summaryrefslogtreecommitdiff
path: root/source/application/api
diff options
context:
space:
mode:
Diffstat (limited to 'source/application/api')
-rw-r--r--source/application/api/common/CMakeLists.txt59
-rw-r--r--source/application/api/common/include/AudioUtils.hpp172
-rw-r--r--source/application/api/common/include/BaseProcessing.hpp67
-rw-r--r--source/application/api/common/include/ClassificationResult.hpp41
-rw-r--r--source/application/api/common/include/Classifier.hpp89
-rw-r--r--source/application/api/common/include/DataStructures.hpp128
-rw-r--r--source/application/api/common/include/ImageUtils.hpp116
-rw-r--r--source/application/api/common/include/Mfcc.hpp255
-rw-r--r--source/application/api/common/include/Model.hpp152
-rw-r--r--source/application/api/common/include/TensorFlowLiteMicro.hpp91
-rw-r--r--source/application/api/common/source/Classifier.cc169
-rw-r--r--source/application/api/common/source/ImageUtils.cc126
-rw-r--r--source/application/api/common/source/Mfcc.cc353
-rw-r--r--source/application/api/common/source/Model.cc359
-rw-r--r--source/application/api/common/source/TensorFlowLiteMicro.cc46
-rw-r--r--source/application/api/use_case/ad/CMakeLists.txt41
-rw-r--r--source/application/api/use_case/ad/include/AdMelSpectrogram.hpp97
-rw-r--r--source/application/api/use_case/ad/include/AdModel.hpp55
-rw-r--r--source/application/api/use_case/ad/include/AdProcessing.hpp231
-rw-r--r--source/application/api/use_case/ad/include/MelSpectrogram.hpp234
-rw-r--r--source/application/api/use_case/ad/src/AdMelSpectrogram.cc93
-rw-r--r--source/application/api/use_case/ad/src/AdModel.cc41
-rw-r--r--source/application/api/use_case/ad/src/AdProcessing.cc210
-rw-r--r--source/application/api/use_case/ad/src/MelSpectrogram.cc316
-rw-r--r--source/application/api/use_case/asr/CMakeLists.txt43
-rw-r--r--source/application/api/use_case/asr/include/AsrClassifier.hpp63
-rw-r--r--source/application/api/use_case/asr/include/AsrResult.hpp63
-rw-r--r--source/application/api/use_case/asr/include/OutputDecode.hpp40
-rw-r--r--source/application/api/use_case/asr/include/Wav2LetterMfcc.hpp109
-rw-r--r--source/application/api/use_case/asr/include/Wav2LetterModel.hpp67
-rw-r--r--source/application/api/use_case/asr/include/Wav2LetterPostprocess.hpp109
-rw-r--r--source/application/api/use_case/asr/include/Wav2LetterPreprocess.hpp182
-rw-r--r--source/application/api/use_case/asr/src/AsrClassifier.cc144
-rw-r--r--source/application/api/use_case/asr/src/OutputDecode.cc47
-rw-r--r--source/application/api/use_case/asr/src/Wav2LetterMfcc.cc141
-rw-r--r--source/application/api/use_case/asr/src/Wav2LetterModel.cc42
-rw-r--r--source/application/api/use_case/asr/src/Wav2LetterPostprocess.cc214
-rw-r--r--source/application/api/use_case/asr/src/Wav2LetterPreprocess.cc208
-rw-r--r--source/application/api/use_case/img_class/CMakeLists.txt39
-rw-r--r--source/application/api/use_case/img_class/include/ImgClassProcessing.hpp91
-rw-r--r--source/application/api/use_case/img_class/include/MobileNetModel.hpp51
-rw-r--r--source/application/api/use_case/img_class/src/ImgClassProcessing.cc66
-rw-r--r--source/application/api/use_case/img_class/src/MobileNetModel.cc42
-rw-r--r--source/application/api/use_case/inference_runner/CMakeLists.txt37
-rw-r--r--source/application/api/use_case/inference_runner/include/TestModel.hpp43
-rw-r--r--source/application/api/use_case/inference_runner/src/TestModel.cc23
-rw-r--r--source/application/api/use_case/kws/CMakeLists.txt39
-rw-r--r--source/application/api/use_case/kws/include/KwsProcessing.hpp137
-rw-r--r--source/application/api/use_case/kws/include/KwsResult.hpp63
-rw-r--r--source/application/api/use_case/kws/include/MicroNetKwsMfcc.hpp50
-rw-r--r--source/application/api/use_case/kws/include/MicroNetKwsModel.hpp63
-rw-r--r--source/application/api/use_case/kws/src/KwsProcessing.cc211
-rw-r--r--source/application/api/use_case/kws/src/MicroNetKwsModel.cc42
-rw-r--r--source/application/api/use_case/noise_reduction/CMakeLists.txt40
-rw-r--r--source/application/api/use_case/noise_reduction/include/RNNoiseFeatureProcessor.hpp341
-rw-r--r--source/application/api/use_case/noise_reduction/include/RNNoiseModel.hpp78
-rw-r--r--source/application/api/use_case/noise_reduction/include/RNNoiseProcessing.hpp113
-rw-r--r--source/application/api/use_case/noise_reduction/src/RNNoiseFeatureProcessor.cc892
-rw-r--r--source/application/api/use_case/noise_reduction/src/RNNoiseModel.cc96
-rw-r--r--source/application/api/use_case/noise_reduction/src/RNNoiseProcessing.cc100
-rw-r--r--source/application/api/use_case/object_detection/CMakeLists.txt40
-rw-r--r--source/application/api/use_case/object_detection/include/DetectionResult.hpp61
-rw-r--r--source/application/api/use_case/object_detection/include/DetectorPostProcessing.hpp125
-rw-r--r--source/application/api/use_case/object_detection/include/DetectorPreProcessing.hpp60
-rw-r--r--source/application/api/use_case/object_detection/include/YoloFastestModel.hpp56
-rw-r--r--source/application/api/use_case/object_detection/src/DetectorPostProcessing.cc240
-rw-r--r--source/application/api/use_case/object_detection/src/DetectorPreProcessing.cc52
-rw-r--r--source/application/api/use_case/object_detection/src/YoloFastestModel.cc45
-rw-r--r--source/application/api/use_case/vww/CMakeLists.txt39
-rw-r--r--source/application/api/use_case/vww/include/VisualWakeWordModel.hpp50
-rw-r--r--source/application/api/use_case/vww/include/VisualWakeWordProcessing.hpp93
-rw-r--r--source/application/api/use_case/vww/src/VisualWakeWordModel.cc42
-rw-r--r--source/application/api/use_case/vww/src/VisualWakeWordProcessing.cc80
73 files changed, 8653 insertions, 0 deletions
diff --git a/source/application/api/common/CMakeLists.txt b/source/application/api/common/CMakeLists.txt
new file mode 100644
index 0000000..5078adc
--- /dev/null
+++ b/source/application/api/common/CMakeLists.txt
@@ -0,0 +1,59 @@
+#----------------------------------------------------------------------------
+# Copyright (c) 2022 Arm Limited. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#----------------------------------------------------------------------------
+
+#########################################################
+# Common utility library used by use case libraries. #
+# NOTE: this library should not depend on HAL. #
+#########################################################
+
+cmake_minimum_required(VERSION 3.15.6)
+
+set(COMMON_UC_UTILS_TARGET common_api)
+project(${COMMON_UC_UTILS_TARGET}
+ DESCRIPTION "Common Utilities library"
+ LANGUAGES CXX)
+
+# Create static library
+add_library(${COMMON_UC_UTILS_TARGET} STATIC)
+
+## Include directories - public
+target_include_directories(${COMMON_UC_UTILS_TARGET}
+ PUBLIC
+ include
+ ${TENSORFLOW_SRC_PATH}/tensorflow/lite/micro/tools/make/downloads/flatbuffers/include)
+
+## Sources
+target_sources(${COMMON_UC_UTILS_TARGET}
+ PRIVATE
+ source/Classifier.cc
+ source/ImageUtils.cc
+ source/Mfcc.cc
+ source/Model.cc
+ source/TensorFlowLiteMicro.cc)
+
+# Link time library targets:
+target_link_libraries(${COMMON_UC_UTILS_TARGET}
+ PUBLIC
+ log # Logging functions
+ arm_math # Math functions
+ tensorflow-lite-micro) # TensorFlow Lite Micro library
+
+# Display status:
+message(STATUS "*******************************************************")
+message(STATUS "Library : " ${COMMON_UC_UTILS_TARGET})
+message(STATUS "CMAKE_SYSTEM_PROCESSOR : " ${CMAKE_SYSTEM_PROCESSOR})
+message(STATUS "*******************************************************")
diff --git a/source/application/api/common/include/AudioUtils.hpp b/source/application/api/common/include/AudioUtils.hpp
new file mode 100644
index 0000000..cbf7bb7
--- /dev/null
+++ b/source/application/api/common/include/AudioUtils.hpp
@@ -0,0 +1,172 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef AUDIO_UTILS_HPP
+#define AUDIO_UTILS_HPP
+
+#include <cstddef>
+#include <cstdint>
+
+namespace arm {
+namespace app {
+namespace audio {
+
+ template<class T>
+ class SlidingWindow {
+ public:
+
+ /**
+ * @brief Creates the window slider through the given data.
+ *
+ * @param[in] data Pointer to the data to slide through.
+ * @param[in] dataSize Size in T type elements wise.
+ * @param[in] windowSize Sliding window size in T type wise elements.
+ * @param[in] stride Stride size in T type wise elements.
+ */
+ SlidingWindow(T *data, size_t dataSize,
+ size_t windowSize, size_t stride) {
+ m_start = data;
+ m_dataSize = dataSize;
+ m_size = windowSize;
+ m_stride = stride;
+ }
+
+ SlidingWindow() = default;
+
+ ~SlidingWindow() = default;
+
+ /**
+ * @brief Get the next data window.
+ * @return Pointer to the next window, if next window is not available nullptr is returned.
+ */
+ virtual T *Next() {
+ if (HasNext()) {
+ m_count++;
+ return m_start + Index() * m_stride;
+ } else {
+ return nullptr;
+ }
+ }
+
+ /**
+ * @brief Checks if the next data portion is available.
+ * @return true if next data portion is available.
+ */
+ virtual bool HasNext() {
+ return m_size + m_count * m_stride <= m_dataSize;
+ }
+
+ /**
+ * @brief Reset the slider to the initial position.
+ */
+ virtual void Reset() {
+ m_count = 0;
+ }
+
+ /**
+ * @brief Resets the slider to the start of the new data.
+ * New data size MUST be the same as the old one.
+ * @param[in] newStart Pointer to the new data to slide through.
+ */
+ virtual void Reset(T *newStart) {
+ m_start = newStart;
+ Reset();
+ }
+
+ /**
+ * @brief Gets current index of the sliding window.
+ * @return Current position of the sliding window in number of strides.
+ */
+ size_t Index() {
+ return m_count == 0? 0: m_count - 1;
+ }
+
+ /**
+ * @brief Gets the index from the start of the data where the next window will begin.
+ * While Index() returns the index of sliding window itself this function
+ * returns the index of the data element itself.
+ * @return Index from the start of the data where the next sliding window will begin.
+ */
+ virtual uint32_t NextWindowStartIndex() {
+ return m_count == 0? 0: ((m_count) * m_stride);
+ }
+
+ /**
+ * @brief Go to given sliding window index.
+ * @param[in] index New position of the sliding window. If index is invalid
+ * (greater than possible range of strides) then next call to Next() will return nullptr.
+ */
+ void FastForward(size_t index) {
+ m_count = index;
+ }
+
+ /**
+ * @brief Calculates whole number of times the window can stride through the given data.
+ * @return Maximum number of whole strides.
+ */
+ size_t TotalStrides() {
+ if (m_size > m_dataSize) {
+ return 0;
+ }
+ return ((m_dataSize - m_size)/m_stride);
+ }
+
+
+ protected:
+ T *m_start = nullptr;
+ size_t m_dataSize = 0;
+ size_t m_size = 0;
+ size_t m_stride = 0;
+ size_t m_count = 0;
+ };
+
+ /*
+ * Sliding window that will cover the whole length of the input, even if
+ * this means the last window is not a full window length.
+ */
+ template<class T>
+ class FractionalSlidingWindow : public SlidingWindow<T> {
+ public:
+ using SlidingWindow<T>::SlidingWindow;
+
+ /**
+ * @brief Checks if the next data portion is available.
+ * @return true if next data portion is available.
+ */
+ bool HasNext() {
+ return this->m_count < 1 + this->FractionalTotalStrides() && (this->NextWindowStartIndex() < this->m_dataSize);
+ }
+
+ /**
+ * @brief Calculates number of times the window can stride through the given data.
+ * May not be a whole number.
+ * @return Number of strides to cover all data.
+ */
+ float FractionalTotalStrides() {
+ if (this->m_dataSize < this->m_size) {
+ return 0;
+ } else {
+ return ((this->m_dataSize - this->m_size) / static_cast<float>(this->m_stride));
+ }
+ }
+ };
+
+
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* AUDIO_UTILS_HPP */ \ No newline at end of file
diff --git a/source/application/api/common/include/BaseProcessing.hpp b/source/application/api/common/include/BaseProcessing.hpp
new file mode 100644
index 0000000..a54dd12
--- /dev/null
+++ b/source/application/api/common/include/BaseProcessing.hpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef BASE_PROCESSING_HPP
+#define BASE_PROCESSING_HPP
+
+#include <cstddef>
+
+namespace arm {
+namespace app {
+
+ /**
+ * @brief Base class exposing pre-processing API.
+ * Use cases should provide their own PreProcessing class that inherits from this one.
+ * All steps required to take raw input data and populate tensors ready for inference
+ * should be handled.
+ */
+ class BasePreProcess {
+
+ public:
+ virtual ~BasePreProcess() = default;
+
+ /**
+ * @brief Should perform pre-processing of 'raw' input data and load it into
+ * TFLite Micro input tensors ready for inference
+ * @param[in] input Pointer to the data that pre-processing will work on.
+ * @param[in] inputSize Size of the input data.
+ * @return true if successful, false otherwise.
+ **/
+ virtual bool DoPreProcess(const void* input, size_t inputSize) = 0;
+ };
+
+ /**
+ * @brief Base class exposing post-processing API.
+ * Use cases should provide their own PostProcessing class that inherits from this one.
+ * All steps required to take inference output and populate results vectors should be handled.
+ */
+ class BasePostProcess {
+
+ public:
+ virtual ~BasePostProcess() = default;
+
+ /**
+ * @brief Should perform post-processing of the result of inference then populate
+ * populate result data for any later use.
+ * @return true if successful, false otherwise.
+ **/
+ virtual bool DoPostProcess() = 0;
+ };
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* BASE_PROCESSING_HPP */ \ No newline at end of file
diff --git a/source/application/api/common/include/ClassificationResult.hpp b/source/application/api/common/include/ClassificationResult.hpp
new file mode 100644
index 0000000..eae28e4
--- /dev/null
+++ b/source/application/api/common/include/ClassificationResult.hpp
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef CLASSIFICATION_RESULT_HPP
+#define CLASSIFICATION_RESULT_HPP
+
+#include <string>
+
+namespace arm {
+namespace app {
+
+ /**
+ * @brief Class representing a single classification result.
+ */
+ class ClassificationResult {
+ public:
+ double m_normalisedVal = 0.0;
+ std::string m_label;
+ uint32_t m_labelIdx = 0;
+
+ ClassificationResult() = default;
+ ~ClassificationResult() = default;
+ };
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* CLASSIFICATION_RESULT_HPP */ \ No newline at end of file
diff --git a/source/application/api/common/include/Classifier.hpp b/source/application/api/common/include/Classifier.hpp
new file mode 100644
index 0000000..d641c22
--- /dev/null
+++ b/source/application/api/common/include/Classifier.hpp
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef CLASSIFIER_HPP
+#define CLASSIFIER_HPP
+
+#include "ClassificationResult.hpp"
+#include "TensorFlowLiteMicro.hpp"
+
+#include <vector>
+
+namespace arm {
+namespace app {
+
+ /**
+ * @brief Classifier - a helper class to get certain number of top
+ * results from the output vector from a classification NN.
+ **/
+ class Classifier{
+ public:
+ /** @brief Constructor. */
+ Classifier() = default;
+
+ /**
+ * @brief Gets the top N classification results from the
+ * output vector.
+ * @param[in] outputTensor Inference output tensor from an NN model.
+ * @param[out] vecResults A vector of classification results.
+ * populated by this function.
+ * @param[in] labels Labels vector to match classified classes.
+ * @param[in] topNCount Number of top classifications to pick. Default is 1.
+ * @param[in] useSoftmax Whether Softmax normalisation should be applied to output. Default is false.
+ * @return true if successful, false otherwise.
+ **/
+
+ virtual bool GetClassificationResults(
+ TfLiteTensor* outputTensor,
+ std::vector<ClassificationResult>& vecResults,
+ const std::vector <std::string>& labels, uint32_t topNCount,
+ bool use_softmax);
+
+ /**
+ * @brief Populate the elements of the Classification Result object.
+ * @param[in] topNSet Ordered set of top 5 output class scores and labels.
+ * @param[out] vecResults A vector of classification results.
+ * populated by this function.
+ * @param[in] labels Labels vector to match classified classes.
+ **/
+
+ void SetVectorResults(
+ std::set<std::pair<float, uint32_t>>& topNSet,
+ std::vector<ClassificationResult>& vecResults,
+ const std::vector <std::string>& labels);
+
+ private:
+ /**
+ * @brief Utility function that gets the top N classification results from the
+ * output vector.
+ * @param[in] tensor Inference output tensor from an NN model.
+ * @param[out] vecResults A vector of classification results
+ * populated by this function.
+ * @param[in] topNCount Number of top classifications to pick.
+ * @param[in] labels Labels vector to match classified classes.
+ * @return true if successful, false otherwise.
+ **/
+
+ bool GetTopNResults(const std::vector<float>& tensor,
+ std::vector<ClassificationResult>& vecResults,
+ uint32_t topNCount,
+ const std::vector <std::string>& labels);
+ };
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* CLASSIFIER_HPP */
diff --git a/source/application/api/common/include/DataStructures.hpp b/source/application/api/common/include/DataStructures.hpp
new file mode 100644
index 0000000..0616839
--- /dev/null
+++ b/source/application/api/common/include/DataStructures.hpp
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef DATA_STRUCTURES_HPP
+#define DATA_STRUCTURES_HPP
+
+#include <iterator>
+
+namespace arm {
+namespace app {
+
+ /**
+ * Class Array2d is a data structure that represents a two dimensional array.
+ * The data is allocated in contiguous memory, arranged row-wise
+ * and individual elements can be accessed with the () operator.
+ * For example a two dimensional array D of size (M, N) can be accessed:
+ *
+ * _|<------------- col size = N -------->|
+ * | D(r=0, c=0) D(r=0, c=1)... D(r=0, c=N)
+ * | D(r=1, c=0) D(r=1, c=1)... D(r=1, c=N)
+ * | ...
+ * row size = M ...
+ * | ...
+ * _ D(r=M, c=0) D(r=M, c=1)... D(r=M, c=N)
+ *
+ */
+ template<typename T>
+ class Array2d {
+ public:
+ /**
+ * @brief Creates the array2d with the given sizes.
+ * @param[in] rows Number of rows.
+ * @param[in] cols Number of columns.
+ */
+ Array2d(unsigned rows, unsigned cols): m_rows(rows), m_cols(cols)
+ {
+ if (rows == 0 || cols == 0) {
+ printf("Array2d constructor has 0 size.\n");
+ m_data = nullptr;
+ return;
+ }
+ m_data = new T[rows * cols];
+ }
+
+ ~Array2d()
+ {
+ delete[] m_data;
+ }
+
+ T& operator() (unsigned int row, unsigned int col)
+ {
+#if defined(DEBUG)
+ if (row >= m_rows || col >= m_cols || m_data == nullptr) {
+ printf_err("Array2d subscript out of bounds.\n");
+ }
+#endif /* defined(DEBUG) */
+ return m_data[m_cols * row + col];
+ }
+
+ T operator() (unsigned int row, unsigned int col) const
+ {
+#if defined(DEBUG)
+ if (row >= m_rows || col >= m_cols || m_data == nullptr) {
+ printf_err("const Array2d subscript out of bounds.\n");
+ }
+#endif /* defined(DEBUG) */
+ return m_data[m_cols * row + col];
+ }
+
+ /**
+ * @brief Gets rows number of the current array2d.
+ * @return Number of rows.
+ */
+ size_t size(size_t dim)
+ {
+ switch (dim)
+ {
+ case 0:
+ return m_rows;
+ case 1:
+ return m_cols;
+ default:
+ return 0;
+ }
+ }
+
+ /**
+ * @brief Gets the array2d total size.
+ */
+ size_t totalSize()
+ {
+ return m_rows * m_cols;
+ }
+
+ /**
+ * array2d iterator.
+ */
+ using iterator=T*;
+ using const_iterator=T const*;
+
+ iterator begin() { return m_data; }
+ iterator end() { return m_data + totalSize(); }
+ const_iterator begin() const { return m_data; }
+ const_iterator end() const { return m_data + totalSize(); };
+
+ private:
+ size_t m_rows;
+ size_t m_cols;
+ T* m_data;
+ };
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* DATA_STRUCTURES_HPP */ \ No newline at end of file
diff --git a/source/application/api/common/include/ImageUtils.hpp b/source/application/api/common/include/ImageUtils.hpp
new file mode 100644
index 0000000..a8c7650
--- /dev/null
+++ b/source/application/api/common/include/ImageUtils.hpp
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef IMAGE_UTILS_HPP
+#define IMAGE_UTILS_HPP
+
+#include <cstddef>
+#include <cstdint>
+#include <forward_list>
+#include <vector>
+
+/* Helper macro to convert RGB888 to RGB565 format. */
+#define RGB888_TO_RGB565(R8,G8,B8) ((((R8>>3) & 0x1F) << 11) | \
+ (((G8>>2) & 0x3F) << 5) | \
+ ((B8>>3) & 0x1F))
+
+constexpr uint16_t COLOR_BLACK = 0;
+constexpr uint16_t COLOR_GREEN = RGB888_TO_RGB565( 0, 255, 0); // 2016;
+constexpr uint16_t COLOR_YELLOW = RGB888_TO_RGB565(255, 255, 0); // 65504;
+
+
+namespace arm {
+namespace app {
+namespace image {
+
+ /**
+ * Contains the x,y co-ordinates of a box centre along with the box width and height.
+ */
+ struct Box {
+ float x;
+ float y;
+ float w;
+ float h;
+ };
+
+ struct Detection {
+ Box bbox;
+ std::vector<float> prob;
+ float objectness;
+ };
+
+ /**
+ * @brief Calculate the 1D overlap.
+ * @param[in] x1Center First center point.
+ * @param[in] width1 First width.
+ * @param[in] x2Center Second center point.
+ * @param[in] width2 Second width.
+ * @return The overlap between the two lines.
+ **/
+ float Calculate1DOverlap(float x1Center, float width1, float x2Center, float width2);
+
+ /**
+ * @brief Calculate the intersection between the two given boxes.
+ * @param[in] box1 First box.
+ * @param[in] box2 Second box.
+ * @return The intersection value.
+ **/
+ float CalculateBoxIntersect(Box& box1, Box& box2);
+
+ /**
+ * @brief Calculate the union between the two given boxes.
+ * @param[in] box1 First box.
+ * @param[in] box2 Second box.
+ * @return The two given boxes union value.
+ **/
+ float CalculateBoxUnion(Box& box1, Box& box2);
+
+ /**
+ * @brief Calculate the intersection over union between the two given boxes.
+ * @param[in] box1 First box.
+ * @param[in] box2 Second box.
+ * @return The intersection over union value.
+ **/
+ float CalculateBoxIOU(Box& box1, Box& box2);
+
+ /**
+ * @brief Calculate the Non-Maxima suppression on the given detection boxes.
+ * @param[in] detections List of Detection boxes.
+ * @param[in] classes Number of classes.
+ * @param[in] iouThreshold Intersection over union threshold.
+ **/
+ void CalculateNMS(std::forward_list<Detection>& detections, int classes, float iouThreshold);
+
+ /**
+ * @brief Helper function to convert a UINT8 image to INT8 format.
+ * @param[in,out] data Pointer to the data start.
+ * @param[in] kMaxImageSize Total number of pixels in the image.
+ **/
+ void ConvertImgToInt8(void* data, size_t kMaxImageSize);
+
+ /**
+ * @brief Converts RGB image to grayscale.
+ * @param[in] srcPtr Pointer to RGB source image.
+ * @param[out] dstPtr Pointer to grayscale destination image.
+ * @param[in] imgSz Destination image size.
+ **/
+ void RgbToGrayscale(const uint8_t* srcPtr, uint8_t* dstPtr, size_t dstImgSz);
+
+} /* namespace image */
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* IMAGE_UTILS_HPP */ \ No newline at end of file
diff --git a/source/application/api/common/include/Mfcc.hpp b/source/application/api/common/include/Mfcc.hpp
new file mode 100644
index 0000000..86330ca
--- /dev/null
+++ b/source/application/api/common/include/Mfcc.hpp
@@ -0,0 +1,255 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MFCC_HPP
+#define MFCC_HPP
+
+#include "PlatformMath.hpp"
+
+#include <vector>
+#include <cstdint>
+#include <cmath>
+#include <limits>
+#include <string>
+
+namespace arm {
+namespace app {
+namespace audio {
+
+ /* MFCC's consolidated parameters. */
+ class MfccParams {
+ public:
+ float m_samplingFreq;
+ uint32_t m_numFbankBins;
+ float m_melLoFreq;
+ float m_melHiFreq;
+ uint32_t m_numMfccFeatures;
+ uint32_t m_frameLen;
+ uint32_t m_frameLenPadded;
+ bool m_useHtkMethod;
+
+ /** @brief Constructor */
+ MfccParams(float samplingFreq, uint32_t numFbankBins,
+ float melLoFreq, float melHiFreq,
+ uint32_t numMfccFeats, uint32_t frameLen,
+ bool useHtkMethod);
+
+ MfccParams() = delete;
+
+ ~MfccParams() = default;
+
+ /** @brief Log parameters */
+ void Log() const;
+ };
+
+ /**
+ * @brief Class for MFCC feature extraction.
+ * Based on https://github.com/ARM-software/ML-KWS-for-MCU/blob/master/Deployment/Source/MFCC/mfcc.cpp
+ * This class is designed to be generic and self-sufficient but
+ * certain calculation routines can be overridden to accommodate
+ * use-case specific requirements.
+ */
+ class MFCC {
+ public:
+ /**
+ * @brief Constructor
+ * @param[in] params MFCC parameters
+ */
+ explicit MFCC(const MfccParams& params);
+
+ MFCC() = delete;
+
+ ~MFCC() = default;
+
+ /**
+ * @brief Extract MFCC features for one single small frame of
+ * audio data e.g. 640 samples.
+ * @param[in] audioData Vector of audio samples to calculate
+ * features for.
+ * @return Vector of extracted MFCC features.
+ **/
+ std::vector<float> MfccCompute(const std::vector<int16_t>& audioData);
+
+ /** @brief Initialise. */
+ void Init();
+
+ /**
+ * @brief Extract MFCC features and quantise for one single small
+ * frame of audio data e.g. 640 samples.
+ * @param[in] audioData Vector of audio samples to calculate
+ * features for.
+ * @param[in] quantScale Quantisation scale.
+ * @param[in] quantOffset Quantisation offset.
+ * @return Vector of extracted quantised MFCC features.
+ **/
+ template<typename T>
+ std::vector<T> MfccComputeQuant(const std::vector<int16_t>& audioData,
+ const float quantScale,
+ const int quantOffset)
+ {
+ this->MfccComputePreFeature(audioData);
+ float minVal = std::numeric_limits<T>::min();
+ float maxVal = std::numeric_limits<T>::max();
+
+ std::vector<T> mfccOut(this->m_params.m_numMfccFeatures);
+ const size_t numFbankBins = this->m_params.m_numFbankBins;
+
+ /* Take DCT. Uses matrix mul. */
+ for (size_t i = 0, j = 0; i < mfccOut.size(); ++i, j += numFbankBins) {
+ float sum = 0;
+ for (size_t k = 0; k < numFbankBins; ++k) {
+ sum += this->m_dctMatrix[j + k] * this->m_melEnergies[k];
+ }
+ /* Quantize to T. */
+ sum = std::round((sum / quantScale) + quantOffset);
+ mfccOut[i] = static_cast<T>(std::min<float>(std::max<float>(sum, minVal), maxVal));
+ }
+
+ return mfccOut;
+ }
+
+ /* Constants */
+ static constexpr float ms_logStep = /*logf(6.4)*/ 1.8562979903656 / 27.0;
+ static constexpr float ms_freqStep = 200.0 / 3;
+ static constexpr float ms_minLogHz = 1000.0;
+ static constexpr float ms_minLogMel = ms_minLogHz / ms_freqStep;
+
+ protected:
+ /**
+ * @brief Project input frequency to Mel Scale.
+ * @param[in] freq Input frequency in floating point.
+ * @param[in] useHTKMethod bool to signal if HTK method is to be
+ * used for calculation.
+ * @return Mel transformed frequency in floating point.
+ **/
+ static float MelScale(float freq,
+ bool useHTKMethod = true);
+
+ /**
+ * @brief Inverse Mel transform - convert MEL warped frequency
+ * back to normal frequency.
+ * @param[in] melFreq Mel frequency in floating point.
+ * @param[in] useHTKMethod bool to signal if HTK method is to be
+ * used for calculation.
+ * @return Real world frequency in floating point.
+ **/
+ static float InverseMelScale(float melFreq,
+ bool useHTKMethod = true);
+
+ /**
+ * @brief Populates MEL energies after applying the MEL filter
+ * bank weights and adding them up to be placed into
+ * bins, according to the filter bank's first and last
+ * indices (pre-computed for each filter bank element
+ * by CreateMelFilterBank function).
+ * @param[in] fftVec Vector populated with FFT magnitudes.
+ * @param[in] melFilterBank 2D Vector with filter bank weights.
+ * @param[in] filterBankFilterFirst Vector containing the first indices of filter bank
+ * to be used for each bin.
+ * @param[in] filterBankFilterLast Vector containing the last indices of filter bank
+ * to be used for each bin.
+ * @param[out] melEnergies Pre-allocated vector of MEL energies to be
+ * populated.
+ * @return true if successful, false otherwise.
+ */
+ virtual bool ApplyMelFilterBank(
+ std::vector<float>& fftVec,
+ std::vector<std::vector<float>>& melFilterBank,
+ std::vector<uint32_t>& filterBankFilterFirst,
+ std::vector<uint32_t>& filterBankFilterLast,
+ std::vector<float>& melEnergies);
+
+ /**
+ * @brief Converts the Mel energies for logarithmic scale.
+ * @param[in,out] melEnergies 1D vector of Mel energies.
+ **/
+ virtual void ConvertToLogarithmicScale(std::vector<float>& melEnergies);
+
+ /**
+ * @brief Create a matrix used to calculate Discrete Cosine
+ * Transform.
+ * @param[in] inputLength Input length of the buffer on which
+ * DCT will be performed.
+ * @param[in] coefficientCount Total coefficients per input length.
+ * @return 1D vector with inputLength x coefficientCount elements
+ * populated with DCT coefficients.
+ */
+ virtual std::vector<float> CreateDCTMatrix(
+ int32_t inputLength,
+ int32_t coefficientCount);
+
+ /**
+ * @brief Given the low and high Mel values, get the normaliser
+ * for weights to be applied when populating the filter
+ * bank.
+ * @param[in] leftMel Low Mel frequency value.
+ * @param[in] rightMel High Mel frequency value.
+ * @param[in] useHTKMethod bool to signal if HTK method is to be
+ * used for calculation.
+ * @return Value to use for normalizing.
+ */
+ virtual float GetMelFilterBankNormaliser(
+ const float& leftMel,
+ const float& rightMel,
+ bool useHTKMethod);
+
+ private:
+ MfccParams m_params;
+ std::vector<float> m_frame;
+ std::vector<float> m_buffer;
+ std::vector<float> m_melEnergies;
+ std::vector<float> m_windowFunc;
+ std::vector<std::vector<float>> m_melFilterBank;
+ std::vector<float> m_dctMatrix;
+ std::vector<uint32_t> m_filterBankFilterFirst;
+ std::vector<uint32_t> m_filterBankFilterLast;
+ bool m_filterBankInitialised;
+ arm::app::math::FftInstance m_fftInstance;
+
+ /**
+ * @brief Initialises the filter banks and the DCT matrix. **/
+ void InitMelFilterBank();
+
+ /**
+ * @brief Signals whether the instance of MFCC has had its
+ * required buffers initialised.
+ * @return true if initialised, false otherwise.
+ **/
+ bool IsMelFilterBankInited() const;
+
+ /**
+ * @brief Create mel filter banks for MFCC calculation.
+ * @return 2D vector of floats.
+ **/
+ std::vector<std::vector<float>> CreateMelFilterBank();
+
+ /**
+ * @brief Computes and populates internal memeber buffers used
+ * in MFCC feature calculation
+ * @param[in] audioData 1D vector of 16-bit audio data.
+ */
+ void MfccComputePreFeature(const std::vector<int16_t>& audioData);
+
+ /** @brief Computes the magnitude from an interleaved complex array. */
+ void ConvertToPowerSpectrum();
+
+ };
+
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* MFCC_HPP */ \ No newline at end of file
diff --git a/source/application/api/common/include/Model.hpp b/source/application/api/common/include/Model.hpp
new file mode 100644
index 0000000..df1b259
--- /dev/null
+++ b/source/application/api/common/include/Model.hpp
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MODEL_HPP
+#define MODEL_HPP
+
+#include "TensorFlowLiteMicro.hpp"
+
+#include <cstdint>
+
+namespace arm {
+namespace app {
+
+ /**
+ * @brief NN model class wrapping the underlying TensorFlow-Lite-Micro API.
+ */
+ class Model {
+ public:
+ /** @brief Constructor. */
+ Model();
+
+ /** @brief Destructor. */
+ ~Model();
+
+ /** @brief Gets the pointer to the model's input tensor at given input index. */
+ TfLiteTensor* GetInputTensor(size_t index) const;
+
+ /** @brief Gets the pointer to the model's output tensor at given output index. */
+ TfLiteTensor* GetOutputTensor(size_t index) const;
+
+ /** @brief Gets the model's data type. */
+ TfLiteType GetType() const;
+
+ /** @brief Gets the pointer to the model's input shape. */
+ TfLiteIntArray* GetInputShape(size_t index) const;
+
+ /** @brief Gets the pointer to the model's output shape at given output index. */
+ TfLiteIntArray* GetOutputShape(size_t index) const;
+
+ /** @brief Gets the number of input tensors the model has. */
+ size_t GetNumInputs() const;
+
+ /** @brief Gets the number of output tensors the model has. */
+ size_t GetNumOutputs() const;
+
+ /** @brief Logs the tensor information to stdout. */
+ void LogTensorInfo(TfLiteTensor* tensor);
+
+ /** @brief Logs the interpreter information to stdout. */
+ void LogInterpreterInfo();
+
+ /** @brief Initialise the model class object.
+ * @param[in] tensorArenaAddress Pointer to the tensor arena buffer.
+ * @param[in] tensorArenaAddress Size of the tensor arena buffer in bytes.
+ * @param[in] nnModelAddr Pointer to the model.
+ * @param[in] nnModelSize Size of the model in bytes, if known.
+ * @param[in] allocator Optional: a pre-initialised micro allocator pointer,
+ * if available. If supplied, this allocator will be used
+ * to create the interpreter instance.
+ * @return true if initialisation succeeds, false otherwise.
+ **/
+ bool Init(uint8_t* tensorArenaAddr,
+ uint32_t tensorArenaSize,
+ uint8_t* nnModelAddr,
+ uint32_t nnModelSize,
+ tflite::MicroAllocator* allocator = nullptr);
+
+ /**
+ * @brief Gets the allocator pointer for this instance.
+ * @return Pointer to a tflite::MicroAllocator object, if
+ * available; nullptr otherwise.
+ **/
+ tflite::MicroAllocator* GetAllocator();
+
+ /** @brief Checks if this object has been initialised. */
+ bool IsInited() const;
+
+ /** @brief Checks if the model uses signed data. */
+ bool IsDataSigned() const;
+
+ /** @brief Checks if the model uses Ethos-U operator */
+ bool ContainsEthosUOperator() const;
+
+ /** @brief Runs the inference (invokes the interpreter). */
+ virtual bool RunInference();
+
+ /** @brief Model information handler common to all models.
+ * @return true or false based on execution success.
+ **/
+ bool ShowModelInfoHandler();
+
+ /** @brief Gets a pointer to the tensor arena. */
+ uint8_t* GetTensorArena();
+
+ protected:
+ /** @brief Gets the pointer to the NN model data array.
+ * @return Pointer of uint8_t type.
+ **/
+ const uint8_t* ModelPointer();
+
+ /** @brief Gets the model size.
+ * @return size_t, size in bytes.
+ **/
+ uint32_t ModelSize();
+
+ /**
+ * @brief Gets the op resolver for the model instance.
+ * @return const reference to a tflite::MicroOpResolver object.
+ **/
+ virtual const tflite::MicroOpResolver& GetOpResolver() = 0;
+
+ /**
+ * @brief Add all the operators required for the given model.
+ * Implementation of this should come from the use case.
+ * @return true is ops are successfully added, false otherwise.
+ **/
+ virtual bool EnlistOperations() = 0;
+
+ /** @brief Gets the total size of tensor arena available for use. */
+ size_t GetActivationBufferSize();
+
+ private:
+ tflite::ErrorReporter* m_pErrorReporter = nullptr; /* Pointer to the error reporter. */
+ const tflite::Model* m_pModel = nullptr; /* Tflite model pointer. */
+ tflite::MicroInterpreter* m_pInterpreter = nullptr; /* Tflite interpreter. */
+ tflite::MicroAllocator* m_pAllocator = nullptr; /* Tflite micro allocator. */
+ bool m_inited = false; /* Indicates whether this object has been initialised. */
+ uint8_t* m_modelAddr = nullptr; /* Model address */
+ uint32_t m_modelSize = 0; /* Model size */
+
+ std::vector<TfLiteTensor*> m_input = {}; /* Model's input tensor pointers. */
+ std::vector<TfLiteTensor*> m_output = {}; /* Model's output tensor pointers. */
+ TfLiteType m_type = kTfLiteNoType;/* Model's data type. */
+ };
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* MODEL_HPP */
diff --git a/source/application/api/common/include/TensorFlowLiteMicro.hpp b/source/application/api/common/include/TensorFlowLiteMicro.hpp
new file mode 100644
index 0000000..f6639fd
--- /dev/null
+++ b/source/application/api/common/include/TensorFlowLiteMicro.hpp
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef TENSORFLOW_LITE_MICRO_LOCAL_HPP
+#define TENSORFLOW_LITE_MICRO_LOCAL_HPP
+
+/* We include all our TensorFlow Lite Micro headers here */
+
+/**
+ * TensorFlow Lite Micro sources can generate a lot of warnings from the usage
+ * of a single macro (TF_LITE_REMOVE_VIRTUAL_DELETE). Suppress the known ones
+ * here to prevent them from masking warnings that might be generated by our
+ * application sources.
+ */
+#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050)
+ #pragma clang diagnostic push
+ #pragma clang diagnostic ignored "-Wunused-parameter"
+ #include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
+ #include "tensorflow/lite/micro/micro_interpreter.h"
+ #include "tensorflow/lite/micro/micro_error_reporter.h"
+ #include "tensorflow/lite/micro/all_ops_resolver.h"
+ #pragma clang diagnostic pop
+#elif defined(__GNUC__)
+ #pragma GCC diagnostic push
+ #pragma GCC diagnostic ignored "-Wunused-parameter"
+ #include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
+ #include "tensorflow/lite/micro/micro_interpreter.h"
+ #include "tensorflow/lite/micro/micro_error_reporter.h"
+ #include "tensorflow/lite/micro/all_ops_resolver.h"
+ #pragma GCC diagnostic pop
+#else
+ #include "tensorflow/lite/micro/micro_mutable_op_resolver.h"
+ #include "tensorflow/lite/micro/micro_interpreter.h"
+ #include "tensorflow/lite/micro/micro_error_reporter.h"
+ #include "tensorflow/lite/micro/all_ops_resolver.h"
+#endif
+
+#include "tensorflow/lite/c/common.h"
+#include "tensorflow/lite/micro/kernels/micro_ops.h"
+#include "tensorflow/lite/schema/schema_generated.h"
+#include "tensorflow/lite/schema/schema_utils.h"
+
+#if defined (TESTS)
+ #include "tensorflow/lite/micro/test_helpers.h"
+#endif /* defined (TESTS) */
+
+namespace arm {
+namespace app {
+
+ /** Struct for quantization parameters. */
+ struct QuantParams {
+ float scale = 1.0;
+ int offset = 0;
+ };
+
+ /**
+ * @brief Gets the quantization parameters from a tensor
+ * @param[in] tensor pointer to the tensor.
+ * @return QuantParams object.
+ */
+ QuantParams GetTensorQuantParams(TfLiteTensor* tensor);
+
+ /**
+ * @brief String logging functionality expected to be defined
+ * by TensorFlow Lite Micro's error reporter.
+ * @param[in] s Pointer to the string.
+ */
+ extern "C" void DebugLog(const char* s);
+
+} /* namespace app */
+} /* namespace arm */
+
+/**
+ * @brief Prints the tensor flow version in use to stdout.
+ */
+void PrintTensorFlowVersion();
+
+#endif /* TENSORFLOW_LITE_MICRO_LOCAL_HPP */
diff --git a/source/application/api/common/source/Classifier.cc b/source/application/api/common/source/Classifier.cc
new file mode 100644
index 0000000..6fabebe
--- /dev/null
+++ b/source/application/api/common/source/Classifier.cc
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "Classifier.hpp"
+
+#include "TensorFlowLiteMicro.hpp"
+#include "PlatformMath.hpp"
+#include "log_macros.h"
+
+#include <vector>
+#include <string>
+#include <set>
+#include <cstdint>
+#include <cinttypes>
+
+
+namespace arm {
+namespace app {
+
+ void Classifier::SetVectorResults(std::set<std::pair<float, uint32_t>>& topNSet,
+ std::vector<ClassificationResult>& vecResults,
+ const std::vector <std::string>& labels)
+ {
+
+ /* Reset the iterator to the largest element - use reverse iterator. */
+
+ auto topNIter = topNSet.rbegin();
+ for (size_t i = 0; i < vecResults.size() && topNIter != topNSet.rend(); ++i, ++topNIter) {
+ vecResults[i].m_normalisedVal = topNIter->first;
+ vecResults[i].m_label = labels[topNIter->second];
+ vecResults[i].m_labelIdx = topNIter->second;
+ }
+ }
+
+ bool Classifier::GetTopNResults(const std::vector<float>& tensor,
+ std::vector<ClassificationResult>& vecResults,
+ uint32_t topNCount,
+ const std::vector <std::string>& labels)
+ {
+
+ std::set<std::pair<float , uint32_t>> sortedSet;
+
+ /* NOTE: inputVec's size verification against labels should be
+ * checked by the calling/public function. */
+
+ /* Set initial elements. */
+ for (uint32_t i = 0; i < topNCount; ++i) {
+ sortedSet.insert({tensor[i], i});
+ }
+
+ /* Initialise iterator. */
+ auto setFwdIter = sortedSet.begin();
+
+ /* Scan through the rest of elements with compare operations. */
+ for (uint32_t i = topNCount; i < labels.size(); ++i) {
+ if (setFwdIter->first < tensor[i]) {
+ sortedSet.erase(*setFwdIter);
+ sortedSet.insert({tensor[i], i});
+ setFwdIter = sortedSet.begin();
+ }
+ }
+
+ /* Final results' container. */
+ vecResults = std::vector<ClassificationResult>(topNCount);
+ SetVectorResults(sortedSet, vecResults, labels);
+
+ return true;
+ }
+
+ bool Classifier::GetClassificationResults(
+ TfLiteTensor* outputTensor,
+ std::vector<ClassificationResult>& vecResults,
+ const std::vector <std::string>& labels,
+ uint32_t topNCount,
+ bool useSoftmax)
+ {
+ if (outputTensor == nullptr) {
+ printf_err("Output vector is null pointer.\n");
+ return false;
+ }
+
+ uint32_t totalOutputSize = 1;
+ for (int inputDim = 0; inputDim < outputTensor->dims->size; inputDim++) {
+ totalOutputSize *= outputTensor->dims->data[inputDim];
+ }
+
+ /* Sanity checks. */
+ if (totalOutputSize < topNCount) {
+ printf_err("Output vector is smaller than %" PRIu32 "\n", topNCount);
+ return false;
+ } else if (totalOutputSize != labels.size()) {
+ printf_err("Output size doesn't match the labels' size\n");
+ return false;
+ } else if (topNCount == 0) {
+ printf_err("Top N results cannot be zero\n");
+ return false;
+ }
+
+ bool resultState;
+ vecResults.clear();
+
+ /* De-Quantize Output Tensor */
+ QuantParams quantParams = GetTensorQuantParams(outputTensor);
+
+ /* Floating point tensor data to be populated
+ * NOTE: The assumption here is that the output tensor size isn't too
+ * big and therefore, there's neglibible impact on heap usage. */
+ std::vector<float> tensorData(totalOutputSize);
+
+ /* Populate the floating point buffer */
+ switch (outputTensor->type) {
+ case kTfLiteUInt8: {
+ uint8_t *tensor_buffer = tflite::GetTensorData<uint8_t>(outputTensor);
+ for (size_t i = 0; i < totalOutputSize; ++i) {
+ tensorData[i] = quantParams.scale *
+ (static_cast<float>(tensor_buffer[i]) - quantParams.offset);
+ }
+ break;
+ }
+ case kTfLiteInt8: {
+ int8_t *tensor_buffer = tflite::GetTensorData<int8_t>(outputTensor);
+ for (size_t i = 0; i < totalOutputSize; ++i) {
+ tensorData[i] = quantParams.scale *
+ (static_cast<float>(tensor_buffer[i]) - quantParams.offset);
+ }
+ break;
+ }
+ case kTfLiteFloat32: {
+ float *tensor_buffer = tflite::GetTensorData<float>(outputTensor);
+ for (size_t i = 0; i < totalOutputSize; ++i) {
+ tensorData[i] = tensor_buffer[i];
+ }
+ break;
+ }
+ default:
+ printf_err("Tensor type %s not supported by classifier\n",
+ TfLiteTypeGetName(outputTensor->type));
+ return false;
+ }
+
+ if (useSoftmax) {
+ math::MathUtils::SoftmaxF32(tensorData);
+ }
+
+ /* Get the top N results. */
+ resultState = GetTopNResults(tensorData, vecResults, topNCount, labels);
+
+ if (!resultState) {
+ printf_err("Failed to get top N results set\n");
+ return false;
+ }
+
+ return true;
+ }
+} /* namespace app */
+} /* namespace arm */ \ No newline at end of file
diff --git a/source/application/api/common/source/ImageUtils.cc b/source/application/api/common/source/ImageUtils.cc
new file mode 100644
index 0000000..31b9493
--- /dev/null
+++ b/source/application/api/common/source/ImageUtils.cc
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "ImageUtils.hpp"
+
+#include <limits>
+
+namespace arm {
+namespace app {
+namespace image {
+
+ float Calculate1DOverlap(float x1Center, float width1, float x2Center, float width2)
+ {
+ float left_1 = x1Center - width1/2;
+ float left_2 = x2Center - width2/2;
+ float leftest = left_1 > left_2 ? left_1 : left_2;
+
+ float right_1 = x1Center + width1/2;
+ float right_2 = x2Center + width2/2;
+ float rightest = right_1 < right_2 ? right_1 : right_2;
+
+ return rightest - leftest;
+ }
+
+ float CalculateBoxIntersect(Box& box1, Box& box2)
+ {
+ float width = Calculate1DOverlap(box1.x, box1.w, box2.x, box2.w);
+ if (width < 0) {
+ return 0;
+ }
+ float height = Calculate1DOverlap(box1.y, box1.h, box2.y, box2.h);
+ if (height < 0) {
+ return 0;
+ }
+
+ float total_area = width*height;
+ return total_area;
+ }
+
+ float CalculateBoxUnion(Box& box1, Box& box2)
+ {
+ float boxes_intersection = CalculateBoxIntersect(box1, box2);
+ float boxes_union = box1.w * box1.h + box2.w * box2.h - boxes_intersection;
+ return boxes_union;
+ }
+
+ float CalculateBoxIOU(Box& box1, Box& box2)
+ {
+ float boxes_intersection = CalculateBoxIntersect(box1, box2);
+ if (boxes_intersection == 0) {
+ return 0;
+ }
+
+ float boxes_union = CalculateBoxUnion(box1, box2);
+ if (boxes_union == 0) {
+ return 0;
+ }
+
+ return boxes_intersection / boxes_union;
+ }
+
+ void CalculateNMS(std::forward_list<Detection>& detections, int classes, float iouThreshold)
+ {
+ int idxClass{0};
+ auto CompareProbs = [idxClass](Detection& prob1, Detection& prob2) {
+ return prob1.prob[idxClass] > prob2.prob[idxClass];
+ };
+
+ for (idxClass = 0; idxClass < classes; ++idxClass) {
+ detections.sort(CompareProbs);
+
+ for (auto it=detections.begin(); it != detections.end(); ++it) {
+ if (it->prob[idxClass] == 0) continue;
+ for (auto itc=std::next(it, 1); itc != detections.end(); ++itc) {
+ if (itc->prob[idxClass] == 0) {
+ continue;
+ }
+ if (CalculateBoxIOU(it->bbox, itc->bbox) > iouThreshold) {
+ itc->prob[idxClass] = 0;
+ }
+ }
+ }
+ }
+ }
+
+ void ConvertImgToInt8(void* data, const size_t kMaxImageSize)
+ {
+ auto* tmp_req_data = static_cast<uint8_t*>(data);
+ auto* tmp_signed_req_data = static_cast<int8_t*>(data);
+
+ for (size_t i = 0; i < kMaxImageSize; i++) {
+ tmp_signed_req_data[i] = (int8_t) (
+ (int32_t) (tmp_req_data[i]) - 128);
+ }
+ }
+
+ void RgbToGrayscale(const uint8_t* srcPtr, uint8_t* dstPtr, const size_t dstImgSz)
+ {
+ const float R = 0.299;
+ const float G = 0.587;
+ const float B = 0.114;
+ for (size_t i = 0; i < dstImgSz; ++i, srcPtr += 3) {
+ uint32_t int_gray = R * (*srcPtr) +
+ G * (*(srcPtr + 1)) +
+ B * (*(srcPtr + 2));
+ *dstPtr++ = int_gray <= std::numeric_limits<uint8_t>::max() ?
+ int_gray : std::numeric_limits<uint8_t>::max();
+ }
+ }
+
+} /* namespace image */
+} /* namespace app */
+} /* namespace arm */ \ No newline at end of file
diff --git a/source/application/api/common/source/Mfcc.cc b/source/application/api/common/source/Mfcc.cc
new file mode 100644
index 0000000..3bf5eb3
--- /dev/null
+++ b/source/application/api/common/source/Mfcc.cc
@@ -0,0 +1,353 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "Mfcc.hpp"
+#include "PlatformMath.hpp"
+#include "log_macros.h"
+
+#include <cfloat>
+#include <cinttypes>
+
+namespace arm {
+namespace app {
+namespace audio {
+
+ MfccParams::MfccParams(
+ const float samplingFreq,
+ const uint32_t numFbankBins,
+ const float melLoFreq,
+ const float melHiFreq,
+ const uint32_t numMfccFeats,
+ const uint32_t frameLen,
+ const bool useHtkMethod):
+ m_samplingFreq(samplingFreq),
+ m_numFbankBins(numFbankBins),
+ m_melLoFreq(melLoFreq),
+ m_melHiFreq(melHiFreq),
+ m_numMfccFeatures(numMfccFeats),
+ m_frameLen(frameLen),
+
+ /* Smallest power of 2 >= frame length. */
+ m_frameLenPadded(pow(2, ceil((log(frameLen)/log(2))))),
+ m_useHtkMethod(useHtkMethod)
+ {}
+
+ void MfccParams::Log() const
+ {
+ debug("MFCC parameters:\n");
+ debug("\t Sampling frequency: %f\n", this->m_samplingFreq);
+ debug("\t Number of filter banks: %" PRIu32 "\n", this->m_numFbankBins);
+ debug("\t Mel frequency limit (low): %f\n", this->m_melLoFreq);
+ debug("\t Mel frequency limit (high): %f\n", this->m_melHiFreq);
+ debug("\t Number of MFCC features: %" PRIu32 "\n", this->m_numMfccFeatures);
+ debug("\t Frame length: %" PRIu32 "\n", this->m_frameLen);
+ debug("\t Padded frame length: %" PRIu32 "\n", this->m_frameLenPadded);
+ debug("\t Using HTK for Mel scale: %s\n", this->m_useHtkMethod ? "yes" : "no");
+ }
+
+ MFCC::MFCC(const MfccParams& params):
+ m_params(params),
+ m_filterBankInitialised(false)
+ {
+ this->m_buffer = std::vector<float>(
+ this->m_params.m_frameLenPadded, 0.0);
+ this->m_frame = std::vector<float>(
+ this->m_params.m_frameLenPadded, 0.0);
+ this->m_melEnergies = std::vector<float>(
+ this->m_params.m_numFbankBins, 0.0);
+
+ this->m_windowFunc = std::vector<float>(this->m_params.m_frameLen);
+ const auto multiplier = static_cast<float>(2 * M_PI / this->m_params.m_frameLen);
+
+ /* Create window function. */
+ for (size_t i = 0; i < this->m_params.m_frameLen; i++) {
+ this->m_windowFunc[i] = (0.5 - (0.5 *
+ math::MathUtils::CosineF32(static_cast<float>(i) * multiplier)));
+ }
+
+ math::MathUtils::FftInitF32(this->m_params.m_frameLenPadded, this->m_fftInstance);
+ this->m_params.Log();
+ }
+
+ void MFCC::Init()
+ {
+ this->InitMelFilterBank();
+ }
+
+ float MFCC::MelScale(const float freq, const bool useHTKMethod)
+ {
+ if (useHTKMethod) {
+ return 1127.0f * logf (1.0f + freq / 700.0f);
+ } else {
+ /* Slaney formula for mel scale. */
+
+ float mel = freq / ms_freqStep;
+
+ if (freq >= ms_minLogHz) {
+ mel = ms_minLogMel + logf(freq / ms_minLogHz) / ms_logStep;
+ }
+ return mel;
+ }
+ }
+
+ float MFCC::InverseMelScale(const float melFreq, const bool useHTKMethod)
+ {
+ if (useHTKMethod) {
+ return 700.0f * (expf (melFreq / 1127.0f) - 1.0f);
+ } else {
+ /* Slaney formula for mel scale. */
+ float freq = ms_freqStep * melFreq;
+
+ if (melFreq >= ms_minLogMel) {
+ freq = ms_minLogHz * expf(ms_logStep * (melFreq - ms_minLogMel));
+ }
+ return freq;
+ }
+ }
+
+
+ bool MFCC::ApplyMelFilterBank(
+ std::vector<float>& fftVec,
+ std::vector<std::vector<float>>& melFilterBank,
+ std::vector<uint32_t>& filterBankFilterFirst,
+ std::vector<uint32_t>& filterBankFilterLast,
+ std::vector<float>& melEnergies)
+ {
+ const size_t numBanks = melEnergies.size();
+
+ if (numBanks != filterBankFilterFirst.size() ||
+ numBanks != filterBankFilterLast.size()) {
+ printf_err("unexpected filter bank lengths\n");
+ return false;
+ }
+
+ for (size_t bin = 0; bin < numBanks; ++bin) {
+ auto filterBankIter = melFilterBank[bin].begin();
+ auto end = melFilterBank[bin].end();
+ float melEnergy = FLT_MIN; /* Avoid log of zero at later stages */
+ const uint32_t firstIndex = filterBankFilterFirst[bin];
+ const uint32_t lastIndex = std::min<uint32_t>(filterBankFilterLast[bin], fftVec.size() - 1);
+
+ for (uint32_t i = firstIndex; i <= lastIndex && filterBankIter != end; i++) {
+ float energyRep = math::MathUtils::SqrtF32(fftVec[i]);
+ melEnergy += (*filterBankIter++ * energyRep);
+ }
+
+ melEnergies[bin] = melEnergy;
+ }
+
+ return true;
+ }
+
+ void MFCC::ConvertToLogarithmicScale(std::vector<float>& melEnergies)
+ {
+ for (float& melEnergy : melEnergies) {
+ melEnergy = logf(melEnergy);
+ }
+ }
+
+ void MFCC::ConvertToPowerSpectrum()
+ {
+ const uint32_t halfDim = this->m_buffer.size() / 2;
+
+ /* Handle this special case. */
+ float firstEnergy = this->m_buffer[0] * this->m_buffer[0];
+ float lastEnergy = this->m_buffer[1] * this->m_buffer[1];
+
+ math::MathUtils::ComplexMagnitudeSquaredF32(
+ this->m_buffer.data(),
+ this->m_buffer.size(),
+ this->m_buffer.data(),
+ this->m_buffer.size()/2);
+
+ this->m_buffer[0] = firstEnergy;
+ this->m_buffer[halfDim] = lastEnergy;
+ }
+
+ std::vector<float> MFCC::CreateDCTMatrix(
+ const int32_t inputLength,
+ const int32_t coefficientCount)
+ {
+ std::vector<float> dctMatix(inputLength * coefficientCount);
+
+ const float normalizer = math::MathUtils::SqrtF32(2.0f/inputLength);
+ const float angleIncr = M_PI/inputLength;
+ float angle = 0;
+
+ for (int32_t k = 0, m = 0; k < coefficientCount; k++, m += inputLength) {
+ for (int32_t n = 0; n < inputLength; n++) {
+ dctMatix[m+n] = normalizer *
+ math::MathUtils::CosineF32((n + 0.5f) * angle);
+ }
+ angle += angleIncr;
+ }
+
+ return dctMatix;
+ }
+
+ float MFCC::GetMelFilterBankNormaliser(
+ const float& leftMel,
+ const float& rightMel,
+ const bool useHTKMethod)
+ {
+ UNUSED(leftMel);
+ UNUSED(rightMel);
+ UNUSED(useHTKMethod);
+
+ /* By default, no normalisation => return 1 */
+ return 1.f;
+ }
+
+ void MFCC::InitMelFilterBank()
+ {
+ if (!this->IsMelFilterBankInited()) {
+ this->m_melFilterBank = this->CreateMelFilterBank();
+ this->m_dctMatrix = this->CreateDCTMatrix(
+ this->m_params.m_numFbankBins,
+ this->m_params.m_numMfccFeatures);
+ this->m_filterBankInitialised = true;
+ }
+ }
+
+ bool MFCC::IsMelFilterBankInited() const
+ {
+ return this->m_filterBankInitialised;
+ }
+
+ void MFCC::MfccComputePreFeature(const std::vector<int16_t>& audioData)
+ {
+ this->InitMelFilterBank();
+
+ /* TensorFlow way of normalizing .wav data to (-1, 1). */
+ constexpr float normaliser = 1.0/(1u<<15u);
+ for (size_t i = 0; i < this->m_params.m_frameLen; i++) {
+ this->m_frame[i] = static_cast<float>(audioData[i]) * normaliser;
+ }
+
+ /* Apply window function to input frame. */
+ for(size_t i = 0; i < this->m_params.m_frameLen; i++) {
+ this->m_frame[i] *= this->m_windowFunc[i];
+ }
+
+ /* Set remaining frame values to 0. */
+ std::fill(this->m_frame.begin() + this->m_params.m_frameLen,this->m_frame.end(), 0);
+
+ /* Compute FFT. */
+ math::MathUtils::FftF32(this->m_frame, this->m_buffer, this->m_fftInstance);
+
+ /* Convert to power spectrum. */
+ this->ConvertToPowerSpectrum();
+
+ /* Apply mel filterbanks. */
+ if (!this->ApplyMelFilterBank(this->m_buffer,
+ this->m_melFilterBank,
+ this->m_filterBankFilterFirst,
+ this->m_filterBankFilterLast,
+ this->m_melEnergies)) {
+ printf_err("Failed to apply MEL filter banks\n");
+ }
+
+ /* Convert to logarithmic scale. */
+ this->ConvertToLogarithmicScale(this->m_melEnergies);
+ }
+
+ std::vector<float> MFCC::MfccCompute(const std::vector<int16_t>& audioData)
+ {
+ this->MfccComputePreFeature(audioData);
+
+ std::vector<float> mfccOut(this->m_params.m_numMfccFeatures);
+
+ float * ptrMel = this->m_melEnergies.data();
+ float * ptrDct = this->m_dctMatrix.data();
+ float * ptrMfcc = mfccOut.data();
+
+ /* Take DCT. Uses matrix mul. */
+ for (size_t i = 0, j = 0; i < mfccOut.size();
+ ++i, j += this->m_params.m_numFbankBins) {
+ *ptrMfcc++ = math::MathUtils::DotProductF32(
+ ptrDct + j,
+ ptrMel,
+ this->m_params.m_numFbankBins);
+ }
+ return mfccOut;
+ }
+
+ std::vector<std::vector<float>> MFCC::CreateMelFilterBank()
+ {
+ size_t numFftBins = this->m_params.m_frameLenPadded / 2;
+ float fftBinWidth = static_cast<float>(this->m_params.m_samplingFreq) / this->m_params.m_frameLenPadded;
+
+ float melLowFreq = MFCC::MelScale(this->m_params.m_melLoFreq,
+ this->m_params.m_useHtkMethod);
+ float melHighFreq = MFCC::MelScale(this->m_params.m_melHiFreq,
+ this->m_params.m_useHtkMethod);
+ float melFreqDelta = (melHighFreq - melLowFreq) / (this->m_params.m_numFbankBins + 1);
+
+ std::vector<float> thisBin = std::vector<float>(numFftBins);
+ std::vector<std::vector<float>> melFilterBank(
+ this->m_params.m_numFbankBins);
+ this->m_filterBankFilterFirst =
+ std::vector<uint32_t>(this->m_params.m_numFbankBins);
+ this->m_filterBankFilterLast =
+ std::vector<uint32_t>(this->m_params.m_numFbankBins);
+
+ for (size_t bin = 0; bin < this->m_params.m_numFbankBins; bin++) {
+ float leftMel = melLowFreq + bin * melFreqDelta;
+ float centerMel = melLowFreq + (bin + 1) * melFreqDelta;
+ float rightMel = melLowFreq + (bin + 2) * melFreqDelta;
+
+ uint32_t firstIndex = 0;
+ uint32_t lastIndex = 0;
+ bool firstIndexFound = false;
+ const float normaliser = this->GetMelFilterBankNormaliser(leftMel, rightMel, this->m_params.m_useHtkMethod);
+
+ for (size_t i = 0; i < numFftBins; i++) {
+ float freq = (fftBinWidth * i); /* Center freq of this fft bin. */
+ float mel = MFCC::MelScale(freq, this->m_params.m_useHtkMethod);
+ thisBin[i] = 0.0;
+
+ if (mel > leftMel && mel < rightMel) {
+ float weight;
+ if (mel <= centerMel) {
+ weight = (mel - leftMel) / (centerMel - leftMel);
+ } else {
+ weight = (rightMel - mel) / (rightMel - centerMel);
+ }
+
+ thisBin[i] = weight * normaliser;
+ if (!firstIndexFound) {
+ firstIndex = i;
+ firstIndexFound = true;
+ }
+ lastIndex = i;
+ }
+ }
+
+ this->m_filterBankFilterFirst[bin] = firstIndex;
+ this->m_filterBankFilterLast[bin] = lastIndex;
+
+ /* Copy the part we care about. */
+ for (uint32_t i = firstIndex; i <= lastIndex; i++) {
+ melFilterBank[bin].push_back(thisBin[i]);
+ }
+ }
+
+ return melFilterBank;
+ }
+
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
diff --git a/source/application/api/common/source/Model.cc b/source/application/api/common/source/Model.cc
new file mode 100644
index 0000000..f1ac91d
--- /dev/null
+++ b/source/application/api/common/source/Model.cc
@@ -0,0 +1,359 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "Model.hpp"
+#include "log_macros.h"
+
+#include <cinttypes>
+
+/* Initialise the model */
+arm::app::Model::~Model()
+{
+ delete this->m_pInterpreter;
+ /**
+ * No clean-up function available for allocator in TensorFlow Lite Micro yet.
+ **/
+}
+
+arm::app::Model::Model() :
+ m_inited (false),
+ m_type(kTfLiteNoType)
+{
+ this->m_pErrorReporter = tflite::GetMicroErrorReporter();
+}
+
+bool arm::app::Model::Init(uint8_t* tensorArenaAddr,
+ uint32_t tensorArenaSize,
+ uint8_t* nnModelAddr,
+ uint32_t nnModelSize,
+ tflite::MicroAllocator* allocator)
+{
+ /* Following tf lite micro example:
+ * Map the model into a usable data structure. This doesn't involve any
+ * copying or parsing, it's a very lightweight operation. */
+ debug("loading model from @ 0x%p\n", nnModelAddr);
+ debug("model size: %" PRIu32 " bytes.\n", nnModelSize);
+
+ this->m_pModel = ::tflite::GetModel(nnModelAddr);
+
+ if (this->m_pModel->version() != TFLITE_SCHEMA_VERSION) {
+ this->m_pErrorReporter->Report(
+ "[ERROR] model's schema version %d is not equal "
+ "to supported version %d.",
+ this->m_pModel->version(), TFLITE_SCHEMA_VERSION);
+ return false;
+ }
+
+ this->m_modelAddr = nnModelAddr;
+ this->m_modelSize = nnModelSize;
+
+ /* Pull in only the operation implementations we need.
+ * This relies on a complete list of all the ops needed by this graph.
+ * An easier approach is to just use the AllOpsResolver, but this will
+ * incur some penalty in code space for op implementations that are not
+ * needed by this graph.
+ * static ::tflite::ops::micro::AllOpsResolver resolver; */
+ /* NOLINTNEXTLINE(runtime-global-variables) */
+ debug("loading op resolver\n");
+
+ this->EnlistOperations();
+
+ /* Create allocator instance, if it doesn't exist */
+ this->m_pAllocator = allocator;
+ if (!this->m_pAllocator) {
+ /* Create an allocator instance */
+ info("Creating allocator using tensor arena at 0x%p\n", tensorArenaAddr);
+
+ this->m_pAllocator = tflite::MicroAllocator::Create(
+ tensorArenaAddr,
+ tensorArenaSize,
+ this->m_pErrorReporter);
+
+ if (!this->m_pAllocator) {
+ printf_err("Failed to create allocator\n");
+ return false;
+ }
+ debug("Created new allocator @ 0x%p\n", this->m_pAllocator);
+ } else {
+ debug("Using existing allocator @ 0x%p\n", this->m_pAllocator);
+ }
+
+ this->m_pInterpreter = new ::tflite::MicroInterpreter(
+ this->m_pModel, this->GetOpResolver(),
+ this->m_pAllocator, this->m_pErrorReporter);
+
+ if (!this->m_pInterpreter) {
+ printf_err("Failed to allocate interpreter\n");
+ return false;
+ }
+
+ /* Allocate memory from the tensor_arena for the model's tensors. */
+ info("Allocating tensors\n");
+ TfLiteStatus allocate_status = this->m_pInterpreter->AllocateTensors();
+
+ if (allocate_status != kTfLiteOk) {
+ printf_err("tensor allocation failed!\n");
+ delete this->m_pInterpreter;
+ return false;
+ }
+
+ /* Get information about the memory area to use for the model's input. */
+ this->m_input.resize(this->GetNumInputs());
+ for (size_t inIndex = 0; inIndex < this->GetNumInputs(); inIndex++)
+ this->m_input[inIndex] = this->m_pInterpreter->input(inIndex);
+
+ this->m_output.resize(this->GetNumOutputs());
+ for (size_t outIndex = 0; outIndex < this->GetNumOutputs(); outIndex++)
+ this->m_output[outIndex] = this->m_pInterpreter->output(outIndex);
+
+ if (this->m_input.empty() || this->m_output.empty()) {
+ printf_err("failed to get tensors\n");
+ return false;
+ } else {
+ this->m_type = this->m_input[0]->type; /* Input 0 should be the main input */
+
+ /* Clear the input & output tensors */
+ for (size_t inIndex = 0; inIndex < this->GetNumInputs(); inIndex++) {
+ std::memset(this->m_input[inIndex]->data.data, 0, this->m_input[inIndex]->bytes);
+ }
+ for (size_t outIndex = 0; outIndex < this->GetNumOutputs(); outIndex++) {
+ std::memset(this->m_output[outIndex]->data.data, 0, this->m_output[outIndex]->bytes);
+ }
+
+ this->LogInterpreterInfo();
+ }
+
+ this->m_inited = true;
+ return true;
+}
+
+tflite::MicroAllocator* arm::app::Model::GetAllocator()
+{
+ if (this->IsInited()) {
+ return this->m_pAllocator;
+ }
+ return nullptr;
+}
+
+void arm::app::Model::LogTensorInfo(TfLiteTensor* tensor)
+{
+ if (!tensor) {
+ printf_err("Invalid tensor\n");
+ assert(tensor);
+ return;
+ }
+
+ debug("\ttensor is assigned to 0x%p\n", tensor);
+ info("\ttensor type is %s\n", TfLiteTypeGetName(tensor->type));
+ info("\ttensor occupies %zu bytes with dimensions\n",
+ tensor->bytes);
+ for (int i = 0 ; i < tensor->dims->size; ++i) {
+ info ("\t\t%d: %3d\n", i, tensor->dims->data[i]);
+ }
+
+ TfLiteQuantization quant = tensor->quantization;
+ if (kTfLiteAffineQuantization == quant.type) {
+ auto* quantParams = (TfLiteAffineQuantization*)quant.params;
+ info("Quant dimension: %" PRIi32 "\n", quantParams->quantized_dimension);
+ for (int i = 0; i < quantParams->scale->size; ++i) {
+ info("Scale[%d] = %f\n", i, quantParams->scale->data[i]);
+ }
+ for (int i = 0; i < quantParams->zero_point->size; ++i) {
+ info("ZeroPoint[%d] = %d\n", i, quantParams->zero_point->data[i]);
+ }
+ }
+}
+
+void arm::app::Model::LogInterpreterInfo()
+{
+ if (!this->m_pInterpreter) {
+ printf_err("Invalid interpreter\n");
+ return;
+ }
+
+ info("Model INPUT tensors: \n");
+ for (auto input : this->m_input) {
+ this->LogTensorInfo(input);
+ }
+
+ info("Model OUTPUT tensors: \n");
+ for (auto output : this->m_output) {
+ this->LogTensorInfo(output);
+ }
+
+ info("Activation buffer (a.k.a tensor arena) size used: %zu\n",
+ this->m_pInterpreter->arena_used_bytes());
+
+ /* We expect there to be only one subgraph. */
+ const uint32_t nOperators = tflite::NumSubgraphOperators(this->m_pModel, 0);
+ info("Number of operators: %" PRIu32 "\n", nOperators);
+
+ const tflite::SubGraph* subgraph = this->m_pModel->subgraphs()->Get(0);
+
+ auto* opcodes = this->m_pModel->operator_codes();
+
+ /* For each operator, display registration information. */
+ for (size_t i = 0 ; i < nOperators; ++i) {
+ const tflite::Operator* op = subgraph->operators()->Get(i);
+ const tflite::OperatorCode* opcode = opcodes->Get(op->opcode_index());
+ const TfLiteRegistration* reg = nullptr;
+
+ tflite::GetRegistrationFromOpCode(opcode, this->GetOpResolver(),
+ this->m_pErrorReporter, &reg);
+ std::string opName;
+
+ if (reg) {
+ if (tflite::BuiltinOperator_CUSTOM == reg->builtin_code) {
+ opName = std::string(reg->custom_name);
+ } else {
+ opName = std::string(EnumNameBuiltinOperator(
+ tflite::BuiltinOperator(reg->builtin_code)));
+ }
+ }
+ info("\tOperator %zu: %s\n", i, opName.c_str());
+ }
+}
+
+bool arm::app::Model::IsInited() const
+{
+ return this->m_inited;
+}
+
+bool arm::app::Model::IsDataSigned() const
+{
+ return this->GetType() == kTfLiteInt8;
+}
+
+bool arm::app::Model::ContainsEthosUOperator() const
+{
+ /* We expect there to be only one subgraph. */
+ const uint32_t nOperators = tflite::NumSubgraphOperators(this->m_pModel, 0);
+ const tflite::SubGraph* subgraph = this->m_pModel->subgraphs()->Get(0);
+ const auto* opcodes = this->m_pModel->operator_codes();
+
+ /* check for custom operators */
+ for (size_t i = 0; (i < nOperators); ++i)
+ {
+ const tflite::Operator* op = subgraph->operators()->Get(i);
+ const tflite::OperatorCode* opcode = opcodes->Get(op->opcode_index());
+
+ auto builtin_code = tflite::GetBuiltinCode(opcode);
+ if ((builtin_code == tflite::BuiltinOperator_CUSTOM) &&
+ ( nullptr != opcode->custom_code()) &&
+ ( "ethos-u" == std::string(opcode->custom_code()->c_str())))
+ {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool arm::app::Model::RunInference()
+{
+ bool inference_state = false;
+ if (this->m_pModel && this->m_pInterpreter) {
+ if (kTfLiteOk != this->m_pInterpreter->Invoke()) {
+ printf_err("Invoke failed.\n");
+ } else {
+ inference_state = true;
+ }
+ } else {
+ printf_err("Error: No interpreter!\n");
+ }
+ return inference_state;
+}
+
+TfLiteTensor* arm::app::Model::GetInputTensor(size_t index) const
+{
+ if (index < this->GetNumInputs()) {
+ return this->m_input.at(index);
+ }
+ return nullptr;
+}
+
+TfLiteTensor* arm::app::Model::GetOutputTensor(size_t index) const
+{
+ if (index < this->GetNumOutputs()) {
+ return this->m_output.at(index);
+ }
+ return nullptr;
+}
+
+size_t arm::app::Model::GetNumInputs() const
+{
+ if (this->m_pModel && this->m_pInterpreter) {
+ return this->m_pInterpreter->inputs_size();
+ }
+ return 0;
+}
+
+size_t arm::app::Model::GetNumOutputs() const
+{
+ if (this->m_pModel && this->m_pInterpreter) {
+ return this->m_pInterpreter->outputs_size();
+ }
+ return 0;
+}
+
+
+TfLiteType arm::app::Model::GetType() const
+{
+ return this->m_type;
+}
+
+TfLiteIntArray* arm::app::Model::GetInputShape(size_t index) const
+{
+ if (index < this->GetNumInputs()) {
+ return this->m_input.at(index)->dims;
+ }
+ return nullptr;
+}
+
+TfLiteIntArray* arm::app::Model::GetOutputShape(size_t index) const
+{
+ if (index < this->GetNumOutputs()) {
+ return this->m_output.at(index)->dims;
+ }
+ return nullptr;
+}
+
+bool arm::app::Model::ShowModelInfoHandler()
+{
+ if (!this->IsInited()) {
+ printf_err("Model is not initialised! Terminating processing.\n");
+ return false;
+ }
+
+ PrintTensorFlowVersion();
+ info("Model address: 0x%p", this->ModelPointer());
+ info("Model size: %" PRIu32 " bytes.", this->ModelSize());
+ info("Model info:\n");
+ this->LogInterpreterInfo();
+
+ info("The model is optimised for Ethos-U NPU: %s.\n", this->ContainsEthosUOperator()? "yes": "no");
+
+ return true;
+}
+
+const uint8_t* arm::app::Model::ModelPointer()
+{
+ return this->m_modelAddr;
+}
+
+uint32_t arm::app::Model::ModelSize()
+{
+ return this->m_modelSize;
+}
diff --git a/source/application/api/common/source/TensorFlowLiteMicro.cc b/source/application/api/common/source/TensorFlowLiteMicro.cc
new file mode 100644
index 0000000..8738e5c
--- /dev/null
+++ b/source/application/api/common/source/TensorFlowLiteMicro.cc
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "TensorFlowLiteMicro.hpp"
+
+void PrintTensorFlowVersion()
+{}
+
+arm::app::QuantParams arm::app::GetTensorQuantParams(TfLiteTensor* tensor)
+{
+ arm::app::QuantParams params;
+ if (kTfLiteAffineQuantization == tensor->quantization.type) {
+ auto* quantParams = (TfLiteAffineQuantization*) (tensor->quantization.params);
+ if (quantParams && 0 == quantParams->quantized_dimension) {
+ if (quantParams->scale->size) {
+ params.scale = quantParams->scale->data[0];
+ }
+ if (quantParams->zero_point->size) {
+ params.offset = quantParams->zero_point->data[0];
+ }
+ } else if (tensor->params.scale != 0.0) {
+ /* Legacy tensorflow quantisation parameters */
+ params.scale = tensor->params.scale;
+ params.offset = tensor->params.zero_point;
+ }
+ }
+ return params;
+}
+
+extern "C" void DebugLog(const char* s)
+{
+ puts(s);
+}
diff --git a/source/application/api/use_case/ad/CMakeLists.txt b/source/application/api/use_case/ad/CMakeLists.txt
new file mode 100644
index 0000000..224816f
--- /dev/null
+++ b/source/application/api/use_case/ad/CMakeLists.txt
@@ -0,0 +1,41 @@
+#----------------------------------------------------------------------------
+# Copyright (c) 2022 Arm Limited. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#----------------------------------------------------------------------------
+#########################################################
+# ANOMALY DETECTION API library #
+#########################################################
+cmake_minimum_required(VERSION 3.15.6)
+
+set(AD_API_TARGET ad_api)
+project(${AD_API_TARGET}
+ DESCRIPTION "Anomaly detection use case API library"
+ LANGUAGES C CXX)
+
+# Create static library
+add_library(${AD_API_TARGET} STATIC
+ src/AdModel.cc
+ src/AdProcessing.cc
+ src/AdMelSpectrogram.cc
+ src/MelSpectrogram.cc)
+
+target_include_directories(${AD_API_TARGET} PUBLIC include)
+
+target_link_libraries(${AD_API_TARGET} PUBLIC common_api)
+
+message(STATUS "*******************************************************")
+message(STATUS "Library : " ${AD_API_TARGET})
+message(STATUS "CMAKE_SYSTEM_PROCESSOR : " ${CMAKE_SYSTEM_PROCESSOR})
+message(STATUS "*******************************************************")
diff --git a/source/application/api/use_case/ad/include/AdMelSpectrogram.hpp b/source/application/api/use_case/ad/include/AdMelSpectrogram.hpp
new file mode 100644
index 0000000..05c5bfc
--- /dev/null
+++ b/source/application/api/use_case/ad/include/AdMelSpectrogram.hpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef ADMELSPECTROGRAM_HPP
+#define ADMELSPECTROGRAM_HPP
+
+#include "MelSpectrogram.hpp"
+
+namespace arm {
+namespace app {
+namespace audio {
+
+ /* Class to provide anomaly detection specific Mel Spectrogram calculation requirements */
+ class AdMelSpectrogram : public MelSpectrogram {
+
+ public:
+ static constexpr uint32_t ms_defaultSamplingFreq = 16000;
+ static constexpr uint32_t ms_defaultNumFbankBins = 64;
+ static constexpr uint32_t ms_defaultMelLoFreq = 0;
+ static constexpr uint32_t ms_defaultMelHiFreq = 8000;
+ static constexpr bool ms_defaultUseHtkMethod = false;
+
+ explicit AdMelSpectrogram(const size_t frameLen)
+ : MelSpectrogram(MelSpecParams(
+ ms_defaultSamplingFreq, ms_defaultNumFbankBins,
+ ms_defaultMelLoFreq, ms_defaultMelHiFreq,
+ frameLen, ms_defaultUseHtkMethod))
+ {}
+
+ AdMelSpectrogram() = delete;
+ ~AdMelSpectrogram() = default;
+
+ protected:
+
+ /**
+ * @brief Overrides base class implementation of this function.
+ * @param[in] fftVec Vector populated with FFT magnitudes
+ * @param[in] melFilterBank 2D Vector with filter bank weights
+ * @param[in] filterBankFilterFirst Vector containing the first indices of filter bank
+ * to be used for each bin.
+ * @param[in] filterBankFilterLast Vector containing the last indices of filter bank
+ * to be used for each bin.
+ * @param[out] melEnergies Pre-allocated vector of MEL energies to be
+ * populated.
+ * @return true if successful, false otherwise
+ */
+ virtual bool ApplyMelFilterBank(
+ std::vector<float>& fftVec,
+ std::vector<std::vector<float>>& melFilterBank,
+ std::vector<uint32_t>& filterBankFilterFirst,
+ std::vector<uint32_t>& filterBankFilterLast,
+ std::vector<float>& melEnergies) override;
+
+ /**
+ * @brief Override for the base class implementation convert mel
+ * energies to logarithmic scale. The difference from
+ * default behaviour is that the power is converted to dB
+ * and subsequently clamped.
+ * @param[in,out] melEnergies - 1D vector of Mel energies
+ **/
+ virtual void ConvertToLogarithmicScale(std::vector<float>& melEnergies) override;
+
+ /**
+ * @brief Given the low and high Mel values, get the normaliser
+ * for weights to be applied when populating the filter
+ * bank. Override for the base class implementation.
+ * @param[in] leftMel - low Mel frequency value
+ * @param[in] rightMel - high Mel frequency value
+ * @param[in] useHTKMethod - bool to signal if HTK method is to be
+ * used for calculation
+ * @return Return float value to be applied
+ * when populating the filter bank.
+ */
+ virtual float GetMelFilterBankNormaliser(
+ const float& leftMel,
+ const float& rightMel,
+ const bool useHTKMethod) override;
+ };
+
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* ADMELSPECTROGRAM_HPP */
diff --git a/source/application/api/use_case/ad/include/AdModel.hpp b/source/application/api/use_case/ad/include/AdModel.hpp
new file mode 100644
index 0000000..0436a89
--- /dev/null
+++ b/source/application/api/use_case/ad/include/AdModel.hpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef AD_MODEL_HPP
+#define AD_MODEL_HPP
+
+#include "Model.hpp"
+
+extern const int g_FrameLength;
+extern const int g_FrameStride;
+extern const float g_ScoreThreshold;
+extern const float g_TrainingMean;
+
+namespace arm {
+namespace app {
+
+ class AdModel : public Model {
+
+ public:
+ /* Indices for the expected model - based on input tensor shape */
+ static constexpr uint32_t ms_inputRowsIdx = 1;
+ static constexpr uint32_t ms_inputColsIdx = 2;
+
+ protected:
+ /** @brief Gets the reference to op resolver interface class */
+ const tflite::MicroOpResolver& GetOpResolver() override;
+
+ /** @brief Adds operations to the op resolver instance */
+ bool EnlistOperations() override;
+
+ private:
+ /* Maximum number of individual operations that can be enlisted */
+ static constexpr int ms_maxOpCnt = 6;
+
+ /* A mutable op resolver instance */
+ tflite::MicroMutableOpResolver<ms_maxOpCnt> m_opResolver;
+ };
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* AD_MODEL_HPP */
diff --git a/source/application/api/use_case/ad/include/AdProcessing.hpp b/source/application/api/use_case/ad/include/AdProcessing.hpp
new file mode 100644
index 0000000..abee75e
--- /dev/null
+++ b/source/application/api/use_case/ad/include/AdProcessing.hpp
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef AD_PROCESSING_HPP
+#define AD_PROCESSING_HPP
+
+#include "BaseProcessing.hpp"
+#include "TensorFlowLiteMicro.hpp"
+#include "AudioUtils.hpp"
+#include "AdMelSpectrogram.hpp"
+#include "log_macros.h"
+
+namespace arm {
+namespace app {
+
+ /**
+ * @brief Pre-processing class for anomaly detection use case.
+ * Implements methods declared by BasePreProcess and anything else needed
+ * to populate input tensors ready for inference.
+ */
+ class AdPreProcess : public BasePreProcess {
+
+ public:
+ /**
+ * @brief Constructor for AdPreProcess class objects
+ * @param[in] inputTensor input tensor pointer from the tensor arena.
+ * @param[in] melSpectrogramFrameLen MEL spectrogram's frame length
+ * @param[in] melSpectrogramFrameStride MEL spectrogram's frame stride
+ * @param[in] adModelTrainingMean Training mean for the Anomaly detection model being used.
+ */
+ explicit AdPreProcess(TfLiteTensor* inputTensor,
+ uint32_t melSpectrogramFrameLen,
+ uint32_t melSpectrogramFrameStride,
+ float adModelTrainingMean);
+
+ ~AdPreProcess() = default;
+
+ /**
+ * @brief Function to invoke pre-processing and populate the input vector
+ * @param input pointer to input data. For anomaly detection, this is the pointer to
+ * the audio data.
+ * @param inputSize Size of the data being passed in for pre-processing.
+ * @return True if successful, false otherwise.
+ */
+ bool DoPreProcess(const void* input, size_t inputSize) override;
+
+ /**
+ * @brief Getter function for audio window size computed when constructing
+ * the class object.
+ * @return Audio window size as 32 bit unsigned integer.
+ */
+ uint32_t GetAudioWindowSize();
+
+ /**
+ * @brief Getter function for audio window stride computed when constructing
+ * the class object.
+ * @return Audio window stride as 32 bit unsigned integer.
+ */
+ uint32_t GetAudioDataStride();
+
+ /**
+ * @brief Setter function for current audio index. This is only used for evaluating
+ * if previously computed features can be re-used from cache.
+ */
+ void SetAudioWindowIndex(uint32_t idx);
+
+ private:
+ bool m_validInstance{false}; /**< Indicates the current object is valid. */
+ uint32_t m_melSpectrogramFrameLen{}; /**< MEL spectrogram's window frame length */
+ uint32_t m_melSpectrogramFrameStride{}; /**< MEL spectrogram's window frame stride */
+ uint8_t m_inputResizeScale{}; /**< Downscaling factor for the MEL energy matrix. */
+ uint32_t m_numMelSpecVectorsInAudioStride{}; /**< Number of frames to move across the audio. */
+ uint32_t m_audioDataWindowSize{}; /**< Audio window size computed based on other parameters. */
+ uint32_t m_audioDataStride{}; /**< Audio window stride computed. */
+ uint32_t m_numReusedFeatureVectors{}; /**< Number of MEL vectors that can be re-used */
+ uint32_t m_audioWindowIndex{}; /**< Current audio window index (from audio's sliding window) */
+
+ audio::SlidingWindow<const int16_t> m_melWindowSlider; /**< Internal MEL spectrogram window slider */
+ audio::AdMelSpectrogram m_melSpec; /**< MEL spectrogram computation object */
+ std::function<void
+ (std::vector<int16_t>&, int, bool, size_t, size_t)> m_featureCalc; /**< Feature calculator object */
+ };
+
+ class AdPostProcess : public BasePostProcess {
+ public:
+ /**
+ * @brief Constructor for AdPostProcess object.
+ * @param[in] outputTensor Output tensor pointer.
+ */
+ explicit AdPostProcess(TfLiteTensor* outputTensor);
+
+ ~AdPostProcess() = default;
+
+ /**
+ * @brief Function to do the post-processing on the output tensor.
+ * @return True if successful, false otherwise.
+ */
+ bool DoPostProcess() override;
+
+ /**
+ * @brief Getter function for an element from the de-quantised output vector.
+ * @param index Index of the element to be retrieved.
+ * @return index represented as a 32 bit floating point number.
+ */
+ float GetOutputValue(uint32_t index);
+
+ private:
+ TfLiteTensor* m_outputTensor{}; /**< Output tensor pointer */
+ std::vector<float> m_dequantizedOutputVec{}; /**< Internal output vector */
+
+ /**
+ * @brief De-quantizes and flattens the output tensor into a vector.
+ * @tparam T template parameter to indicate data type.
+ * @return True if successful, false otherwise.
+ */
+ template<typename T>
+ bool Dequantize()
+ {
+ TfLiteTensor* tensor = this->m_outputTensor;
+ if (tensor == nullptr) {
+ printf_err("Invalid output tensor.\n");
+ return false;
+ }
+ T* tensorData = tflite::GetTensorData<T>(tensor);
+
+ uint32_t totalOutputSize = 1;
+ for (int inputDim = 0; inputDim < tensor->dims->size; inputDim++){
+ totalOutputSize *= tensor->dims->data[inputDim];
+ }
+
+ /* For getting the floating point values, we need quantization parameters */
+ QuantParams quantParams = GetTensorQuantParams(tensor);
+
+ this->m_dequantizedOutputVec = std::vector<float>(totalOutputSize, 0);
+
+ for (size_t i = 0; i < totalOutputSize; ++i) {
+ this->m_dequantizedOutputVec[i] = quantParams.scale * (tensorData[i] - quantParams.offset);
+ }
+
+ return true;
+ }
+ };
+
+ /* Templated instances available: */
+ template bool AdPostProcess::Dequantize<int8_t>();
+
+ /**
+ * @brief Generic feature calculator factory.
+ *
+ * Returns lambda function to compute features using features cache.
+ * Real features math is done by a lambda function provided as a parameter.
+ * Features are written to input tensor memory.
+ *
+ * @tparam T feature vector type.
+ * @param inputTensor model input tensor pointer.
+ * @param cacheSize number of feature vectors to cache. Defined by the sliding window overlap.
+ * @param compute features calculator function.
+ * @return lambda function to compute features.
+ */
+ template<class T>
+ std::function<void (std::vector<int16_t>&, size_t, bool, size_t, size_t)>
+ FeatureCalc(TfLiteTensor* inputTensor, size_t cacheSize,
+ std::function<std::vector<T> (std::vector<int16_t>& )> compute)
+ {
+ /* Feature cache to be captured by lambda function*/
+ static std::vector<std::vector<T>> featureCache = std::vector<std::vector<T>>(cacheSize);
+
+ return [=](std::vector<int16_t>& audioDataWindow,
+ size_t index,
+ bool useCache,
+ size_t featuresOverlapIndex,
+ size_t resizeScale)
+ {
+ T* tensorData = tflite::GetTensorData<T>(inputTensor);
+ std::vector<T> features;
+
+ /* Reuse features from cache if cache is ready and sliding windows overlap.
+ * Overlap is in the beginning of sliding window with a size of a feature cache. */
+ if (useCache && index < featureCache.size()) {
+ features = std::move(featureCache[index]);
+ } else {
+ features = std::move(compute(audioDataWindow));
+ }
+ auto size = features.size() / resizeScale;
+ auto sizeBytes = sizeof(T);
+
+ /* Input should be transposed and "resized" by skipping elements. */
+ for (size_t outIndex = 0; outIndex < size; outIndex++) {
+ std::memcpy(tensorData + (outIndex*size) + index, &features[outIndex*resizeScale], sizeBytes);
+ }
+
+ /* Start renewing cache as soon iteration goes out of the windows overlap. */
+ if (index >= featuresOverlapIndex / resizeScale) {
+ featureCache[index - featuresOverlapIndex / resizeScale] = std::move(features);
+ }
+ };
+ }
+
+ template std::function<void (std::vector<int16_t>&, size_t , bool, size_t, size_t)>
+ FeatureCalc<int8_t>(TfLiteTensor* inputTensor,
+ size_t cacheSize,
+ std::function<std::vector<int8_t> (std::vector<int16_t>&)> compute);
+
+ template std::function<void(std::vector<int16_t>&, size_t, bool, size_t, size_t)>
+ FeatureCalc<float>(TfLiteTensor *inputTensor,
+ size_t cacheSize,
+ std::function<std::vector<float>(std::vector<int16_t>&)> compute);
+
+ std::function<void (std::vector<int16_t>&, int, bool, size_t, size_t)>
+ GetFeatureCalculator(audio::AdMelSpectrogram& melSpec,
+ TfLiteTensor* inputTensor,
+ size_t cacheSize,
+ float trainingMean);
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* AD_PROCESSING_HPP */
diff --git a/source/application/api/use_case/ad/include/MelSpectrogram.hpp b/source/application/api/use_case/ad/include/MelSpectrogram.hpp
new file mode 100644
index 0000000..d3ea3f7
--- /dev/null
+++ b/source/application/api/use_case/ad/include/MelSpectrogram.hpp
@@ -0,0 +1,234 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MELSPECTROGRAM_HPP
+#define MELSPECTROGRAM_HPP
+
+#include "PlatformMath.hpp"
+
+#include <vector>
+#include <cstdint>
+#include <cmath>
+#include <limits>
+#include <string>
+
+namespace arm {
+namespace app {
+namespace audio {
+
+ /* Mel Spectrogram consolidated parameters */
+ class MelSpecParams {
+ public:
+ float m_samplingFreq;
+ uint32_t m_numFbankBins;
+ float m_melLoFreq;
+ float m_melHiFreq;
+ uint32_t m_frameLen;
+ uint32_t m_frameLenPadded;
+ bool m_useHtkMethod;
+
+ /** @brief Constructor */
+ MelSpecParams(const float samplingFreq, const uint32_t numFbankBins,
+ const float melLoFreq, const float melHiFreq,
+ const uint32_t frameLen, const bool useHtkMethod);
+
+ MelSpecParams() = delete;
+ ~MelSpecParams() = default;
+
+ /** @brief String representation of parameters */
+ std::string Str() const;
+ };
+
+ /**
+ * @brief Class for Mel Spectrogram feature extraction.
+ * Based on https://github.com/ARM-software/ML-KWS-for-MCU/blob/master/Deployment/Source/MFCC/mfcc.cpp
+ * This class is designed to be generic and self-sufficient but
+ * certain calculation routines can be overridden to accommodate
+ * use-case specific requirements.
+ */
+ class MelSpectrogram {
+
+ public:
+ /**
+ * @brief Extract Mel Spectrogram for one single small frame of
+ * audio data e.g. 640 samples.
+ * @param[in] audioData Vector of audio samples to calculate
+ * features for.
+ * @param[in] trainingMean Value to subtract from the the computed mel spectrogram, default 0.
+ * @return Vector of extracted Mel Spectrogram features.
+ **/
+ std::vector<float> ComputeMelSpec(const std::vector<int16_t>& audioData, float trainingMean = 0);
+
+ /**
+ * @brief Constructor
+ * @param[in] params Mel Spectrogram parameters
+ */
+ explicit MelSpectrogram(const MelSpecParams& params);
+
+ MelSpectrogram() = delete;
+ ~MelSpectrogram() = default;
+
+ /** @brief Initialise */
+ void Init();
+
+ /**
+ * @brief Extract Mel Spectrogram features and quantise for one single small
+ * frame of audio data e.g. 640 samples.
+ * @param[in] audioData Vector of audio samples to calculate
+ * features for.
+ * @param[in] quantScale quantisation scale.
+ * @param[in] quantOffset quantisation offset.
+ * @param[in] trainingMean training mean.
+ * @return Vector of extracted quantised Mel Spectrogram features.
+ **/
+ template<typename T>
+ std::vector<T> MelSpecComputeQuant(const std::vector<int16_t>& audioData,
+ const float quantScale,
+ const int quantOffset,
+ float trainingMean = 0)
+ {
+ this->ComputeMelSpec(audioData, trainingMean);
+ float minVal = std::numeric_limits<T>::min();
+ float maxVal = std::numeric_limits<T>::max();
+
+ std::vector<T> melSpecOut(this->m_params.m_numFbankBins);
+ const size_t numFbankBins = this->m_params.m_numFbankBins;
+
+ /* Quantize to T. */
+ for (size_t k = 0; k < numFbankBins; ++k) {
+ auto quantizedEnergy = std::round(((this->m_melEnergies[k]) / quantScale) + quantOffset);
+ melSpecOut[k] = static_cast<T>(std::min<float>(std::max<float>(quantizedEnergy, minVal), maxVal));
+ }
+
+ return melSpecOut;
+ }
+
+ /* Constants */
+ static constexpr float ms_logStep = /*logf(6.4)*/ 1.8562979903656 / 27.0;
+ static constexpr float ms_freqStep = 200.0 / 3;
+ static constexpr float ms_minLogHz = 1000.0;
+ static constexpr float ms_minLogMel = ms_minLogHz / ms_freqStep;
+
+ protected:
+ /**
+ * @brief Project input frequency to Mel Scale.
+ * @param[in] freq input frequency in floating point
+ * @param[in] useHTKMethod bool to signal if HTK method is to be
+ * used for calculation
+ * @return Mel transformed frequency in floating point
+ **/
+ static float MelScale(const float freq,
+ const bool useHTKMethod = true);
+
+ /**
+ * @brief Inverse Mel transform - convert MEL warped frequency
+ * back to normal frequency
+ * @param[in] melFreq Mel frequency in floating point
+ * @param[in] useHTKMethod bool to signal if HTK method is to be
+ * used for calculation
+ * @return Real world frequency in floating point
+ **/
+ static float InverseMelScale(const float melFreq,
+ const bool useHTKMethod = true);
+
+ /**
+ * @brief Populates MEL energies after applying the MEL filter
+ * bank weights and adding them up to be placed into
+ * bins, according to the filter bank's first and last
+ * indices (pre-computed for each filter bank element
+ * by CreateMelFilterBank function).
+ * @param[in] fftVec Vector populated with FFT magnitudes
+ * @param[in] melFilterBank 2D Vector with filter bank weights
+ * @param[in] filterBankFilterFirst Vector containing the first indices of filter bank
+ * to be used for each bin.
+ * @param[in] filterBankFilterLast Vector containing the last indices of filter bank
+ * to be used for each bin.
+ * @param[out] melEnergies Pre-allocated vector of MEL energies to be
+ * populated.
+ * @return true if successful, false otherwise
+ */
+ virtual bool ApplyMelFilterBank(
+ std::vector<float>& fftVec,
+ std::vector<std::vector<float>>& melFilterBank,
+ std::vector<uint32_t>& filterBankFilterFirst,
+ std::vector<uint32_t>& filterBankFilterLast,
+ std::vector<float>& melEnergies);
+
+ /**
+ * @brief Converts the Mel energies for logarithmic scale
+ * @param[in,out] melEnergies 1D vector of Mel energies
+ **/
+ virtual void ConvertToLogarithmicScale(std::vector<float>& melEnergies);
+
+ /**
+ * @brief Given the low and high Mel values, get the normaliser
+ * for weights to be applied when populating the filter
+ * bank.
+ * @param[in] leftMel low Mel frequency value
+ * @param[in] rightMel high Mel frequency value
+ * @param[in] useHTKMethod bool to signal if HTK method is to be
+ * used for calculation
+ * @return Return float value to be applied
+ * when populating the filter bank.
+ */
+ virtual float GetMelFilterBankNormaliser(
+ const float& leftMel,
+ const float& rightMel,
+ const bool useHTKMethod);
+
+ private:
+ MelSpecParams m_params;
+ std::vector<float> m_frame;
+ std::vector<float> m_buffer;
+ std::vector<float> m_melEnergies;
+ std::vector<float> m_windowFunc;
+ std::vector<std::vector<float>> m_melFilterBank;
+ std::vector<uint32_t> m_filterBankFilterFirst;
+ std::vector<uint32_t> m_filterBankFilterLast;
+ bool m_filterBankInitialised;
+ arm::app::math::FftInstance m_fftInstance;
+
+ /**
+ * @brief Initialises the filter banks.
+ **/
+ void InitMelFilterBank();
+
+ /**
+ * @brief Signals whether the instance of MelSpectrogram has had its
+ * required buffers initialised
+ * @return True if initialised, false otherwise
+ **/
+ bool IsMelFilterBankInited() const;
+
+ /**
+ * @brief Create mel filter banks for Mel Spectrogram calculation.
+ * @return 2D vector of floats
+ **/
+ std::vector<std::vector<float>> CreateMelFilterBank();
+
+ /**
+ * @brief Computes the magnitude from an interleaved complex array
+ **/
+ void ConvertToPowerSpectrum();
+
+ };
+
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
+
+
+#endif /* MELSPECTROGRAM_HPP */
diff --git a/source/application/api/use_case/ad/src/AdMelSpectrogram.cc b/source/application/api/use_case/ad/src/AdMelSpectrogram.cc
new file mode 100644
index 0000000..14b9323
--- /dev/null
+++ b/source/application/api/use_case/ad/src/AdMelSpectrogram.cc
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "AdMelSpectrogram.hpp"
+#include "PlatformMath.hpp"
+#include "log_macros.h"
+
+#include <cfloat>
+
+namespace arm {
+namespace app {
+namespace audio {
+
+ bool AdMelSpectrogram::ApplyMelFilterBank(
+ std::vector<float>& fftVec,
+ std::vector<std::vector<float>>& melFilterBank,
+ std::vector<uint32_t>& filterBankFilterFirst,
+ std::vector<uint32_t>& filterBankFilterLast,
+ std::vector<float>& melEnergies)
+ {
+ const size_t numBanks = melEnergies.size();
+
+ if (numBanks != filterBankFilterFirst.size() ||
+ numBanks != filterBankFilterLast.size()) {
+ printf_err("unexpected filter bank lengths\n");
+ return false;
+ }
+
+ for (size_t bin = 0; bin < numBanks; ++bin) {
+ auto filterBankIter = melFilterBank[bin].begin();
+ auto end = melFilterBank[bin].end();
+ float melEnergy = FLT_MIN; /* Avoid log of zero at later stages. */
+ const uint32_t firstIndex = filterBankFilterFirst[bin];
+ const uint32_t lastIndex = std::min<int32_t>(filterBankFilterLast[bin], fftVec.size() - 1);
+
+ for (uint32_t i = firstIndex; i <= lastIndex && filterBankIter != end; ++i) {
+ melEnergy += (*filterBankIter++ * fftVec[i]);
+ }
+
+ melEnergies[bin] = melEnergy;
+ }
+
+ return true;
+ }
+
+ void AdMelSpectrogram::ConvertToLogarithmicScale(
+ std::vector<float>& melEnergies)
+ {
+ /* Container for natural logarithms of mel energies */
+ std::vector <float> vecLogEnergies(melEnergies.size(), 0.f);
+
+ /* Because we are taking natural logs, we need to multiply by log10(e).
+ * Also, for wav2letter model, we scale our log10 values by 10 */
+ constexpr float multiplier = 10.0 * /* default scalar */
+ 0.4342944819032518; /* log10f(std::exp(1.0))*/
+
+ /* Take log of the whole vector */
+ math::MathUtils::VecLogarithmF32(melEnergies, vecLogEnergies);
+
+ /* Scale the log values. */
+ for (auto iterM = melEnergies.begin(), iterL = vecLogEnergies.begin();
+ iterM != melEnergies.end() && iterL != vecLogEnergies.end(); ++iterM, ++iterL) {
+
+ *iterM = *iterL * multiplier;
+ }
+ }
+
+ float AdMelSpectrogram::GetMelFilterBankNormaliser(
+ const float& leftMel,
+ const float& rightMel,
+ const bool useHTKMethod)
+ {
+ /* Slaney normalization for mel weights. */
+ return (2.0f / (AdMelSpectrogram::InverseMelScale(rightMel, useHTKMethod) -
+ AdMelSpectrogram::InverseMelScale(leftMel, useHTKMethod)));
+ }
+
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
diff --git a/source/application/api/use_case/ad/src/AdModel.cc b/source/application/api/use_case/ad/src/AdModel.cc
new file mode 100644
index 0000000..961c260
--- /dev/null
+++ b/source/application/api/use_case/ad/src/AdModel.cc
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "AdModel.hpp"
+#include "log_macros.h"
+
+const tflite::MicroOpResolver& arm::app::AdModel::GetOpResolver()
+{
+ return this->m_opResolver;
+}
+
+bool arm::app::AdModel::EnlistOperations()
+{
+ this->m_opResolver.AddAveragePool2D();
+ this->m_opResolver.AddConv2D();
+ this->m_opResolver.AddDepthwiseConv2D();
+ this->m_opResolver.AddRelu6();
+ this->m_opResolver.AddReshape();
+
+ if (kTfLiteOk == this->m_opResolver.AddEthosU()) {
+ info("Added %s support to op resolver\n",
+ tflite::GetString_ETHOSU());
+ } else {
+ printf_err("Failed to add Arm NPU support to op resolver.");
+ return false;
+ }
+ return true;
+}
diff --git a/source/application/api/use_case/ad/src/AdProcessing.cc b/source/application/api/use_case/ad/src/AdProcessing.cc
new file mode 100644
index 0000000..fb26a83
--- /dev/null
+++ b/source/application/api/use_case/ad/src/AdProcessing.cc
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "AdProcessing.hpp"
+
+#include "AdModel.hpp"
+
+namespace arm {
+namespace app {
+
+AdPreProcess::AdPreProcess(TfLiteTensor* inputTensor,
+ uint32_t melSpectrogramFrameLen,
+ uint32_t melSpectrogramFrameStride,
+ float adModelTrainingMean):
+ m_validInstance{false},
+ m_melSpectrogramFrameLen{melSpectrogramFrameLen},
+ m_melSpectrogramFrameStride{melSpectrogramFrameStride},
+ /**< Model is trained on features downsampled 2x */
+ m_inputResizeScale{2},
+ /**< We are choosing to move by 20 frames across the audio for each inference. */
+ m_numMelSpecVectorsInAudioStride{20},
+ m_audioDataStride{m_numMelSpecVectorsInAudioStride * melSpectrogramFrameStride},
+ m_melSpec{melSpectrogramFrameLen}
+{
+ UNUSED(this->m_melSpectrogramFrameStride);
+
+ if (!inputTensor) {
+ printf_err("Invalid input tensor provided to pre-process\n");
+ return;
+ }
+
+ TfLiteIntArray* inputShape = inputTensor->dims;
+
+ if (!inputShape) {
+ printf_err("Invalid input tensor dims\n");
+ return;
+ }
+
+ const uint32_t kNumRows = inputShape->data[AdModel::ms_inputRowsIdx];
+ const uint32_t kNumCols = inputShape->data[AdModel::ms_inputColsIdx];
+
+ /* Deduce the data length required for 1 inference from the network parameters. */
+ this->m_audioDataWindowSize = (((this->m_inputResizeScale * kNumCols) - 1) *
+ melSpectrogramFrameStride) +
+ melSpectrogramFrameLen;
+ this->m_numReusedFeatureVectors = kNumRows -
+ (this->m_numMelSpecVectorsInAudioStride /
+ this->m_inputResizeScale);
+ this->m_melSpec.Init();
+
+ /* Creating a Mel Spectrogram sliding window for the data required for 1 inference.
+ * "resizing" done here by multiplying stride by resize scale. */
+ this->m_melWindowSlider = audio::SlidingWindow<const int16_t>(
+ nullptr, /* to be populated later. */
+ this->m_audioDataWindowSize,
+ melSpectrogramFrameLen,
+ melSpectrogramFrameStride * this->m_inputResizeScale);
+
+ /* Construct feature calculation function. */
+ this->m_featureCalc = GetFeatureCalculator(this->m_melSpec, inputTensor,
+ this->m_numReusedFeatureVectors,
+ adModelTrainingMean);
+ this->m_validInstance = true;
+}
+
+bool AdPreProcess::DoPreProcess(const void* input, size_t inputSize)
+{
+ /* Check that we have a valid instance. */
+ if (!this->m_validInstance) {
+ printf_err("Invalid pre-processor instance\n");
+ return false;
+ }
+
+ /* We expect that we can traverse the size with which the MEL spectrogram
+ * sliding window was initialised with. */
+ if (!input || inputSize < this->m_audioDataWindowSize) {
+ printf_err("Invalid input provided for pre-processing\n");
+ return false;
+ }
+
+ /* We moved to the next window - set the features sliding to the new address. */
+ this->m_melWindowSlider.Reset(static_cast<const int16_t*>(input));
+
+ /* The first window does not have cache ready. */
+ const bool useCache = this->m_audioWindowIndex > 0 && this->m_numReusedFeatureVectors > 0;
+
+ /* Start calculating features inside one audio sliding window. */
+ while (this->m_melWindowSlider.HasNext()) {
+ const int16_t* melSpecWindow = this->m_melWindowSlider.Next();
+ std::vector<int16_t> melSpecAudioData = std::vector<int16_t>(
+ melSpecWindow,
+ melSpecWindow + this->m_melSpectrogramFrameLen);
+
+ /* Compute features for this window and write them to input tensor. */
+ this->m_featureCalc(melSpecAudioData,
+ this->m_melWindowSlider.Index(),
+ useCache,
+ this->m_numMelSpecVectorsInAudioStride,
+ this->m_inputResizeScale);
+ }
+
+ return true;
+}
+
+uint32_t AdPreProcess::GetAudioWindowSize()
+{
+ return this->m_audioDataWindowSize;
+}
+
+uint32_t AdPreProcess::GetAudioDataStride()
+{
+ return this->m_audioDataStride;
+}
+
+void AdPreProcess::SetAudioWindowIndex(uint32_t idx)
+{
+ this->m_audioWindowIndex = idx;
+}
+
+AdPostProcess::AdPostProcess(TfLiteTensor* outputTensor) :
+ m_outputTensor {outputTensor}
+{}
+
+bool AdPostProcess::DoPostProcess()
+{
+ switch (this->m_outputTensor->type) {
+ case kTfLiteInt8:
+ this->Dequantize<int8_t>();
+ break;
+ default:
+ printf_err("Unsupported tensor type");
+ return false;
+ }
+
+ math::MathUtils::SoftmaxF32(this->m_dequantizedOutputVec);
+ return true;
+}
+
+float AdPostProcess::GetOutputValue(uint32_t index)
+{
+ if (index < this->m_dequantizedOutputVec.size()) {
+ return this->m_dequantizedOutputVec[index];
+ }
+ printf_err("Invalid index for output\n");
+ return 0.0;
+}
+
+std::function<void (std::vector<int16_t>&, int, bool, size_t, size_t)>
+GetFeatureCalculator(audio::AdMelSpectrogram& melSpec,
+ TfLiteTensor* inputTensor,
+ size_t cacheSize,
+ float trainingMean)
+{
+ std::function<void (std::vector<int16_t>&, size_t, bool, size_t, size_t)> melSpecFeatureCalc;
+
+ TfLiteQuantization quant = inputTensor->quantization;
+
+ if (kTfLiteAffineQuantization == quant.type) {
+
+ auto* quantParams = static_cast<TfLiteAffineQuantization*>(quant.params);
+ const float quantScale = quantParams->scale->data[0];
+ const int quantOffset = quantParams->zero_point->data[0];
+
+ switch (inputTensor->type) {
+ case kTfLiteInt8: {
+ melSpecFeatureCalc = FeatureCalc<int8_t>(
+ inputTensor,
+ cacheSize,
+ [=, &melSpec](std::vector<int16_t>& audioDataWindow) {
+ return melSpec.MelSpecComputeQuant<int8_t>(
+ audioDataWindow,
+ quantScale,
+ quantOffset,
+ trainingMean);
+ }
+ );
+ break;
+ }
+ default:
+ printf_err("Tensor type %s not supported\n", TfLiteTypeGetName(inputTensor->type));
+ }
+ } else {
+ melSpecFeatureCalc = FeatureCalc<float>(
+ inputTensor,
+ cacheSize,
+ [=, &melSpec](
+ std::vector<int16_t>& audioDataWindow) {
+ return melSpec.ComputeMelSpec(
+ audioDataWindow,
+ trainingMean);
+ });
+ }
+ return melSpecFeatureCalc;
+}
+
+} /* namespace app */
+} /* namespace arm */
diff --git a/source/application/api/use_case/ad/src/MelSpectrogram.cc b/source/application/api/use_case/ad/src/MelSpectrogram.cc
new file mode 100644
index 0000000..ff0c536
--- /dev/null
+++ b/source/application/api/use_case/ad/src/MelSpectrogram.cc
@@ -0,0 +1,316 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "MelSpectrogram.hpp"
+
+#include "PlatformMath.hpp"
+#include "log_macros.h"
+
+#include <cfloat>
+#include <cinttypes>
+
+namespace arm {
+namespace app {
+namespace audio {
+
+ MelSpecParams::MelSpecParams(
+ const float samplingFreq,
+ const uint32_t numFbankBins,
+ const float melLoFreq,
+ const float melHiFreq,
+ const uint32_t frameLen,
+ const bool useHtkMethod):
+ m_samplingFreq(samplingFreq),
+ m_numFbankBins(numFbankBins),
+ m_melLoFreq(melLoFreq),
+ m_melHiFreq(melHiFreq),
+ m_frameLen(frameLen),
+
+ /* Smallest power of 2 >= frame length. */
+ m_frameLenPadded(pow(2, ceil((log(frameLen)/log(2))))),
+ m_useHtkMethod(useHtkMethod)
+ {}
+
+ std::string MelSpecParams::Str() const
+ {
+ char strC[1024];
+ snprintf(strC, sizeof(strC) - 1, "\n \
+ \n\t Sampling frequency: %f\
+ \n\t Number of filter banks: %" PRIu32 "\
+ \n\t Mel frequency limit (low): %f\
+ \n\t Mel frequency limit (high): %f\
+ \n\t Frame length: %" PRIu32 "\
+ \n\t Padded frame length: %" PRIu32 "\
+ \n\t Using HTK for Mel scale: %s\n",
+ this->m_samplingFreq, this->m_numFbankBins, this->m_melLoFreq,
+ this->m_melHiFreq, this->m_frameLen,
+ this->m_frameLenPadded, this->m_useHtkMethod ? "yes" : "no");
+ return std::string{strC};
+ }
+
+ MelSpectrogram::MelSpectrogram(const MelSpecParams& params):
+ m_params(params),
+ m_filterBankInitialised(false)
+ {
+ this->m_buffer = std::vector<float>(
+ this->m_params.m_frameLenPadded, 0.0);
+ this->m_frame = std::vector<float>(
+ this->m_params.m_frameLenPadded, 0.0);
+ this->m_melEnergies = std::vector<float>(
+ this->m_params.m_numFbankBins, 0.0);
+
+ this->m_windowFunc = std::vector<float>(this->m_params.m_frameLen);
+ const auto multiplier = static_cast<float>(2 * M_PI / this->m_params.m_frameLen);
+
+ /* Create window function. */
+ for (size_t i = 0; i < this->m_params.m_frameLen; ++i) {
+ this->m_windowFunc[i] = (0.5 - (0.5 *
+ math::MathUtils::CosineF32(static_cast<float>(i) * multiplier)));
+ }
+
+ math::MathUtils::FftInitF32(this->m_params.m_frameLenPadded, this->m_fftInstance);
+ debug("Instantiated Mel Spectrogram object: %s\n", this->m_params.Str().c_str());
+ }
+
+ void MelSpectrogram::Init()
+ {
+ this->InitMelFilterBank();
+ }
+
+ float MelSpectrogram::MelScale(const float freq, const bool useHTKMethod)
+ {
+ if (useHTKMethod) {
+ return 1127.0f * logf (1.0f + freq / 700.0f);
+ } else {
+ /* Slaney formula for mel scale. */
+ float mel = freq / ms_freqStep;
+
+ if (freq >= ms_minLogHz) {
+ mel = ms_minLogMel + logf(freq / ms_minLogHz) / ms_logStep;
+ }
+ return mel;
+ }
+ }
+
+ float MelSpectrogram::InverseMelScale(const float melFreq, const bool useHTKMethod)
+ {
+ if (useHTKMethod) {
+ return 700.0f * (expf (melFreq / 1127.0f) - 1.0f);
+ } else {
+ /* Slaney formula for inverse mel scale. */
+ float freq = ms_freqStep * melFreq;
+
+ if (melFreq >= ms_minLogMel) {
+ freq = ms_minLogHz * expf(ms_logStep * (melFreq - ms_minLogMel));
+ }
+ return freq;
+ }
+ }
+
+ bool MelSpectrogram::ApplyMelFilterBank(
+ std::vector<float>& fftVec,
+ std::vector<std::vector<float>>& melFilterBank,
+ std::vector<uint32_t>& filterBankFilterFirst,
+ std::vector<uint32_t>& filterBankFilterLast,
+ std::vector<float>& melEnergies)
+ {
+ const size_t numBanks = melEnergies.size();
+
+ if (numBanks != filterBankFilterFirst.size() ||
+ numBanks != filterBankFilterLast.size()) {
+ printf_err("unexpected filter bank lengths\n");
+ return false;
+ }
+
+ for (size_t bin = 0; bin < numBanks; ++bin) {
+ auto filterBankIter = melFilterBank[bin].begin();
+ auto end = melFilterBank[bin].end();
+ float melEnergy = FLT_MIN; /* Avoid log of zero at later stages */
+ const uint32_t firstIndex = filterBankFilterFirst[bin];
+ const uint32_t lastIndex = std::min<int32_t>(filterBankFilterLast[bin], fftVec.size() - 1);
+
+ for (uint32_t i = firstIndex; i <= lastIndex && filterBankIter != end; ++i) {
+ float energyRep = math::MathUtils::SqrtF32(fftVec[i]);
+ melEnergy += (*filterBankIter++ * energyRep);
+ }
+
+ melEnergies[bin] = melEnergy;
+ }
+
+ return true;
+ }
+
+ void MelSpectrogram::ConvertToLogarithmicScale(std::vector<float>& melEnergies)
+ {
+ for (float& melEnergy : melEnergies) {
+ melEnergy = logf(melEnergy);
+ }
+ }
+
+ void MelSpectrogram::ConvertToPowerSpectrum()
+ {
+ const uint32_t halfDim = this->m_buffer.size() / 2;
+
+ /* Handle this special case. */
+ float firstEnergy = this->m_buffer[0] * this->m_buffer[0];
+ float lastEnergy = this->m_buffer[1] * this->m_buffer[1];
+
+ math::MathUtils::ComplexMagnitudeSquaredF32(
+ this->m_buffer.data(),
+ this->m_buffer.size(),
+ this->m_buffer.data(),
+ this->m_buffer.size()/2);
+
+ this->m_buffer[0] = firstEnergy;
+ this->m_buffer[halfDim] = lastEnergy;
+ }
+
+ float MelSpectrogram::GetMelFilterBankNormaliser(
+ const float& leftMel,
+ const float& rightMel,
+ const bool useHTKMethod)
+ {
+ UNUSED(leftMel);
+ UNUSED(rightMel);
+ UNUSED(useHTKMethod);
+
+ /* By default, no normalisation => return 1 */
+ return 1.f;
+ }
+
+ void MelSpectrogram::InitMelFilterBank()
+ {
+ if (!this->IsMelFilterBankInited()) {
+ this->m_melFilterBank = this->CreateMelFilterBank();
+ this->m_filterBankInitialised = true;
+ }
+ }
+
+ bool MelSpectrogram::IsMelFilterBankInited() const
+ {
+ return this->m_filterBankInitialised;
+ }
+
+ std::vector<float> MelSpectrogram::ComputeMelSpec(const std::vector<int16_t>& audioData, float trainingMean)
+ {
+ this->InitMelFilterBank();
+
+ /* TensorFlow way of normalizing .wav data to (-1, 1). */
+ constexpr float normaliser = 1.0/(1<<15);
+ for (size_t i = 0; i < this->m_params.m_frameLen; ++i) {
+ this->m_frame[i] = static_cast<float>(audioData[i]) * normaliser;
+ }
+
+ /* Apply window function to input frame. */
+ for(size_t i = 0; i < this->m_params.m_frameLen; ++i) {
+ this->m_frame[i] *= this->m_windowFunc[i];
+ }
+
+ /* Set remaining frame values to 0. */
+ std::fill(this->m_frame.begin() + this->m_params.m_frameLen,this->m_frame.end(), 0);
+
+ /* Compute FFT. */
+ math::MathUtils::FftF32(this->m_frame, this->m_buffer, this->m_fftInstance);
+
+ /* Convert to power spectrum. */
+ this->ConvertToPowerSpectrum();
+
+ /* Apply mel filterbanks. */
+ if (!this->ApplyMelFilterBank(this->m_buffer,
+ this->m_melFilterBank,
+ this->m_filterBankFilterFirst,
+ this->m_filterBankFilterLast,
+ this->m_melEnergies)) {
+ printf_err("Failed to apply MEL filter banks\n");
+ }
+
+ /* Convert to logarithmic scale */
+ this->ConvertToLogarithmicScale(this->m_melEnergies);
+
+ /* Perform mean subtraction. */
+ for (auto& energy:this->m_melEnergies) {
+ energy -= trainingMean;
+ }
+
+ return this->m_melEnergies;
+ }
+
+ std::vector<std::vector<float>> MelSpectrogram::CreateMelFilterBank()
+ {
+ size_t numFftBins = this->m_params.m_frameLenPadded / 2;
+ float fftBinWidth = static_cast<float>(this->m_params.m_samplingFreq) / this->m_params.m_frameLenPadded;
+
+ float melLowFreq = MelSpectrogram::MelScale(this->m_params.m_melLoFreq,
+ this->m_params.m_useHtkMethod);
+ float melHighFreq = MelSpectrogram::MelScale(this->m_params.m_melHiFreq,
+ this->m_params.m_useHtkMethod);
+ float melFreqDelta = (melHighFreq - melLowFreq) / (this->m_params.m_numFbankBins + 1);
+
+ std::vector<float> thisBin = std::vector<float>(numFftBins);
+ std::vector<std::vector<float>> melFilterBank(
+ this->m_params.m_numFbankBins);
+ this->m_filterBankFilterFirst =
+ std::vector<uint32_t>(this->m_params.m_numFbankBins);
+ this->m_filterBankFilterLast =
+ std::vector<uint32_t>(this->m_params.m_numFbankBins);
+
+ for (size_t bin = 0; bin < this->m_params.m_numFbankBins; bin++) {
+ float leftMel = melLowFreq + bin * melFreqDelta;
+ float centerMel = melLowFreq + (bin + 1) * melFreqDelta;
+ float rightMel = melLowFreq + (bin + 2) * melFreqDelta;
+
+ uint32_t firstIndex = 0;
+ uint32_t lastIndex = 0;
+ bool firstIndexFound = false;
+ const float normaliser = this->GetMelFilterBankNormaliser(leftMel, rightMel, this->m_params.m_useHtkMethod);
+
+ for (size_t i = 0; i < numFftBins; ++i) {
+ float freq = (fftBinWidth * i); /* Center freq of this fft bin. */
+ float mel = MelSpectrogram::MelScale(freq, this->m_params.m_useHtkMethod);
+ thisBin[i] = 0.0;
+
+ if (mel > leftMel && mel < rightMel) {
+ float weight;
+ if (mel <= centerMel) {
+ weight = (mel - leftMel) / (centerMel - leftMel);
+ } else {
+ weight = (rightMel - mel) / (rightMel - centerMel);
+ }
+
+ thisBin[i] = weight * normaliser;
+ if (!firstIndexFound) {
+ firstIndex = i;
+ firstIndexFound = true;
+ }
+ lastIndex = i;
+ }
+ }
+
+ this->m_filterBankFilterFirst[bin] = firstIndex;
+ this->m_filterBankFilterLast[bin] = lastIndex;
+
+ /* Copy the part we care about. */
+ for (uint32_t i = firstIndex; i <= lastIndex; ++i) {
+ melFilterBank[bin].push_back(thisBin[i]);
+ }
+ }
+
+ return melFilterBank;
+ }
+
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
diff --git a/source/application/api/use_case/asr/CMakeLists.txt b/source/application/api/use_case/asr/CMakeLists.txt
new file mode 100644
index 0000000..77e3d6a
--- /dev/null
+++ b/source/application/api/use_case/asr/CMakeLists.txt
@@ -0,0 +1,43 @@
+#----------------------------------------------------------------------------
+# Copyright (c) 2022 Arm Limited. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#----------------------------------------------------------------------------
+#########################################################
+# AUTOMATIC SPEECH RECOGNITION API library #
+#########################################################
+cmake_minimum_required(VERSION 3.15.6)
+
+set(ASR_API_TARGET asr_api)
+project(${ASR_API_TARGET}
+ DESCRIPTION "Automatic speech recognition use case API library"
+ LANGUAGES C CXX)
+
+# Create static library
+add_library(${ASR_API_TARGET} STATIC
+ src/Wav2LetterPreprocess.cc
+ src/Wav2LetterPostprocess.cc
+ src/Wav2LetterMfcc.cc
+ src/AsrClassifier.cc
+ src/OutputDecode.cc
+ src/Wav2LetterModel.cc)
+
+target_include_directories(${ASR_API_TARGET} PUBLIC include)
+
+target_link_libraries(${ASR_API_TARGET} PUBLIC common_api)
+
+message(STATUS "*******************************************************")
+message(STATUS "Library : " ${ASR_API_TARGET})
+message(STATUS "CMAKE_SYSTEM_PROCESSOR : " ${CMAKE_SYSTEM_PROCESSOR})
+message(STATUS "*******************************************************")
diff --git a/source/application/api/use_case/asr/include/AsrClassifier.hpp b/source/application/api/use_case/asr/include/AsrClassifier.hpp
new file mode 100644
index 0000000..a07a721
--- /dev/null
+++ b/source/application/api/use_case/asr/include/AsrClassifier.hpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef ASR_CLASSIFIER_HPP
+#define ASR_CLASSIFIER_HPP
+
+#include "Classifier.hpp"
+
+namespace arm {
+namespace app {
+
+ class AsrClassifier : public Classifier {
+ public:
+ /**
+ * @brief Gets the top N classification results from the
+ * output vector.
+ * @param[in] outputTensor Inference output tensor from an NN model.
+ * @param[out] vecResults A vector of classification results
+ * populated by this function.
+ * @param[in] labels Labels vector to match classified classes
+ * @param[in] topNCount Number of top classifications to pick.
+ * @param[in] use_softmax Whether softmax scaling should be applied to model output.
+ * @return true if successful, false otherwise.
+ **/
+ bool GetClassificationResults(TfLiteTensor* outputTensor,
+ std::vector<ClassificationResult>& vecResults,
+ const std::vector<std::string>& labels,
+ uint32_t topNCount, bool use_softmax = false) override;
+
+ private:
+ /**
+ * @brief Utility function that gets the top 1 classification results from the
+ * output tensor (vector of vector).
+ * @param[in] tensor Inference output tensor from an NN model.
+ * @param[out] vecResults Vector of classification results populated by this function.
+ * @param[in] labels Labels vector to match classified classes.
+ * @param[in] scale Quantization scale.
+ * @param[in] zeroPoint Quantization zero point.
+ * @return true if successful, false otherwise.
+ **/
+ template<typename T>
+ bool GetTopResults(TfLiteTensor* tensor,
+ std::vector<ClassificationResult>& vecResults,
+ const std::vector<std::string>& labels, double scale, double zeroPoint);
+ };
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* ASR_CLASSIFIER_HPP */ \ No newline at end of file
diff --git a/source/application/api/use_case/asr/include/AsrResult.hpp b/source/application/api/use_case/asr/include/AsrResult.hpp
new file mode 100644
index 0000000..ed826d0
--- /dev/null
+++ b/source/application/api/use_case/asr/include/AsrResult.hpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef ASR_RESULT_HPP
+#define ASR_RESULT_HPP
+
+#include "ClassificationResult.hpp"
+
+#include <vector>
+
+namespace arm {
+namespace app {
+namespace asr {
+
+ using ResultVec = std::vector<arm::app::ClassificationResult>;
+
+ /* Structure for holding ASR result. */
+ class AsrResult {
+
+ public:
+ ResultVec m_resultVec; /* Container for "thresholded" classification results. */
+ float m_timeStamp; /* Audio timestamp for this result. */
+ uint32_t m_inferenceNumber; /* Corresponding inference number. */
+ float m_threshold; /* Threshold value for `m_resultVec.` */
+
+ AsrResult() = delete;
+ AsrResult(ResultVec& resultVec,
+ const float timestamp,
+ const uint32_t inferenceIdx,
+ const float scoreThreshold) {
+
+ this->m_threshold = scoreThreshold;
+ this->m_timeStamp = timestamp;
+ this->m_inferenceNumber = inferenceIdx;
+
+ this->m_resultVec = ResultVec();
+ for (auto& i : resultVec) {
+ if (i.m_normalisedVal >= this->m_threshold) {
+ this->m_resultVec.emplace_back(i);
+ }
+ }
+ }
+ ~AsrResult() = default;
+ };
+
+} /* namespace asr */
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* ASR_RESULT_HPP */ \ No newline at end of file
diff --git a/source/application/api/use_case/asr/include/OutputDecode.hpp b/source/application/api/use_case/asr/include/OutputDecode.hpp
new file mode 100644
index 0000000..9d39057
--- /dev/null
+++ b/source/application/api/use_case/asr/include/OutputDecode.hpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef ASR_OUTPUT_DECODE_HPP
+#define ASR_OUTPUT_DECODE_HPP
+
+#include "AsrClassifier.hpp"
+
+namespace arm {
+namespace app {
+namespace audio {
+namespace asr {
+
+ /**
+ * @brief Gets the top N classification results from the
+ * output vector.
+ * @param[in] vecResults Label output from classifier.
+ * @return true if successful, false otherwise.
+ **/
+ std::string DecodeOutput(const std::vector<ClassificationResult>& vecResults);
+
+} /* namespace asr */
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* ASR_OUTPUT_DECODE_HPP */ \ No newline at end of file
diff --git a/source/application/api/use_case/asr/include/Wav2LetterMfcc.hpp b/source/application/api/use_case/asr/include/Wav2LetterMfcc.hpp
new file mode 100644
index 0000000..b5a21d3
--- /dev/null
+++ b/source/application/api/use_case/asr/include/Wav2LetterMfcc.hpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef ASR_WAV2LETTER_MFCC_HPP
+#define ASR_WAV2LETTER_MFCC_HPP
+
+#include "Mfcc.hpp"
+
+namespace arm {
+namespace app {
+namespace audio {
+
+ /* Class to provide Wav2Letter specific MFCC calculation requirements. */
+ class Wav2LetterMFCC : public MFCC {
+
+ public:
+ static constexpr uint32_t ms_defaultSamplingFreq = 16000;
+ static constexpr uint32_t ms_defaultNumFbankBins = 128;
+ static constexpr uint32_t ms_defaultMelLoFreq = 0;
+ static constexpr uint32_t ms_defaultMelHiFreq = 8000;
+ static constexpr bool ms_defaultUseHtkMethod = false;
+
+ explicit Wav2LetterMFCC(const size_t numFeats, const size_t frameLen)
+ : MFCC(MfccParams(
+ ms_defaultSamplingFreq, ms_defaultNumFbankBins,
+ ms_defaultMelLoFreq, ms_defaultMelHiFreq,
+ numFeats, frameLen, ms_defaultUseHtkMethod))
+ {}
+
+ Wav2LetterMFCC() = delete;
+ ~Wav2LetterMFCC() = default;
+
+ protected:
+
+ /**
+ * @brief Overrides base class implementation of this function.
+ * @param[in] fftVec Vector populated with FFT magnitudes
+ * @param[in] melFilterBank 2D Vector with filter bank weights
+ * @param[in] filterBankFilterFirst Vector containing the first indices of filter bank
+ * to be used for each bin.
+ * @param[in] filterBankFilterLast Vector containing the last indices of filter bank
+ * to be used for each bin.
+ * @param[out] melEnergies Pre-allocated vector of MEL energies to be
+ * populated.
+ * @return true if successful, false otherwise
+ */
+ bool ApplyMelFilterBank(
+ std::vector<float>& fftVec,
+ std::vector<std::vector<float>>& melFilterBank,
+ std::vector<uint32_t>& filterBankFilterFirst,
+ std::vector<uint32_t>& filterBankFilterLast,
+ std::vector<float>& melEnergies) override;
+
+ /**
+ * @brief Override for the base class implementation convert mel
+ * energies to logarithmic scale. The difference from
+ * default behaviour is that the power is converted to dB
+ * and subsequently clamped.
+ * @param[in,out] melEnergies 1D vector of Mel energies
+ **/
+ void ConvertToLogarithmicScale(std::vector<float>& melEnergies) override;
+
+ /**
+ * @brief Create a matrix used to calculate Discrete Cosine
+ * Transform. Override for the base class' default
+ * implementation as the first and last elements
+ * use a different normaliser.
+ * @param[in] inputLength input length of the buffer on which
+ * DCT will be performed
+ * @param[in] coefficientCount Total coefficients per input length.
+ * @return 1D vector with inputLength x coefficientCount elements
+ * populated with DCT coefficients.
+ */
+ std::vector<float> CreateDCTMatrix(int32_t inputLength,
+ int32_t coefficientCount) override;
+
+ /**
+ * @brief Given the low and high Mel values, get the normaliser
+ * for weights to be applied when populating the filter
+ * bank. Override for the base class implementation.
+ * @param[in] leftMel Low Mel frequency value.
+ * @param[in] rightMel High Mel frequency value.
+ * @param[in] useHTKMethod bool to signal if HTK method is to be
+ * used for calculation.
+ * @return Value to use for normalising.
+ */
+ float GetMelFilterBankNormaliser(const float& leftMel,
+ const float& rightMel,
+ bool useHTKMethod) override;
+ };
+
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* ASR_WAV2LETTER_MFCC_HPP */ \ No newline at end of file
diff --git a/source/application/api/use_case/asr/include/Wav2LetterModel.hpp b/source/application/api/use_case/asr/include/Wav2LetterModel.hpp
new file mode 100644
index 0000000..a02eed1
--- /dev/null
+++ b/source/application/api/use_case/asr/include/Wav2LetterModel.hpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef ASR_WAV2LETTER_MODEL_HPP
+#define ASR_WAV2LETTER_MODEL_HPP
+
+#include "Model.hpp"
+
+namespace arm {
+namespace app {
+namespace asr {
+ extern const int g_FrameLength;
+ extern const int g_FrameStride;
+ extern const float g_ScoreThreshold;
+ extern const int g_ctxLen;
+} /* namespace asr */
+} /* namespace app */
+} /* namespace arm */
+
+namespace arm {
+namespace app {
+
+ class Wav2LetterModel : public Model {
+
+ public:
+ /* Indices for the expected model - based on input and output tensor shapes */
+ static constexpr uint32_t ms_inputRowsIdx = 1;
+ static constexpr uint32_t ms_inputColsIdx = 2;
+ static constexpr uint32_t ms_outputRowsIdx = 2;
+ static constexpr uint32_t ms_outputColsIdx = 3;
+
+ /* Model specific constants. */
+ static constexpr uint32_t ms_blankTokenIdx = 28;
+ static constexpr uint32_t ms_numMfccFeatures = 13;
+
+ protected:
+ /** @brief Gets the reference to op resolver interface class. */
+ const tflite::MicroOpResolver& GetOpResolver() override;
+
+ /** @brief Adds operations to the op resolver instance. */
+ bool EnlistOperations() override;
+
+ private:
+ /* Maximum number of individual operations that can be enlisted. */
+ static constexpr int ms_maxOpCnt = 5;
+
+ /* A mutable op resolver instance. */
+ tflite::MicroMutableOpResolver<ms_maxOpCnt> m_opResolver;
+ };
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* ASR_WAV2LETTER_MODEL_HPP */
diff --git a/source/application/api/use_case/asr/include/Wav2LetterPostprocess.hpp b/source/application/api/use_case/asr/include/Wav2LetterPostprocess.hpp
new file mode 100644
index 0000000..02738bc
--- /dev/null
+++ b/source/application/api/use_case/asr/include/Wav2LetterPostprocess.hpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef ASR_WAV2LETTER_POSTPROCESS_HPP
+#define ASR_WAV2LETTER_POSTPROCESS_HPP
+
+#include "TensorFlowLiteMicro.hpp" /* TensorFlow headers. */
+#include "BaseProcessing.hpp"
+#include "Model.hpp"
+#include "AsrClassifier.hpp"
+#include "AsrResult.hpp"
+#include "log_macros.h"
+
+namespace arm {
+namespace app {
+
+ /**
+ * @brief Helper class to manage tensor post-processing for "wav2letter"
+ * output.
+ */
+ class AsrPostProcess : public BasePostProcess {
+ public:
+ bool m_lastIteration = false; /* Flag to set if processing the last set of data for a clip. */
+
+ /**
+ * @brief Constructor
+ * @param[in] outputTensor Pointer to the TFLite Micro output Tensor.
+ * @param[in] classifier Object used to get top N results from classification.
+ * @param[in] labels Vector of string labels to identify each output of the model.
+ * @param[in/out] result Vector of classification results to store decoded outputs.
+ * @param[in] outputContextLen Left/right context length for output tensor.
+ * @param[in] blankTokenIdx Index in the labels that the "Blank token" takes.
+ * @param[in] reductionAxis The axis that the logits of each time step is on.
+ **/
+ AsrPostProcess(TfLiteTensor* outputTensor, AsrClassifier& classifier,
+ const std::vector<std::string>& labels, asr::ResultVec& result,
+ uint32_t outputContextLen,
+ uint32_t blankTokenIdx, uint32_t reductionAxis);
+
+ /**
+ * @brief Should perform post-processing of the result of inference then
+ * populate ASR result data for any later use.
+ * @return true if successful, false otherwise.
+ **/
+ bool DoPostProcess() override;
+
+ /** @brief Gets the output inner length for post-processing. */
+ static uint32_t GetOutputInnerLen(const TfLiteTensor*, uint32_t outputCtxLen);
+
+ /** @brief Gets the output context length (left/right) for post-processing. */
+ static uint32_t GetOutputContextLen(const Model& model, uint32_t inputCtxLen);
+
+ /** @brief Gets the number of feature vectors to be computed. */
+ static uint32_t GetNumFeatureVectors(const Model& model);
+
+ private:
+ AsrClassifier& m_classifier; /* ASR Classifier object. */
+ TfLiteTensor* m_outputTensor; /* Model output tensor. */
+ const std::vector<std::string>& m_labels; /* ASR Labels. */
+ asr::ResultVec & m_results; /* Results vector for a single inference. */
+ uint32_t m_outputContextLen; /* lengths of left/right contexts for output. */
+ uint32_t m_outputInnerLen; /* Length of output inner context. */
+ uint32_t m_totalLen; /* Total length of the required axis. */
+ uint32_t m_countIterations; /* Current number of iterations. */
+ uint32_t m_blankTokenIdx; /* Index of the labels blank token. */
+ uint32_t m_reductionAxisIdx; /* Axis containing output logits for a single step. */
+
+ /**
+ * @brief Checks if the tensor and axis index are valid
+ * inputs to the object - based on how it has been initialised.
+ * @return true if valid, false otherwise.
+ */
+ bool IsInputValid(TfLiteTensor* tensor,
+ uint32_t axisIdx) const;
+
+ /**
+ * @brief Gets the tensor data element size in bytes based
+ * on the tensor type.
+ * @return Size in bytes, 0 if not supported.
+ */
+ static uint32_t GetTensorElementSize(TfLiteTensor* tensor);
+
+ /**
+ * @brief Erases sections from the data assuming row-wise
+ * arrangement along the context axis.
+ * @return true if successful, false otherwise.
+ */
+ bool EraseSectionsRowWise(uint8_t* ptrData,
+ uint32_t strideSzBytes,
+ bool lastIteration);
+ };
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* ASR_WAV2LETTER_POSTPROCESS_HPP */ \ No newline at end of file
diff --git a/source/application/api/use_case/asr/include/Wav2LetterPreprocess.hpp b/source/application/api/use_case/asr/include/Wav2LetterPreprocess.hpp
new file mode 100644
index 0000000..9943946
--- /dev/null
+++ b/source/application/api/use_case/asr/include/Wav2LetterPreprocess.hpp
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef ASR_WAV2LETTER_PREPROCESS_HPP
+#define ASR_WAV2LETTER_PREPROCESS_HPP
+
+#include "TensorFlowLiteMicro.hpp"
+#include "Wav2LetterMfcc.hpp"
+#include "AudioUtils.hpp"
+#include "DataStructures.hpp"
+#include "BaseProcessing.hpp"
+#include "log_macros.h"
+
+namespace arm {
+namespace app {
+
+ /* Class to facilitate pre-processing calculation for Wav2Letter model
+ * for ASR. */
+ using AudioWindow = audio::SlidingWindow<const int16_t>;
+
+ class AsrPreProcess : public BasePreProcess {
+ public:
+ /**
+ * @brief Constructor.
+ * @param[in] inputTensor Pointer to the TFLite Micro input Tensor.
+ * @param[in] numMfccFeatures Number of MFCC features per window.
+ * @param[in] numFeatureFrames Number of MFCC vectors that need to be calculated
+ * for an inference.
+ * @param[in] mfccWindowLen Number of audio elements to calculate MFCC features per window.
+ * @param[in] mfccWindowStride Stride (in number of elements) for moving the MFCC window.
+ */
+ AsrPreProcess(TfLiteTensor* inputTensor,
+ uint32_t numMfccFeatures,
+ uint32_t numFeatureFrames,
+ uint32_t mfccWindowLen,
+ uint32_t mfccWindowStride);
+
+ /**
+ * @brief Calculates the features required from audio data. This
+ * includes MFCC, first and second order deltas,
+ * normalisation and finally, quantisation. The tensor is
+ * populated with features from a given window placed along
+ * in a single row.
+ * @param[in] audioData Pointer to the first element of audio data.
+ * @param[in] audioDataLen Number of elements in the audio data.
+ * @return true if successful, false in case of error.
+ */
+ bool DoPreProcess(const void* audioData, size_t audioDataLen) override;
+
+ protected:
+ /**
+ * @brief Computes the first and second order deltas for the
+ * MFCC buffers - they are assumed to be populated.
+ *
+ * @param[in] mfcc MFCC buffers.
+ * @param[out] delta1 Result of the first diff computation.
+ * @param[out] delta2 Result of the second diff computation.
+ * @return true if successful, false otherwise.
+ */
+ static bool ComputeDeltas(Array2d<float>& mfcc,
+ Array2d<float>& delta1,
+ Array2d<float>& delta2);
+
+ /**
+ * @brief Given a 2D vector of floats, rescale it to have mean of 0 and
+ * standard deviation of 1.
+ * @param[in,out] vec Vector of vector of floats.
+ */
+ static void StandardizeVecF32(Array2d<float>& vec);
+
+ /**
+ * @brief Standardizes all the MFCC and delta buffers to have mean 0 and std. dev 1.
+ */
+ void Standarize();
+
+ /**
+ * @brief Given the quantisation and data type limits, computes
+ * the quantised values of a floating point input data.
+ * @param[in] elem Element to be quantised.
+ * @param[in] quantScale Scale.
+ * @param[in] quantOffset Offset.
+ * @param[in] minVal Numerical limit - minimum.
+ * @param[in] maxVal Numerical limit - maximum.
+ * @return Floating point quantised value.
+ */
+ static float GetQuantElem(
+ float elem,
+ float quantScale,
+ int quantOffset,
+ float minVal,
+ float maxVal);
+
+ /**
+ * @brief Quantises the MFCC and delta buffers, and places them
+ * in the output buffer. While doing so, it transposes
+ * the data. Reason: Buffers in this class are arranged
+ * for "time" axis to be row major. Primary reason for
+ * this being the convolution speed up (as we can use
+ * contiguous memory). The output, however, requires the
+ * time axis to be in column major arrangement.
+ * @param[in] outputBuf Pointer to the output buffer.
+ * @param[in] outputBufSz Output buffer's size.
+ * @param[in] quantScale Quantisation scale.
+ * @param[in] quantOffset Quantisation offset.
+ */
+ template <typename T>
+ bool Quantise(
+ T* outputBuf,
+ const uint32_t outputBufSz,
+ const float quantScale,
+ const int quantOffset)
+ {
+ /* Check the output size will fit everything. */
+ if (outputBufSz < (this->m_mfccBuf.size(0) * 3 * sizeof(T))) {
+ printf_err("Tensor size too small for features\n");
+ return false;
+ }
+
+ /* Populate. */
+ T* outputBufMfcc = outputBuf;
+ T* outputBufD1 = outputBuf + this->m_numMfccFeats;
+ T* outputBufD2 = outputBufD1 + this->m_numMfccFeats;
+ const uint32_t ptrIncr = this->m_numMfccFeats * 2; /* (3 vectors - 1 vector) */
+
+ const float minVal = std::numeric_limits<T>::min();
+ const float maxVal = std::numeric_limits<T>::max();
+
+ /* Need to transpose while copying and concatenating the tensor. */
+ for (uint32_t j = 0; j < this->m_numFeatureFrames; ++j) {
+ for (uint32_t i = 0; i < this->m_numMfccFeats; ++i) {
+ *outputBufMfcc++ = static_cast<T>(AsrPreProcess::GetQuantElem(
+ this->m_mfccBuf(i, j), quantScale,
+ quantOffset, minVal, maxVal));
+ *outputBufD1++ = static_cast<T>(AsrPreProcess::GetQuantElem(
+ this->m_delta1Buf(i, j), quantScale,
+ quantOffset, minVal, maxVal));
+ *outputBufD2++ = static_cast<T>(AsrPreProcess::GetQuantElem(
+ this->m_delta2Buf(i, j), quantScale,
+ quantOffset, minVal, maxVal));
+ }
+ outputBufMfcc += ptrIncr;
+ outputBufD1 += ptrIncr;
+ outputBufD2 += ptrIncr;
+ }
+
+ return true;
+ }
+
+ private:
+ audio::Wav2LetterMFCC m_mfcc; /* MFCC instance. */
+ TfLiteTensor* m_inputTensor; /* Model input tensor. */
+
+ /* Actual buffers to be populated. */
+ Array2d<float> m_mfccBuf; /* Contiguous buffer 1D: MFCC */
+ Array2d<float> m_delta1Buf; /* Contiguous buffer 1D: Delta 1 */
+ Array2d<float> m_delta2Buf; /* Contiguous buffer 1D: Delta 2 */
+
+ uint32_t m_mfccWindowLen; /* Window length for MFCC. */
+ uint32_t m_mfccWindowStride; /* Window stride len for MFCC. */
+ uint32_t m_numMfccFeats; /* Number of MFCC features per window. */
+ uint32_t m_numFeatureFrames; /* How many sets of m_numMfccFeats. */
+ AudioWindow m_mfccSlidingWindow; /* Sliding window to calculate MFCCs. */
+
+ };
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* ASR_WAV2LETTER_PREPROCESS_HPP */ \ No newline at end of file
diff --git a/source/application/api/use_case/asr/src/AsrClassifier.cc b/source/application/api/use_case/asr/src/AsrClassifier.cc
new file mode 100644
index 0000000..4ba8c7b
--- /dev/null
+++ b/source/application/api/use_case/asr/src/AsrClassifier.cc
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "AsrClassifier.hpp"
+
+#include "log_macros.h"
+#include "TensorFlowLiteMicro.hpp"
+#include "Wav2LetterModel.hpp"
+
+namespace arm {
+namespace app {
+
+ template<typename T>
+ bool AsrClassifier::GetTopResults(TfLiteTensor* tensor,
+ std::vector<ClassificationResult>& vecResults,
+ const std::vector <std::string>& labels, double scale, double zeroPoint)
+ {
+ const uint32_t nElems = tensor->dims->data[Wav2LetterModel::ms_outputRowsIdx];
+ const uint32_t nLetters = tensor->dims->data[Wav2LetterModel::ms_outputColsIdx];
+
+ if (nLetters != labels.size()) {
+ printf("Output size doesn't match the labels' size\n");
+ return false;
+ }
+
+ /* NOTE: tensor's size verification against labels should be
+ * checked by the calling/public function. */
+ if (nLetters < 1) {
+ return false;
+ }
+
+ /* Final results' container. */
+ vecResults = std::vector<ClassificationResult>(nElems);
+
+ T* tensorData = tflite::GetTensorData<T>(tensor);
+
+ /* Get the top 1 results. */
+ for (uint32_t i = 0, row = 0; i < nElems; ++i, row+=nLetters) {
+ std::pair<T, uint32_t> top_1 = std::make_pair(tensorData[row + 0], 0);
+
+ for (uint32_t j = 1; j < nLetters; ++j) {
+ if (top_1.first < tensorData[row + j]) {
+ top_1.first = tensorData[row + j];
+ top_1.second = j;
+ }
+ }
+
+ double score = static_cast<int> (top_1.first);
+ vecResults[i].m_normalisedVal = scale * (score - zeroPoint);
+ vecResults[i].m_label = labels[top_1.second];
+ vecResults[i].m_labelIdx = top_1.second;
+ }
+
+ return true;
+ }
+ template bool AsrClassifier::GetTopResults<uint8_t>(TfLiteTensor* tensor,
+ std::vector<ClassificationResult>& vecResults,
+ const std::vector <std::string>& labels,
+ double scale, double zeroPoint);
+ template bool AsrClassifier::GetTopResults<int8_t>(TfLiteTensor* tensor,
+ std::vector<ClassificationResult>& vecResults,
+ const std::vector <std::string>& labels,
+ double scale, double zeroPoint);
+
+ bool AsrClassifier::GetClassificationResults(
+ TfLiteTensor* outputTensor,
+ std::vector<ClassificationResult>& vecResults,
+ const std::vector <std::string>& labels, uint32_t topNCount, bool use_softmax)
+ {
+ UNUSED(use_softmax);
+ vecResults.clear();
+
+ constexpr int minTensorDims = static_cast<int>(
+ (Wav2LetterModel::ms_outputRowsIdx > Wav2LetterModel::ms_outputColsIdx)?
+ Wav2LetterModel::ms_outputRowsIdx : Wav2LetterModel::ms_outputColsIdx);
+
+ constexpr uint32_t outColsIdx = Wav2LetterModel::ms_outputColsIdx;
+
+ /* Sanity checks. */
+ if (outputTensor == nullptr) {
+ printf_err("Output vector is null pointer.\n");
+ return false;
+ } else if (outputTensor->dims->size < minTensorDims) {
+ printf_err("Output tensor expected to be %dD\n", minTensorDims);
+ return false;
+ } else if (static_cast<uint32_t>(outputTensor->dims->data[outColsIdx]) < topNCount) {
+ printf_err("Output vectors are smaller than %" PRIu32 "\n", topNCount);
+ return false;
+ } else if (static_cast<uint32_t>(outputTensor->dims->data[outColsIdx]) != labels.size()) {
+ printf("Output size doesn't match the labels' size\n");
+ return false;
+ }
+
+ if (topNCount != 1) {
+ warn("TopNCount value ignored in this implementation\n");
+ }
+
+ /* To return the floating point values, we need quantization parameters. */
+ QuantParams quantParams = GetTensorQuantParams(outputTensor);
+
+ bool resultState;
+
+ switch (outputTensor->type) {
+ case kTfLiteUInt8:
+ resultState = this->GetTopResults<uint8_t>(
+ outputTensor, vecResults,
+ labels, quantParams.scale,
+ quantParams.offset);
+ break;
+ case kTfLiteInt8:
+ resultState = this->GetTopResults<int8_t>(
+ outputTensor, vecResults,
+ labels, quantParams.scale,
+ quantParams.offset);
+ break;
+ default:
+ printf_err("Tensor type %s not supported by classifier\n",
+ TfLiteTypeGetName(outputTensor->type));
+ return false;
+ }
+
+ if (!resultState) {
+ printf_err("Failed to get sorted set\n");
+ return false;
+ }
+
+ return true;
+ }
+
+} /* namespace app */
+} /* namespace arm */ \ No newline at end of file
diff --git a/source/application/api/use_case/asr/src/OutputDecode.cc b/source/application/api/use_case/asr/src/OutputDecode.cc
new file mode 100644
index 0000000..41fbe07
--- /dev/null
+++ b/source/application/api/use_case/asr/src/OutputDecode.cc
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "OutputDecode.hpp"
+
+namespace arm {
+namespace app {
+namespace audio {
+namespace asr {
+
+ std::string DecodeOutput(const std::vector<ClassificationResult>& vecResults)
+ {
+ std::string CleanOutputBuffer;
+
+ for (size_t i = 0; i < vecResults.size(); ++i) /* For all elements in vector. */
+ {
+ while (i+1 < vecResults.size() &&
+ vecResults[i].m_label == vecResults[i+1].m_label) /* While the current element is equal to the next, ignore it and move on. */
+ {
+ ++i;
+ }
+ if (vecResults[i].m_label != "$") /* $ is a character used to represent unknown and double characters so should not be in output. */
+ {
+ CleanOutputBuffer += vecResults[i].m_label; /* If the element is different to the next, it will be appended to CleanOutputBuffer. */
+ }
+ }
+
+ return CleanOutputBuffer; /* Return string type containing clean output. */
+ }
+
+} /* namespace asr */
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
diff --git a/source/application/api/use_case/asr/src/Wav2LetterMfcc.cc b/source/application/api/use_case/asr/src/Wav2LetterMfcc.cc
new file mode 100644
index 0000000..bb29b0f
--- /dev/null
+++ b/source/application/api/use_case/asr/src/Wav2LetterMfcc.cc
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "Wav2LetterMfcc.hpp"
+
+#include "PlatformMath.hpp"
+#include "log_macros.h"
+
+#include <cfloat>
+
+namespace arm {
+namespace app {
+namespace audio {
+
+ bool Wav2LetterMFCC::ApplyMelFilterBank(
+ std::vector<float>& fftVec,
+ std::vector<std::vector<float>>& melFilterBank,
+ std::vector<uint32_t>& filterBankFilterFirst,
+ std::vector<uint32_t>& filterBankFilterLast,
+ std::vector<float>& melEnergies)
+ {
+ const size_t numBanks = melEnergies.size();
+
+ if (numBanks != filterBankFilterFirst.size() ||
+ numBanks != filterBankFilterLast.size()) {
+ printf_err("Unexpected filter bank lengths\n");
+ return false;
+ }
+
+ for (size_t bin = 0; bin < numBanks; ++bin) {
+ auto filterBankIter = melFilterBank[bin].begin();
+ auto end = melFilterBank[bin].end();
+ /* Avoid log of zero at later stages, same value used in librosa.
+ * The number was used during our default wav2letter model training. */
+ float melEnergy = 1e-10;
+ const uint32_t firstIndex = filterBankFilterFirst[bin];
+ const uint32_t lastIndex = std::min<uint32_t>(filterBankFilterLast[bin], fftVec.size() - 1);
+
+ for (uint32_t i = firstIndex; i <= lastIndex && filterBankIter != end; ++i) {
+ melEnergy += (*filterBankIter++ * fftVec[i]);
+ }
+
+ melEnergies[bin] = melEnergy;
+ }
+
+ return true;
+ }
+
+ void Wav2LetterMFCC::ConvertToLogarithmicScale(
+ std::vector<float>& melEnergies)
+ {
+ float maxMelEnergy = -FLT_MAX;
+
+ /* Container for natural logarithms of mel energies. */
+ std::vector <float> vecLogEnergies(melEnergies.size(), 0.f);
+
+ /* Because we are taking natural logs, we need to multiply by log10(e).
+ * Also, for wav2letter model, we scale our log10 values by 10. */
+ constexpr float multiplier = 10.0 * /* Default scalar. */
+ 0.4342944819032518; /* log10f(std::exp(1.0)) */
+
+ /* Take log of the whole vector. */
+ math::MathUtils::VecLogarithmF32(melEnergies, vecLogEnergies);
+
+ /* Scale the log values and get the max. */
+ for (auto iterM = melEnergies.begin(), iterL = vecLogEnergies.begin();
+ iterM != melEnergies.end() && iterL != vecLogEnergies.end(); ++iterM, ++iterL) {
+
+ *iterM = *iterL * multiplier;
+
+ /* Save the max mel energy. */
+ if (*iterM > maxMelEnergy) {
+ maxMelEnergy = *iterM;
+ }
+ }
+
+ /* Clamp the mel energies. */
+ constexpr float maxDb = 80.0;
+ const float clampLevelLowdB = maxMelEnergy - maxDb;
+ for (float& melEnergy : melEnergies) {
+ melEnergy = std::max(melEnergy, clampLevelLowdB);
+ }
+ }
+
+ std::vector<float> Wav2LetterMFCC::CreateDCTMatrix(
+ const int32_t inputLength,
+ const int32_t coefficientCount)
+ {
+ std::vector<float> dctMatix(inputLength * coefficientCount);
+
+ /* Orthonormal normalization. */
+ const float normalizerK0 = 2 * math::MathUtils::SqrtF32(1.0f /
+ static_cast<float>(4*inputLength));
+ const float normalizer = 2 * math::MathUtils::SqrtF32(1.0f /
+ static_cast<float>(2*inputLength));
+
+ const float angleIncr = M_PI / inputLength;
+ float angle = angleIncr; /* We start using it at k = 1 loop. */
+
+ /* First row of DCT will use normalizer K0. */
+ for (int32_t n = 0; n < inputLength; ++n) {
+ dctMatix[n] = normalizerK0 /* cos(0) = 1 */;
+ }
+
+ /* Second row (index = 1) onwards, we use standard normalizer. */
+ for (int32_t k = 1, m = inputLength; k < coefficientCount; ++k, m += inputLength) {
+ for (int32_t n = 0; n < inputLength; ++n) {
+ dctMatix[m+n] = normalizer *
+ math::MathUtils::CosineF32((n + 0.5f) * angle);
+ }
+ angle += angleIncr;
+ }
+ return dctMatix;
+ }
+
+ float Wav2LetterMFCC::GetMelFilterBankNormaliser(
+ const float& leftMel,
+ const float& rightMel,
+ const bool useHTKMethod)
+ {
+ /* Slaney normalization for mel weights. */
+ return (2.0f / (MFCC::InverseMelScale(rightMel, useHTKMethod) -
+ MFCC::InverseMelScale(leftMel, useHTKMethod)));
+ }
+
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
diff --git a/source/application/api/use_case/asr/src/Wav2LetterModel.cc b/source/application/api/use_case/asr/src/Wav2LetterModel.cc
new file mode 100644
index 0000000..7b1e521
--- /dev/null
+++ b/source/application/api/use_case/asr/src/Wav2LetterModel.cc
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "Wav2LetterModel.hpp"
+
+#include "log_macros.h"
+
+
+const tflite::MicroOpResolver& arm::app::Wav2LetterModel::GetOpResolver()
+{
+ return this->m_opResolver;
+}
+
+bool arm::app::Wav2LetterModel::EnlistOperations()
+{
+ this->m_opResolver.AddConv2D();
+ this->m_opResolver.AddReshape();
+ this->m_opResolver.AddLeakyRelu();
+ this->m_opResolver.AddSoftmax();
+
+ if (kTfLiteOk == this->m_opResolver.AddEthosU()) {
+ info("Added %s support to op resolver\n",
+ tflite::GetString_ETHOSU());
+ } else {
+ printf_err("Failed to add Arm NPU support to op resolver.");
+ return false;
+ }
+ return true;
+}
diff --git a/source/application/api/use_case/asr/src/Wav2LetterPostprocess.cc b/source/application/api/use_case/asr/src/Wav2LetterPostprocess.cc
new file mode 100644
index 0000000..00e689b
--- /dev/null
+++ b/source/application/api/use_case/asr/src/Wav2LetterPostprocess.cc
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "Wav2LetterPostprocess.hpp"
+
+#include "Wav2LetterModel.hpp"
+#include "log_macros.h"
+
+#include <cmath>
+
+namespace arm {
+namespace app {
+
+ AsrPostProcess::AsrPostProcess(TfLiteTensor* outputTensor, AsrClassifier& classifier,
+ const std::vector<std::string>& labels, std::vector<ClassificationResult>& results,
+ const uint32_t outputContextLen,
+ const uint32_t blankTokenIdx, const uint32_t reductionAxisIdx
+ ):
+ m_classifier(classifier),
+ m_outputTensor(outputTensor),
+ m_labels{labels},
+ m_results(results),
+ m_outputContextLen(outputContextLen),
+ m_countIterations(0),
+ m_blankTokenIdx(blankTokenIdx),
+ m_reductionAxisIdx(reductionAxisIdx)
+ {
+ this->m_outputInnerLen = AsrPostProcess::GetOutputInnerLen(this->m_outputTensor, this->m_outputContextLen);
+ this->m_totalLen = (2 * this->m_outputContextLen + this->m_outputInnerLen);
+ }
+
+ bool AsrPostProcess::DoPostProcess()
+ {
+ /* Basic checks. */
+ if (!this->IsInputValid(this->m_outputTensor, this->m_reductionAxisIdx)) {
+ return false;
+ }
+
+ /* Irrespective of tensor type, we use unsigned "byte" */
+ auto* ptrData = tflite::GetTensorData<uint8_t>(this->m_outputTensor);
+ const uint32_t elemSz = AsrPostProcess::GetTensorElementSize(this->m_outputTensor);
+
+ /* Other sanity checks. */
+ if (0 == elemSz) {
+ printf_err("Tensor type not supported for post processing\n");
+ return false;
+ } else if (elemSz * this->m_totalLen > this->m_outputTensor->bytes) {
+ printf_err("Insufficient number of tensor bytes\n");
+ return false;
+ }
+
+ /* Which axis do we need to process? */
+ switch (this->m_reductionAxisIdx) {
+ case Wav2LetterModel::ms_outputRowsIdx:
+ this->EraseSectionsRowWise(
+ ptrData, elemSz * this->m_outputTensor->dims->data[Wav2LetterModel::ms_outputColsIdx],
+ this->m_lastIteration);
+ break;
+ default:
+ printf_err("Unsupported axis index: %" PRIu32 "\n", this->m_reductionAxisIdx);
+ return false;
+ }
+ this->m_classifier.GetClassificationResults(this->m_outputTensor,
+ this->m_results, this->m_labels, 1);
+
+ return true;
+ }
+
+ bool AsrPostProcess::IsInputValid(TfLiteTensor* tensor, const uint32_t axisIdx) const
+ {
+ if (nullptr == tensor) {
+ return false;
+ }
+
+ if (static_cast<int>(axisIdx) >= tensor->dims->size) {
+ printf_err("Invalid axis index: %" PRIu32 "; Max: %d\n",
+ axisIdx, tensor->dims->size);
+ return false;
+ }
+
+ if (static_cast<int>(this->m_totalLen) !=
+ tensor->dims->data[axisIdx]) {
+ printf_err("Unexpected tensor dimension for axis %" PRIu32", got %d.\n",
+ axisIdx, tensor->dims->data[axisIdx]);
+ return false;
+ }
+
+ return true;
+ }
+
+ uint32_t AsrPostProcess::GetTensorElementSize(TfLiteTensor* tensor)
+ {
+ switch(tensor->type) {
+ case kTfLiteUInt8:
+ case kTfLiteInt8:
+ return 1;
+ case kTfLiteInt16:
+ return 2;
+ case kTfLiteInt32:
+ case kTfLiteFloat32:
+ return 4;
+ default:
+ printf_err("Unsupported tensor type %s\n",
+ TfLiteTypeGetName(tensor->type));
+ }
+
+ return 0;
+ }
+
+ bool AsrPostProcess::EraseSectionsRowWise(
+ uint8_t* ptrData,
+ const uint32_t strideSzBytes,
+ const bool lastIteration)
+ {
+ /* In this case, the "zero-ing" is quite simple as the region
+ * to be zeroed sits in contiguous memory (row-major). */
+ const uint32_t eraseLen = strideSzBytes * this->m_outputContextLen;
+
+ /* Erase left context? */
+ if (this->m_countIterations > 0) {
+ /* Set output of each classification window to the blank token. */
+ std::memset(ptrData, 0, eraseLen);
+ for (size_t windowIdx = 0; windowIdx < this->m_outputContextLen; windowIdx++) {
+ ptrData[windowIdx*strideSzBytes + this->m_blankTokenIdx] = 1;
+ }
+ }
+
+ /* Erase right context? */
+ if (false == lastIteration) {
+ uint8_t* rightCtxPtr = ptrData + (strideSzBytes * (this->m_outputContextLen + this->m_outputInnerLen));
+ /* Set output of each classification window to the blank token. */
+ std::memset(rightCtxPtr, 0, eraseLen);
+ for (size_t windowIdx = 0; windowIdx < this->m_outputContextLen; windowIdx++) {
+ rightCtxPtr[windowIdx*strideSzBytes + this->m_blankTokenIdx] = 1;
+ }
+ }
+
+ if (lastIteration) {
+ this->m_countIterations = 0;
+ } else {
+ ++this->m_countIterations;
+ }
+
+ return true;
+ }
+
+ uint32_t AsrPostProcess::GetNumFeatureVectors(const Model& model)
+ {
+ TfLiteTensor* inputTensor = model.GetInputTensor(0);
+ const int inputRows = std::max(inputTensor->dims->data[Wav2LetterModel::ms_inputRowsIdx], 0);
+ if (inputRows == 0) {
+ printf_err("Error getting number of input rows for axis: %" PRIu32 "\n",
+ Wav2LetterModel::ms_inputRowsIdx);
+ }
+ return inputRows;
+ }
+
+ uint32_t AsrPostProcess::GetOutputInnerLen(const TfLiteTensor* outputTensor, const uint32_t outputCtxLen)
+ {
+ const uint32_t outputRows = std::max(outputTensor->dims->data[Wav2LetterModel::ms_outputRowsIdx], 0);
+ if (outputRows == 0) {
+ printf_err("Error getting number of output rows for axis: %" PRIu32 "\n",
+ Wav2LetterModel::ms_outputRowsIdx);
+ }
+
+ /* Watching for underflow. */
+ int innerLen = (outputRows - (2 * outputCtxLen));
+
+ return std::max(innerLen, 0);
+ }
+
+ uint32_t AsrPostProcess::GetOutputContextLen(const Model& model, const uint32_t inputCtxLen)
+ {
+ const uint32_t inputRows = AsrPostProcess::GetNumFeatureVectors(model);
+ const uint32_t inputInnerLen = inputRows - (2 * inputCtxLen);
+ constexpr uint32_t ms_outputRowsIdx = Wav2LetterModel::ms_outputRowsIdx;
+
+ /* Check to make sure that the input tensor supports the above
+ * context and inner lengths. */
+ if (inputRows <= 2 * inputCtxLen || inputRows <= inputInnerLen) {
+ printf_err("Input rows not compatible with ctx of %" PRIu32 "\n",
+ inputCtxLen);
+ return 0;
+ }
+
+ TfLiteTensor* outputTensor = model.GetOutputTensor(0);
+ const uint32_t outputRows = std::max(outputTensor->dims->data[ms_outputRowsIdx], 0);
+ if (outputRows == 0) {
+ printf_err("Error getting number of output rows for axis: %" PRIu32 "\n",
+ Wav2LetterModel::ms_outputRowsIdx);
+ return 0;
+ }
+
+ const float inOutRowRatio = static_cast<float>(inputRows) /
+ static_cast<float>(outputRows);
+
+ return std::round(static_cast<float>(inputCtxLen) / inOutRowRatio);
+ }
+
+} /* namespace app */
+} /* namespace arm */
diff --git a/source/application/api/use_case/asr/src/Wav2LetterPreprocess.cc b/source/application/api/use_case/asr/src/Wav2LetterPreprocess.cc
new file mode 100644
index 0000000..92b0631
--- /dev/null
+++ b/source/application/api/use_case/asr/src/Wav2LetterPreprocess.cc
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "Wav2LetterPreprocess.hpp"
+
+#include "PlatformMath.hpp"
+#include "TensorFlowLiteMicro.hpp"
+
+#include <algorithm>
+#include <cmath>
+
+namespace arm {
+namespace app {
+
+ AsrPreProcess::AsrPreProcess(TfLiteTensor* inputTensor, const uint32_t numMfccFeatures,
+ const uint32_t numFeatureFrames, const uint32_t mfccWindowLen,
+ const uint32_t mfccWindowStride
+ ):
+ m_mfcc(numMfccFeatures, mfccWindowLen),
+ m_inputTensor(inputTensor),
+ m_mfccBuf(numMfccFeatures, numFeatureFrames),
+ m_delta1Buf(numMfccFeatures, numFeatureFrames),
+ m_delta2Buf(numMfccFeatures, numFeatureFrames),
+ m_mfccWindowLen(mfccWindowLen),
+ m_mfccWindowStride(mfccWindowStride),
+ m_numMfccFeats(numMfccFeatures),
+ m_numFeatureFrames(numFeatureFrames)
+ {
+ if (numMfccFeatures > 0 && mfccWindowLen > 0) {
+ this->m_mfcc.Init();
+ }
+ }
+
+ bool AsrPreProcess::DoPreProcess(const void* audioData, const size_t audioDataLen)
+ {
+ this->m_mfccSlidingWindow = audio::SlidingWindow<const int16_t>(
+ static_cast<const int16_t*>(audioData), audioDataLen,
+ this->m_mfccWindowLen, this->m_mfccWindowStride);
+
+ uint32_t mfccBufIdx = 0;
+
+ std::fill(m_mfccBuf.begin(), m_mfccBuf.end(), 0.f);
+ std::fill(m_delta1Buf.begin(), m_delta1Buf.end(), 0.f);
+ std::fill(m_delta2Buf.begin(), m_delta2Buf.end(), 0.f);
+
+ /* While we can slide over the audio. */
+ while (this->m_mfccSlidingWindow.HasNext()) {
+ const int16_t* mfccWindow = this->m_mfccSlidingWindow.Next();
+ auto mfccAudioData = std::vector<int16_t>(
+ mfccWindow,
+ mfccWindow + this->m_mfccWindowLen);
+ auto mfcc = this->m_mfcc.MfccCompute(mfccAudioData);
+ for (size_t i = 0; i < this->m_mfccBuf.size(0); ++i) {
+ this->m_mfccBuf(i, mfccBufIdx) = mfcc[i];
+ }
+ ++mfccBufIdx;
+ }
+
+ /* Pad MFCC if needed by adding MFCC for zeros. */
+ if (mfccBufIdx != this->m_numFeatureFrames) {
+ std::vector<int16_t> zerosWindow = std::vector<int16_t>(this->m_mfccWindowLen, 0);
+ std::vector<float> mfccZeros = this->m_mfcc.MfccCompute(zerosWindow);
+
+ while (mfccBufIdx != this->m_numFeatureFrames) {
+ memcpy(&this->m_mfccBuf(0, mfccBufIdx),
+ mfccZeros.data(), sizeof(float) * m_numMfccFeats);
+ ++mfccBufIdx;
+ }
+ }
+
+ /* Compute first and second order deltas from MFCCs. */
+ AsrPreProcess::ComputeDeltas(this->m_mfccBuf, this->m_delta1Buf, this->m_delta2Buf);
+
+ /* Standardize calculated features. */
+ this->Standarize();
+
+ /* Quantise. */
+ QuantParams quantParams = GetTensorQuantParams(this->m_inputTensor);
+
+ if (0 == quantParams.scale) {
+ printf_err("Quantisation scale can't be 0\n");
+ return false;
+ }
+
+ switch(this->m_inputTensor->type) {
+ case kTfLiteUInt8:
+ return this->Quantise<uint8_t>(
+ tflite::GetTensorData<uint8_t>(this->m_inputTensor), this->m_inputTensor->bytes,
+ quantParams.scale, quantParams.offset);
+ case kTfLiteInt8:
+ return this->Quantise<int8_t>(
+ tflite::GetTensorData<int8_t>(this->m_inputTensor), this->m_inputTensor->bytes,
+ quantParams.scale, quantParams.offset);
+ default:
+ printf_err("Unsupported tensor type %s\n",
+ TfLiteTypeGetName(this->m_inputTensor->type));
+ }
+
+ return false;
+ }
+
+ bool AsrPreProcess::ComputeDeltas(Array2d<float>& mfcc,
+ Array2d<float>& delta1,
+ Array2d<float>& delta2)
+ {
+ const std::vector <float> delta1Coeffs =
+ {6.66666667e-02, 5.00000000e-02, 3.33333333e-02,
+ 1.66666667e-02, -3.46944695e-18, -1.66666667e-02,
+ -3.33333333e-02, -5.00000000e-02, -6.66666667e-02};
+
+ const std::vector <float> delta2Coeffs =
+ {0.06060606, 0.01515152, -0.01731602,
+ -0.03679654, -0.04329004, -0.03679654,
+ -0.01731602, 0.01515152, 0.06060606};
+
+ if (delta1.size(0) == 0 || delta2.size(0) != delta1.size(0) ||
+ mfcc.size(0) == 0 || mfcc.size(1) == 0) {
+ return false;
+ }
+
+ /* Get the middle index; coeff vec len should always be odd. */
+ const size_t coeffLen = delta1Coeffs.size();
+ const size_t fMidIdx = (coeffLen - 1)/2;
+ const size_t numFeatures = mfcc.size(0);
+ const size_t numFeatVectors = mfcc.size(1);
+
+ /* Iterate through features in MFCC vector. */
+ for (size_t i = 0; i < numFeatures; ++i) {
+ /* For each feature, iterate through time (t) samples representing feature evolution and
+ * calculate d/dt and d^2/dt^2, using 1D convolution with differential kernels.
+ * Convolution padding = valid, result size is `time length - kernel length + 1`.
+ * The result is padded with 0 from both sides to match the size of initial time samples data.
+ *
+ * For the small filter, conv1D implementation as a simple loop is efficient enough.
+ * Filters of a greater size would need CMSIS-DSP functions to be used, like arm_fir_f32.
+ */
+
+ for (size_t j = fMidIdx; j < numFeatVectors - fMidIdx; ++j) {
+ float d1 = 0;
+ float d2 = 0;
+ const size_t mfccStIdx = j - fMidIdx;
+
+ for (size_t k = 0, m = coeffLen - 1; k < coeffLen; ++k, --m) {
+
+ d1 += mfcc(i,mfccStIdx + k) * delta1Coeffs[m];
+ d2 += mfcc(i,mfccStIdx + k) * delta2Coeffs[m];
+ }
+
+ delta1(i,j) = d1;
+ delta2(i,j) = d2;
+ }
+ }
+
+ return true;
+ }
+
+ void AsrPreProcess::StandardizeVecF32(Array2d<float>& vec)
+ {
+ auto mean = math::MathUtils::MeanF32(vec.begin(), vec.totalSize());
+ auto stddev = math::MathUtils::StdDevF32(vec.begin(), vec.totalSize(), mean);
+
+ debug("Mean: %f, Stddev: %f\n", mean, stddev);
+ if (stddev == 0) {
+ std::fill(vec.begin(), vec.end(), 0);
+ } else {
+ const float stddevInv = 1.f/stddev;
+ const float normalisedMean = mean/stddev;
+
+ auto NormalisingFunction = [=](float& value) {
+ value = value * stddevInv - normalisedMean;
+ };
+ std::for_each(vec.begin(), vec.end(), NormalisingFunction);
+ }
+ }
+
+ void AsrPreProcess::Standarize()
+ {
+ AsrPreProcess::StandardizeVecF32(this->m_mfccBuf);
+ AsrPreProcess::StandardizeVecF32(this->m_delta1Buf);
+ AsrPreProcess::StandardizeVecF32(this->m_delta2Buf);
+ }
+
+ float AsrPreProcess::GetQuantElem(
+ const float elem,
+ const float quantScale,
+ const int quantOffset,
+ const float minVal,
+ const float maxVal)
+ {
+ float val = std::round((elem/quantScale) + quantOffset);
+ return std::min<float>(std::max<float>(val, minVal), maxVal);
+ }
+
+} /* namespace app */
+} /* namespace arm */ \ No newline at end of file
diff --git a/source/application/api/use_case/img_class/CMakeLists.txt b/source/application/api/use_case/img_class/CMakeLists.txt
new file mode 100644
index 0000000..f4818d8
--- /dev/null
+++ b/source/application/api/use_case/img_class/CMakeLists.txt
@@ -0,0 +1,39 @@
+#----------------------------------------------------------------------------
+# Copyright (c) 2022 Arm Limited. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#----------------------------------------------------------------------------
+#########################################################
+# IMG CLASS API library #
+#########################################################
+cmake_minimum_required(VERSION 3.15.6)
+
+set(IMG_CLASS_API_TARGET img_class_api)
+project(${IMG_CLASS_API_TARGET}
+ DESCRIPTION "Image classification use case API library"
+ LANGUAGES C CXX)
+
+# Create static library
+add_library(${IMG_CLASS_API_TARGET} STATIC
+ src/ImgClassProcessing.cc
+ src/MobileNetModel.cc)
+
+target_include_directories(${IMG_CLASS_API_TARGET} PUBLIC include)
+
+target_link_libraries(${IMG_CLASS_API_TARGET} PUBLIC common_api)
+
+message(STATUS "*******************************************************")
+message(STATUS "Library : " ${IMG_CLASS_API_TARGET})
+message(STATUS "CMAKE_SYSTEM_PROCESSOR : " ${CMAKE_SYSTEM_PROCESSOR})
+message(STATUS "*******************************************************")
diff --git a/source/application/api/use_case/img_class/include/ImgClassProcessing.hpp b/source/application/api/use_case/img_class/include/ImgClassProcessing.hpp
new file mode 100644
index 0000000..55b5ce1
--- /dev/null
+++ b/source/application/api/use_case/img_class/include/ImgClassProcessing.hpp
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef IMG_CLASS_PROCESSING_HPP
+#define IMG_CLASS_PROCESSING_HPP
+
+#include "BaseProcessing.hpp"
+#include "Classifier.hpp"
+
+namespace arm {
+namespace app {
+
+ /**
+ * @brief Pre-processing class for Image Classification use case.
+ * Implements methods declared by BasePreProcess and anything else needed
+ * to populate input tensors ready for inference.
+ */
+ class ImgClassPreProcess : public BasePreProcess {
+
+ public:
+ /**
+ * @brief Constructor
+ * @param[in] inputTensor Pointer to the TFLite Micro input Tensor.
+ * @param[in] convertToInt8 Should the image be converted to Int8 range.
+ **/
+ explicit ImgClassPreProcess(TfLiteTensor* inputTensor, bool convertToInt8);
+
+ /**
+ * @brief Should perform pre-processing of 'raw' input image data and load it into
+ * TFLite Micro input tensors ready for inference
+ * @param[in] input Pointer to the data that pre-processing will work on.
+ * @param[in] inputSize Size of the input data.
+ * @return true if successful, false otherwise.
+ **/
+ bool DoPreProcess(const void* input, size_t inputSize) override;
+
+ private:
+ TfLiteTensor* m_inputTensor;
+ bool m_convertToInt8;
+ };
+
+ /**
+ * @brief Post-processing class for Image Classification use case.
+ * Implements methods declared by BasePostProcess and anything else needed
+ * to populate result vector.
+ */
+ class ImgClassPostProcess : public BasePostProcess {
+
+ public:
+ /**
+ * @brief Constructor
+ * @param[in] outputTensor Pointer to the TFLite Micro output Tensor.
+ * @param[in] classifier Classifier object used to get top N results from classification.
+ * @param[in] labels Vector of string labels to identify each output of the model.
+ * @param[in] results Vector of classification results to store decoded outputs.
+ **/
+ ImgClassPostProcess(TfLiteTensor* outputTensor, Classifier& classifier,
+ const std::vector<std::string>& labels,
+ std::vector<ClassificationResult>& results);
+
+ /**
+ * @brief Should perform post-processing of the result of inference then
+ * populate classification result data for any later use.
+ * @return true if successful, false otherwise.
+ **/
+ bool DoPostProcess() override;
+
+ private:
+ TfLiteTensor* m_outputTensor;
+ Classifier& m_imgClassifier;
+ const std::vector<std::string>& m_labels;
+ std::vector<ClassificationResult>& m_results;
+ };
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* IMG_CLASS_PROCESSING_HPP */ \ No newline at end of file
diff --git a/source/application/api/use_case/img_class/include/MobileNetModel.hpp b/source/application/api/use_case/img_class/include/MobileNetModel.hpp
new file mode 100644
index 0000000..adaa9c2
--- /dev/null
+++ b/source/application/api/use_case/img_class/include/MobileNetModel.hpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef IMG_CLASS_MOBILENETMODEL_HPP
+#define IMG_CLASS_MOBILENETMODEL_HPP
+
+#include "Model.hpp"
+
+namespace arm {
+namespace app {
+
+ class MobileNetModel : public Model {
+
+ public:
+ /* Indices for the expected model - based on input tensor shape */
+ static constexpr uint32_t ms_inputRowsIdx = 1;
+ static constexpr uint32_t ms_inputColsIdx = 2;
+ static constexpr uint32_t ms_inputChannelsIdx = 3;
+
+ protected:
+ /** @brief Gets the reference to op resolver interface class. */
+ const tflite::MicroOpResolver& GetOpResolver() override;
+
+ /** @brief Adds operations to the op resolver instance. */
+ bool EnlistOperations() override;
+
+ private:
+ /* Maximum number of individual operations that can be enlisted. */
+ static constexpr int ms_maxOpCnt = 7;
+
+ /* A mutable op resolver instance. */
+ tflite::MicroMutableOpResolver<ms_maxOpCnt> m_opResolver;
+ };
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* IMG_CLASS_MOBILENETMODEL_HPP */
diff --git a/source/application/api/use_case/img_class/src/ImgClassProcessing.cc b/source/application/api/use_case/img_class/src/ImgClassProcessing.cc
new file mode 100644
index 0000000..491e751
--- /dev/null
+++ b/source/application/api/use_case/img_class/src/ImgClassProcessing.cc
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "ImgClassProcessing.hpp"
+
+#include "ImageUtils.hpp"
+#include "log_macros.h"
+
+namespace arm {
+namespace app {
+
+ ImgClassPreProcess::ImgClassPreProcess(TfLiteTensor* inputTensor, bool convertToInt8)
+ :m_inputTensor{inputTensor},
+ m_convertToInt8{convertToInt8}
+ {}
+
+ bool ImgClassPreProcess::DoPreProcess(const void* data, size_t inputSize)
+ {
+ if (data == nullptr) {
+ printf_err("Data pointer is null");
+ return false;
+ }
+
+ auto input = static_cast<const uint8_t*>(data);
+
+ std::memcpy(this->m_inputTensor->data.data, input, inputSize);
+ debug("Input tensor populated \n");
+
+ if (this->m_convertToInt8) {
+ image::ConvertImgToInt8(this->m_inputTensor->data.data, this->m_inputTensor->bytes);
+ }
+
+ return true;
+ }
+
+ ImgClassPostProcess::ImgClassPostProcess(TfLiteTensor* outputTensor, Classifier& classifier,
+ const std::vector<std::string>& labels,
+ std::vector<ClassificationResult>& results)
+ :m_outputTensor{outputTensor},
+ m_imgClassifier{classifier},
+ m_labels{labels},
+ m_results{results}
+ {}
+
+ bool ImgClassPostProcess::DoPostProcess()
+ {
+ return this->m_imgClassifier.GetClassificationResults(
+ this->m_outputTensor, this->m_results,
+ this->m_labels, 5, false);
+ }
+
+} /* namespace app */
+} /* namespace arm */ \ No newline at end of file
diff --git a/source/application/api/use_case/img_class/src/MobileNetModel.cc b/source/application/api/use_case/img_class/src/MobileNetModel.cc
new file mode 100644
index 0000000..b700d70
--- /dev/null
+++ b/source/application/api/use_case/img_class/src/MobileNetModel.cc
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "MobileNetModel.hpp"
+#include "log_macros.h"
+
+const tflite::MicroOpResolver& arm::app::MobileNetModel::GetOpResolver()
+{
+ return this->m_opResolver;
+}
+
+bool arm::app::MobileNetModel::EnlistOperations()
+{
+ this->m_opResolver.AddDepthwiseConv2D();
+ this->m_opResolver.AddConv2D();
+ this->m_opResolver.AddAveragePool2D();
+ this->m_opResolver.AddAdd();
+ this->m_opResolver.AddReshape();
+ this->m_opResolver.AddSoftmax();
+
+ if (kTfLiteOk == this->m_opResolver.AddEthosU()) {
+ info("Added %s support to op resolver\n",
+ tflite::GetString_ETHOSU());
+ } else {
+ printf_err("Failed to add Arm NPU support to op resolver.");
+ return false;
+ }
+ return true;
+}
diff --git a/source/application/api/use_case/inference_runner/CMakeLists.txt b/source/application/api/use_case/inference_runner/CMakeLists.txt
new file mode 100644
index 0000000..d0fe629
--- /dev/null
+++ b/source/application/api/use_case/inference_runner/CMakeLists.txt
@@ -0,0 +1,37 @@
+#----------------------------------------------------------------------------
+# Copyright (c) 2022 Arm Limited. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#----------------------------------------------------------------------------
+#########################################################
+# INFERENCE RUNNER API library #
+#########################################################
+cmake_minimum_required(VERSION 3.15.6)
+
+set(INFERENCE_RUNNER_API_TARGET inference_runner_api)
+project(${INFERENCE_RUNNER_API_TARGET}
+ DESCRIPTION "Inference runner use case API library"
+ LANGUAGES C CXX)
+
+# Create static library
+add_library(${INFERENCE_RUNNER_API_TARGET} STATIC src/TestModel.cc)
+
+target_include_directories(${INFERENCE_RUNNER_API_TARGET} PUBLIC include)
+
+target_link_libraries(${INFERENCE_RUNNER_API_TARGET} PUBLIC common_api)
+
+message(STATUS "*******************************************************")
+message(STATUS "Library : " ${INFERENCE_RUNNER_API_TARGET})
+message(STATUS "CMAKE_SYSTEM_PROCESSOR : " ${CMAKE_SYSTEM_PROCESSOR})
+message(STATUS "*******************************************************")
diff --git a/source/application/api/use_case/inference_runner/include/TestModel.hpp b/source/application/api/use_case/inference_runner/include/TestModel.hpp
new file mode 100644
index 0000000..648198c
--- /dev/null
+++ b/source/application/api/use_case/inference_runner/include/TestModel.hpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef INF_RUNNER_TESTMODEL_HPP
+#define INF_RUNNER_TESTMODEL_HPP
+
+#include "Model.hpp"
+
+namespace arm {
+namespace app {
+
+ class TestModel : public Model {
+
+ protected:
+ /** @brief Gets the reference to op resolver interface class. */
+ const tflite::AllOpsResolver& GetOpResolver() override;
+
+ /** @brief Adds operations to the op resolver instance, not needed as using AllOpsResolver. */
+ bool EnlistOperations() override {return false;}
+
+ private:
+
+ /* No need to define individual ops at the cost of extra memory. */
+ tflite::AllOpsResolver m_opResolver;
+ };
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* INF_RUNNER_TESTMODEL_HPP */
diff --git a/source/application/api/use_case/inference_runner/src/TestModel.cc b/source/application/api/use_case/inference_runner/src/TestModel.cc
new file mode 100644
index 0000000..1891e44
--- /dev/null
+++ b/source/application/api/use_case/inference_runner/src/TestModel.cc
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "TestModel.hpp"
+#include "log_macros.h"
+
+const tflite::AllOpsResolver& arm::app::TestModel::GetOpResolver()
+{
+ return this->m_opResolver;
+}
diff --git a/source/application/api/use_case/kws/CMakeLists.txt b/source/application/api/use_case/kws/CMakeLists.txt
new file mode 100644
index 0000000..3256d03
--- /dev/null
+++ b/source/application/api/use_case/kws/CMakeLists.txt
@@ -0,0 +1,39 @@
+#----------------------------------------------------------------------------
+# Copyright (c) 2022 Arm Limited. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#----------------------------------------------------------------------------
+#########################################################
+# KEYWORD SPOTTING API library #
+#########################################################
+cmake_minimum_required(VERSION 3.15.6)
+
+set(KWS_API_TARGET kws_api)
+project(${KWS_API_TARGET}
+ DESCRIPTION "Keyword spotting use case API library"
+ LANGUAGES C CXX)
+
+# Create static library
+add_library(${KWS_API_TARGET} STATIC
+ src/KwsProcessing.cc
+ src/MicroNetKwsModel.cc)
+
+target_include_directories(${KWS_API_TARGET} PUBLIC include)
+
+target_link_libraries(${KWS_API_TARGET} PUBLIC common_api)
+
+message(STATUS "*******************************************************")
+message(STATUS "Library : " ${KWS_API_TARGET})
+message(STATUS "CMAKE_SYSTEM_PROCESSOR : " ${CMAKE_SYSTEM_PROCESSOR})
+message(STATUS "*******************************************************")
diff --git a/source/application/api/use_case/kws/include/KwsProcessing.hpp b/source/application/api/use_case/kws/include/KwsProcessing.hpp
new file mode 100644
index 0000000..0ede425
--- /dev/null
+++ b/source/application/api/use_case/kws/include/KwsProcessing.hpp
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef KWS_PROCESSING_HPP
+#define KWS_PROCESSING_HPP
+
+#include "AudioUtils.hpp"
+#include "BaseProcessing.hpp"
+#include "Classifier.hpp"
+#include "MicroNetKwsMfcc.hpp"
+
+#include <functional>
+
+namespace arm {
+namespace app {
+
+ /**
+ * @brief Pre-processing class for Keyword Spotting use case.
+ * Implements methods declared by BasePreProcess and anything else needed
+ * to populate input tensors ready for inference.
+ */
+ class KwsPreProcess : public BasePreProcess {
+
+ public:
+ /**
+ * @brief Constructor
+ * @param[in] inputTensor Pointer to the TFLite Micro input Tensor.
+ * @param[in] numFeatures How many MFCC features to use.
+ * @param[in] numFeatureFrames Number of MFCC vectors that need to be calculated
+ * for an inference.
+ * @param[in] mfccFrameLength Number of audio samples used to calculate one set of MFCC values when
+ * sliding a window through the audio sample.
+ * @param[in] mfccFrameStride Number of audio samples between consecutive windows.
+ **/
+ explicit KwsPreProcess(TfLiteTensor* inputTensor, size_t numFeatures, size_t numFeatureFrames,
+ int mfccFrameLength, int mfccFrameStride);
+
+ /**
+ * @brief Should perform pre-processing of 'raw' input audio data and load it into
+ * TFLite Micro input tensors ready for inference.
+ * @param[in] input Pointer to the data that pre-processing will work on.
+ * @param[in] inputSize Size of the input data.
+ * @return true if successful, false otherwise.
+ **/
+ bool DoPreProcess(const void* input, size_t inputSize) override;
+
+ size_t m_audioWindowIndex = 0; /* Index of audio slider, used when caching features in longer clips. */
+ size_t m_audioDataWindowSize; /* Amount of audio needed for 1 inference. */
+ size_t m_audioDataStride; /* Amount of audio to stride across if doing >1 inference in longer clips. */
+
+ private:
+ TfLiteTensor* m_inputTensor; /* Model input tensor. */
+ const int m_mfccFrameLength;
+ const int m_mfccFrameStride;
+ const size_t m_numMfccFrames; /* How many sets of m_numMfccFeats. */
+
+ audio::MicroNetKwsMFCC m_mfcc;
+ audio::SlidingWindow<const int16_t> m_mfccSlidingWindow;
+ size_t m_numMfccVectorsInAudioStride;
+ size_t m_numReusedMfccVectors;
+ std::function<void (std::vector<int16_t>&, int, bool, size_t)> m_mfccFeatureCalculator;
+
+ /**
+ * @brief Returns a function to perform feature calculation and populates input tensor data with
+ * MFCC data.
+ *
+ * Input tensor data type check is performed to choose correct MFCC feature data type.
+ * If tensor has an integer data type then original features are quantised.
+ *
+ * Warning: MFCC calculator provided as input must have the same life scope as returned function.
+ *
+ * @param[in] mfcc MFCC feature calculator.
+ * @param[in,out] inputTensor Input tensor pointer to store calculated features.
+ * @param[in] cacheSize Size of the feature vectors cache (number of feature vectors).
+ * @return Function to be called providing audio sample and sliding window index.
+ */
+ std::function<void (std::vector<int16_t>&, int, bool, size_t)>
+ GetFeatureCalculator(audio::MicroNetKwsMFCC& mfcc,
+ TfLiteTensor* inputTensor,
+ size_t cacheSize);
+
+ template<class T>
+ std::function<void (std::vector<int16_t>&, size_t, bool, size_t)>
+ FeatureCalc(TfLiteTensor* inputTensor, size_t cacheSize,
+ std::function<std::vector<T> (std::vector<int16_t>& )> compute);
+ };
+
+ /**
+ * @brief Post-processing class for Keyword Spotting use case.
+ * Implements methods declared by BasePostProcess and anything else needed
+ * to populate result vector.
+ */
+ class KwsPostProcess : public BasePostProcess {
+
+ private:
+ TfLiteTensor* m_outputTensor; /* Model output tensor. */
+ Classifier& m_kwsClassifier; /* KWS Classifier object. */
+ const std::vector<std::string>& m_labels; /* KWS Labels. */
+ std::vector<ClassificationResult>& m_results; /* Results vector for a single inference. */
+
+ public:
+ /**
+ * @brief Constructor
+ * @param[in] outputTensor Pointer to the TFLite Micro output Tensor.
+ * @param[in] classifier Classifier object used to get top N results from classification.
+ * @param[in] labels Vector of string labels to identify each output of the model.
+ * @param[in/out] results Vector of classification results to store decoded outputs.
+ **/
+ KwsPostProcess(TfLiteTensor* outputTensor, Classifier& classifier,
+ const std::vector<std::string>& labels,
+ std::vector<ClassificationResult>& results);
+
+ /**
+ * @brief Should perform post-processing of the result of inference then
+ * populate KWS result data for any later use.
+ * @return true if successful, false otherwise.
+ **/
+ bool DoPostProcess() override;
+ };
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* KWS_PROCESSING_HPP */ \ No newline at end of file
diff --git a/source/application/api/use_case/kws/include/KwsResult.hpp b/source/application/api/use_case/kws/include/KwsResult.hpp
new file mode 100644
index 0000000..38f32b4
--- /dev/null
+++ b/source/application/api/use_case/kws/include/KwsResult.hpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef KWS_RESULT_HPP
+#define KWS_RESULT_HPP
+
+#include "ClassificationResult.hpp"
+
+#include <vector>
+
+namespace arm {
+namespace app {
+namespace kws {
+
+ using ResultVec = std::vector<arm::app::ClassificationResult>;
+
+ /* Structure for holding kws result. */
+ class KwsResult {
+
+ public:
+ ResultVec m_resultVec; /* Container for "thresholded" classification results. */
+ float m_timeStamp; /* Audio timestamp for this result. */
+ uint32_t m_inferenceNumber; /* Corresponding inference number. */
+ float m_threshold; /* Threshold value for `m_resultVec`. */
+
+ KwsResult() = delete;
+ KwsResult(ResultVec& resultVec,
+ const float timestamp,
+ const uint32_t inferenceIdx,
+ const float scoreThreshold) {
+
+ this->m_threshold = scoreThreshold;
+ this->m_timeStamp = timestamp;
+ this->m_inferenceNumber = inferenceIdx;
+
+ this->m_resultVec = ResultVec();
+ for (auto & i : resultVec) {
+ if (i.m_normalisedVal >= this->m_threshold) {
+ this->m_resultVec.emplace_back(i);
+ }
+ }
+ }
+ ~KwsResult() = default;
+ };
+
+} /* namespace kws */
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* KWS_RESULT_HPP */ \ No newline at end of file
diff --git a/source/application/api/use_case/kws/include/MicroNetKwsMfcc.hpp b/source/application/api/use_case/kws/include/MicroNetKwsMfcc.hpp
new file mode 100644
index 0000000..b2565a3
--- /dev/null
+++ b/source/application/api/use_case/kws/include/MicroNetKwsMfcc.hpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef KWS_MICRONET_MFCC_HPP
+#define KWS_MICRONET_MFCC_HPP
+
+#include "Mfcc.hpp"
+
+namespace arm {
+namespace app {
+namespace audio {
+
+ /* Class to provide MicroNet specific MFCC calculation requirements. */
+ class MicroNetKwsMFCC : public MFCC {
+
+ public:
+ static constexpr uint32_t ms_defaultSamplingFreq = 16000;
+ static constexpr uint32_t ms_defaultNumFbankBins = 40;
+ static constexpr uint32_t ms_defaultMelLoFreq = 20;
+ static constexpr uint32_t ms_defaultMelHiFreq = 4000;
+ static constexpr bool ms_defaultUseHtkMethod = true;
+
+ explicit MicroNetKwsMFCC(const size_t numFeats, const size_t frameLen)
+ : MFCC(MfccParams(
+ ms_defaultSamplingFreq, ms_defaultNumFbankBins,
+ ms_defaultMelLoFreq, ms_defaultMelHiFreq,
+ numFeats, frameLen, ms_defaultUseHtkMethod))
+ {}
+ MicroNetKwsMFCC() = delete;
+ ~MicroNetKwsMFCC() = default;
+ };
+
+} /* namespace audio */
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* KWS_MICRONET_MFCC_HPP */ \ No newline at end of file
diff --git a/source/application/api/use_case/kws/include/MicroNetKwsModel.hpp b/source/application/api/use_case/kws/include/MicroNetKwsModel.hpp
new file mode 100644
index 0000000..3d2f3de
--- /dev/null
+++ b/source/application/api/use_case/kws/include/MicroNetKwsModel.hpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef KWS_MICRONETMODEL_HPP
+#define KWS_MICRONETMODEL_HPP
+
+#include "Model.hpp"
+
+namespace arm {
+namespace app {
+namespace kws {
+ extern const int g_FrameLength;
+ extern const int g_FrameStride;
+ extern const float g_ScoreThreshold;
+ extern const uint32_t g_NumMfcc;
+ extern const uint32_t g_NumAudioWins;
+} /* namespace kws */
+} /* namespace app */
+} /* namespace arm */
+
+namespace arm {
+namespace app {
+
+ class MicroNetKwsModel : public Model {
+ public:
+ /* Indices for the expected model - based on input and output tensor shapes */
+ static constexpr uint32_t ms_inputRowsIdx = 1;
+ static constexpr uint32_t ms_inputColsIdx = 2;
+ static constexpr uint32_t ms_outputRowsIdx = 2;
+ static constexpr uint32_t ms_outputColsIdx = 3;
+
+ protected:
+ /** @brief Gets the reference to op resolver interface class. */
+ const tflite::MicroOpResolver& GetOpResolver() override;
+
+ /** @brief Adds operations to the op resolver instance. */
+ bool EnlistOperations() override;
+
+ private:
+ /* Maximum number of individual operations that can be enlisted. */
+ static constexpr int ms_maxOpCnt = 7;
+
+ /* A mutable op resolver instance. */
+ tflite::MicroMutableOpResolver<ms_maxOpCnt> m_opResolver;
+ };
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* KWS_MICRONETMODEL_HPP */
diff --git a/source/application/api/use_case/kws/src/KwsProcessing.cc b/source/application/api/use_case/kws/src/KwsProcessing.cc
new file mode 100644
index 0000000..40de498
--- /dev/null
+++ b/source/application/api/use_case/kws/src/KwsProcessing.cc
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "KwsProcessing.hpp"
+#include "log_macros.h"
+#include "MicroNetKwsModel.hpp"
+
+namespace arm {
+namespace app {
+
+ KwsPreProcess::KwsPreProcess(TfLiteTensor* inputTensor, size_t numFeatures, size_t numMfccFrames,
+ int mfccFrameLength, int mfccFrameStride
+ ):
+ m_inputTensor{inputTensor},
+ m_mfccFrameLength{mfccFrameLength},
+ m_mfccFrameStride{mfccFrameStride},
+ m_numMfccFrames{numMfccFrames},
+ m_mfcc{audio::MicroNetKwsMFCC(numFeatures, mfccFrameLength)}
+ {
+ this->m_mfcc.Init();
+
+ /* Deduce the data length required for 1 inference from the network parameters. */
+ this->m_audioDataWindowSize = this->m_numMfccFrames * this->m_mfccFrameStride +
+ (this->m_mfccFrameLength - this->m_mfccFrameStride);
+
+ /* Creating an MFCC feature sliding window for the data required for 1 inference. */
+ this->m_mfccSlidingWindow = audio::SlidingWindow<const int16_t>(nullptr, this->m_audioDataWindowSize,
+ this->m_mfccFrameLength, this->m_mfccFrameStride);
+
+ /* For longer audio clips we choose to move by half the audio window size
+ * => for a 1 second window size there is an overlap of 0.5 seconds. */
+ this->m_audioDataStride = this->m_audioDataWindowSize / 2;
+
+ /* To have the previously calculated features re-usable, stride must be multiple
+ * of MFCC features window stride. Reduce stride through audio if needed. */
+ if (0 != this->m_audioDataStride % this->m_mfccFrameStride) {
+ this->m_audioDataStride -= this->m_audioDataStride % this->m_mfccFrameStride;
+ }
+
+ this->m_numMfccVectorsInAudioStride = this->m_audioDataStride / this->m_mfccFrameStride;
+
+ /* Calculate number of the feature vectors in the window overlap region.
+ * These feature vectors will be reused.*/
+ this->m_numReusedMfccVectors = this->m_mfccSlidingWindow.TotalStrides() + 1
+ - this->m_numMfccVectorsInAudioStride;
+
+ /* Construct feature calculation function. */
+ this->m_mfccFeatureCalculator = GetFeatureCalculator(this->m_mfcc, this->m_inputTensor,
+ this->m_numReusedMfccVectors);
+
+ if (!this->m_mfccFeatureCalculator) {
+ printf_err("Feature calculator not initialized.");
+ }
+ }
+
+ bool KwsPreProcess::DoPreProcess(const void* data, size_t inputSize)
+ {
+ UNUSED(inputSize);
+ if (data == nullptr) {
+ printf_err("Data pointer is null");
+ }
+
+ /* Set the features sliding window to the new address. */
+ auto input = static_cast<const int16_t*>(data);
+ this->m_mfccSlidingWindow.Reset(input);
+
+ /* Cache is only usable if we have more than 1 inference in an audio clip. */
+ bool useCache = this->m_audioWindowIndex > 0 && this->m_numReusedMfccVectors > 0;
+
+ /* Use a sliding window to calculate MFCC features frame by frame. */
+ while (this->m_mfccSlidingWindow.HasNext()) {
+ const int16_t* mfccWindow = this->m_mfccSlidingWindow.Next();
+
+ std::vector<int16_t> mfccFrameAudioData = std::vector<int16_t>(mfccWindow,
+ mfccWindow + this->m_mfccFrameLength);
+
+ /* Compute features for this window and write them to input tensor. */
+ this->m_mfccFeatureCalculator(mfccFrameAudioData, this->m_mfccSlidingWindow.Index(),
+ useCache, this->m_numMfccVectorsInAudioStride);
+ }
+
+ debug("Input tensor populated \n");
+
+ return true;
+ }
+
+ /**
+ * @brief Generic feature calculator factory.
+ *
+ * Returns lambda function to compute features using features cache.
+ * Real features math is done by a lambda function provided as a parameter.
+ * Features are written to input tensor memory.
+ *
+ * @tparam T Feature vector type.
+ * @param[in] inputTensor Model input tensor pointer.
+ * @param[in] cacheSize Number of feature vectors to cache. Defined by the sliding window overlap.
+ * @param[in] compute Features calculator function.
+ * @return Lambda function to compute features.
+ */
+ template<class T>
+ std::function<void (std::vector<int16_t>&, size_t, bool, size_t)>
+ KwsPreProcess::FeatureCalc(TfLiteTensor* inputTensor, size_t cacheSize,
+ std::function<std::vector<T> (std::vector<int16_t>& )> compute)
+ {
+ /* Feature cache to be captured by lambda function. */
+ static std::vector<std::vector<T>> featureCache = std::vector<std::vector<T>>(cacheSize);
+
+ return [=](std::vector<int16_t>& audioDataWindow,
+ size_t index,
+ bool useCache,
+ size_t featuresOverlapIndex)
+ {
+ T* tensorData = tflite::GetTensorData<T>(inputTensor);
+ std::vector<T> features;
+
+ /* Reuse features from cache if cache is ready and sliding windows overlap.
+ * Overlap is in the beginning of sliding window with a size of a feature cache. */
+ if (useCache && index < featureCache.size()) {
+ features = std::move(featureCache[index]);
+ } else {
+ features = std::move(compute(audioDataWindow));
+ }
+ auto size = features.size();
+ auto sizeBytes = sizeof(T) * size;
+ std::memcpy(tensorData + (index * size), features.data(), sizeBytes);
+
+ /* Start renewing cache as soon iteration goes out of the windows overlap. */
+ if (index >= featuresOverlapIndex) {
+ featureCache[index - featuresOverlapIndex] = std::move(features);
+ }
+ };
+ }
+
+ template std::function<void (std::vector<int16_t>&, size_t , bool, size_t)>
+ KwsPreProcess::FeatureCalc<int8_t>(TfLiteTensor* inputTensor,
+ size_t cacheSize,
+ std::function<std::vector<int8_t> (std::vector<int16_t>&)> compute);
+
+ template std::function<void(std::vector<int16_t>&, size_t, bool, size_t)>
+ KwsPreProcess::FeatureCalc<float>(TfLiteTensor* inputTensor,
+ size_t cacheSize,
+ std::function<std::vector<float>(std::vector<int16_t>&)> compute);
+
+
+ std::function<void (std::vector<int16_t>&, int, bool, size_t)>
+ KwsPreProcess::GetFeatureCalculator(audio::MicroNetKwsMFCC& mfcc, TfLiteTensor* inputTensor, size_t cacheSize)
+ {
+ std::function<void (std::vector<int16_t>&, size_t, bool, size_t)> mfccFeatureCalc;
+
+ TfLiteQuantization quant = inputTensor->quantization;
+
+ if (kTfLiteAffineQuantization == quant.type) {
+ auto *quantParams = (TfLiteAffineQuantization *) quant.params;
+ const float quantScale = quantParams->scale->data[0];
+ const int quantOffset = quantParams->zero_point->data[0];
+
+ switch (inputTensor->type) {
+ case kTfLiteInt8: {
+ mfccFeatureCalc = this->FeatureCalc<int8_t>(inputTensor,
+ cacheSize,
+ [=, &mfcc](std::vector<int16_t>& audioDataWindow) {
+ return mfcc.MfccComputeQuant<int8_t>(audioDataWindow,
+ quantScale,
+ quantOffset);
+ }
+ );
+ break;
+ }
+ default:
+ printf_err("Tensor type %s not supported\n", TfLiteTypeGetName(inputTensor->type));
+ }
+ } else {
+ mfccFeatureCalc = this->FeatureCalc<float>(inputTensor, cacheSize,
+ [&mfcc](std::vector<int16_t>& audioDataWindow) {
+ return mfcc.MfccCompute(audioDataWindow); }
+ );
+ }
+ return mfccFeatureCalc;
+ }
+
+ KwsPostProcess::KwsPostProcess(TfLiteTensor* outputTensor, Classifier& classifier,
+ const std::vector<std::string>& labels,
+ std::vector<ClassificationResult>& results)
+ :m_outputTensor{outputTensor},
+ m_kwsClassifier{classifier},
+ m_labels{labels},
+ m_results{results}
+ {}
+
+ bool KwsPostProcess::DoPostProcess()
+ {
+ return this->m_kwsClassifier.GetClassificationResults(
+ this->m_outputTensor, this->m_results,
+ this->m_labels, 1, true);
+ }
+
+} /* namespace app */
+} /* namespace arm */ \ No newline at end of file
diff --git a/source/application/api/use_case/kws/src/MicroNetKwsModel.cc b/source/application/api/use_case/kws/src/MicroNetKwsModel.cc
new file mode 100644
index 0000000..bedca99
--- /dev/null
+++ b/source/application/api/use_case/kws/src/MicroNetKwsModel.cc
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "MicroNetKwsModel.hpp"
+#include "log_macros.h"
+
+const tflite::MicroOpResolver& arm::app::MicroNetKwsModel::GetOpResolver()
+{
+ return this->m_opResolver;
+}
+
+bool arm::app::MicroNetKwsModel::EnlistOperations()
+{
+ this->m_opResolver.AddReshape();
+ this->m_opResolver.AddAveragePool2D();
+ this->m_opResolver.AddConv2D();
+ this->m_opResolver.AddDepthwiseConv2D();
+ this->m_opResolver.AddFullyConnected();
+ this->m_opResolver.AddRelu();
+
+ if (kTfLiteOk == this->m_opResolver.AddEthosU()) {
+ info("Added %s support to op resolver\n",
+ tflite::GetString_ETHOSU());
+ } else {
+ printf_err("Failed to add Arm NPU support to op resolver.");
+ return false;
+ }
+ return true;
+}
diff --git a/source/application/api/use_case/noise_reduction/CMakeLists.txt b/source/application/api/use_case/noise_reduction/CMakeLists.txt
new file mode 100644
index 0000000..5fa9a73
--- /dev/null
+++ b/source/application/api/use_case/noise_reduction/CMakeLists.txt
@@ -0,0 +1,40 @@
+#----------------------------------------------------------------------------
+# Copyright (c) 2022 Arm Limited. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#----------------------------------------------------------------------------
+#########################################################
+# NOISE REDUCTION API library #
+#########################################################
+cmake_minimum_required(VERSION 3.15.6)
+
+set(NOISE_REDUCTION_API_TARGET noise_reduction_api)
+project(${NOISE_REDUCTION_API_TARGET}
+ DESCRIPTION "Noise reduction use case API library"
+ LANGUAGES C CXX)
+
+# Create static library
+add_library(${NOISE_REDUCTION_API_TARGET} STATIC
+ src/RNNoiseProcessing.cc
+ src/RNNoiseFeatureProcessor.cc
+ src/RNNoiseModel.cc)
+
+target_include_directories(${NOISE_REDUCTION_API_TARGET} PUBLIC include)
+
+target_link_libraries(${NOISE_REDUCTION_API_TARGET} PUBLIC common_api)
+
+message(STATUS "*******************************************************")
+message(STATUS "Library : " ${NOISE_REDUCTION_API_TARGET})
+message(STATUS "CMAKE_SYSTEM_PROCESSOR : " ${CMAKE_SYSTEM_PROCESSOR})
+message(STATUS "*******************************************************")
diff --git a/source/application/api/use_case/noise_reduction/include/RNNoiseFeatureProcessor.hpp b/source/application/api/use_case/noise_reduction/include/RNNoiseFeatureProcessor.hpp
new file mode 100644
index 0000000..cbf0e4e
--- /dev/null
+++ b/source/application/api/use_case/noise_reduction/include/RNNoiseFeatureProcessor.hpp
@@ -0,0 +1,341 @@
+/*
+ * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef RNNOISE_FEATURE_PROCESSOR_HPP
+#define RNNOISE_FEATURE_PROCESSOR_HPP
+
+#include "PlatformMath.hpp"
+#include <cstdint>
+#include <vector>
+#include <array>
+#include <tuple>
+
+namespace arm {
+namespace app {
+namespace rnn {
+
+ using vec1D32F = std::vector<float>;
+ using vec2D32F = std::vector<vec1D32F>;
+ using arrHp = std::array<float, 2>;
+ using math::FftInstance;
+ using math::FftType;
+
+ class FrameFeatures {
+ public:
+ bool m_silence{false}; /* If frame contains silence or not. */
+ vec1D32F m_featuresVec{}; /* Calculated feature vector to feed to model. */
+ vec1D32F m_fftX{}; /* Vector of floats arranged to represent complex numbers. */
+ vec1D32F m_fftP{}; /* Vector of floats arranged to represent complex numbers. */
+ vec1D32F m_Ex{}; /* Spectral band energy for audio x. */
+ vec1D32F m_Ep{}; /* Spectral band energy for pitch p. */
+ vec1D32F m_Exp{}; /* Correlated spectral energy between x and p. */
+ };
+
+ /**
+ * @brief RNNoise pre and post processing class based on the 2018 paper from
+ * Jan-Marc Valin. Recommended reading:
+ * - https://jmvalin.ca/demo/rnnoise/
+ * - https://arxiv.org/abs/1709.08243
+ **/
+ class RNNoiseFeatureProcessor {
+ /* Public interface */
+ public:
+ RNNoiseFeatureProcessor();
+ ~RNNoiseFeatureProcessor() = default;
+
+ /**
+ * @brief Calculates the features from a given audio buffer ready to be sent to RNNoise model.
+ * @param[in] audioData Pointer to the floating point vector
+ * with audio data (within the numerical
+ * limits of int16_t type).
+ * @param[in] audioLen Number of elements in the audio window.
+ * @param[out] features FrameFeatures object reference.
+ **/
+ void PreprocessFrame(const float* audioData,
+ size_t audioLen,
+ FrameFeatures& features);
+
+ /**
+ * @brief Use the RNNoise model output gain values with pre-processing features
+ * to generate audio with noise suppressed.
+ * @param[in] modelOutput Output gain values from model.
+ * @param[in] features Calculated features from pre-processing step.
+ * @param[out] outFrame Output frame to be populated.
+ **/
+ void PostProcessFrame(vec1D32F& modelOutput, FrameFeatures& features, vec1D32F& outFrame);
+
+
+ /* Public constants */
+ public:
+ static constexpr uint32_t FRAME_SIZE_SHIFT{2};
+ static constexpr uint32_t FRAME_SIZE{512};
+ static constexpr uint32_t WINDOW_SIZE{2 * FRAME_SIZE};
+ static constexpr uint32_t FREQ_SIZE{FRAME_SIZE + 1};
+
+ static constexpr uint32_t PITCH_MIN_PERIOD{64};
+ static constexpr uint32_t PITCH_MAX_PERIOD{820};
+ static constexpr uint32_t PITCH_FRAME_SIZE{1024};
+ static constexpr uint32_t PITCH_BUF_SIZE{PITCH_MAX_PERIOD + PITCH_FRAME_SIZE};
+
+ static constexpr uint32_t NB_BANDS{22};
+ static constexpr uint32_t CEPS_MEM{8};
+ static constexpr uint32_t NB_DELTA_CEPS{6};
+
+ static constexpr uint32_t NB_FEATURES{NB_BANDS + 3*NB_DELTA_CEPS + 2};
+
+ /* Private functions */
+ private:
+
+ /**
+ * @brief Initialises the half window and DCT tables.
+ */
+ void InitTables();
+
+ /**
+ * @brief Applies a bi-quadratic filter over the audio window.
+ * @param[in] bHp Constant coefficient set b (arrHp type).
+ * @param[in] aHp Constant coefficient set a (arrHp type).
+ * @param[in,out] memHpX Coefficients populated by this function.
+ * @param[in,out] audioWindow Floating point vector with audio data.
+ **/
+ void BiQuad(
+ const arrHp& bHp,
+ const arrHp& aHp,
+ arrHp& memHpX,
+ vec1D32F& audioWindow);
+
+ /**
+ * @brief Computes features from the "filtered" audio window.
+ * @param[in] audioWindow Floating point vector with audio data.
+ * @param[out] features FrameFeatures object reference.
+ **/
+ void ComputeFrameFeatures(vec1D32F& audioWindow, FrameFeatures& features);
+
+ /**
+ * @brief Runs analysis on the audio buffer.
+ * @param[in] audioWindow Floating point vector with audio data.
+ * @param[out] fft Floating point FFT vector containing real and
+ * imaginary pairs of elements. NOTE: this vector
+ * does not contain the mirror image (conjugates)
+ * part of the spectrum.
+ * @param[out] energy Computed energy for each band in the Bark scale.
+ * @param[out] analysisMem Buffer sequentially, but partially,
+ * populated with new audio data.
+ **/
+ void FrameAnalysis(
+ const vec1D32F& audioWindow,
+ vec1D32F& fft,
+ vec1D32F& energy,
+ vec1D32F& analysisMem);
+
+ /**
+ * @brief Applies the window function, in-place, over the given
+ * floating point buffer.
+ * @param[in,out] x Buffer the window will be applied to.
+ **/
+ void ApplyWindow(vec1D32F& x);
+
+ /**
+ * @brief Computes the FFT for a given vector.
+ * @param[in] x Vector to compute the FFT from.
+ * @param[out] fft Floating point FFT vector containing real and
+ * imaginary pairs of elements. NOTE: this vector
+ * does not contain the mirror image (conjugates)
+ * part of the spectrum.
+ **/
+ void ForwardTransform(
+ vec1D32F& x,
+ vec1D32F& fft);
+
+ /**
+ * @brief Computes band energy for each of the 22 Bark scale bands.
+ * @param[in] fft_X FFT spectrum (as computed by ForwardTransform).
+ * @param[out] bandE Vector with 22 elements populated with energy for
+ * each band.
+ **/
+ void ComputeBandEnergy(const vec1D32F& fft_X, vec1D32F& bandE);
+
+ /**
+ * @brief Computes band energy correlation.
+ * @param[in] X FFT vector X.
+ * @param[in] P FFT vector P.
+ * @param[out] bandC Vector with 22 elements populated with band energy
+ * correlation for the two input FFT vectors.
+ **/
+ void ComputeBandCorr(const vec1D32F& X, const vec1D32F& P, vec1D32F& bandC);
+
+ /**
+ * @brief Performs pitch auto-correlation for a given vector for
+ * given lag.
+ * @param[in] x Input vector.
+ * @param[out] ac Auto-correlation output vector.
+ * @param[in] lag Lag value.
+ * @param[in] n Number of elements to consider for correlation
+ * computation.
+ **/
+ void AutoCorr(const vec1D32F &x,
+ vec1D32F &ac,
+ size_t lag,
+ size_t n);
+
+ /**
+ * @brief Computes pitch cross-correlation.
+ * @param[in] x Input vector 1.
+ * @param[in] y Input vector 2.
+ * @param[out] xCorr Cross-correlation output vector.
+ * @param[in] len Number of elements to consider for correlation.
+ * computation.
+ * @param[in] maxPitch Maximum pitch.
+ **/
+ void PitchXCorr(
+ const vec1D32F& x,
+ const vec1D32F& y,
+ vec1D32F& xCorr,
+ size_t len,
+ size_t maxPitch);
+
+ /**
+ * @brief Computes "Linear Predictor Coefficients".
+ * @param[in] ac Correlation vector.
+ * @param[in] p Number of elements of input vector to consider.
+ * @param[out] lpc Output coefficients vector.
+ **/
+ void LPC(const vec1D32F& ac, int32_t p, vec1D32F& lpc);
+
+ /**
+ * @brief Custom FIR implementation.
+ * @param[in] num FIR coefficient vector.
+ * @param[in] N Number of elements.
+ * @param[out] x Vector to be be processed.
+ **/
+ void Fir5(const vec1D32F& num, uint32_t N, vec1D32F& x);
+
+ /**
+ * @brief Down-sample the pitch buffer.
+ * @param[in,out] pitchBuf Pitch buffer.
+ * @param[in] pitchBufSz Buffer size.
+ **/
+ void PitchDownsample(vec1D32F& pitchBuf, size_t pitchBufSz);
+
+ /**
+ * @brief Pitch search function.
+ * @param[in] xLP Shifted pitch buffer input.
+ * @param[in] y Pitch buffer input.
+ * @param[in] len Length to search for.
+ * @param[in] maxPitch Maximum pitch.
+ * @return pitch index.
+ **/
+ int PitchSearch(vec1D32F& xLp, vec1D32F& y, uint32_t len, uint32_t maxPitch);
+
+ /**
+ * @brief Finds the "best" pitch from the buffer.
+ * @param[in] xCorr Pitch correlation vector.
+ * @param[in] y Pitch buffer input.
+ * @param[in] len Length to search for.
+ * @param[in] maxPitch Maximum pitch.
+ * @return pitch array (2 elements).
+ **/
+ arrHp FindBestPitch(vec1D32F& xCorr, vec1D32F& y, uint32_t len, uint32_t maxPitch);
+
+ /**
+ * @brief Remove pitch period doubling errors.
+ * @param[in,out] pitchBuf Pitch buffer vector.
+ * @param[in] maxPeriod Maximum period.
+ * @param[in] minPeriod Minimum period.
+ * @param[in] frameSize Frame size.
+ * @param[in] pitchIdx0_ Pitch index 0.
+ * @return pitch index.
+ **/
+ int RemoveDoubling(
+ vec1D32F& pitchBuf,
+ uint32_t maxPeriod,
+ uint32_t minPeriod,
+ uint32_t frameSize,
+ size_t pitchIdx0_);
+
+ /**
+ * @brief Computes pitch gain.
+ * @param[in] xy Single xy cross correlation value.
+ * @param[in] xx Single xx auto correlation value.
+ * @param[in] yy Single yy auto correlation value.
+ * @return Calculated pitch gain.
+ **/
+ float ComputePitchGain(float xy, float xx, float yy);
+
+ /**
+ * @brief Computes DCT vector from the given input.
+ * @param[in] input Input vector.
+ * @param[out] output Output vector with DCT coefficients.
+ **/
+ void DCT(vec1D32F& input, vec1D32F& output);
+
+ /**
+ * @brief Perform inverse fourier transform on complex spectral vector.
+ * @param[out] out Output vector.
+ * @param[in] fftXIn Vector of floats arranged to represent complex numbers interleaved.
+ **/
+ void InverseTransform(vec1D32F& out, vec1D32F& fftXIn);
+
+ /**
+ * @brief Perform pitch filtering.
+ * @param[in] features Object with pre-processing calculated frame features.
+ * @param[in] g Gain values.
+ **/
+ void PitchFilter(FrameFeatures& features, vec1D32F& g);
+
+ /**
+ * @brief Interpolate the band gain values.
+ * @param[out] g Gain values.
+ * @param[in] bandE Vector with 22 elements populated with energy for
+ * each band.
+ **/
+ void InterpBandGain(vec1D32F& g, vec1D32F& bandE);
+
+ /**
+ * @brief Create de-noised frame.
+ * @param[out] outFrame Output vector for storing the created audio frame.
+ * @param[in] fftY Gain adjusted complex spectral vector.
+ */
+ void FrameSynthesis(vec1D32F& outFrame, vec1D32F& fftY);
+
+ /* Private objects */
+ private:
+ FftInstance m_fftInstReal; /* FFT instance for real numbers */
+ FftInstance m_fftInstCmplx; /* FFT instance for complex numbers */
+ vec1D32F m_halfWindow; /* Window coefficients */
+ vec1D32F m_dctTable; /* DCT table */
+ vec1D32F m_analysisMem; /* Buffer used for frame analysis */
+ vec2D32F m_cepstralMem; /* Cepstral coefficients */
+ size_t m_memId; /* memory ID */
+ vec1D32F m_synthesisMem; /* Synthesis mem (used by post-processing) */
+ vec1D32F m_pitchBuf; /* Pitch buffer */
+ float m_lastGain; /* Last gain calculated */
+ int m_lastPeriod; /* Last period calculated */
+ arrHp m_memHpX; /* HpX coefficients. */
+ vec1D32F m_lastGVec; /* Last gain vector (used by post-processing) */
+
+ /* Constants */
+ const std::array <uint32_t, NB_BANDS> m_eband5ms {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12,
+ 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100};
+ };
+
+
+} /* namespace rnn */
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* RNNOISE_FEATURE_PROCESSOR_HPP */
diff --git a/source/application/api/use_case/noise_reduction/include/RNNoiseModel.hpp b/source/application/api/use_case/noise_reduction/include/RNNoiseModel.hpp
new file mode 100644
index 0000000..3d2f23c
--- /dev/null
+++ b/source/application/api/use_case/noise_reduction/include/RNNoiseModel.hpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef RNNOISE_MODEL_HPP
+#define RNNOISE_MODEL_HPP
+
+#include "Model.hpp"
+
+extern const uint32_t g_NumInputFeatures;
+extern const uint32_t g_FrameLength;
+extern const uint32_t g_FrameStride;
+
+namespace arm {
+namespace app {
+
+ class RNNoiseModel : public Model {
+ public:
+ /**
+ * @brief Runs inference for RNNoise model.
+ *
+ * Call CopyGruStates so GRU state outputs are copied to GRU state inputs before the inference run.
+ * Run ResetGruState() method to set states to zero before starting processing logically related data.
+ * @return True if inference succeeded, False - otherwise
+ */
+ bool RunInference() override;
+
+ /**
+ * @brief Sets GRU input states to zeros.
+ * Call this method before starting processing the new sequence of logically related data.
+ */
+ void ResetGruState();
+
+ /**
+ * @brief Copy current GRU output states to input states.
+ * Call this method before starting processing the next sequence of logically related data.
+ */
+ bool CopyGruStates();
+
+ /* Which index of model outputs does the main output (gains) come from. */
+ const size_t m_indexForModelOutput = 1;
+
+ protected:
+ /** @brief Gets the reference to op resolver interface class. */
+ const tflite::MicroOpResolver& GetOpResolver() override;
+
+ /** @brief Adds operations to the op resolver instance. */
+ bool EnlistOperations() override;
+
+ /*
+ Each inference after the first needs to copy 3 GRU states from a output index to input index (model dependent):
+ 0 -> 3, 2 -> 2, 3 -> 1
+ */
+ const std::vector<std::pair<size_t, size_t>> m_gruStateMap = {{0,3}, {2, 2}, {3, 1}};
+ private:
+ /* Maximum number of individual operations that can be enlisted. */
+ static constexpr int ms_maxOpCnt = 15;
+
+ /* A mutable op resolver instance. */
+ tflite::MicroMutableOpResolver<ms_maxOpCnt> m_opResolver;
+ };
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* RNNOISE_MODEL_HPP */
diff --git a/source/application/api/use_case/noise_reduction/include/RNNoiseProcessing.hpp b/source/application/api/use_case/noise_reduction/include/RNNoiseProcessing.hpp
new file mode 100644
index 0000000..15e62d9
--- /dev/null
+++ b/source/application/api/use_case/noise_reduction/include/RNNoiseProcessing.hpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef RNNOISE_PROCESSING_HPP
+#define RNNOISE_PROCESSING_HPP
+
+#include "BaseProcessing.hpp"
+#include "Model.hpp"
+#include "RNNoiseFeatureProcessor.hpp"
+
+namespace arm {
+namespace app {
+
+ /**
+ * @brief Pre-processing class for Noise Reduction use case.
+ * Implements methods declared by BasePreProcess and anything else needed
+ * to populate input tensors ready for inference.
+ */
+ class RNNoisePreProcess : public BasePreProcess {
+
+ public:
+ /**
+ * @brief Constructor
+ * @param[in] inputTensor Pointer to the TFLite Micro input Tensor.
+ * @param[in/out] featureProcessor RNNoise specific feature extractor object.
+ * @param[in/out] frameFeatures RNNoise specific features shared between pre & post-processing.
+ *
+ **/
+ explicit RNNoisePreProcess(TfLiteTensor* inputTensor,
+ std::shared_ptr<rnn::RNNoiseFeatureProcessor> featureProcessor,
+ std::shared_ptr<rnn::FrameFeatures> frameFeatures);
+
+ /**
+ * @brief Should perform pre-processing of 'raw' input audio data and load it into
+ * TFLite Micro input tensors ready for inference
+ * @param[in] input Pointer to the data that pre-processing will work on.
+ * @param[in] inputSize Size of the input data.
+ * @return true if successful, false otherwise.
+ **/
+ bool DoPreProcess(const void* input, size_t inputSize) override;
+
+ private:
+ TfLiteTensor* m_inputTensor; /* Model input tensor. */
+ std::shared_ptr<rnn::RNNoiseFeatureProcessor> m_featureProcessor; /* RNNoise feature processor shared between pre & post-processing. */
+ std::shared_ptr<rnn::FrameFeatures> m_frameFeatures; /* RNNoise features shared between pre & post-processing. */
+ rnn::vec1D32F m_audioFrame; /* Audio frame cast to FP32 */
+
+ /**
+ * @brief Quantize the given features and populate the input Tensor.
+ * @param[in] inputFeatures Vector of floating point features to quantize.
+ * @param[in] quantScale Quantization scale for the inputTensor.
+ * @param[in] quantOffset Quantization offset for the inputTensor.
+ * @param[in,out] inputTensor TFLite micro tensor to populate.
+ **/
+ static void QuantizeAndPopulateInput(rnn::vec1D32F& inputFeatures,
+ float quantScale, int quantOffset,
+ TfLiteTensor* inputTensor);
+ };
+
+ /**
+ * @brief Post-processing class for Noise Reduction use case.
+ * Implements methods declared by BasePostProcess and anything else needed
+ * to populate result vector.
+ */
+ class RNNoisePostProcess : public BasePostProcess {
+
+ public:
+ /**
+ * @brief Constructor
+ * @param[in] outputTensor Pointer to the TFLite Micro output Tensor.
+ * @param[out] denoisedAudioFrame Vector to store the final denoised audio frame.
+ * @param[in/out] featureProcessor RNNoise specific feature extractor object.
+ * @param[in/out] frameFeatures RNNoise specific features shared between pre & post-processing.
+ **/
+ RNNoisePostProcess(TfLiteTensor* outputTensor,
+ std::vector<int16_t>& denoisedAudioFrame,
+ std::shared_ptr<rnn::RNNoiseFeatureProcessor> featureProcessor,
+ std::shared_ptr<rnn::FrameFeatures> frameFeatures);
+
+ /**
+ * @brief Should perform post-processing of the result of inference then
+ * populate result data for any later use.
+ * @return true if successful, false otherwise.
+ **/
+ bool DoPostProcess() override;
+
+ private:
+ TfLiteTensor* m_outputTensor; /* Model output tensor. */
+ std::vector<int16_t>& m_denoisedAudioFrame; /* Vector to store the final denoised frame. */
+ rnn::vec1D32F m_denoisedAudioFrameFloat; /* Internal vector to store the final denoised frame (FP32). */
+ std::shared_ptr<rnn::RNNoiseFeatureProcessor> m_featureProcessor; /* RNNoise feature processor shared between pre & post-processing. */
+ std::shared_ptr<rnn::FrameFeatures> m_frameFeatures; /* RNNoise features shared between pre & post-processing. */
+ std::vector<float> m_modelOutputFloat; /* Internal vector to store de-quantized model output. */
+
+ };
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* RNNOISE_PROCESSING_HPP */ \ No newline at end of file
diff --git a/source/application/api/use_case/noise_reduction/src/RNNoiseFeatureProcessor.cc b/source/application/api/use_case/noise_reduction/src/RNNoiseFeatureProcessor.cc
new file mode 100644
index 0000000..036894c
--- /dev/null
+++ b/source/application/api/use_case/noise_reduction/src/RNNoiseFeatureProcessor.cc
@@ -0,0 +1,892 @@
+/*
+ * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "RNNoiseFeatureProcessor.hpp"
+#include "log_macros.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+
+namespace arm {
+namespace app {
+namespace rnn {
+
+#define VERIFY(x) \
+do { \
+ if (!(x)) { \
+ printf_err("Assert failed:" #x "\n"); \
+ exit(1); \
+ } \
+} while(0)
+
+RNNoiseFeatureProcessor::RNNoiseFeatureProcessor() :
+ m_halfWindow(FRAME_SIZE, 0),
+ m_dctTable(NB_BANDS * NB_BANDS),
+ m_analysisMem(FRAME_SIZE, 0),
+ m_cepstralMem(CEPS_MEM, vec1D32F(NB_BANDS, 0)),
+ m_memId{0},
+ m_synthesisMem(FRAME_SIZE, 0),
+ m_pitchBuf(PITCH_BUF_SIZE, 0),
+ m_lastGain{0.0},
+ m_lastPeriod{0},
+ m_memHpX{},
+ m_lastGVec(NB_BANDS, 0)
+{
+ constexpr uint32_t numFFt = 2 * FRAME_SIZE;
+ static_assert(numFFt != 0, "Num FFT can't be 0");
+
+ math::MathUtils::FftInitF32(numFFt, this->m_fftInstReal, FftType::real);
+ math::MathUtils::FftInitF32(numFFt, this->m_fftInstCmplx, FftType::complex);
+ this->InitTables();
+}
+
+void RNNoiseFeatureProcessor::PreprocessFrame(const float* audioData,
+ const size_t audioLen,
+ FrameFeatures& features)
+{
+ /* Note audioWindow is modified in place */
+ const arrHp aHp {-1.99599, 0.99600 };
+ const arrHp bHp {-2.00000, 1.00000 };
+
+ vec1D32F audioWindow{audioData, audioData + audioLen};
+
+ this->BiQuad(bHp, aHp, this->m_memHpX, audioWindow);
+ this->ComputeFrameFeatures(audioWindow, features);
+}
+
+void RNNoiseFeatureProcessor::PostProcessFrame(vec1D32F& modelOutput, FrameFeatures& features, vec1D32F& outFrame)
+{
+ std::vector<float> outputBands = modelOutput;
+ std::vector<float> gain(FREQ_SIZE, 0);
+
+ if (!features.m_silence) {
+ PitchFilter(features, outputBands);
+ for (size_t i = 0; i < NB_BANDS; i++) {
+ float alpha = .6f;
+ outputBands[i] = std::max(outputBands[i], alpha * m_lastGVec[i]);
+ m_lastGVec[i] = outputBands[i];
+ }
+ InterpBandGain(gain, outputBands);
+ for (size_t i = 0; i < FREQ_SIZE; i++) {
+ features.m_fftX[2 * i] *= gain[i]; /* Real. */
+ features.m_fftX[2 * i + 1] *= gain[i]; /*imaginary. */
+
+ }
+
+ }
+
+ FrameSynthesis(outFrame, features.m_fftX);
+}
+
+void RNNoiseFeatureProcessor::InitTables()
+{
+ constexpr float pi = M_PI;
+ constexpr float halfPi = M_PI / 2;
+ constexpr float halfPiOverFrameSz = halfPi/FRAME_SIZE;
+
+ for (uint32_t i = 0; i < FRAME_SIZE; i++) {
+ const float sinVal = math::MathUtils::SineF32(halfPiOverFrameSz * (i + 0.5f));
+ m_halfWindow[i] = math::MathUtils::SineF32(halfPi * sinVal * sinVal);
+ }
+
+ for (uint32_t i = 0; i < NB_BANDS; i++) {
+ for (uint32_t j = 0; j < NB_BANDS; j++) {
+ m_dctTable[i * NB_BANDS + j] = math::MathUtils::CosineF32((i + 0.5f) * j * pi / NB_BANDS);
+ }
+ m_dctTable[i * NB_BANDS] *= math::MathUtils::SqrtF32(0.5f);
+ }
+}
+
+void RNNoiseFeatureProcessor::BiQuad(
+ const arrHp& bHp,
+ const arrHp& aHp,
+ arrHp& memHpX,
+ vec1D32F& audioWindow)
+{
+ for (float& audioElement : audioWindow) {
+ const auto xi = audioElement;
+ const auto yi = audioElement + memHpX[0];
+ memHpX[0] = memHpX[1] + (bHp[0] * xi - aHp[0] * yi);
+ memHpX[1] = (bHp[1] * xi - aHp[1] * yi);
+ audioElement = yi;
+ }
+}
+
+void RNNoiseFeatureProcessor::ComputeFrameFeatures(vec1D32F& audioWindow,
+ FrameFeatures& features)
+{
+ this->FrameAnalysis(audioWindow,
+ features.m_fftX,
+ features.m_Ex,
+ this->m_analysisMem);
+
+ float energy = 0.0;
+
+ vec1D32F Ly(NB_BANDS, 0);
+ vec1D32F p(WINDOW_SIZE, 0);
+ vec1D32F pitchBuf(PITCH_BUF_SIZE >> 1, 0);
+
+ VERIFY(PITCH_BUF_SIZE >= this->m_pitchBuf.size());
+ std::copy_n(this->m_pitchBuf.begin() + FRAME_SIZE,
+ PITCH_BUF_SIZE - FRAME_SIZE,
+ this->m_pitchBuf.begin());
+
+ VERIFY(FRAME_SIZE <= audioWindow.size() && PITCH_BUF_SIZE > FRAME_SIZE);
+ std::copy_n(audioWindow.begin(),
+ FRAME_SIZE,
+ this->m_pitchBuf.begin() + PITCH_BUF_SIZE - FRAME_SIZE);
+
+ this->PitchDownsample(pitchBuf, PITCH_BUF_SIZE);
+
+ VERIFY(pitchBuf.size() > PITCH_MAX_PERIOD/2);
+ vec1D32F xLp(pitchBuf.size() - PITCH_MAX_PERIOD/2, 0);
+ std::copy_n(pitchBuf.begin() + PITCH_MAX_PERIOD/2, xLp.size(), xLp.begin());
+
+ int pitchIdx = this->PitchSearch(xLp, pitchBuf,
+ PITCH_FRAME_SIZE, (PITCH_MAX_PERIOD - (3*PITCH_MIN_PERIOD)));
+
+ pitchIdx = this->RemoveDoubling(
+ pitchBuf,
+ PITCH_MAX_PERIOD,
+ PITCH_MIN_PERIOD,
+ PITCH_FRAME_SIZE,
+ PITCH_MAX_PERIOD - pitchIdx);
+
+ size_t stIdx = PITCH_BUF_SIZE - WINDOW_SIZE - pitchIdx;
+ VERIFY((static_cast<int>(PITCH_BUF_SIZE) - static_cast<int>(WINDOW_SIZE) - pitchIdx) >= 0);
+ std::copy_n(this->m_pitchBuf.begin() + stIdx, WINDOW_SIZE, p.begin());
+
+ this->ApplyWindow(p);
+ this->ForwardTransform(p, features.m_fftP);
+ this->ComputeBandEnergy(features.m_fftP, features.m_Ep);
+ this->ComputeBandCorr(features.m_fftX, features.m_fftP, features.m_Exp);
+
+ for (uint32_t i = 0 ; i < NB_BANDS; ++i) {
+ features.m_Exp[i] /= math::MathUtils::SqrtF32(
+ 0.001f + features.m_Ex[i] * features.m_Ep[i]);
+ }
+
+ vec1D32F dctVec(NB_BANDS, 0);
+ this->DCT(features.m_Exp, dctVec);
+
+ features.m_featuresVec = vec1D32F (NB_FEATURES, 0);
+ for (uint32_t i = 0; i < NB_DELTA_CEPS; ++i) {
+ features.m_featuresVec[NB_BANDS + 2*NB_DELTA_CEPS + i] = dctVec[i];
+ }
+
+ features.m_featuresVec[NB_BANDS + 2*NB_DELTA_CEPS] -= 1.3;
+ features.m_featuresVec[NB_BANDS + 2*NB_DELTA_CEPS + 1] -= 0.9;
+ features.m_featuresVec[NB_BANDS + 3*NB_DELTA_CEPS] = 0.01 * (static_cast<int>(pitchIdx) - 300);
+
+ float logMax = -2.f;
+ float follow = -2.f;
+ for (uint32_t i = 0; i < NB_BANDS; ++i) {
+ Ly[i] = log10f(1e-2f + features.m_Ex[i]);
+ Ly[i] = std::max<float>(logMax - 7, std::max<float>(follow - 1.5, Ly[i]));
+ logMax = std::max<float>(logMax, Ly[i]);
+ follow = std::max<float>(follow - 1.5, Ly[i]);
+ energy += features.m_Ex[i];
+ }
+
+ /* If there's no audio avoid messing up the state. */
+ features.m_silence = true;
+ if (energy < 0.04) {
+ return;
+ } else {
+ features.m_silence = false;
+ }
+
+ this->DCT(Ly, features.m_featuresVec);
+ features.m_featuresVec[0] -= 12.0;
+ features.m_featuresVec[1] -= 4.0;
+
+ VERIFY(CEPS_MEM > 2);
+ uint32_t stIdx1 = this->m_memId < 1 ? CEPS_MEM + this->m_memId - 1 : this->m_memId - 1;
+ uint32_t stIdx2 = this->m_memId < 2 ? CEPS_MEM + this->m_memId - 2 : this->m_memId - 2;
+ VERIFY(stIdx1 < this->m_cepstralMem.size());
+ VERIFY(stIdx2 < this->m_cepstralMem.size());
+ auto ceps1 = this->m_cepstralMem[stIdx1];
+ auto ceps2 = this->m_cepstralMem[stIdx2];
+
+ /* Ceps 0 */
+ for (uint32_t i = 0; i < NB_BANDS; ++i) {
+ this->m_cepstralMem[this->m_memId][i] = features.m_featuresVec[i];
+ }
+
+ for (uint32_t i = 0; i < NB_DELTA_CEPS; ++i) {
+ features.m_featuresVec[i] = this->m_cepstralMem[this->m_memId][i] + ceps1[i] + ceps2[i];
+ features.m_featuresVec[NB_BANDS + i] = this->m_cepstralMem[this->m_memId][i] - ceps2[i];
+ features.m_featuresVec[NB_BANDS + NB_DELTA_CEPS + i] =
+ this->m_cepstralMem[this->m_memId][i] - 2 * ceps1[i] + ceps2[i];
+ }
+
+ /* Spectral variability features. */
+ this->m_memId += 1;
+ if (this->m_memId == CEPS_MEM) {
+ this->m_memId = 0;
+ }
+
+ float specVariability = 0.f;
+
+ VERIFY(this->m_cepstralMem.size() >= CEPS_MEM);
+ for (size_t i = 0; i < CEPS_MEM; ++i) {
+ float minDist = 1e15;
+ for (size_t j = 0; j < CEPS_MEM; ++j) {
+ float dist = 0.f;
+ for (size_t k = 0; k < NB_BANDS; ++k) {
+ VERIFY(this->m_cepstralMem[i].size() >= NB_BANDS);
+ auto tmp = this->m_cepstralMem[i][k] - this->m_cepstralMem[j][k];
+ dist += tmp * tmp;
+ }
+
+ if (j != i) {
+ minDist = std::min<float>(minDist, dist);
+ }
+ }
+ specVariability += minDist;
+ }
+
+ VERIFY(features.m_featuresVec.size() >= NB_BANDS + 3 * NB_DELTA_CEPS + 1);
+ features.m_featuresVec[NB_BANDS + 3 * NB_DELTA_CEPS + 1] = specVariability / CEPS_MEM - 2.1;
+}
+
+void RNNoiseFeatureProcessor::FrameAnalysis(
+ const vec1D32F& audioWindow,
+ vec1D32F& fft,
+ vec1D32F& energy,
+ vec1D32F& analysisMem)
+{
+ vec1D32F x(WINDOW_SIZE, 0);
+
+ /* Move old audio down and populate end with latest audio window. */
+ VERIFY(x.size() >= FRAME_SIZE && analysisMem.size() >= FRAME_SIZE);
+ VERIFY(audioWindow.size() >= FRAME_SIZE);
+
+ std::copy_n(analysisMem.begin(), FRAME_SIZE, x.begin());
+ std::copy_n(audioWindow.begin(), x.size() - FRAME_SIZE, x.begin() + FRAME_SIZE);
+ std::copy_n(audioWindow.begin(), FRAME_SIZE, analysisMem.begin());
+
+ this->ApplyWindow(x);
+
+ /* Calculate FFT. */
+ ForwardTransform(x, fft);
+
+ /* Compute band energy. */
+ ComputeBandEnergy(fft, energy);
+}
+
+void RNNoiseFeatureProcessor::ApplyWindow(vec1D32F& x)
+{
+ if (WINDOW_SIZE != x.size()) {
+ printf_err("Invalid size for vector to be windowed\n");
+ return;
+ }
+
+ VERIFY(this->m_halfWindow.size() >= FRAME_SIZE);
+
+ /* Multiply input by sinusoidal function. */
+ for (size_t i = 0; i < FRAME_SIZE; i++) {
+ x[i] *= this->m_halfWindow[i];
+ x[WINDOW_SIZE - 1 - i] *= this->m_halfWindow[i];
+ }
+}
+
+void RNNoiseFeatureProcessor::ForwardTransform(
+ vec1D32F& x,
+ vec1D32F& fft)
+{
+ /* The input vector can be modified by the fft function. */
+ fft.reserve(x.size() + 2);
+ fft.resize(x.size() + 2, 0);
+ math::MathUtils::FftF32(x, fft, this->m_fftInstReal);
+
+ /* Normalise. */
+ for (auto& f : fft) {
+ f /= this->m_fftInstReal.m_fftLen;
+ }
+
+ /* Place the last freq element correctly */
+ fft[fft.size()-2] = fft[1];
+ fft[1] = 0;
+
+ /* NOTE: We don't truncate out FFT vector as it already contains only the
+ * first half of the FFT's. The conjugates are not present. */
+}
+
+void RNNoiseFeatureProcessor::ComputeBandEnergy(const vec1D32F& fftX, vec1D32F& bandE)
+{
+ bandE = vec1D32F(NB_BANDS, 0);
+
+ VERIFY(this->m_eband5ms.size() >= NB_BANDS);
+ for (uint32_t i = 0; i < NB_BANDS - 1; i++) {
+ const auto bandSize = (this->m_eband5ms[i + 1] - this->m_eband5ms[i])
+ << FRAME_SIZE_SHIFT;
+
+ for (uint32_t j = 0; j < bandSize; j++) {
+ const auto frac = static_cast<float>(j) / bandSize;
+ const auto idx = (this->m_eband5ms[i] << FRAME_SIZE_SHIFT) + j;
+
+ auto tmp = fftX[2 * idx] * fftX[2 * idx]; /* Real part */
+ tmp += fftX[2 * idx + 1] * fftX[2 * idx + 1]; /* Imaginary part */
+
+ bandE[i] += (1 - frac) * tmp;
+ bandE[i + 1] += frac * tmp;
+ }
+ }
+ bandE[0] *= 2;
+ bandE[NB_BANDS - 1] *= 2;
+}
+
+void RNNoiseFeatureProcessor::ComputeBandCorr(const vec1D32F& X, const vec1D32F& P, vec1D32F& bandC)
+{
+ bandC = vec1D32F(NB_BANDS, 0);
+ VERIFY(this->m_eband5ms.size() >= NB_BANDS);
+
+ for (uint32_t i = 0; i < NB_BANDS - 1; i++) {
+ const auto bandSize = (this->m_eband5ms[i + 1] - this->m_eband5ms[i]) << FRAME_SIZE_SHIFT;
+
+ for (uint32_t j = 0; j < bandSize; j++) {
+ const auto frac = static_cast<float>(j) / bandSize;
+ const auto idx = (this->m_eband5ms[i] << FRAME_SIZE_SHIFT) + j;
+
+ auto tmp = X[2 * idx] * P[2 * idx]; /* Real part */
+ tmp += X[2 * idx + 1] * P[2 * idx + 1]; /* Imaginary part */
+
+ bandC[i] += (1 - frac) * tmp;
+ bandC[i + 1] += frac * tmp;
+ }
+ }
+ bandC[0] *= 2;
+ bandC[NB_BANDS - 1] *= 2;
+}
+
+void RNNoiseFeatureProcessor::DCT(vec1D32F& input, vec1D32F& output)
+{
+ VERIFY(this->m_dctTable.size() >= NB_BANDS * NB_BANDS);
+ for (uint32_t i = 0; i < NB_BANDS; ++i) {
+ float sum = 0;
+
+ for (uint32_t j = 0, k = 0; j < NB_BANDS; ++j, k += NB_BANDS) {
+ sum += input[j] * this->m_dctTable[k + i];
+ }
+ output[i] = sum * math::MathUtils::SqrtF32(2.0/22);
+ }
+}
+
+void RNNoiseFeatureProcessor::PitchDownsample(vec1D32F& pitchBuf, size_t pitchBufSz) {
+ for (size_t i = 1; i < (pitchBufSz >> 1); ++i) {
+ pitchBuf[i] = 0.5 * (
+ 0.5 * (this->m_pitchBuf[2 * i - 1] + this->m_pitchBuf[2 * i + 1])
+ + this->m_pitchBuf[2 * i]);
+ }
+
+ pitchBuf[0] = 0.5*(0.5*(this->m_pitchBuf[1]) + this->m_pitchBuf[0]);
+
+ vec1D32F ac(5, 0);
+ size_t numLags = 4;
+
+ this->AutoCorr(pitchBuf, ac, numLags, pitchBufSz >> 1);
+
+ /* Noise floor -40db */
+ ac[0] *= 1.0001;
+
+ /* Lag windowing. */
+ for (size_t i = 1; i < numLags + 1; ++i) {
+ ac[i] -= ac[i] * (0.008 * i) * (0.008 * i);
+ }
+
+ vec1D32F lpc(numLags, 0);
+ this->LPC(ac, numLags, lpc);
+
+ float tmp = 1.0;
+ for (size_t i = 0; i < numLags; ++i) {
+ tmp = 0.9f * tmp;
+ lpc[i] = lpc[i] * tmp;
+ }
+
+ vec1D32F lpc2(numLags + 1, 0);
+ float c1 = 0.8;
+
+ /* Add a zero. */
+ lpc2[0] = lpc[0] + 0.8;
+ lpc2[1] = lpc[1] + (c1 * lpc[0]);
+ lpc2[2] = lpc[2] + (c1 * lpc[1]);
+ lpc2[3] = lpc[3] + (c1 * lpc[2]);
+ lpc2[4] = (c1 * lpc[3]);
+
+ this->Fir5(lpc2, pitchBufSz >> 1, pitchBuf);
+}
+
+int RNNoiseFeatureProcessor::PitchSearch(vec1D32F& xLp, vec1D32F& y, uint32_t len, uint32_t maxPitch) {
+ uint32_t lag = len + maxPitch;
+ vec1D32F xLp4(len >> 2, 0);
+ vec1D32F yLp4(lag >> 2, 0);
+ vec1D32F xCorr(maxPitch >> 1, 0);
+
+ /* Downsample by 2 again. */
+ for (size_t j = 0; j < (len >> 2); ++j) {
+ xLp4[j] = xLp[2*j];
+ }
+ for (size_t j = 0; j < (lag >> 2); ++j) {
+ yLp4[j] = y[2*j];
+ }
+
+ this->PitchXCorr(xLp4, yLp4, xCorr, len >> 2, maxPitch >> 2);
+
+ /* Coarse search with 4x decimation. */
+ arrHp bestPitch = this->FindBestPitch(xCorr, yLp4, len >> 2, maxPitch >> 2);
+
+ /* Finer search with 2x decimation. */
+ const int maxIdx = (maxPitch >> 1);
+ for (int i = 0; i < maxIdx; ++i) {
+ xCorr[i] = 0;
+ if (std::abs(i - 2*bestPitch[0]) > 2 and std::abs(i - 2*bestPitch[1]) > 2) {
+ continue;
+ }
+ float sum = 0;
+ for (size_t j = 0; j < len >> 1; ++j) {
+ sum += xLp[j] * y[i+j];
+ }
+
+ xCorr[i] = std::max(-1.0f, sum);
+ }
+
+ bestPitch = this->FindBestPitch(xCorr, y, len >> 1, maxPitch >> 1);
+
+ int offset;
+ /* Refine by pseudo-interpolation. */
+ if ( 0 < bestPitch[0] && bestPitch[0] < ((maxPitch >> 1) - 1)) {
+ float a = xCorr[bestPitch[0] - 1];
+ float b = xCorr[bestPitch[0]];
+ float c = xCorr[bestPitch[0] + 1];
+
+ if ( (c-a) > 0.7*(b-a) ) {
+ offset = 1;
+ } else if ( (a-c) > 0.7*(b-c) ) {
+ offset = -1;
+ } else {
+ offset = 0;
+ }
+ } else {
+ offset = 0;
+ }
+
+ return 2*bestPitch[0] - offset;
+}
+
+arrHp RNNoiseFeatureProcessor::FindBestPitch(vec1D32F& xCorr, vec1D32F& y, uint32_t len, uint32_t maxPitch)
+{
+ float Syy = 1;
+ arrHp bestNum {-1, -1};
+ arrHp bestDen {0, 0};
+ arrHp bestPitch {0, 1};
+
+ for (size_t j = 0; j < len; ++j) {
+ Syy += (y[j] * y[j]);
+ }
+
+ for (size_t i = 0; i < maxPitch; ++i ) {
+ if (xCorr[i] > 0) {
+ float xCorr16 = xCorr[i] * 1e-12f; /* Avoid problems when squaring. */
+
+ float num = xCorr16 * xCorr16;
+ if (num*bestDen[1] > bestNum[1]*Syy) {
+ if (num*bestDen[0] > bestNum[0]*Syy) {
+ bestNum[1] = bestNum[0];
+ bestDen[1] = bestDen[0];
+ bestPitch[1] = bestPitch[0];
+ bestNum[0] = num;
+ bestDen[0] = Syy;
+ bestPitch[0] = i;
+ } else {
+ bestNum[1] = num;
+ bestDen[1] = Syy;
+ bestPitch[1] = i;
+ }
+ }
+ }
+
+ Syy += (y[i+len]*y[i+len]) - (y[i]*y[i]);
+ Syy = std::max(1.0f, Syy);
+ }
+
+ return bestPitch;
+}
+
+int RNNoiseFeatureProcessor::RemoveDoubling(
+ vec1D32F& pitchBuf,
+ uint32_t maxPeriod,
+ uint32_t minPeriod,
+ uint32_t frameSize,
+ size_t pitchIdx0_)
+{
+ constexpr std::array<size_t, 16> secondCheck {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2};
+ uint32_t minPeriod0 = minPeriod;
+ float lastPeriod = static_cast<float>(this->m_lastPeriod)/2;
+ float lastGain = static_cast<float>(this->m_lastGain);
+
+ maxPeriod /= 2;
+ minPeriod /= 2;
+ pitchIdx0_ /= 2;
+ frameSize /= 2;
+ uint32_t xStart = maxPeriod;
+
+ if (pitchIdx0_ >= maxPeriod) {
+ pitchIdx0_ = maxPeriod - 1;
+ }
+
+ size_t pitchIdx = pitchIdx0_;
+ const size_t pitchIdx0 = pitchIdx0_;
+
+ float xx = 0;
+ for ( size_t i = xStart; i < xStart+frameSize; ++i) {
+ xx += (pitchBuf[i] * pitchBuf[i]);
+ }
+
+ float xy = 0;
+ for ( size_t i = xStart; i < xStart+frameSize; ++i) {
+ xy += (pitchBuf[i] * pitchBuf[i-pitchIdx0]);
+ }
+
+ vec1D32F yyLookup (maxPeriod+1, 0);
+ yyLookup[0] = xx;
+ float yy = xx;
+
+ for ( size_t i = 1; i < yyLookup.size(); ++i) {
+ yy = yy + (pitchBuf[xStart-i] * pitchBuf[xStart-i]) -
+ (pitchBuf[xStart+frameSize-i] * pitchBuf[xStart+frameSize-i]);
+ yyLookup[i] = std::max(0.0f, yy);
+ }
+
+ yy = yyLookup[pitchIdx0];
+ float bestXy = xy;
+ float bestYy = yy;
+
+ float g = this->ComputePitchGain(xy, xx, yy);
+ float g0 = g;
+
+ /* Look for any pitch at pitchIndex/k. */
+ for ( size_t k = 2; k < 16; ++k) {
+ size_t pitchIdx1 = (2*pitchIdx0+k) / (2*k);
+ if (pitchIdx1 < minPeriod) {
+ break;
+ }
+
+ size_t pitchIdx1b;
+ /* Look for another strong correlation at T1b. */
+ if (k == 2) {
+ if ((pitchIdx1 + pitchIdx0) > maxPeriod) {
+ pitchIdx1b = pitchIdx0;
+ } else {
+ pitchIdx1b = pitchIdx0 + pitchIdx1;
+ }
+ } else {
+ pitchIdx1b = (2*(secondCheck[k])*pitchIdx0 + k) / (2*k);
+ }
+
+ xy = 0;
+ for ( size_t i = xStart; i < xStart+frameSize; ++i) {
+ xy += (pitchBuf[i] * pitchBuf[i-pitchIdx1]);
+ }
+
+ float xy2 = 0;
+ for ( size_t i = xStart; i < xStart+frameSize; ++i) {
+ xy2 += (pitchBuf[i] * pitchBuf[i-pitchIdx1b]);
+ }
+ xy = 0.5f * (xy + xy2);
+ VERIFY(pitchIdx1b < maxPeriod+1);
+ yy = 0.5f * (yyLookup[pitchIdx1] + yyLookup[pitchIdx1b]);
+
+ float g1 = this->ComputePitchGain(xy, xx, yy);
+
+ float cont;
+ if (std::abs(pitchIdx1-lastPeriod) <= 1) {
+ cont = lastGain;
+ } else if (std::abs(pitchIdx1-lastPeriod) <= 2 and 5*k*k < pitchIdx0) {
+ cont = 0.5f*lastGain;
+ } else {
+ cont = 0.0f;
+ }
+
+ float thresh = std::max(0.3, 0.7*g0-cont);
+
+ /* Bias against very high pitch (very short period) to avoid false-positives
+ * due to short-term correlation */
+ if (pitchIdx1 < 3*minPeriod) {
+ thresh = std::max(0.4, 0.85*g0-cont);
+ } else if (pitchIdx1 < 2*minPeriod) {
+ thresh = std::max(0.5, 0.9*g0-cont);
+ }
+ if (g1 > thresh) {
+ bestXy = xy;
+ bestYy = yy;
+ pitchIdx = pitchIdx1;
+ g = g1;
+ }
+ }
+
+ bestXy = std::max(0.0f, bestXy);
+ float pg;
+ if (bestYy <= bestXy) {
+ pg = 1.0;
+ } else {
+ pg = bestXy/(bestYy+1);
+ }
+
+ std::array<float, 3> xCorr {0};
+ for ( size_t k = 0; k < 3; ++k ) {
+ for ( size_t i = xStart; i < xStart+frameSize; ++i) {
+ xCorr[k] += (pitchBuf[i] * pitchBuf[i-(pitchIdx+k-1)]);
+ }
+ }
+
+ size_t offset;
+ if ((xCorr[2]-xCorr[0]) > 0.7*(xCorr[1]-xCorr[0])) {
+ offset = 1;
+ } else if ((xCorr[0]-xCorr[2]) > 0.7*(xCorr[1]-xCorr[2])) {
+ offset = -1;
+ } else {
+ offset = 0;
+ }
+
+ if (pg > g) {
+ pg = g;
+ }
+
+ pitchIdx0_ = 2*pitchIdx + offset;
+
+ if (pitchIdx0_ < minPeriod0) {
+ pitchIdx0_ = minPeriod0;
+ }
+
+ this->m_lastPeriod = pitchIdx0_;
+ this->m_lastGain = pg;
+
+ return this->m_lastPeriod;
+}
+
+float RNNoiseFeatureProcessor::ComputePitchGain(float xy, float xx, float yy)
+{
+ return xy / math::MathUtils::SqrtF32(1+xx*yy);
+}
+
+void RNNoiseFeatureProcessor::AutoCorr(
+ const vec1D32F& x,
+ vec1D32F& ac,
+ size_t lag,
+ size_t n)
+{
+ if (n < lag) {
+ printf_err("Invalid parameters for AutoCorr\n");
+ return;
+ }
+
+ auto fastN = n - lag;
+
+ /* Auto-correlation - can be done by PlatformMath functions */
+ this->PitchXCorr(x, x, ac, fastN, lag + 1);
+
+ /* Modify auto-correlation by summing with auto-correlation for different lags. */
+ for (size_t k = 0; k < lag + 1; k++) {
+ float d = 0;
+ for (size_t i = k + fastN; i < n; i++) {
+ d += x[i] * x[i - k];
+ }
+ ac[k] += d;
+ }
+}
+
+
+void RNNoiseFeatureProcessor::PitchXCorr(
+ const vec1D32F& x,
+ const vec1D32F& y,
+ vec1D32F& xCorr,
+ size_t len,
+ size_t maxPitch)
+{
+ for (size_t i = 0; i < maxPitch; i++) {
+ float sum = 0;
+ for (size_t j = 0; j < len; j++) {
+ sum += x[j] * y[i + j];
+ }
+ xCorr[i] = sum;
+ }
+}
+
+/* Linear predictor coefficients */
+void RNNoiseFeatureProcessor::LPC(
+ const vec1D32F& correlation,
+ int32_t p,
+ vec1D32F& lpc)
+{
+ auto error = correlation[0];
+
+ if (error != 0) {
+ for (int i = 0; i < p; i++) {
+
+ /* Sum up this iteration's reflection coefficient */
+ float rr = 0;
+ for (int j = 0; j < i; j++) {
+ rr += lpc[j] * correlation[i - j];
+ }
+
+ rr += correlation[i + 1];
+ auto r = -rr / error;
+
+ /* Update LP coefficients and total error */
+ lpc[i] = r;
+ for (int j = 0; j < ((i + 1) >> 1); j++) {
+ auto tmp1 = lpc[j];
+ auto tmp2 = lpc[i - 1 - j];
+ lpc[j] = tmp1 + (r * tmp2);
+ lpc[i - 1 - j] = tmp2 + (r * tmp1);
+ }
+
+ error = error - (r * r * error);
+
+ /* Bail out once we get 30dB gain */
+ if (error < (0.001 * correlation[0])) {
+ break;
+ }
+ }
+ }
+}
+
+void RNNoiseFeatureProcessor::Fir5(
+ const vec1D32F &num,
+ uint32_t N,
+ vec1D32F &x)
+{
+ auto num0 = num[0];
+ auto num1 = num[1];
+ auto num2 = num[2];
+ auto num3 = num[3];
+ auto num4 = num[4];
+ auto mem0 = 0;
+ auto mem1 = 0;
+ auto mem2 = 0;
+ auto mem3 = 0;
+ auto mem4 = 0;
+ for (uint32_t i = 0; i < N; i++)
+ {
+ auto sum_ = x[i] + (num0 * mem0) + (num1 * mem1) +
+ (num2 * mem2) + (num3 * mem3) + (num4 * mem4);
+ mem4 = mem3;
+ mem3 = mem2;
+ mem2 = mem1;
+ mem1 = mem0;
+ mem0 = x[i];
+ x[i] = sum_;
+ }
+}
+
+void RNNoiseFeatureProcessor::PitchFilter(FrameFeatures &features, vec1D32F &gain) {
+ std::vector<float> r(NB_BANDS, 0);
+ std::vector<float> rf(FREQ_SIZE, 0);
+ std::vector<float> newE(NB_BANDS);
+
+ for (size_t i = 0; i < NB_BANDS; i++) {
+ if (features.m_Exp[i] > gain[i]) {
+ r[i] = 1;
+ } else {
+
+
+ r[i] = std::pow(features.m_Exp[i], 2) * (1 - std::pow(gain[i], 2)) /
+ (.001 + std::pow(gain[i], 2) * (1 - std::pow(features.m_Exp[i], 2)));
+ }
+
+
+ r[i] = math::MathUtils::SqrtF32(std::min(1.0f, std::max(0.0f, r[i])));
+ r[i] *= math::MathUtils::SqrtF32(features.m_Ex[i] / (1e-8f + features.m_Ep[i]));
+ }
+
+ InterpBandGain(rf, r);
+ for (size_t i = 0; i < FREQ_SIZE - 1; i++) {
+ features.m_fftX[2 * i] += rf[i] * features.m_fftP[2 * i]; /* Real. */
+ features.m_fftX[2 * i + 1] += rf[i] * features.m_fftP[2 * i + 1]; /* Imaginary. */
+
+ }
+ ComputeBandEnergy(features.m_fftX, newE);
+ std::vector<float> norm(NB_BANDS);
+ std::vector<float> normf(FRAME_SIZE, 0);
+ for (size_t i = 0; i < NB_BANDS; i++) {
+ norm[i] = math::MathUtils::SqrtF32(features.m_Ex[i] / (1e-8f + newE[i]));
+ }
+
+ InterpBandGain(normf, norm);
+ for (size_t i = 0; i < FREQ_SIZE - 1; i++) {
+ features.m_fftX[2 * i] *= normf[i]; /* Real. */
+ features.m_fftX[2 * i + 1] *= normf[i]; /* Imaginary. */
+
+ }
+}
+
+void RNNoiseFeatureProcessor::FrameSynthesis(vec1D32F& outFrame, vec1D32F& fftY) {
+ std::vector<float> x(WINDOW_SIZE, 0);
+ InverseTransform(x, fftY);
+ ApplyWindow(x);
+ for (size_t i = 0; i < FRAME_SIZE; i++) {
+ outFrame[i] = x[i] + m_synthesisMem[i];
+ }
+ memcpy((m_synthesisMem.data()), &x[FRAME_SIZE], FRAME_SIZE*sizeof(float));
+}
+
+void RNNoiseFeatureProcessor::InterpBandGain(vec1D32F& g, vec1D32F& bandE) {
+ for (size_t i = 0; i < NB_BANDS - 1; i++) {
+ int bandSize = (m_eband5ms[i + 1] - m_eband5ms[i]) << FRAME_SIZE_SHIFT;
+ for (int j = 0; j < bandSize; j++) {
+ float frac = static_cast<float>(j) / bandSize;
+ g[(m_eband5ms[i] << FRAME_SIZE_SHIFT) + j] = (1 - frac) * bandE[i] + frac * bandE[i + 1];
+ }
+ }
+}
+
+void RNNoiseFeatureProcessor::InverseTransform(vec1D32F& out, vec1D32F& fftXIn) {
+
+ std::vector<float> x(WINDOW_SIZE * 2); /* This is complex. */
+ vec1D32F newFFT; /* This is complex. */
+
+ size_t i;
+ for (i = 0; i < FREQ_SIZE * 2; i++) {
+ x[i] = fftXIn[i];
+ }
+ for (i = FREQ_SIZE; i < WINDOW_SIZE; i++) {
+ x[2 * i] = x[2 * (WINDOW_SIZE - i)]; /* Real. */
+ x[2 * i + 1] = -x[2 * (WINDOW_SIZE - i) + 1]; /* Imaginary. */
+ }
+
+ constexpr uint32_t numFFt = 2 * FRAME_SIZE;
+ static_assert(numFFt != 0, "numFFt cannot be 0!");
+
+ vec1D32F fftOut = vec1D32F(x.size(), 0);
+ math::MathUtils::FftF32(x,fftOut, m_fftInstCmplx);
+
+ /* Normalize. */
+ for (auto &f: fftOut) {
+ f /= numFFt;
+ }
+
+ out[0] = WINDOW_SIZE * fftOut[0]; /* Real. */
+ for (i = 1; i < WINDOW_SIZE; i++) {
+ out[i] = WINDOW_SIZE * fftOut[(WINDOW_SIZE * 2) - (2 * i)]; /* Real. */
+ }
+}
+
+
+} /* namespace rnn */
+} /* namespace app */
+} /* namspace arm */
diff --git a/source/application/api/use_case/noise_reduction/src/RNNoiseModel.cc b/source/application/api/use_case/noise_reduction/src/RNNoiseModel.cc
new file mode 100644
index 0000000..457cda9
--- /dev/null
+++ b/source/application/api/use_case/noise_reduction/src/RNNoiseModel.cc
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "RNNoiseModel.hpp"
+#include "log_macros.h"
+
+const tflite::MicroOpResolver& arm::app::RNNoiseModel::GetOpResolver()
+{
+ return this->m_opResolver;
+}
+
+bool arm::app::RNNoiseModel::EnlistOperations()
+{
+ this->m_opResolver.AddUnpack();
+ this->m_opResolver.AddFullyConnected();
+ this->m_opResolver.AddSplit();
+ this->m_opResolver.AddSplitV();
+ this->m_opResolver.AddAdd();
+ this->m_opResolver.AddLogistic();
+ this->m_opResolver.AddMul();
+ this->m_opResolver.AddSub();
+ this->m_opResolver.AddTanh();
+ this->m_opResolver.AddPack();
+ this->m_opResolver.AddReshape();
+ this->m_opResolver.AddQuantize();
+ this->m_opResolver.AddConcatenation();
+ this->m_opResolver.AddRelu();
+
+ if (kTfLiteOk == this->m_opResolver.AddEthosU()) {
+ info("Added %s support to op resolver\n",
+ tflite::GetString_ETHOSU());
+ } else {
+ printf_err("Failed to add Arm NPU support to op resolver.");
+ return false;
+ }
+ return true;
+}
+
+bool arm::app::RNNoiseModel::RunInference()
+{
+ return Model::RunInference();
+}
+
+void arm::app::RNNoiseModel::ResetGruState()
+{
+ for (auto& stateMapping: this->m_gruStateMap) {
+ TfLiteTensor* inputGruStateTensor = this->GetInputTensor(stateMapping.second);
+ auto* inputGruState = tflite::GetTensorData<int8_t>(inputGruStateTensor);
+ /* Initial value of states is 0, but this is affected by quantization zero point. */
+ auto quantParams = arm::app::GetTensorQuantParams(inputGruStateTensor);
+ memset(inputGruState, quantParams.offset, inputGruStateTensor->bytes);
+ }
+}
+
+bool arm::app::RNNoiseModel::CopyGruStates()
+{
+ std::vector<std::pair<size_t, std::vector<int8_t>>> tempOutGruStates;
+ /* Saving output states before copying them to input states to avoid output states modification in the tensor.
+ * tflu shares input and output tensors memory, thus writing to input tensor can change output tensor values. */
+ for (auto& stateMapping: this->m_gruStateMap) {
+ TfLiteTensor* outputGruStateTensor = this->GetOutputTensor(stateMapping.first);
+ std::vector<int8_t> tempOutGruState(outputGruStateTensor->bytes);
+ auto* outGruState = tflite::GetTensorData<int8_t>(outputGruStateTensor);
+ memcpy(tempOutGruState.data(), outGruState, outputGruStateTensor->bytes);
+ /* Index of the input tensor and the data to copy. */
+ tempOutGruStates.emplace_back(stateMapping.second, std::move(tempOutGruState));
+ }
+ /* Updating input GRU states with saved GRU output states. */
+ for (auto& stateMapping: tempOutGruStates) {
+ auto outputGruStateTensorData = stateMapping.second;
+ TfLiteTensor* inputGruStateTensor = this->GetInputTensor(stateMapping.first);
+ if (outputGruStateTensorData.size() != inputGruStateTensor->bytes) {
+ printf_err("Unexpected number of bytes for GRU state mapping. Input = %zuz, output = %zuz.\n",
+ inputGruStateTensor->bytes,
+ outputGruStateTensorData.size());
+ return false;
+ }
+ auto* inputGruState = tflite::GetTensorData<int8_t>(inputGruStateTensor);
+ auto* outGruState = outputGruStateTensorData.data();
+ memcpy(inputGruState, outGruState, inputGruStateTensor->bytes);
+ }
+ return true;
+}
diff --git a/source/application/api/use_case/noise_reduction/src/RNNoiseProcessing.cc b/source/application/api/use_case/noise_reduction/src/RNNoiseProcessing.cc
new file mode 100644
index 0000000..f6a3ec4
--- /dev/null
+++ b/source/application/api/use_case/noise_reduction/src/RNNoiseProcessing.cc
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "RNNoiseProcessing.hpp"
+#include "log_macros.h"
+
+namespace arm {
+namespace app {
+
+ RNNoisePreProcess::RNNoisePreProcess(TfLiteTensor* inputTensor,
+ std::shared_ptr<rnn::RNNoiseFeatureProcessor> featureProcessor, std::shared_ptr<rnn::FrameFeatures> frameFeatures)
+ : m_inputTensor{inputTensor},
+ m_featureProcessor{featureProcessor},
+ m_frameFeatures{frameFeatures}
+ {}
+
+ bool RNNoisePreProcess::DoPreProcess(const void* data, size_t inputSize)
+ {
+ if (data == nullptr) {
+ printf_err("Data pointer is null");
+ return false;
+ }
+
+ auto input = static_cast<const int16_t*>(data);
+ this->m_audioFrame = rnn::vec1D32F(input, input + inputSize);
+ m_featureProcessor->PreprocessFrame(this->m_audioFrame.data(), inputSize, *this->m_frameFeatures);
+
+ QuantizeAndPopulateInput(this->m_frameFeatures->m_featuresVec,
+ this->m_inputTensor->params.scale, this->m_inputTensor->params.zero_point,
+ this->m_inputTensor);
+
+ debug("Input tensor populated \n");
+
+ return true;
+ }
+
+ void RNNoisePreProcess::QuantizeAndPopulateInput(rnn::vec1D32F& inputFeatures,
+ const float quantScale, const int quantOffset,
+ TfLiteTensor* inputTensor)
+ {
+ const float minVal = std::numeric_limits<int8_t>::min();
+ const float maxVal = std::numeric_limits<int8_t>::max();
+
+ auto* inputTensorData = tflite::GetTensorData<int8_t>(inputTensor);
+
+ for (size_t i=0; i < inputFeatures.size(); ++i) {
+ float quantValue = ((inputFeatures[i] / quantScale) + quantOffset);
+ inputTensorData[i] = static_cast<int8_t>(std::min<float>(std::max<float>(quantValue, minVal), maxVal));
+ }
+ }
+
+ RNNoisePostProcess::RNNoisePostProcess(TfLiteTensor* outputTensor,
+ std::vector<int16_t>& denoisedAudioFrame,
+ std::shared_ptr<rnn::RNNoiseFeatureProcessor> featureProcessor,
+ std::shared_ptr<rnn::FrameFeatures> frameFeatures)
+ : m_outputTensor{outputTensor},
+ m_denoisedAudioFrame{denoisedAudioFrame},
+ m_featureProcessor{featureProcessor},
+ m_frameFeatures{frameFeatures}
+ {
+ this->m_denoisedAudioFrameFloat.reserve(denoisedAudioFrame.size());
+ this->m_modelOutputFloat.resize(outputTensor->bytes);
+ }
+
+ bool RNNoisePostProcess::DoPostProcess()
+ {
+ const auto* outputData = tflite::GetTensorData<int8_t>(this->m_outputTensor);
+ auto outputQuantParams = GetTensorQuantParams(this->m_outputTensor);
+
+ for (size_t i = 0; i < this->m_outputTensor->bytes; ++i) {
+ this->m_modelOutputFloat[i] = (static_cast<float>(outputData[i]) - outputQuantParams.offset)
+ * outputQuantParams.scale;
+ }
+
+ this->m_featureProcessor->PostProcessFrame(this->m_modelOutputFloat,
+ *this->m_frameFeatures, this->m_denoisedAudioFrameFloat);
+
+ for (size_t i = 0; i < this->m_denoisedAudioFrame.size(); ++i) {
+ this->m_denoisedAudioFrame[i] = static_cast<int16_t>(
+ std::roundf(this->m_denoisedAudioFrameFloat[i]));
+ }
+
+ return true;
+ }
+
+} /* namespace app */
+} /* namespace arm */ \ No newline at end of file
diff --git a/source/application/api/use_case/object_detection/CMakeLists.txt b/source/application/api/use_case/object_detection/CMakeLists.txt
new file mode 100644
index 0000000..797ff55
--- /dev/null
+++ b/source/application/api/use_case/object_detection/CMakeLists.txt
@@ -0,0 +1,40 @@
+#----------------------------------------------------------------------------
+# Copyright (c) 2022 Arm Limited. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#----------------------------------------------------------------------------
+#########################################################
+# OBJECT DETECTION API library #
+#########################################################
+cmake_minimum_required(VERSION 3.15.6)
+
+set(OBJECT_DETECTION_API_TARGET object_detection_api)
+project(${OBJECT_DETECTION_API_TARGET}
+ DESCRIPTION "Object detection use case API library"
+ LANGUAGES C CXX)
+
+# Create static library
+add_library(${OBJECT_DETECTION_API_TARGET} STATIC
+ src/DetectorPreProcessing.cc
+ src/DetectorPostProcessing.cc
+ src/YoloFastestModel.cc)
+
+target_include_directories(${OBJECT_DETECTION_API_TARGET} PUBLIC include)
+
+target_link_libraries(${OBJECT_DETECTION_API_TARGET} PUBLIC common_api)
+
+message(STATUS "*******************************************************")
+message(STATUS "Library : " ${OBJECT_DETECTION_API_TARGET})
+message(STATUS "CMAKE_SYSTEM_PROCESSOR : " ${CMAKE_SYSTEM_PROCESSOR})
+message(STATUS "*******************************************************")
diff --git a/source/application/api/use_case/object_detection/include/DetectionResult.hpp b/source/application/api/use_case/object_detection/include/DetectionResult.hpp
new file mode 100644
index 0000000..aa74d90
--- /dev/null
+++ b/source/application/api/use_case/object_detection/include/DetectionResult.hpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef DETECTION_RESULT_HPP
+#define DETECTION_RESULT_HPP
+
+
+namespace arm {
+namespace app {
+namespace object_detection {
+
+ /**
+ * @brief Class representing a single detection result.
+ */
+ class DetectionResult {
+ public:
+ /**
+ * @brief Constructor
+ * @param[in] normalisedVal Result normalized value
+ * @param[in] x0 Top corner x starting point
+ * @param[in] y0 Top corner y starting point
+ * @param[in] w Detection result width
+ * @param[in] h Detection result height
+ **/
+ DetectionResult(double normalisedVal,int x0,int y0, int w,int h) :
+ m_normalisedVal(normalisedVal),
+ m_x0(x0),
+ m_y0(y0),
+ m_w(w),
+ m_h(h)
+ {
+ }
+
+ DetectionResult() = default;
+ ~DetectionResult() = default;
+
+ double m_normalisedVal{0.0};
+ int m_x0{0};
+ int m_y0{0};
+ int m_w{0};
+ int m_h{0};
+ };
+
+} /* namespace object_detection */
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* DETECTION_RESULT_HPP */
diff --git a/source/application/api/use_case/object_detection/include/DetectorPostProcessing.hpp b/source/application/api/use_case/object_detection/include/DetectorPostProcessing.hpp
new file mode 100644
index 0000000..30bc123
--- /dev/null
+++ b/source/application/api/use_case/object_detection/include/DetectorPostProcessing.hpp
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef DETECTOR_POST_PROCESSING_HPP
+#define DETECTOR_POST_PROCESSING_HPP
+
+#include "ImageUtils.hpp"
+#include "DetectionResult.hpp"
+#include "YoloFastestModel.hpp"
+#include "BaseProcessing.hpp"
+
+#include <forward_list>
+
+namespace arm {
+namespace app {
+
+namespace object_detection {
+
+ struct Branch {
+ int resolution;
+ int numBox;
+ const float* anchor;
+ int8_t* modelOutput;
+ float scale;
+ int zeroPoint;
+ size_t size;
+ };
+
+ struct Network {
+ int inputWidth;
+ int inputHeight;
+ int numClasses;
+ std::vector<Branch> branches;
+ int topN;
+ };
+
+} /* namespace object_detection */
+
+ /**
+ * @brief Post-processing class for Object Detection use case.
+ * Implements methods declared by BasePostProcess and anything else needed
+ * to populate result vector.
+ */
+ class DetectorPostProcess : public BasePostProcess {
+ public:
+ /**
+ * @brief Constructor.
+ * @param[in] outputTensor0 Pointer to the TFLite Micro output Tensor at index 0.
+ * @param[in] outputTensor1 Pointer to the TFLite Micro output Tensor at index 1.
+ * @param[out] results Vector of detected results.
+ * @param[in] inputImgRows Number of rows in the input image.
+ * @param[in] inputImgCols Number of columns in the input image.
+ * @param[in] threshold Post-processing threshold.
+ * @param[in] nms Non-maximum Suppression threshold.
+ * @param[in] numClasses Number of classes.
+ * @param[in] topN Top N for each class.
+ **/
+ explicit DetectorPostProcess(TfLiteTensor* outputTensor0,
+ TfLiteTensor* outputTensor1,
+ std::vector<object_detection::DetectionResult>& results,
+ int inputImgRows,
+ int inputImgCols,
+ float threshold = 0.5f,
+ float nms = 0.45f,
+ int numClasses = 1,
+ int topN = 0);
+
+ /**
+ * @brief Should perform YOLO post-processing of the result of inference then
+ * populate Detection result data for any later use.
+ * @return true if successful, false otherwise.
+ **/
+ bool DoPostProcess() override;
+
+ private:
+ TfLiteTensor* m_outputTensor0; /* Output tensor index 0 */
+ TfLiteTensor* m_outputTensor1; /* Output tensor index 1 */
+ std::vector<object_detection::DetectionResult>& m_results; /* Single inference results. */
+ int m_inputImgRows; /* Number of rows for model input. */
+ int m_inputImgCols; /* Number of cols for model input. */
+ float m_threshold; /* Post-processing threshold. */
+ float m_nms; /* NMS threshold. */
+ int m_numClasses; /* Number of classes. */
+ int m_topN; /* TopN. */
+ object_detection::Network m_net; /* YOLO network object. */
+
+ /**
+ * @brief Insert the given Detection in the list.
+ * @param[in] detections List of detections.
+ * @param[in] det Detection to be inserted.
+ **/
+ void InsertTopNDetections(std::forward_list<image::Detection>& detections, image::Detection& det);
+
+ /**
+ * @brief Given a Network calculate the detection boxes.
+ * @param[in] net Network.
+ * @param[in] imageWidth Original image width.
+ * @param[in] imageHeight Original image height.
+ * @param[in] threshold Detections threshold.
+ * @param[out] detections Detection boxes.
+ **/
+ void GetNetworkBoxes(object_detection::Network& net,
+ int imageWidth,
+ int imageHeight,
+ float threshold,
+ std::forward_list<image::Detection>& detections);
+ };
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* DETECTOR_POST_PROCESSING_HPP */
diff --git a/source/application/api/use_case/object_detection/include/DetectorPreProcessing.hpp b/source/application/api/use_case/object_detection/include/DetectorPreProcessing.hpp
new file mode 100644
index 0000000..4936048
--- /dev/null
+++ b/source/application/api/use_case/object_detection/include/DetectorPreProcessing.hpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef DETECTOR_PRE_PROCESSING_HPP
+#define DETECTOR_PRE_PROCESSING_HPP
+
+#include "BaseProcessing.hpp"
+#include "Classifier.hpp"
+
+namespace arm {
+namespace app {
+
+ /**
+ * @brief Pre-processing class for Object detection use case.
+ * Implements methods declared by BasePreProcess and anything else needed
+ * to populate input tensors ready for inference.
+ */
+ class DetectorPreProcess : public BasePreProcess {
+
+ public:
+ /**
+ * @brief Constructor
+ * @param[in] inputTensor Pointer to the TFLite Micro input Tensor.
+ * @param[in] rgb2Gray Convert image from 3 channel RGB to 1 channel grayscale.
+ * @param[in] convertToInt8 Convert the image from uint8 to int8 range.
+ **/
+ explicit DetectorPreProcess(TfLiteTensor* inputTensor, bool rgb2Gray, bool convertToInt8);
+
+ /**
+ * @brief Should perform pre-processing of 'raw' input image data and load it into
+ * TFLite Micro input tensor ready for inference
+ * @param[in] input Pointer to the data that pre-processing will work on.
+ * @param[in] inputSize Size of the input data.
+ * @return true if successful, false otherwise.
+ **/
+ bool DoPreProcess(const void* input, size_t inputSize) override;
+
+ private:
+ TfLiteTensor* m_inputTensor;
+ bool m_rgb2Gray;
+ bool m_convertToInt8;
+ };
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* DETECTOR_PRE_PROCESSING_HPP */ \ No newline at end of file
diff --git a/source/application/api/use_case/object_detection/include/YoloFastestModel.hpp b/source/application/api/use_case/object_detection/include/YoloFastestModel.hpp
new file mode 100644
index 0000000..4c64433
--- /dev/null
+++ b/source/application/api/use_case/object_detection/include/YoloFastestModel.hpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef YOLO_FASTEST_MODEL_HPP
+#define YOLO_FASTEST_MODEL_HPP
+
+#include "Model.hpp"
+
+extern const int originalImageSize;
+extern const int channelsImageDisplayed;
+extern const float anchor1[];
+extern const float anchor2[];
+
+namespace arm {
+namespace app {
+
+ class YoloFastestModel : public Model {
+
+ public:
+ /* Indices for the expected model - based on input tensor shape */
+ static constexpr uint32_t ms_inputRowsIdx = 1;
+ static constexpr uint32_t ms_inputColsIdx = 2;
+ static constexpr uint32_t ms_inputChannelsIdx = 3;
+
+ protected:
+ /** @brief Gets the reference to op resolver interface class. */
+ const tflite::MicroOpResolver& GetOpResolver() override;
+
+ /** @brief Adds operations to the op resolver instance. */
+ bool EnlistOperations() override;
+
+ private:
+ /* Maximum number of individual operations that can be enlisted. */
+ static constexpr int ms_maxOpCnt = 8;
+
+ /* A mutable op resolver instance. */
+ tflite::MicroMutableOpResolver<ms_maxOpCnt> m_opResolver;
+ };
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* YOLO_FASTEST_MODEL_HPP */
diff --git a/source/application/api/use_case/object_detection/src/DetectorPostProcessing.cc b/source/application/api/use_case/object_detection/src/DetectorPostProcessing.cc
new file mode 100644
index 0000000..fb1606a
--- /dev/null
+++ b/source/application/api/use_case/object_detection/src/DetectorPostProcessing.cc
@@ -0,0 +1,240 @@
+/*
+ * Copyright (c) 2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "DetectorPostProcessing.hpp"
+#include "PlatformMath.hpp"
+
+#include <cmath>
+
+namespace arm {
+namespace app {
+
+ DetectorPostProcess::DetectorPostProcess(
+ TfLiteTensor* modelOutput0,
+ TfLiteTensor* modelOutput1,
+ std::vector<object_detection::DetectionResult>& results,
+ int inputImgRows,
+ int inputImgCols,
+ const float threshold,
+ const float nms,
+ int numClasses,
+ int topN)
+ : m_outputTensor0{modelOutput0},
+ m_outputTensor1{modelOutput1},
+ m_results{results},
+ m_inputImgRows{inputImgRows},
+ m_inputImgCols{inputImgCols},
+ m_threshold(threshold),
+ m_nms(nms),
+ m_numClasses(numClasses),
+ m_topN(topN)
+{
+ /* Init PostProcessing */
+ this->m_net =
+ object_detection::Network {
+ .inputWidth = inputImgCols,
+ .inputHeight = inputImgRows,
+ .numClasses = numClasses,
+ .branches = {
+ object_detection::Branch {
+ .resolution = inputImgCols/32,
+ .numBox = 3,
+ .anchor = anchor1,
+ .modelOutput = this->m_outputTensor0->data.int8,
+ .scale = (static_cast<TfLiteAffineQuantization*>(
+ this->m_outputTensor0->quantization.params))->scale->data[0],
+ .zeroPoint = (static_cast<TfLiteAffineQuantization*>(
+ this->m_outputTensor0->quantization.params))->zero_point->data[0],
+ .size = this->m_outputTensor0->bytes
+ },
+ object_detection::Branch {
+ .resolution = inputImgCols/16,
+ .numBox = 3,
+ .anchor = anchor2,
+ .modelOutput = this->m_outputTensor1->data.int8,
+ .scale = (static_cast<TfLiteAffineQuantization*>(
+ this->m_outputTensor1->quantization.params))->scale->data[0],
+ .zeroPoint = (static_cast<TfLiteAffineQuantization*>(
+ this->m_outputTensor1->quantization.params))->zero_point->data[0],
+ .size = this->m_outputTensor1->bytes
+ }
+ },
+ .topN = m_topN
+ };
+ /* End init */
+}
+
+bool DetectorPostProcess::DoPostProcess()
+{
+ /* Start postprocessing */
+ int originalImageWidth = originalImageSize;
+ int originalImageHeight = originalImageSize;
+
+ std::forward_list<image::Detection> detections;
+ GetNetworkBoxes(this->m_net, originalImageWidth, originalImageHeight, m_threshold, detections);
+
+ /* Do nms */
+ CalculateNMS(detections, this->m_net.numClasses, m_nms);
+
+ for (auto& it: detections) {
+ float xMin = it.bbox.x - it.bbox.w / 2.0f;
+ float xMax = it.bbox.x + it.bbox.w / 2.0f;
+ float yMin = it.bbox.y - it.bbox.h / 2.0f;
+ float yMax = it.bbox.y + it.bbox.h / 2.0f;
+
+ if (xMin < 0) {
+ xMin = 0;
+ }
+ if (yMin < 0) {
+ yMin = 0;
+ }
+ if (xMax > originalImageWidth) {
+ xMax = originalImageWidth;
+ }
+ if (yMax > originalImageHeight) {
+ yMax = originalImageHeight;
+ }
+
+ float boxX = xMin;
+ float boxY = yMin;
+ float boxWidth = xMax - xMin;
+ float boxHeight = yMax - yMin;
+
+ for (int j = 0; j < this->m_net.numClasses; ++j) {
+ if (it.prob[j] > 0) {
+
+ object_detection::DetectionResult tmpResult = {};
+ tmpResult.m_normalisedVal = it.prob[j];
+ tmpResult.m_x0 = boxX;
+ tmpResult.m_y0 = boxY;
+ tmpResult.m_w = boxWidth;
+ tmpResult.m_h = boxHeight;
+
+ this->m_results.push_back(tmpResult);
+ }
+ }
+ }
+ return true;
+}
+
+void DetectorPostProcess::InsertTopNDetections(std::forward_list<image::Detection>& detections, image::Detection& det)
+{
+ std::forward_list<image::Detection>::iterator it;
+ std::forward_list<image::Detection>::iterator last_it;
+ for ( it = detections.begin(); it != detections.end(); ++it ) {
+ if(it->objectness > det.objectness)
+ break;
+ last_it = it;
+ }
+ if(it != detections.begin()) {
+ detections.emplace_after(last_it, det);
+ detections.pop_front();
+ }
+}
+
+void DetectorPostProcess::GetNetworkBoxes(
+ object_detection::Network& net,
+ int imageWidth,
+ int imageHeight,
+ float threshold,
+ std::forward_list<image::Detection>& detections)
+{
+ int numClasses = net.numClasses;
+ int num = 0;
+ auto det_objectness_comparator = [](image::Detection& pa, image::Detection& pb) {
+ return pa.objectness < pb.objectness;
+ };
+ for (size_t i = 0; i < net.branches.size(); ++i) {
+ int height = net.branches[i].resolution;
+ int width = net.branches[i].resolution;
+ int channel = net.branches[i].numBox*(5+numClasses);
+
+ for (int h = 0; h < net.branches[i].resolution; h++) {
+ for (int w = 0; w < net.branches[i].resolution; w++) {
+ for (int anc = 0; anc < net.branches[i].numBox; anc++) {
+
+ /* Objectness score */
+ int bbox_obj_offset = h * width * channel + w * channel + anc * (numClasses + 5) + 4;
+ float objectness = math::MathUtils::SigmoidF32(
+ (static_cast<float>(net.branches[i].modelOutput[bbox_obj_offset])
+ - net.branches[i].zeroPoint
+ ) * net.branches[i].scale);
+
+ if(objectness > threshold) {
+ image::Detection det;
+ det.objectness = objectness;
+ /* Get bbox prediction data for each anchor, each feature point */
+ int bbox_x_offset = bbox_obj_offset -4;
+ int bbox_y_offset = bbox_x_offset + 1;
+ int bbox_w_offset = bbox_x_offset + 2;
+ int bbox_h_offset = bbox_x_offset + 3;
+ int bbox_scores_offset = bbox_x_offset + 5;
+
+ det.bbox.x = (static_cast<float>(net.branches[i].modelOutput[bbox_x_offset])
+ - net.branches[i].zeroPoint) * net.branches[i].scale;
+ det.bbox.y = (static_cast<float>(net.branches[i].modelOutput[bbox_y_offset])
+ - net.branches[i].zeroPoint) * net.branches[i].scale;
+ det.bbox.w = (static_cast<float>(net.branches[i].modelOutput[bbox_w_offset])
+ - net.branches[i].zeroPoint) * net.branches[i].scale;
+ det.bbox.h = (static_cast<float>(net.branches[i].modelOutput[bbox_h_offset])
+ - net.branches[i].zeroPoint) * net.branches[i].scale;
+
+ float bbox_x, bbox_y;
+
+ /* Eliminate grid sensitivity trick involved in YOLOv4 */
+ bbox_x = math::MathUtils::SigmoidF32(det.bbox.x);
+ bbox_y = math::MathUtils::SigmoidF32(det.bbox.y);
+ det.bbox.x = (bbox_x + w) / width;
+ det.bbox.y = (bbox_y + h) / height;
+
+ det.bbox.w = std::exp(det.bbox.w) * net.branches[i].anchor[anc*2] / net.inputWidth;
+ det.bbox.h = std::exp(det.bbox.h) * net.branches[i].anchor[anc*2+1] / net.inputHeight;
+
+ for (int s = 0; s < numClasses; s++) {
+ float sig = math::MathUtils::SigmoidF32(
+ (static_cast<float>(net.branches[i].modelOutput[bbox_scores_offset + s]) -
+ net.branches[i].zeroPoint) * net.branches[i].scale
+ ) * objectness;
+ det.prob.emplace_back((sig > threshold) ? sig : 0);
+ }
+
+ /* Correct_YOLO_boxes */
+ det.bbox.x *= imageWidth;
+ det.bbox.w *= imageWidth;
+ det.bbox.y *= imageHeight;
+ det.bbox.h *= imageHeight;
+
+ if (num < net.topN || net.topN <=0) {
+ detections.emplace_front(det);
+ num += 1;
+ } else if (num == net.topN) {
+ detections.sort(det_objectness_comparator);
+ InsertTopNDetections(detections,det);
+ num += 1;
+ } else {
+ InsertTopNDetections(detections,det);
+ }
+ }
+ }
+ }
+ }
+ }
+ if(num > net.topN)
+ num -=1;
+}
+
+} /* namespace app */
+} /* namespace arm */
diff --git a/source/application/api/use_case/object_detection/src/DetectorPreProcessing.cc b/source/application/api/use_case/object_detection/src/DetectorPreProcessing.cc
new file mode 100644
index 0000000..7212046
--- /dev/null
+++ b/source/application/api/use_case/object_detection/src/DetectorPreProcessing.cc
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "DetectorPreProcessing.hpp"
+#include "ImageUtils.hpp"
+#include "log_macros.h"
+
+namespace arm {
+namespace app {
+
+ DetectorPreProcess::DetectorPreProcess(TfLiteTensor* inputTensor, bool rgb2Gray, bool convertToInt8)
+ : m_inputTensor{inputTensor},
+ m_rgb2Gray{rgb2Gray},
+ m_convertToInt8{convertToInt8}
+ {}
+
+ bool DetectorPreProcess::DoPreProcess(const void* data, size_t inputSize) {
+ if (data == nullptr) {
+ printf_err("Data pointer is null");
+ }
+
+ auto input = static_cast<const uint8_t*>(data);
+
+ if (this->m_rgb2Gray) {
+ image::RgbToGrayscale(input, this->m_inputTensor->data.uint8, this->m_inputTensor->bytes);
+ } else {
+ std::memcpy(this->m_inputTensor->data.data, input, inputSize);
+ }
+ debug("Input tensor populated \n");
+
+ if (this->m_convertToInt8) {
+ image::ConvertImgToInt8(this->m_inputTensor->data.data, this->m_inputTensor->bytes);
+ }
+
+ return true;
+ }
+
+} /* namespace app */
+} /* namespace arm */ \ No newline at end of file
diff --git a/source/application/api/use_case/object_detection/src/YoloFastestModel.cc b/source/application/api/use_case/object_detection/src/YoloFastestModel.cc
new file mode 100644
index 0000000..e293181
--- /dev/null
+++ b/source/application/api/use_case/object_detection/src/YoloFastestModel.cc
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "YoloFastestModel.hpp"
+
+#include "log_macros.h"
+
+const tflite::MicroOpResolver& arm::app::YoloFastestModel::GetOpResolver()
+{
+ return this->m_opResolver;
+}
+
+bool arm::app::YoloFastestModel::EnlistOperations()
+{
+ this->m_opResolver.AddDepthwiseConv2D();
+ this->m_opResolver.AddConv2D();
+ this->m_opResolver.AddAdd();
+ this->m_opResolver.AddResizeNearestNeighbor();
+ /*These are needed for UT to work, not needed on FVP */
+ this->m_opResolver.AddPad();
+ this->m_opResolver.AddMaxPool2D();
+ this->m_opResolver.AddConcatenation();
+
+ if (kTfLiteOk == this->m_opResolver.AddEthosU()) {
+ info("Added %s support to op resolver\n",
+ tflite::GetString_ETHOSU());
+ } else {
+ printf_err("Failed to add Arm NPU support to op resolver.");
+ return false;
+ }
+ return true;
+}
diff --git a/source/application/api/use_case/vww/CMakeLists.txt b/source/application/api/use_case/vww/CMakeLists.txt
new file mode 100644
index 0000000..b933d32
--- /dev/null
+++ b/source/application/api/use_case/vww/CMakeLists.txt
@@ -0,0 +1,39 @@
+#----------------------------------------------------------------------------
+# Copyright (c) 2022 Arm Limited. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#----------------------------------------------------------------------------
+#########################################################
+# VISUAL WAKE WORD API library #
+#########################################################
+cmake_minimum_required(VERSION 3.15.6)
+
+set(VWW_API_TARGET vww_api)
+project(${VWW_API_TARGET}
+ DESCRIPTION "Visual wake word use case API library"
+ LANGUAGES C CXX)
+
+# Create static library
+add_library(${VWW_API_TARGET} STATIC
+ src/VisualWakeWordProcessing.cc
+ src/VisualWakeWordModel.cc)
+
+target_include_directories(${VWW_API_TARGET} PUBLIC include)
+
+target_link_libraries(${VWW_API_TARGET} PUBLIC common_api)
+
+message(STATUS "*******************************************************")
+message(STATUS "Library : " ${VWW_API_TARGET})
+message(STATUS "CMAKE_SYSTEM_PROCESSOR : " ${CMAKE_SYSTEM_PROCESSOR})
+message(STATUS "*******************************************************")
diff --git a/source/application/api/use_case/vww/include/VisualWakeWordModel.hpp b/source/application/api/use_case/vww/include/VisualWakeWordModel.hpp
new file mode 100644
index 0000000..a34b904
--- /dev/null
+++ b/source/application/api/use_case/vww/include/VisualWakeWordModel.hpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2021 - 2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef VISUAL_WAKE_WORD_MODEL_HPP
+#define VISUAL_WAKE_WORD_MODEL_HPP
+
+#include "Model.hpp"
+
+namespace arm {
+namespace app {
+
+ class VisualWakeWordModel : public Model {
+
+ public:
+ /* Indices for the expected model - based on input tensor shape */
+ static constexpr uint32_t ms_inputRowsIdx = 1;
+ static constexpr uint32_t ms_inputColsIdx = 2;
+ static constexpr uint32_t ms_inputChannelsIdx = 3;
+
+ protected:
+ /** @brief Gets the reference to op resolver interface class. */
+ const tflite::MicroOpResolver& GetOpResolver() override;
+
+ /** @brief Adds operations to the op resolver instance. */
+ bool EnlistOperations() override;
+ private:
+ /* Maximum number of individual operations that can be enlisted. */
+ static constexpr int ms_maxOpCnt = 7;
+
+ /* A mutable op resolver instance. */
+ tflite::MicroMutableOpResolver<ms_maxOpCnt> m_opResolver;
+ };
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* VISUAL_WAKE_WORD_MODEL_HPP */
diff --git a/source/application/api/use_case/vww/include/VisualWakeWordProcessing.hpp b/source/application/api/use_case/vww/include/VisualWakeWordProcessing.hpp
new file mode 100644
index 0000000..f9f9d72
--- /dev/null
+++ b/source/application/api/use_case/vww/include/VisualWakeWordProcessing.hpp
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef VWW_PROCESSING_HPP
+#define VWW_PROCESSING_HPP
+
+#include "BaseProcessing.hpp"
+#include "Model.hpp"
+#include "Classifier.hpp"
+
+namespace arm {
+namespace app {
+
+ /**
+ * @brief Pre-processing class for Visual Wake Word use case.
+ * Implements methods declared by BasePreProcess and anything else needed
+ * to populate input tensors ready for inference.
+ */
+ class VisualWakeWordPreProcess : public BasePreProcess {
+
+ public:
+ /**
+ * @brief Constructor
+ * @param[in] inputTensor Pointer to the TFLite Micro input Tensor.
+ * @param[in] rgb2Gray Convert image from 3 channel RGB to 1 channel grayscale.
+ **/
+ explicit VisualWakeWordPreProcess(TfLiteTensor* inputTensor, bool rgb2Gray=true);
+
+ /**
+ * @brief Should perform pre-processing of 'raw' input image data and load it into
+ * TFLite Micro input tensors ready for inference
+ * @param[in] input Pointer to the data that pre-processing will work on.
+ * @param[in] inputSize Size of the input data.
+ * @return true if successful, false otherwise.
+ **/
+ bool DoPreProcess(const void* input, size_t inputSize) override;
+
+ private:
+ TfLiteTensor* m_inputTensor;
+ bool m_rgb2Gray;
+ };
+
+ /**
+ * @brief Post-processing class for Visual Wake Word use case.
+ * Implements methods declared by BasePostProcess and anything else needed
+ * to populate result vector.
+ */
+ class VisualWakeWordPostProcess : public BasePostProcess {
+
+ private:
+ TfLiteTensor* m_outputTensor;
+ Classifier& m_vwwClassifier;
+ const std::vector<std::string>& m_labels;
+ std::vector<ClassificationResult>& m_results;
+
+ public:
+ /**
+ * @brief Constructor
+ * @param[in] outputTensor Pointer to the TFLite Micro output Tensor.
+ * @param[in] classifier Classifier object used to get top N results from classification.
+ * @param[in] model Pointer to the VWW classification Model object.
+ * @param[in] labels Vector of string labels to identify each output of the model.
+ * @param[out] results Vector of classification results to store decoded outputs.
+ **/
+ VisualWakeWordPostProcess(TfLiteTensor* outputTensor, Classifier& classifier,
+ const std::vector<std::string>& labels,
+ std::vector<ClassificationResult>& results);
+
+ /**
+ * @brief Should perform post-processing of the result of inference then
+ * populate classification result data for any later use.
+ * @return true if successful, false otherwise.
+ **/
+ bool DoPostProcess() override;
+ };
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* VWW_PROCESSING_HPP */ \ No newline at end of file
diff --git a/source/application/api/use_case/vww/src/VisualWakeWordModel.cc b/source/application/api/use_case/vww/src/VisualWakeWordModel.cc
new file mode 100644
index 0000000..2d8a125
--- /dev/null
+++ b/source/application/api/use_case/vww/src/VisualWakeWordModel.cc
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "VisualWakeWordModel.hpp"
+#include "log_macros.h"
+
+const tflite::MicroOpResolver& arm::app::VisualWakeWordModel::GetOpResolver()
+{
+ return this->m_opResolver;
+}
+
+bool arm::app::VisualWakeWordModel::EnlistOperations()
+{
+ this->m_opResolver.AddDepthwiseConv2D();
+ this->m_opResolver.AddConv2D();
+ this->m_opResolver.AddAveragePool2D();
+ this->m_opResolver.AddReshape();
+ this->m_opResolver.AddPad();
+ this->m_opResolver.AddAdd();
+
+ if (kTfLiteOk == this->m_opResolver.AddEthosU()) {
+ info("Added %s support to op resolver\n",
+ tflite::GetString_ETHOSU());
+ } else {
+ printf_err("Failed to add Arm NPU support to op resolver.");
+ return false;
+ }
+ return true;
+}
diff --git a/source/application/api/use_case/vww/src/VisualWakeWordProcessing.cc b/source/application/api/use_case/vww/src/VisualWakeWordProcessing.cc
new file mode 100644
index 0000000..4ae8a54
--- /dev/null
+++ b/source/application/api/use_case/vww/src/VisualWakeWordProcessing.cc
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "VisualWakeWordProcessing.hpp"
+
+#include "ImageUtils.hpp"
+#include "VisualWakeWordModel.hpp"
+#include "log_macros.h"
+
+namespace arm {
+namespace app {
+
+ VisualWakeWordPreProcess::VisualWakeWordPreProcess(TfLiteTensor* inputTensor, bool rgb2Gray)
+ :m_inputTensor{inputTensor},
+ m_rgb2Gray{rgb2Gray}
+ {}
+
+ bool VisualWakeWordPreProcess::DoPreProcess(const void* data, size_t inputSize)
+ {
+ if (data == nullptr) {
+ printf_err("Data pointer is null");
+ }
+
+ auto input = static_cast<const uint8_t*>(data);
+
+ uint8_t* unsignedDstPtr = this->m_inputTensor->data.uint8;
+
+ if (this->m_rgb2Gray) {
+ image::RgbToGrayscale(input, unsignedDstPtr, inputSize);
+ } else {
+ std::memcpy(unsignedDstPtr, input, inputSize);
+ }
+
+ /* VWW model pre-processing is image conversion from uint8 to [0,1] float values,
+ * then quantize them with input quantization info. */
+ QuantParams inQuantParams = GetTensorQuantParams(this->m_inputTensor);
+
+ int8_t* signedDstPtr = this->m_inputTensor->data.int8;
+ for (size_t i = 0; i < this->m_inputTensor->bytes; i++) {
+ auto i_data_int8 = static_cast<int8_t>(
+ ((static_cast<float>(unsignedDstPtr[i]) / 255.0f) / inQuantParams.scale) + inQuantParams.offset
+ );
+ signedDstPtr[i] = std::min<int8_t>(INT8_MAX, std::max<int8_t>(i_data_int8, INT8_MIN));
+ }
+
+ debug("Input tensor populated \n");
+
+ return true;
+ }
+
+ VisualWakeWordPostProcess::VisualWakeWordPostProcess(TfLiteTensor* outputTensor, Classifier& classifier,
+ const std::vector<std::string>& labels, std::vector<ClassificationResult>& results)
+ :m_outputTensor{outputTensor},
+ m_vwwClassifier{classifier},
+ m_labels{labels},
+ m_results{results}
+ {}
+
+ bool VisualWakeWordPostProcess::DoPostProcess()
+ {
+ return this->m_vwwClassifier.GetClassificationResults(
+ this->m_outputTensor, this->m_results,
+ this->m_labels, 1, true);
+ }
+
+} /* namespace app */
+} /* namespace arm */ \ No newline at end of file