diff options
Diffstat (limited to 'source/application/main/include')
-rw-r--r-- | source/application/main/include/AudioUtils.hpp | 172 | ||||
-rw-r--r-- | source/application/main/include/BaseProcessing.hpp | 67 | ||||
-rw-r--r-- | source/application/main/include/BufAttributes.hpp | 72 | ||||
-rw-r--r-- | source/application/main/include/ClassificationResult.hpp | 41 | ||||
-rw-r--r-- | source/application/main/include/Classifier.hpp | 89 | ||||
-rw-r--r-- | source/application/main/include/DataStructures.hpp | 128 | ||||
-rw-r--r-- | source/application/main/include/ImageUtils.hpp | 116 | ||||
-rw-r--r-- | source/application/main/include/Mfcc.hpp | 255 | ||||
-rw-r--r-- | source/application/main/include/UseCaseCommonUtils.hpp | 1 |
9 files changed, 73 insertions, 868 deletions
diff --git a/source/application/main/include/AudioUtils.hpp b/source/application/main/include/AudioUtils.hpp deleted file mode 100644 index cbf7bb7..0000000 --- a/source/application/main/include/AudioUtils.hpp +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef AUDIO_UTILS_HPP -#define AUDIO_UTILS_HPP - -#include <cstddef> -#include <cstdint> - -namespace arm { -namespace app { -namespace audio { - - template<class T> - class SlidingWindow { - public: - - /** - * @brief Creates the window slider through the given data. - * - * @param[in] data Pointer to the data to slide through. - * @param[in] dataSize Size in T type elements wise. - * @param[in] windowSize Sliding window size in T type wise elements. - * @param[in] stride Stride size in T type wise elements. - */ - SlidingWindow(T *data, size_t dataSize, - size_t windowSize, size_t stride) { - m_start = data; - m_dataSize = dataSize; - m_size = windowSize; - m_stride = stride; - } - - SlidingWindow() = default; - - ~SlidingWindow() = default; - - /** - * @brief Get the next data window. - * @return Pointer to the next window, if next window is not available nullptr is returned. - */ - virtual T *Next() { - if (HasNext()) { - m_count++; - return m_start + Index() * m_stride; - } else { - return nullptr; - } - } - - /** - * @brief Checks if the next data portion is available. - * @return true if next data portion is available. - */ - virtual bool HasNext() { - return m_size + m_count * m_stride <= m_dataSize; - } - - /** - * @brief Reset the slider to the initial position. - */ - virtual void Reset() { - m_count = 0; - } - - /** - * @brief Resets the slider to the start of the new data. - * New data size MUST be the same as the old one. - * @param[in] newStart Pointer to the new data to slide through. - */ - virtual void Reset(T *newStart) { - m_start = newStart; - Reset(); - } - - /** - * @brief Gets current index of the sliding window. - * @return Current position of the sliding window in number of strides. - */ - size_t Index() { - return m_count == 0? 0: m_count - 1; - } - - /** - * @brief Gets the index from the start of the data where the next window will begin. - * While Index() returns the index of sliding window itself this function - * returns the index of the data element itself. - * @return Index from the start of the data where the next sliding window will begin. - */ - virtual uint32_t NextWindowStartIndex() { - return m_count == 0? 0: ((m_count) * m_stride); - } - - /** - * @brief Go to given sliding window index. - * @param[in] index New position of the sliding window. If index is invalid - * (greater than possible range of strides) then next call to Next() will return nullptr. - */ - void FastForward(size_t index) { - m_count = index; - } - - /** - * @brief Calculates whole number of times the window can stride through the given data. - * @return Maximum number of whole strides. - */ - size_t TotalStrides() { - if (m_size > m_dataSize) { - return 0; - } - return ((m_dataSize - m_size)/m_stride); - } - - - protected: - T *m_start = nullptr; - size_t m_dataSize = 0; - size_t m_size = 0; - size_t m_stride = 0; - size_t m_count = 0; - }; - - /* - * Sliding window that will cover the whole length of the input, even if - * this means the last window is not a full window length. - */ - template<class T> - class FractionalSlidingWindow : public SlidingWindow<T> { - public: - using SlidingWindow<T>::SlidingWindow; - - /** - * @brief Checks if the next data portion is available. - * @return true if next data portion is available. - */ - bool HasNext() { - return this->m_count < 1 + this->FractionalTotalStrides() && (this->NextWindowStartIndex() < this->m_dataSize); - } - - /** - * @brief Calculates number of times the window can stride through the given data. - * May not be a whole number. - * @return Number of strides to cover all data. - */ - float FractionalTotalStrides() { - if (this->m_dataSize < this->m_size) { - return 0; - } else { - return ((this->m_dataSize - this->m_size) / static_cast<float>(this->m_stride)); - } - } - }; - - -} /* namespace audio */ -} /* namespace app */ -} /* namespace arm */ - -#endif /* AUDIO_UTILS_HPP */
\ No newline at end of file diff --git a/source/application/main/include/BaseProcessing.hpp b/source/application/main/include/BaseProcessing.hpp deleted file mode 100644 index c099db2..0000000 --- a/source/application/main/include/BaseProcessing.hpp +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef BASE_PROCESSING_HPP -#define BASE_PROCESSING_HPP - -#include "Model.hpp" - -namespace arm { -namespace app { - - /** - * @brief Base class exposing pre-processing API. - * Use cases should provide their own PreProcessing class that inherits from this one. - * All steps required to take raw input data and populate tensors ready for inference - * should be handled. - */ - class BasePreProcess { - - public: - virtual ~BasePreProcess() = default; - - /** - * @brief Should perform pre-processing of 'raw' input data and load it into - * TFLite Micro input tensors ready for inference - * @param[in] input Pointer to the data that pre-processing will work on. - * @param[in] inputSize Size of the input data. - * @return true if successful, false otherwise. - **/ - virtual bool DoPreProcess(const void* input, size_t inputSize) = 0; - }; - - /** - * @brief Base class exposing post-processing API. - * Use cases should provide their own PostProcessing class that inherits from this one. - * All steps required to take inference output and populate results vectors should be handled. - */ - class BasePostProcess { - - public: - virtual ~BasePostProcess() = default; - - /** - * @brief Should perform post-processing of the result of inference then populate - * populate result data for any later use. - * @return true if successful, false otherwise. - **/ - virtual bool DoPostProcess() = 0; - }; - -} /* namespace app */ -} /* namespace arm */ - -#endif /* BASE_PROCESSING_HPP */
\ No newline at end of file diff --git a/source/application/main/include/BufAttributes.hpp b/source/application/main/include/BufAttributes.hpp new file mode 100644 index 0000000..0146443 --- /dev/null +++ b/source/application/main/include/BufAttributes.hpp @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2021 Arm Limited. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef BUF_ATTRIBUTES_HPP +#define BUF_ATTRIBUTES_HPP + +#if defined(ARM_NPU) + /* When Arm NPU is defined, we use the config set by NPU mem parameters */ + #include "ethosu_mem_config.h" + #define BYTE_ALIGNMENT ETHOS_U_MEM_BYTE_ALIGNMENT +#else /* defined(ARM_NPU) */ + /* otherwise, we use the default ones here. */ + #define ACTIVATION_BUF_SECTION section(".bss.NoInit.activation_buf_sram") + #define ACTIVATION_BUF_SECTION_NAME ("SRAM") + #define BYTE_ALIGNMENT 16 +#endif /* defined(ARM_NPU) */ + +#ifdef __has_attribute +#define HAVE_ATTRIBUTE(x) __has_attribute(x) +#else /* __has_attribute */ +#define HAVE_ATTRIBUTE(x) 0 +#endif /* __has_attribute */ + +#if HAVE_ATTRIBUTE(aligned) || (defined(__GNUC__) && !defined(__clang__)) + +/* We want all buffers/sections to be aligned to 16 byte. */ +#define ALIGNMENT_REQ aligned(BYTE_ALIGNMENT) + +#define MODEL_SECTION section("nn_model") + +/* Label section name */ +#define LABEL_SECTION section("labels") + +#ifndef ACTIVATION_BUF_SZ + #warning "ACTIVATION_BUF_SZ needs to be defined. Using default value" + #define ACTIVATION_BUF_SZ 0x00200000 +#endif /* ACTIVATION_BUF_SZ */ + +/* IFM section name. */ +#define IFM_BUF_SECTION section("ifm") + +/* Form the attributes, alignment is mandatory. */ +#define MAKE_ATTRIBUTE(x) __attribute__((ALIGNMENT_REQ, x)) +#define MODEL_TFLITE_ATTRIBUTE MAKE_ATTRIBUTE(MODEL_SECTION) +#define ACTIVATION_BUF_ATTRIBUTE MAKE_ATTRIBUTE(ACTIVATION_BUF_SECTION) +#define IFM_BUF_ATTRIBUTE MAKE_ATTRIBUTE(IFM_BUF_SECTION) +#define LABELS_ATTRIBUTE MAKE_ATTRIBUTE(LABEL_SECTION) + +#else /* HAVE_ATTRIBUTE(aligned) || (defined(__GNUC__) && !defined(__clang__)) */ + +#define MODEL_TFLITE_ATTRIBUTE +#define ACTIVATION_BUF_ATTRIBUTE +#define IFM_BUF_ATTRIBUTE +#define LABELS_ATTRIBUTE + +#endif /* HAVE_ATTRIBUTE(aligned) || (defined(__GNUC__) && !defined(__clang__)) */ + +#endif /* BUF_ATTRIBUTES_HPP */ diff --git a/source/application/main/include/ClassificationResult.hpp b/source/application/main/include/ClassificationResult.hpp deleted file mode 100644 index eae28e4..0000000 --- a/source/application/main/include/ClassificationResult.hpp +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef CLASSIFICATION_RESULT_HPP -#define CLASSIFICATION_RESULT_HPP - -#include <string> - -namespace arm { -namespace app { - - /** - * @brief Class representing a single classification result. - */ - class ClassificationResult { - public: - double m_normalisedVal = 0.0; - std::string m_label; - uint32_t m_labelIdx = 0; - - ClassificationResult() = default; - ~ClassificationResult() = default; - }; - -} /* namespace app */ -} /* namespace arm */ - -#endif /* CLASSIFICATION_RESULT_HPP */
\ No newline at end of file diff --git a/source/application/main/include/Classifier.hpp b/source/application/main/include/Classifier.hpp deleted file mode 100644 index d641c22..0000000 --- a/source/application/main/include/Classifier.hpp +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef CLASSIFIER_HPP -#define CLASSIFIER_HPP - -#include "ClassificationResult.hpp" -#include "TensorFlowLiteMicro.hpp" - -#include <vector> - -namespace arm { -namespace app { - - /** - * @brief Classifier - a helper class to get certain number of top - * results from the output vector from a classification NN. - **/ - class Classifier{ - public: - /** @brief Constructor. */ - Classifier() = default; - - /** - * @brief Gets the top N classification results from the - * output vector. - * @param[in] outputTensor Inference output tensor from an NN model. - * @param[out] vecResults A vector of classification results. - * populated by this function. - * @param[in] labels Labels vector to match classified classes. - * @param[in] topNCount Number of top classifications to pick. Default is 1. - * @param[in] useSoftmax Whether Softmax normalisation should be applied to output. Default is false. - * @return true if successful, false otherwise. - **/ - - virtual bool GetClassificationResults( - TfLiteTensor* outputTensor, - std::vector<ClassificationResult>& vecResults, - const std::vector <std::string>& labels, uint32_t topNCount, - bool use_softmax); - - /** - * @brief Populate the elements of the Classification Result object. - * @param[in] topNSet Ordered set of top 5 output class scores and labels. - * @param[out] vecResults A vector of classification results. - * populated by this function. - * @param[in] labels Labels vector to match classified classes. - **/ - - void SetVectorResults( - std::set<std::pair<float, uint32_t>>& topNSet, - std::vector<ClassificationResult>& vecResults, - const std::vector <std::string>& labels); - - private: - /** - * @brief Utility function that gets the top N classification results from the - * output vector. - * @param[in] tensor Inference output tensor from an NN model. - * @param[out] vecResults A vector of classification results - * populated by this function. - * @param[in] topNCount Number of top classifications to pick. - * @param[in] labels Labels vector to match classified classes. - * @return true if successful, false otherwise. - **/ - - bool GetTopNResults(const std::vector<float>& tensor, - std::vector<ClassificationResult>& vecResults, - uint32_t topNCount, - const std::vector <std::string>& labels); - }; - -} /* namespace app */ -} /* namespace arm */ - -#endif /* CLASSIFIER_HPP */ diff --git a/source/application/main/include/DataStructures.hpp b/source/application/main/include/DataStructures.hpp deleted file mode 100644 index 0616839..0000000 --- a/source/application/main/include/DataStructures.hpp +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef DATA_STRUCTURES_HPP -#define DATA_STRUCTURES_HPP - -#include <iterator> - -namespace arm { -namespace app { - - /** - * Class Array2d is a data structure that represents a two dimensional array. - * The data is allocated in contiguous memory, arranged row-wise - * and individual elements can be accessed with the () operator. - * For example a two dimensional array D of size (M, N) can be accessed: - * - * _|<------------- col size = N -------->| - * | D(r=0, c=0) D(r=0, c=1)... D(r=0, c=N) - * | D(r=1, c=0) D(r=1, c=1)... D(r=1, c=N) - * | ... - * row size = M ... - * | ... - * _ D(r=M, c=0) D(r=M, c=1)... D(r=M, c=N) - * - */ - template<typename T> - class Array2d { - public: - /** - * @brief Creates the array2d with the given sizes. - * @param[in] rows Number of rows. - * @param[in] cols Number of columns. - */ - Array2d(unsigned rows, unsigned cols): m_rows(rows), m_cols(cols) - { - if (rows == 0 || cols == 0) { - printf("Array2d constructor has 0 size.\n"); - m_data = nullptr; - return; - } - m_data = new T[rows * cols]; - } - - ~Array2d() - { - delete[] m_data; - } - - T& operator() (unsigned int row, unsigned int col) - { -#if defined(DEBUG) - if (row >= m_rows || col >= m_cols || m_data == nullptr) { - printf_err("Array2d subscript out of bounds.\n"); - } -#endif /* defined(DEBUG) */ - return m_data[m_cols * row + col]; - } - - T operator() (unsigned int row, unsigned int col) const - { -#if defined(DEBUG) - if (row >= m_rows || col >= m_cols || m_data == nullptr) { - printf_err("const Array2d subscript out of bounds.\n"); - } -#endif /* defined(DEBUG) */ - return m_data[m_cols * row + col]; - } - - /** - * @brief Gets rows number of the current array2d. - * @return Number of rows. - */ - size_t size(size_t dim) - { - switch (dim) - { - case 0: - return m_rows; - case 1: - return m_cols; - default: - return 0; - } - } - - /** - * @brief Gets the array2d total size. - */ - size_t totalSize() - { - return m_rows * m_cols; - } - - /** - * array2d iterator. - */ - using iterator=T*; - using const_iterator=T const*; - - iterator begin() { return m_data; } - iterator end() { return m_data + totalSize(); } - const_iterator begin() const { return m_data; } - const_iterator end() const { return m_data + totalSize(); }; - - private: - size_t m_rows; - size_t m_cols; - T* m_data; - }; - -} /* namespace app */ -} /* namespace arm */ - -#endif /* DATA_STRUCTURES_HPP */
\ No newline at end of file diff --git a/source/application/main/include/ImageUtils.hpp b/source/application/main/include/ImageUtils.hpp deleted file mode 100644 index a8c7650..0000000 --- a/source/application/main/include/ImageUtils.hpp +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef IMAGE_UTILS_HPP -#define IMAGE_UTILS_HPP - -#include <cstddef> -#include <cstdint> -#include <forward_list> -#include <vector> - -/* Helper macro to convert RGB888 to RGB565 format. */ -#define RGB888_TO_RGB565(R8,G8,B8) ((((R8>>3) & 0x1F) << 11) | \ - (((G8>>2) & 0x3F) << 5) | \ - ((B8>>3) & 0x1F)) - -constexpr uint16_t COLOR_BLACK = 0; -constexpr uint16_t COLOR_GREEN = RGB888_TO_RGB565( 0, 255, 0); // 2016; -constexpr uint16_t COLOR_YELLOW = RGB888_TO_RGB565(255, 255, 0); // 65504; - - -namespace arm { -namespace app { -namespace image { - - /** - * Contains the x,y co-ordinates of a box centre along with the box width and height. - */ - struct Box { - float x; - float y; - float w; - float h; - }; - - struct Detection { - Box bbox; - std::vector<float> prob; - float objectness; - }; - - /** - * @brief Calculate the 1D overlap. - * @param[in] x1Center First center point. - * @param[in] width1 First width. - * @param[in] x2Center Second center point. - * @param[in] width2 Second width. - * @return The overlap between the two lines. - **/ - float Calculate1DOverlap(float x1Center, float width1, float x2Center, float width2); - - /** - * @brief Calculate the intersection between the two given boxes. - * @param[in] box1 First box. - * @param[in] box2 Second box. - * @return The intersection value. - **/ - float CalculateBoxIntersect(Box& box1, Box& box2); - - /** - * @brief Calculate the union between the two given boxes. - * @param[in] box1 First box. - * @param[in] box2 Second box. - * @return The two given boxes union value. - **/ - float CalculateBoxUnion(Box& box1, Box& box2); - - /** - * @brief Calculate the intersection over union between the two given boxes. - * @param[in] box1 First box. - * @param[in] box2 Second box. - * @return The intersection over union value. - **/ - float CalculateBoxIOU(Box& box1, Box& box2); - - /** - * @brief Calculate the Non-Maxima suppression on the given detection boxes. - * @param[in] detections List of Detection boxes. - * @param[in] classes Number of classes. - * @param[in] iouThreshold Intersection over union threshold. - **/ - void CalculateNMS(std::forward_list<Detection>& detections, int classes, float iouThreshold); - - /** - * @brief Helper function to convert a UINT8 image to INT8 format. - * @param[in,out] data Pointer to the data start. - * @param[in] kMaxImageSize Total number of pixels in the image. - **/ - void ConvertImgToInt8(void* data, size_t kMaxImageSize); - - /** - * @brief Converts RGB image to grayscale. - * @param[in] srcPtr Pointer to RGB source image. - * @param[out] dstPtr Pointer to grayscale destination image. - * @param[in] imgSz Destination image size. - **/ - void RgbToGrayscale(const uint8_t* srcPtr, uint8_t* dstPtr, size_t dstImgSz); - -} /* namespace image */ -} /* namespace app */ -} /* namespace arm */ - -#endif /* IMAGE_UTILS_HPP */
\ No newline at end of file diff --git a/source/application/main/include/Mfcc.hpp b/source/application/main/include/Mfcc.hpp deleted file mode 100644 index 86330ca..0000000 --- a/source/application/main/include/Mfcc.hpp +++ /dev/null @@ -1,255 +0,0 @@ -/* - * Copyright (c) 2021 Arm Limited. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef MFCC_HPP -#define MFCC_HPP - -#include "PlatformMath.hpp" - -#include <vector> -#include <cstdint> -#include <cmath> -#include <limits> -#include <string> - -namespace arm { -namespace app { -namespace audio { - - /* MFCC's consolidated parameters. */ - class MfccParams { - public: - float m_samplingFreq; - uint32_t m_numFbankBins; - float m_melLoFreq; - float m_melHiFreq; - uint32_t m_numMfccFeatures; - uint32_t m_frameLen; - uint32_t m_frameLenPadded; - bool m_useHtkMethod; - - /** @brief Constructor */ - MfccParams(float samplingFreq, uint32_t numFbankBins, - float melLoFreq, float melHiFreq, - uint32_t numMfccFeats, uint32_t frameLen, - bool useHtkMethod); - - MfccParams() = delete; - - ~MfccParams() = default; - - /** @brief Log parameters */ - void Log() const; - }; - - /** - * @brief Class for MFCC feature extraction. - * Based on https://github.com/ARM-software/ML-KWS-for-MCU/blob/master/Deployment/Source/MFCC/mfcc.cpp - * This class is designed to be generic and self-sufficient but - * certain calculation routines can be overridden to accommodate - * use-case specific requirements. - */ - class MFCC { - public: - /** - * @brief Constructor - * @param[in] params MFCC parameters - */ - explicit MFCC(const MfccParams& params); - - MFCC() = delete; - - ~MFCC() = default; - - /** - * @brief Extract MFCC features for one single small frame of - * audio data e.g. 640 samples. - * @param[in] audioData Vector of audio samples to calculate - * features for. - * @return Vector of extracted MFCC features. - **/ - std::vector<float> MfccCompute(const std::vector<int16_t>& audioData); - - /** @brief Initialise. */ - void Init(); - - /** - * @brief Extract MFCC features and quantise for one single small - * frame of audio data e.g. 640 samples. - * @param[in] audioData Vector of audio samples to calculate - * features for. - * @param[in] quantScale Quantisation scale. - * @param[in] quantOffset Quantisation offset. - * @return Vector of extracted quantised MFCC features. - **/ - template<typename T> - std::vector<T> MfccComputeQuant(const std::vector<int16_t>& audioData, - const float quantScale, - const int quantOffset) - { - this->MfccComputePreFeature(audioData); - float minVal = std::numeric_limits<T>::min(); - float maxVal = std::numeric_limits<T>::max(); - - std::vector<T> mfccOut(this->m_params.m_numMfccFeatures); - const size_t numFbankBins = this->m_params.m_numFbankBins; - - /* Take DCT. Uses matrix mul. */ - for (size_t i = 0, j = 0; i < mfccOut.size(); ++i, j += numFbankBins) { - float sum = 0; - for (size_t k = 0; k < numFbankBins; ++k) { - sum += this->m_dctMatrix[j + k] * this->m_melEnergies[k]; - } - /* Quantize to T. */ - sum = std::round((sum / quantScale) + quantOffset); - mfccOut[i] = static_cast<T>(std::min<float>(std::max<float>(sum, minVal), maxVal)); - } - - return mfccOut; - } - - /* Constants */ - static constexpr float ms_logStep = /*logf(6.4)*/ 1.8562979903656 / 27.0; - static constexpr float ms_freqStep = 200.0 / 3; - static constexpr float ms_minLogHz = 1000.0; - static constexpr float ms_minLogMel = ms_minLogHz / ms_freqStep; - - protected: - /** - * @brief Project input frequency to Mel Scale. - * @param[in] freq Input frequency in floating point. - * @param[in] useHTKMethod bool to signal if HTK method is to be - * used for calculation. - * @return Mel transformed frequency in floating point. - **/ - static float MelScale(float freq, - bool useHTKMethod = true); - - /** - * @brief Inverse Mel transform - convert MEL warped frequency - * back to normal frequency. - * @param[in] melFreq Mel frequency in floating point. - * @param[in] useHTKMethod bool to signal if HTK method is to be - * used for calculation. - * @return Real world frequency in floating point. - **/ - static float InverseMelScale(float melFreq, - bool useHTKMethod = true); - - /** - * @brief Populates MEL energies after applying the MEL filter - * bank weights and adding them up to be placed into - * bins, according to the filter bank's first and last - * indices (pre-computed for each filter bank element - * by CreateMelFilterBank function). - * @param[in] fftVec Vector populated with FFT magnitudes. - * @param[in] melFilterBank 2D Vector with filter bank weights. - * @param[in] filterBankFilterFirst Vector containing the first indices of filter bank - * to be used for each bin. - * @param[in] filterBankFilterLast Vector containing the last indices of filter bank - * to be used for each bin. - * @param[out] melEnergies Pre-allocated vector of MEL energies to be - * populated. - * @return true if successful, false otherwise. - */ - virtual bool ApplyMelFilterBank( - std::vector<float>& fftVec, - std::vector<std::vector<float>>& melFilterBank, - std::vector<uint32_t>& filterBankFilterFirst, - std::vector<uint32_t>& filterBankFilterLast, - std::vector<float>& melEnergies); - - /** - * @brief Converts the Mel energies for logarithmic scale. - * @param[in,out] melEnergies 1D vector of Mel energies. - **/ - virtual void ConvertToLogarithmicScale(std::vector<float>& melEnergies); - - /** - * @brief Create a matrix used to calculate Discrete Cosine - * Transform. - * @param[in] inputLength Input length of the buffer on which - * DCT will be performed. - * @param[in] coefficientCount Total coefficients per input length. - * @return 1D vector with inputLength x coefficientCount elements - * populated with DCT coefficients. - */ - virtual std::vector<float> CreateDCTMatrix( - int32_t inputLength, - int32_t coefficientCount); - - /** - * @brief Given the low and high Mel values, get the normaliser - * for weights to be applied when populating the filter - * bank. - * @param[in] leftMel Low Mel frequency value. - * @param[in] rightMel High Mel frequency value. - * @param[in] useHTKMethod bool to signal if HTK method is to be - * used for calculation. - * @return Value to use for normalizing. - */ - virtual float GetMelFilterBankNormaliser( - const float& leftMel, - const float& rightMel, - bool useHTKMethod); - - private: - MfccParams m_params; - std::vector<float> m_frame; - std::vector<float> m_buffer; - std::vector<float> m_melEnergies; - std::vector<float> m_windowFunc; - std::vector<std::vector<float>> m_melFilterBank; - std::vector<float> m_dctMatrix; - std::vector<uint32_t> m_filterBankFilterFirst; - std::vector<uint32_t> m_filterBankFilterLast; - bool m_filterBankInitialised; - arm::app::math::FftInstance m_fftInstance; - - /** - * @brief Initialises the filter banks and the DCT matrix. **/ - void InitMelFilterBank(); - - /** - * @brief Signals whether the instance of MFCC has had its - * required buffers initialised. - * @return true if initialised, false otherwise. - **/ - bool IsMelFilterBankInited() const; - - /** - * @brief Create mel filter banks for MFCC calculation. - * @return 2D vector of floats. - **/ - std::vector<std::vector<float>> CreateMelFilterBank(); - - /** - * @brief Computes and populates internal memeber buffers used - * in MFCC feature calculation - * @param[in] audioData 1D vector of 16-bit audio data. - */ - void MfccComputePreFeature(const std::vector<int16_t>& audioData); - - /** @brief Computes the magnitude from an interleaved complex array. */ - void ConvertToPowerSpectrum(); - - }; - -} /* namespace audio */ -} /* namespace app */ -} /* namespace arm */ - -#endif /* MFCC_HPP */
\ No newline at end of file diff --git a/source/application/main/include/UseCaseCommonUtils.hpp b/source/application/main/include/UseCaseCommonUtils.hpp index 9b6d550..b0f2e7a 100644 --- a/source/application/main/include/UseCaseCommonUtils.hpp +++ b/source/application/main/include/UseCaseCommonUtils.hpp @@ -24,6 +24,7 @@ #include "UseCaseHandler.hpp" /* Handlers for different user options. */ #include "Classifier.hpp" /* Classifier. */ #include "InputFiles.hpp" +#include "BufAttributes.hpp" /* Buffer attributes */ void DisplayCommonMenu(); |