summaryrefslogtreecommitdiff
path: root/source/use_case/ad
diff options
context:
space:
mode:
authorRichard Burton <richard.burton@arm.com>2022-05-04 09:45:02 +0100
committerRichard Burton <richard.burton@arm.com>2022-05-04 09:45:02 +0100
commit4e002791bc6781b549c6951cfe44f918289d7e82 (patch)
treeb639243b5fa433657c207783a384bad1ed248536 /source/use_case/ad
parentdd6d07b24bbf9023ebe8e8927be8aac3291d0f58 (diff)
downloadml-embedded-evaluation-kit-4e002791bc6781b549c6951cfe44f918289d7e82.tar.gz
MLECO-3173: Add AD, KWS_ASR and Noise reduction use case API's
Signed-off-by: Richard Burton <richard.burton@arm.com> Change-Id: I36f61ce74bf17f7b327cdae9704a22ca54144f37
Diffstat (limited to 'source/use_case/ad')
-rw-r--r--source/use_case/ad/include/AdModel.hpp8
-rw-r--r--source/use_case/ad/include/AdPostProcessing.hpp50
-rw-r--r--source/use_case/ad/include/AdProcessing.hpp230
-rw-r--r--source/use_case/ad/src/AdPostProcessing.cc115
-rw-r--r--source/use_case/ad/src/AdProcessing.cc208
-rw-r--r--source/use_case/ad/src/MainLoop.cc7
-rw-r--r--source/use_case/ad/src/UseCaseHandler.cc291
7 files changed, 516 insertions, 393 deletions
diff --git a/source/use_case/ad/include/AdModel.hpp b/source/use_case/ad/include/AdModel.hpp
index 8d914c4..2195a7c 100644
--- a/source/use_case/ad/include/AdModel.hpp
+++ b/source/use_case/ad/include/AdModel.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
@@ -28,6 +28,12 @@ namespace arm {
namespace app {
class AdModel : public Model {
+
+ public:
+ /* Indices for the expected model - based on input tensor shape */
+ static constexpr uint32_t ms_inputRowsIdx = 1;
+ static constexpr uint32_t ms_inputColsIdx = 2;
+
protected:
/** @brief Gets the reference to op resolver interface class */
const tflite::MicroOpResolver& GetOpResolver() override;
diff --git a/source/use_case/ad/include/AdPostProcessing.hpp b/source/use_case/ad/include/AdPostProcessing.hpp
deleted file mode 100644
index 7eaec84..0000000
--- a/source/use_case/ad/include/AdPostProcessing.hpp
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef ADPOSTPROCESSING_HPP
-#define ADPOSTPROCESSING_HPP
-
-#include "TensorFlowLiteMicro.hpp"
-
-#include <vector>
-
-namespace arm {
-namespace app {
-
- /** @brief Dequantize TensorFlow Lite Micro tensor.
- * @param[in] tensor Pointer to the TensorFlow Lite Micro tensor to be dequantized.
- * @return Vector with the dequantized tensor values.
- **/
- template<typename T>
- std::vector<float> Dequantize(TfLiteTensor* tensor);
-
- /**
- * @brief Calculates the softmax of vector in place. **/
- void Softmax(std::vector<float>& inputVector);
-
-
- /** @brief Given a wav file name return AD model output index.
- * @param[in] wavFileName Audio WAV filename.
- * File name should be in format anything_goes_XX_here.wav
- * where XX is the machine ID e.g. 00, 02, 04 or 06
- * @return AD model output index as 8 bit integer.
- **/
- int8_t OutputIndexFromFileName(std::string wavFileName);
-
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* ADPOSTPROCESSING_HPP */
diff --git a/source/use_case/ad/include/AdProcessing.hpp b/source/use_case/ad/include/AdProcessing.hpp
new file mode 100644
index 0000000..9abf6f1
--- /dev/null
+++ b/source/use_case/ad/include/AdProcessing.hpp
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef AD_PROCESSING_HPP
+#define AD_PROCESSING_HPP
+
+#include "BaseProcessing.hpp"
+#include "AudioUtils.hpp"
+#include "AdMelSpectrogram.hpp"
+#include "log_macros.h"
+
+namespace arm {
+namespace app {
+
+ /**
+ * @brief Pre-processing class for anomaly detection use case.
+ * Implements methods declared by BasePreProcess and anything else needed
+ * to populate input tensors ready for inference.
+ */
+ class AdPreProcess : public BasePreProcess {
+
+ public:
+ /**
+ * @brief Constructor for AdPreProcess class objects
+ * @param[in] inputTensor input tensor pointer from the tensor arena.
+ * @param[in] melSpectrogramFrameLen MEL spectrogram's frame length
+ * @param[in] melSpectrogramFrameStride MEL spectrogram's frame stride
+ * @param[in] adModelTrainingMean Training mean for the Anomaly detection model being used.
+ */
+ explicit AdPreProcess(TfLiteTensor* inputTensor,
+ uint32_t melSpectrogramFrameLen,
+ uint32_t melSpectrogramFrameStride,
+ float adModelTrainingMean);
+
+ ~AdPreProcess() = default;
+
+ /**
+ * @brief Function to invoke pre-processing and populate the input vector
+ * @param input pointer to input data. For anomaly detection, this is the pointer to
+ * the audio data.
+ * @param inputSize Size of the data being passed in for pre-processing.
+ * @return True if successful, false otherwise.
+ */
+ bool DoPreProcess(const void* input, size_t inputSize) override;
+
+ /**
+ * @brief Getter function for audio window size computed when constructing
+ * the class object.
+ * @return Audio window size as 32 bit unsigned integer.
+ */
+ uint32_t GetAudioWindowSize();
+
+ /**
+ * @brief Getter function for audio window stride computed when constructing
+ * the class object.
+ * @return Audio window stride as 32 bit unsigned integer.
+ */
+ uint32_t GetAudioDataStride();
+
+ /**
+ * @brief Setter function for current audio index. This is only used for evaluating
+ * if previously computed features can be re-used from cache.
+ */
+ void SetAudioWindowIndex(uint32_t idx);
+
+ private:
+ bool m_validInstance{false}; /**< Indicates the current object is valid. */
+ uint32_t m_melSpectrogramFrameLen{}; /**< MEL spectrogram's window frame length */
+ uint32_t m_melSpectrogramFrameStride{}; /**< MEL spectrogram's window frame stride */
+ uint8_t m_inputResizeScale{}; /**< Downscaling factor for the MEL energy matrix. */
+ uint32_t m_numMelSpecVectorsInAudioStride{}; /**< Number of frames to move across the audio. */
+ uint32_t m_audioDataWindowSize{}; /**< Audio window size computed based on other parameters. */
+ uint32_t m_audioDataStride{}; /**< Audio window stride computed. */
+ uint32_t m_numReusedFeatureVectors{}; /**< Number of MEL vectors that can be re-used */
+ uint32_t m_audioWindowIndex{}; /**< Current audio window index (from audio's sliding window) */
+
+ audio::SlidingWindow<const int16_t> m_melWindowSlider; /**< Internal MEL spectrogram window slider */
+ audio::AdMelSpectrogram m_melSpec; /**< MEL spectrogram computation object */
+ std::function<void
+ (std::vector<int16_t>&, int, bool, size_t, size_t)> m_featureCalc; /**< Feature calculator object */
+ };
+
+ class AdPostProcess : public BasePostProcess {
+ public:
+ /**
+ * @brief Constructor for AdPostProcess object.
+ * @param[in] outputTensor Output tensor pointer.
+ */
+ explicit AdPostProcess(TfLiteTensor* outputTensor);
+
+ ~AdPostProcess() = default;
+
+ /**
+ * @brief Function to do the post-processing on the output tensor.
+ * @return True if successful, false otherwise.
+ */
+ bool DoPostProcess() override;
+
+ /**
+ * @brief Getter function for an element from the de-quantised output vector.
+ * @param index Index of the element to be retrieved.
+ * @return index represented as a 32 bit floating point number.
+ */
+ float GetOutputValue(uint32_t index);
+
+ private:
+ TfLiteTensor* m_outputTensor{}; /**< Output tensor pointer */
+ std::vector<float> m_dequantizedOutputVec{}; /**< Internal output vector */
+
+ /**
+ * @brief De-quantizes and flattens the output tensor into a vector.
+ * @tparam T template parameter to indicate data type.
+ * @return True if successful, false otherwise.
+ */
+ template<typename T>
+ bool Dequantize()
+ {
+ TfLiteTensor* tensor = this->m_outputTensor;
+ if (tensor == nullptr) {
+ printf_err("Invalid output tensor.\n");
+ return false;
+ }
+ T* tensorData = tflite::GetTensorData<T>(tensor);
+
+ uint32_t totalOutputSize = 1;
+ for (int inputDim = 0; inputDim < tensor->dims->size; inputDim++){
+ totalOutputSize *= tensor->dims->data[inputDim];
+ }
+
+ /* For getting the floating point values, we need quantization parameters */
+ QuantParams quantParams = GetTensorQuantParams(tensor);
+
+ this->m_dequantizedOutputVec = std::vector<float>(totalOutputSize, 0);
+
+ for (size_t i = 0; i < totalOutputSize; ++i) {
+ this->m_dequantizedOutputVec[i] = quantParams.scale * (tensorData[i] - quantParams.offset);
+ }
+
+ return true;
+ }
+ };
+
+ /* Templated instances available: */
+ template bool AdPostProcess::Dequantize<int8_t>();
+
+ /**
+ * @brief Generic feature calculator factory.
+ *
+ * Returns lambda function to compute features using features cache.
+ * Real features math is done by a lambda function provided as a parameter.
+ * Features are written to input tensor memory.
+ *
+ * @tparam T feature vector type.
+ * @param inputTensor model input tensor pointer.
+ * @param cacheSize number of feature vectors to cache. Defined by the sliding window overlap.
+ * @param compute features calculator function.
+ * @return lambda function to compute features.
+ */
+ template<class T>
+ std::function<void (std::vector<int16_t>&, size_t, bool, size_t, size_t)>
+ FeatureCalc(TfLiteTensor* inputTensor, size_t cacheSize,
+ std::function<std::vector<T> (std::vector<int16_t>& )> compute)
+ {
+ /* Feature cache to be captured by lambda function*/
+ static std::vector<std::vector<T>> featureCache = std::vector<std::vector<T>>(cacheSize);
+
+ return [=](std::vector<int16_t>& audioDataWindow,
+ size_t index,
+ bool useCache,
+ size_t featuresOverlapIndex,
+ size_t resizeScale)
+ {
+ T* tensorData = tflite::GetTensorData<T>(inputTensor);
+ std::vector<T> features;
+
+ /* Reuse features from cache if cache is ready and sliding windows overlap.
+ * Overlap is in the beginning of sliding window with a size of a feature cache. */
+ if (useCache && index < featureCache.size()) {
+ features = std::move(featureCache[index]);
+ } else {
+ features = std::move(compute(audioDataWindow));
+ }
+ auto size = features.size() / resizeScale;
+ auto sizeBytes = sizeof(T);
+
+ /* Input should be transposed and "resized" by skipping elements. */
+ for (size_t outIndex = 0; outIndex < size; outIndex++) {
+ std::memcpy(tensorData + (outIndex*size) + index, &features[outIndex*resizeScale], sizeBytes);
+ }
+
+ /* Start renewing cache as soon iteration goes out of the windows overlap. */
+ if (index >= featuresOverlapIndex / resizeScale) {
+ featureCache[index - featuresOverlapIndex / resizeScale] = std::move(features);
+ }
+ };
+ }
+
+ template std::function<void (std::vector<int16_t>&, size_t , bool, size_t, size_t)>
+ FeatureCalc<int8_t>(TfLiteTensor* inputTensor,
+ size_t cacheSize,
+ std::function<std::vector<int8_t> (std::vector<int16_t>&)> compute);
+
+ template std::function<void(std::vector<int16_t>&, size_t, bool, size_t, size_t)>
+ FeatureCalc<float>(TfLiteTensor *inputTensor,
+ size_t cacheSize,
+ std::function<std::vector<float>(std::vector<int16_t>&)> compute);
+
+ std::function<void (std::vector<int16_t>&, int, bool, size_t, size_t)>
+ GetFeatureCalculator(audio::AdMelSpectrogram& melSpec,
+ TfLiteTensor* inputTensor,
+ size_t cacheSize,
+ float trainingMean);
+
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* AD_PROCESSING_HPP */
diff --git a/source/use_case/ad/src/AdPostProcessing.cc b/source/use_case/ad/src/AdPostProcessing.cc
deleted file mode 100644
index c461875..0000000
--- a/source/use_case/ad/src/AdPostProcessing.cc
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "AdPostProcessing.hpp"
-#include "log_macros.h"
-
-#include <numeric>
-#include <cmath>
-#include <string>
-
-namespace arm {
-namespace app {
-
- template<typename T>
- std::vector<float> Dequantize(TfLiteTensor* tensor) {
-
- if (tensor == nullptr) {
- printf_err("Tensor is null pointer can not dequantize.\n");
- return std::vector<float>();
- }
- T* tensorData = tflite::GetTensorData<T>(tensor);
-
- uint32_t totalOutputSize = 1;
- for (int inputDim = 0; inputDim < tensor->dims->size; inputDim++){
- totalOutputSize *= tensor->dims->data[inputDim];
- }
-
- /* For getting the floating point values, we need quantization parameters */
- QuantParams quantParams = GetTensorQuantParams(tensor);
-
- std::vector<float> dequantizedOutput(totalOutputSize);
-
- for (size_t i = 0; i < totalOutputSize; ++i) {
- dequantizedOutput[i] = quantParams.scale * (tensorData[i] - quantParams.offset);
- }
-
- return dequantizedOutput;
- }
-
- void Softmax(std::vector<float>& inputVector) {
- auto start = inputVector.begin();
- auto end = inputVector.end();
-
- /* Fix for numerical stability and apply exp. */
- float maxValue = *std::max_element(start, end);
- for (auto it = start; it!=end; ++it) {
- *it = std::exp((*it) - maxValue);
- }
-
- float sumExp = std::accumulate(start, end, 0.0f);
-
- for (auto it = start; it!=end; ++it) {
- *it = (*it)/sumExp;
- }
- }
-
- int8_t OutputIndexFromFileName(std::string wavFileName) {
- /* Filename is assumed in the form machine_id_00.wav */
- std::string delimiter = "_"; /* First character used to split the file name up. */
- size_t delimiterStart;
- std::string subString;
- size_t machineIdxInString = 3; /* Which part of the file name the machine id should be at. */
-
- for (size_t i = 0; i < machineIdxInString; ++i) {
- delimiterStart = wavFileName.find(delimiter);
- subString = wavFileName.substr(0, delimiterStart);
- wavFileName.erase(0, delimiterStart + delimiter.length());
- }
-
- /* At this point substring should be 00.wav */
- delimiter = "."; /* Second character used to split the file name up. */
- delimiterStart = subString.find(delimiter);
- subString = (delimiterStart != std::string::npos) ? subString.substr(0, delimiterStart) : subString;
-
- auto is_number = [](const std::string& str) -> bool
- {
- std::string::const_iterator it = str.begin();
- while (it != str.end() && std::isdigit(*it)) ++it;
- return !str.empty() && it == str.end();
- };
-
- const int8_t machineIdx = is_number(subString) ? std::stoi(subString) : -1;
-
- /* Return corresponding index in the output vector. */
- if (machineIdx == 0) {
- return 0;
- } else if (machineIdx == 2) {
- return 1;
- } else if (machineIdx == 4) {
- return 2;
- } else if (machineIdx == 6) {
- return 3;
- } else {
- printf_err("%d is an invalid machine index \n", machineIdx);
- return -1;
- }
- }
-
- template std::vector<float> Dequantize<uint8_t>(TfLiteTensor* tensor);
- template std::vector<float> Dequantize<int8_t>(TfLiteTensor* tensor);
-} /* namespace app */
-} /* namespace arm */ \ No newline at end of file
diff --git a/source/use_case/ad/src/AdProcessing.cc b/source/use_case/ad/src/AdProcessing.cc
new file mode 100644
index 0000000..a33131c
--- /dev/null
+++ b/source/use_case/ad/src/AdProcessing.cc
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "AdProcessing.hpp"
+
+#include "AdModel.hpp"
+
+namespace arm {
+namespace app {
+
+AdPreProcess::AdPreProcess(TfLiteTensor* inputTensor,
+ uint32_t melSpectrogramFrameLen,
+ uint32_t melSpectrogramFrameStride,
+ float adModelTrainingMean):
+ m_validInstance{false},
+ m_melSpectrogramFrameLen{melSpectrogramFrameLen},
+ m_melSpectrogramFrameStride{melSpectrogramFrameStride},
+ /**< Model is trained on features downsampled 2x */
+ m_inputResizeScale{2},
+ /**< We are choosing to move by 20 frames across the audio for each inference. */
+ m_numMelSpecVectorsInAudioStride{20},
+ m_audioDataStride{m_numMelSpecVectorsInAudioStride * melSpectrogramFrameStride},
+ m_melSpec{melSpectrogramFrameLen}
+{
+ if (!inputTensor) {
+ printf_err("Invalid input tensor provided to pre-process\n");
+ return;
+ }
+
+ TfLiteIntArray* inputShape = inputTensor->dims;
+
+ if (!inputShape) {
+ printf_err("Invalid input tensor dims\n");
+ return;
+ }
+
+ const uint32_t kNumRows = inputShape->data[AdModel::ms_inputRowsIdx];
+ const uint32_t kNumCols = inputShape->data[AdModel::ms_inputColsIdx];
+
+ /* Deduce the data length required for 1 inference from the network parameters. */
+ this->m_audioDataWindowSize = (((this->m_inputResizeScale * kNumCols) - 1) *
+ melSpectrogramFrameStride) +
+ melSpectrogramFrameLen;
+ this->m_numReusedFeatureVectors = kNumRows -
+ (this->m_numMelSpecVectorsInAudioStride /
+ this->m_inputResizeScale);
+ this->m_melSpec.Init();
+
+ /* Creating a Mel Spectrogram sliding window for the data required for 1 inference.
+ * "resizing" done here by multiplying stride by resize scale. */
+ this->m_melWindowSlider = audio::SlidingWindow<const int16_t>(
+ nullptr, /* to be populated later. */
+ this->m_audioDataWindowSize,
+ melSpectrogramFrameLen,
+ melSpectrogramFrameStride * this->m_inputResizeScale);
+
+ /* Construct feature calculation function. */
+ this->m_featureCalc = GetFeatureCalculator(this->m_melSpec, inputTensor,
+ this->m_numReusedFeatureVectors,
+ adModelTrainingMean);
+ this->m_validInstance = true;
+}
+
+bool AdPreProcess::DoPreProcess(const void* input, size_t inputSize)
+{
+ /* Check that we have a valid instance. */
+ if (!this->m_validInstance) {
+ printf_err("Invalid pre-processor instance\n");
+ return false;
+ }
+
+ /* We expect that we can traverse the size with which the MEL spectrogram
+ * sliding window was initialised with. */
+ if (!input || inputSize < this->m_audioDataWindowSize) {
+ printf_err("Invalid input provided for pre-processing\n");
+ return false;
+ }
+
+ /* We moved to the next window - set the features sliding to the new address. */
+ this->m_melWindowSlider.Reset(static_cast<const int16_t*>(input));
+
+ /* The first window does not have cache ready. */
+ const bool useCache = this->m_audioWindowIndex > 0 && this->m_numReusedFeatureVectors > 0;
+
+ /* Start calculating features inside one audio sliding window. */
+ while (this->m_melWindowSlider.HasNext()) {
+ const int16_t* melSpecWindow = this->m_melWindowSlider.Next();
+ std::vector<int16_t> melSpecAudioData = std::vector<int16_t>(
+ melSpecWindow,
+ melSpecWindow + this->m_melSpectrogramFrameLen);
+
+ /* Compute features for this window and write them to input tensor. */
+ this->m_featureCalc(melSpecAudioData,
+ this->m_melWindowSlider.Index(),
+ useCache,
+ this->m_numMelSpecVectorsInAudioStride,
+ this->m_inputResizeScale);
+ }
+
+ return true;
+}
+
+uint32_t AdPreProcess::GetAudioWindowSize()
+{
+ return this->m_audioDataWindowSize;
+}
+
+uint32_t AdPreProcess::GetAudioDataStride()
+{
+ return this->m_audioDataStride;
+}
+
+void AdPreProcess::SetAudioWindowIndex(uint32_t idx)
+{
+ this->m_audioWindowIndex = idx;
+}
+
+AdPostProcess::AdPostProcess(TfLiteTensor* outputTensor) :
+ m_outputTensor {outputTensor}
+{}
+
+bool AdPostProcess::DoPostProcess()
+{
+ switch (this->m_outputTensor->type) {
+ case kTfLiteInt8:
+ this->Dequantize<int8_t>();
+ break;
+ default:
+ printf_err("Unsupported tensor type");
+ return false;
+ }
+
+ math::MathUtils::SoftmaxF32(this->m_dequantizedOutputVec);
+ return true;
+}
+
+float AdPostProcess::GetOutputValue(uint32_t index)
+{
+ if (index < this->m_dequantizedOutputVec.size()) {
+ return this->m_dequantizedOutputVec[index];
+ }
+ printf_err("Invalid index for output\n");
+ return 0.0;
+}
+
+std::function<void (std::vector<int16_t>&, int, bool, size_t, size_t)>
+GetFeatureCalculator(audio::AdMelSpectrogram& melSpec,
+ TfLiteTensor* inputTensor,
+ size_t cacheSize,
+ float trainingMean)
+{
+ std::function<void (std::vector<int16_t>&, size_t, bool, size_t, size_t)> melSpecFeatureCalc;
+
+ TfLiteQuantization quant = inputTensor->quantization;
+
+ if (kTfLiteAffineQuantization == quant.type) {
+
+ auto* quantParams = static_cast<TfLiteAffineQuantization*>(quant.params);
+ const float quantScale = quantParams->scale->data[0];
+ const int quantOffset = quantParams->zero_point->data[0];
+
+ switch (inputTensor->type) {
+ case kTfLiteInt8: {
+ melSpecFeatureCalc = FeatureCalc<int8_t>(
+ inputTensor,
+ cacheSize,
+ [=, &melSpec](std::vector<int16_t>& audioDataWindow) {
+ return melSpec.MelSpecComputeQuant<int8_t>(
+ audioDataWindow,
+ quantScale,
+ quantOffset,
+ trainingMean);
+ }
+ );
+ break;
+ }
+ default:
+ printf_err("Tensor type %s not supported\n", TfLiteTypeGetName(inputTensor->type));
+ }
+ } else {
+ melSpecFeatureCalc = FeatureCalc<float>(
+ inputTensor,
+ cacheSize,
+ [=, &melSpec](
+ std::vector<int16_t>& audioDataWindow) {
+ return melSpec.ComputeMelSpec(
+ audioDataWindow,
+ trainingMean);
+ });
+ }
+ return melSpecFeatureCalc;
+}
+
+} /* namespace app */
+} /* namespace arm */
diff --git a/source/use_case/ad/src/MainLoop.cc b/source/use_case/ad/src/MainLoop.cc
index 23d1e51..140359b 100644
--- a/source/use_case/ad/src/MainLoop.cc
+++ b/source/use_case/ad/src/MainLoop.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-#include "hal.h" /* Brings in platform definitions */
#include "InputFiles.hpp" /* For input data */
#include "AdModel.hpp" /* Model class for running inference */
#include "UseCaseCommonUtils.hpp" /* Utils functions */
@@ -63,8 +62,8 @@ void main_loop()
caseContext.Set<arm::app::Profiler&>("profiler", profiler);
caseContext.Set<arm::app::Model&>("model", model);
caseContext.Set<uint32_t>("clipIndex", 0);
- caseContext.Set<int>("frameLength", g_FrameLength);
- caseContext.Set<int>("frameStride", g_FrameStride);
+ caseContext.Set<uint32_t>("frameLength", g_FrameLength);
+ caseContext.Set<uint32_t>("frameStride", g_FrameStride);
caseContext.Set<float>("scoreThreshold", g_ScoreThreshold);
caseContext.Set<float>("trainingMean", g_TrainingMean);
diff --git a/source/use_case/ad/src/UseCaseHandler.cc b/source/use_case/ad/src/UseCaseHandler.cc
index 5585f36..0179d6b 100644
--- a/source/use_case/ad/src/UseCaseHandler.cc
+++ b/source/use_case/ad/src/UseCaseHandler.cc
@@ -24,8 +24,8 @@
#include "AudioUtils.hpp"
#include "ImageUtils.hpp"
#include "UseCaseCommonUtils.hpp"
-#include "AdPostProcessing.hpp"
#include "log_macros.h"
+#include "AdProcessing.hpp"
namespace arm {
namespace app {
@@ -39,32 +39,17 @@ namespace app {
**/
static bool PresentInferenceResult(float result, float threshold);
- /**
- * @brief Returns a function to perform feature calculation and populates input tensor data with
- * MelSpe data.
- *
- * Input tensor data type check is performed to choose correct MFCC feature data type.
- * If tensor has an integer data type then original features are quantised.
- *
- * Warning: mfcc calculator provided as input must have the same life scope as returned function.
- *
- * @param[in] melSpec MFCC feature calculator.
- * @param[in,out] inputTensor Input tensor pointer to store calculated features.
- * @param[in] cacheSize Size of the feture vectors cache (number of feature vectors).
- * @param[in] trainingMean Training mean.
- * @return function function to be called providing audio sample and sliding window index.
- */
- static std::function<void (std::vector<int16_t>&, int, bool, size_t, size_t)>
- GetFeatureCalculator(audio::AdMelSpectrogram& melSpec,
- TfLiteTensor* inputTensor,
- size_t cacheSize,
- float trainingMean);
-
- /* Vibration classification handler */
+ /** @brief Given a wav file name return AD model output index.
+ * @param[in] wavFileName Audio WAV filename.
+ * File name should be in format anything_goes_XX_here.wav
+ * where XX is the machine ID e.g. 00, 02, 04 or 06
+ * @return AD model output index as 8 bit integer.
+ **/
+ static int8_t OutputIndexFromFileName(std::string wavFileName);
+
+ /* Anomaly Detection inference handler */
bool ClassifyVibrationHandler(ApplicationContext& ctx, uint32_t clipIndex, bool runAll)
{
- auto& profiler = ctx.Get<Profiler&>("profiler");
-
constexpr uint32_t dataPsnTxtInfStartX = 20;
constexpr uint32_t dataPsnTxtInfStartY = 40;
@@ -81,8 +66,9 @@ namespace app {
return false;
}
- const auto frameLength = ctx.Get<int>("frameLength");
- const auto frameStride = ctx.Get<int>("frameStride");
+ auto& profiler = ctx.Get<Profiler&>("profiler");
+ const auto melSpecFrameLength = ctx.Get<uint32_t>("frameLength");
+ const auto melSpecFrameStride = ctx.Get<uint32_t>("frameStride");
const auto scoreThreshold = ctx.Get<float>("scoreThreshold");
const auto trainingMean = ctx.Get<float>("trainingMean");
auto startClipIdx = ctx.Get<uint32_t>("clipIndex");
@@ -95,21 +81,13 @@ namespace app {
return false;
}
- TfLiteIntArray* inputShape = model.GetInputShape(0);
- const uint32_t kNumRows = inputShape->data[1];
- const uint32_t kNumCols = inputShape->data[2];
+ AdPreProcess preProcess{
+ inputTensor,
+ melSpecFrameLength,
+ melSpecFrameStride,
+ trainingMean};
- audio::AdMelSpectrogram melSpec = audio::AdMelSpectrogram(frameLength);
- melSpec.Init();
-
- /* Deduce the data length required for 1 inference from the network parameters. */
- const uint8_t inputResizeScale = 2;
- const uint32_t audioDataWindowSize = (((inputResizeScale * kNumCols) - 1) * frameStride) + frameLength;
-
- /* We are choosing to move by 20 frames across the audio for each inference. */
- const uint8_t nMelSpecVectorsInAudioStride = 20;
-
- auto audioDataStride = nMelSpecVectorsInAudioStride * frameStride;
+ AdPostProcess postProcess{outputTensor};
do {
hal_lcd_clear(COLOR_BLACK);
@@ -122,29 +100,12 @@ namespace app {
return false;
}
- /* Creating a Mel Spectrogram sliding window for the data required for 1 inference.
- * "resizing" done here by multiplying stride by resize scale. */
- auto audioMelSpecWindowSlider = audio::SlidingWindow<const int16_t>(
- get_audio_array(currentIndex),
- audioDataWindowSize, frameLength,
- frameStride * inputResizeScale);
-
/* Creating a sliding window through the whole audio clip. */
auto audioDataSlider = audio::SlidingWindow<const int16_t>(
- get_audio_array(currentIndex),
- get_audio_array_size(currentIndex),
- audioDataWindowSize, audioDataStride);
-
- /* Calculate number of the feature vectors in the window overlap region taking into account resizing.
- * These feature vectors will be reused.*/
- auto numberOfReusedFeatureVectors = kNumRows - (nMelSpecVectorsInAudioStride / inputResizeScale);
-
- /* Construct feature calculation function. */
- auto melSpecFeatureCalc = GetFeatureCalculator(melSpec, inputTensor,
- numberOfReusedFeatureVectors, trainingMean);
- if (!melSpecFeatureCalc){
- return false;
- }
+ get_audio_array(currentIndex),
+ get_audio_array_size(currentIndex),
+ preProcess.GetAudioWindowSize(),
+ preProcess.GetAudioDataStride());
/* Result is an averaged sum over inferences. */
float result = 0;
@@ -152,30 +113,18 @@ namespace app {
/* Display message on the LCD - inference running. */
std::string str_inf{"Running inference... "};
hal_lcd_display_text(
- str_inf.c_str(), str_inf.size(),
- dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
- info("Running inference on audio clip %" PRIu32 " => %s\n", currentIndex, get_filename(currentIndex));
+ str_inf.c_str(), str_inf.size(),
+ dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
+
+ info("Running inference on audio clip %" PRIu32 " => %s\n",
+ currentIndex, get_filename(currentIndex));
/* Start sliding through audio clip. */
while (audioDataSlider.HasNext()) {
- const int16_t *inferenceWindow = audioDataSlider.Next();
-
- /* We moved to the next window - set the features sliding to the new address. */
- audioMelSpecWindowSlider.Reset(inferenceWindow);
+ const int16_t* inferenceWindow = audioDataSlider.Next();
- /* The first window does not have cache ready. */
- bool useCache = audioDataSlider.Index() > 0 && numberOfReusedFeatureVectors > 0;
-
- /* Start calculating features inside one audio sliding window. */
- while (audioMelSpecWindowSlider.HasNext()) {
- const int16_t *melSpecWindow = audioMelSpecWindowSlider.Next();
- std::vector<int16_t> melSpecAudioData = std::vector<int16_t>(melSpecWindow,
- melSpecWindow + frameLength);
-
- /* Compute features for this window and write them to input tensor. */
- melSpecFeatureCalc(melSpecAudioData, audioMelSpecWindowSlider.Index(),
- useCache, nMelSpecVectorsInAudioStride, inputResizeScale);
- }
+ preProcess.SetAudioWindowIndex(audioDataSlider.Index());
+ preProcess.DoPreProcess(inferenceWindow, preProcess.GetAudioWindowSize());
info("Inference %zu/%zu\n", audioDataSlider.Index() + 1,
audioDataSlider.TotalStrides() + 1);
@@ -185,13 +134,11 @@ namespace app {
return false;
}
- /* Use the negative softmax score of the corresponding index as the outlier score */
- std::vector<float> dequantOutput = Dequantize<int8_t>(outputTensor);
- Softmax(dequantOutput);
- result += -dequantOutput[machineOutputIndex];
+ postProcess.DoPostProcess();
+ result += 0 - postProcess.GetOutputValue(machineOutputIndex);
#if VERIFY_TEST_OUTPUT
- arm::app::DumpTensor(outputTensor);
+ DumpTensor(outputTensor);
#endif /* VERIFY_TEST_OUTPUT */
} /* while (audioDataSlider.HasNext()) */
@@ -218,7 +165,6 @@ namespace app {
return true;
}
-
static bool PresentInferenceResult(float result, float threshold)
{
constexpr uint32_t dataPsnTxtStartX1 = 20;
@@ -251,148 +197,47 @@ namespace app {
return true;
}
- /**
- * @brief Generic feature calculator factory.
- *
- * Returns lambda function to compute features using features cache.
- * Real features math is done by a lambda function provided as a parameter.
- * Features are written to input tensor memory.
- *
- * @tparam T feature vector type.
- * @param inputTensor model input tensor pointer.
- * @param cacheSize number of feature vectors to cache. Defined by the sliding window overlap.
- * @param compute features calculator function.
- * @return lambda function to compute features.
- */
- template<class T>
- std::function<void (std::vector<int16_t>&, size_t, bool, size_t, size_t)>
- FeatureCalc(TfLiteTensor* inputTensor, size_t cacheSize,
- std::function<std::vector<T> (std::vector<int16_t>& )> compute)
+ static int8_t OutputIndexFromFileName(std::string wavFileName)
{
- /* Feature cache to be captured by lambda function*/
- static std::vector<std::vector<T>> featureCache = std::vector<std::vector<T>>(cacheSize);
-
- return [=](std::vector<int16_t>& audioDataWindow,
- size_t index,
- bool useCache,
- size_t featuresOverlapIndex,
- size_t resizeScale)
- {
- T *tensorData = tflite::GetTensorData<T>(inputTensor);
- std::vector<T> features;
-
- /* Reuse features from cache if cache is ready and sliding windows overlap.
- * Overlap is in the beginning of sliding window with a size of a feature cache. */
- if (useCache && index < featureCache.size()) {
- features = std::move(featureCache[index]);
- } else {
- features = std::move(compute(audioDataWindow));
- }
- auto size = features.size() / resizeScale;
- auto sizeBytes = sizeof(T);
+ /* Filename is assumed in the form machine_id_00.wav */
+ std::string delimiter = "_"; /* First character used to split the file name up. */
+ size_t delimiterStart;
+ std::string subString;
+ size_t machineIdxInString = 3; /* Which part of the file name the machine id should be at. */
+
+ for (size_t i = 0; i < machineIdxInString; ++i) {
+ delimiterStart = wavFileName.find(delimiter);
+ subString = wavFileName.substr(0, delimiterStart);
+ wavFileName.erase(0, delimiterStart + delimiter.length());
+ }
- /* Input should be transposed and "resized" by skipping elements. */
- for (size_t outIndex = 0; outIndex < size; outIndex++) {
- std::memcpy(tensorData + (outIndex*size) + index, &features[outIndex*resizeScale], sizeBytes);
- }
+ /* At this point substring should be 00.wav */
+ delimiter = "."; /* Second character used to split the file name up. */
+ delimiterStart = subString.find(delimiter);
+ subString = (delimiterStart != std::string::npos) ? subString.substr(0, delimiterStart) : subString;
- /* Start renewing cache as soon iteration goes out of the windows overlap. */
- if (index >= featuresOverlapIndex / resizeScale) {
- featureCache[index - featuresOverlapIndex / resizeScale] = std::move(features);
- }
+ auto is_number = [](const std::string& str) -> bool
+ {
+ std::string::const_iterator it = str.begin();
+ while (it != str.end() && std::isdigit(*it)) ++it;
+ return !str.empty() && it == str.end();
};
- }
-
- template std::function<void (std::vector<int16_t>&, size_t , bool, size_t, size_t)>
- FeatureCalc<int8_t>(TfLiteTensor* inputTensor,
- size_t cacheSize,
- std::function<std::vector<int8_t> (std::vector<int16_t>&)> compute);
-
- template std::function<void (std::vector<int16_t>&, size_t , bool, size_t, size_t)>
- FeatureCalc<uint8_t>(TfLiteTensor* inputTensor,
- size_t cacheSize,
- std::function<std::vector<uint8_t> (std::vector<int16_t>&)> compute);
-
- template std::function<void (std::vector<int16_t>&, size_t , bool, size_t, size_t)>
- FeatureCalc<int16_t>(TfLiteTensor* inputTensor,
- size_t cacheSize,
- std::function<std::vector<int16_t> (std::vector<int16_t>&)> compute);
-
- template std::function<void(std::vector<int16_t>&, size_t, bool, size_t, size_t)>
- FeatureCalc<float>(TfLiteTensor *inputTensor,
- size_t cacheSize,
- std::function<std::vector<float>(std::vector<int16_t>&)> compute);
-
-
- static std::function<void (std::vector<int16_t>&, int, bool, size_t, size_t)>
- GetFeatureCalculator(audio::AdMelSpectrogram& melSpec, TfLiteTensor* inputTensor, size_t cacheSize, float trainingMean)
- {
- std::function<void (std::vector<int16_t>&, size_t, bool, size_t, size_t)> melSpecFeatureCalc;
-
- TfLiteQuantization quant = inputTensor->quantization;
-
- if (kTfLiteAffineQuantization == quant.type) {
-
- auto *quantParams = (TfLiteAffineQuantization *) quant.params;
- const float quantScale = quantParams->scale->data[0];
- const int quantOffset = quantParams->zero_point->data[0];
-
- switch (inputTensor->type) {
- case kTfLiteInt8: {
- melSpecFeatureCalc = FeatureCalc<int8_t>(inputTensor,
- cacheSize,
- [=, &melSpec](std::vector<int16_t>& audioDataWindow) {
- return melSpec.MelSpecComputeQuant<int8_t>(
- audioDataWindow,
- quantScale,
- quantOffset,
- trainingMean);
- }
- );
- break;
- }
- case kTfLiteUInt8: {
- melSpecFeatureCalc = FeatureCalc<uint8_t>(inputTensor,
- cacheSize,
- [=, &melSpec](std::vector<int16_t>& audioDataWindow) {
- return melSpec.MelSpecComputeQuant<uint8_t>(
- audioDataWindow,
- quantScale,
- quantOffset,
- trainingMean);
- }
- );
- break;
- }
- case kTfLiteInt16: {
- melSpecFeatureCalc = FeatureCalc<int16_t>(inputTensor,
- cacheSize,
- [=, &melSpec](std::vector<int16_t>& audioDataWindow) {
- return melSpec.MelSpecComputeQuant<int16_t>(
- audioDataWindow,
- quantScale,
- quantOffset,
- trainingMean);
- }
- );
- break;
- }
- default:
- printf_err("Tensor type %s not supported\n", TfLiteTypeGetName(inputTensor->type));
- }
-
+ const int8_t machineIdx = is_number(subString) ? std::stoi(subString) : -1;
+
+ /* Return corresponding index in the output vector. */
+ if (machineIdx == 0) {
+ return 0;
+ } else if (machineIdx == 2) {
+ return 1;
+ } else if (machineIdx == 4) {
+ return 2;
+ } else if (machineIdx == 6) {
+ return 3;
} else {
- melSpecFeatureCalc = melSpecFeatureCalc = FeatureCalc<float>(inputTensor,
- cacheSize,
- [=, &melSpec](
- std::vector<int16_t>& audioDataWindow) {
- return melSpec.ComputeMelSpec(
- audioDataWindow,
- trainingMean);
- });
+ printf_err("%d is an invalid machine index \n", machineIdx);
+ return -1;
}
- return melSpecFeatureCalc;
}
} /* namespace app */