MLECO-2599: Replace DSCNN with MicroNet for KWS

Added SoftMax function to Mathutils to allow MicroNet to output probability as it does not nativelu have this layer. Minor refactoring to accommodate Softmax Calculations Extensive renaming and updating of documentation and resource download script. Added SoftMax function to Mathutils to allow MicroNet to output probability. Change-Id: I7cbbda1024d14b85c9ac1beea7ca8fbffd0b6eb5 Signed-off-by: Liam Barry <liam.barry@arm.com>
author: Kshitij Sisodia <kshitij.sisodia@arm.com> 2021-12-24 11:05:11 +0000
committer: Liam Barry <liam.barry@arm.com> 2021-12-24 14:20:36 +0000
commit: 76a1580861210e0310db23acbc29e1064ae30ead (patch)
tree: f947145cffd944aa3724c90745fc0e9d8e2fb2f4 /source
parent: 871fcdc755173b9f7ecb8cf9dc8dc6306329958c (diff)
download: ml-embedded-evaluation-kit-76a1580861210e0310db23acbc29e1064ae30ead.tar.gz
20 files changed, 185 insertions, 143 deletions
diff --git a/source/application/main/Classifier.cc b/source/application/main/Classifier.cc
index c5519fb..a6ff532 100644
--- a/source/application/main/Classifier.cc
+++ b/source/application/main/Classifier.cc
@@ -24,61 +24,40 @@
 #include <set>
 #include <cstdint>
 #include <inttypes.h>
+#include "PlatformMath.hpp"
 
 namespace arm {
 namespace app {
 
-    template<typename T>
-    void SetVectorResults(std::set<std::pair<T, uint32_t>>& topNSet,
+    void Classifier::SetVectorResults(std::set<std::pair<float, uint32_t>>& topNSet,
                           std::vector<ClassificationResult>& vecResults,
-                          TfLiteTensor* tensor,
-                          const std::vector <std::string>& labels) {
-
-        /* For getting the floating point values, we need quantization parameters. */
-        QuantParams quantParams = GetTensorQuantParams(tensor);
+                          const std::vector <std::string>& labels)
+    {
 
         /* Reset the iterator to the largest element - use reverse iterator. */
-        auto topNIter = topNSet.rbegin();
-        for (size_t i = 0; i < vecResults.size() && topNIter != topNSet.rend(); ++i, ++topNIter) {
-            T score = topNIter->first;
-            vecResults[i].m_normalisedVal = quantParams.scale * (score - quantParams.offset);
-            vecResults[i].m_label = labels[topNIter->second];
-            vecResults[i].m_labelIdx = topNIter->second;
-        }
 
-    }
-
-    template<>
-    void SetVectorResults<float>(std::set<std::pair<float, uint32_t>>& topNSet,
-                          std::vector<ClassificationResult>& vecResults,
-                          TfLiteTensor* tensor,
-                          const std::vector <std::string>& labels) {
-        UNUSED(tensor);
-        /* Reset the iterator to the largest element - use reverse iterator. */
         auto topNIter = topNSet.rbegin();
         for (size_t i = 0; i < vecResults.size() && topNIter != topNSet.rend(); ++i, ++topNIter) {
             vecResults[i].m_normalisedVal = topNIter->first;
             vecResults[i].m_label = labels[topNIter->second];
             vecResults[i].m_labelIdx = topNIter->second;
         }
-
     }
 
-    template<typename T>
-    bool Classifier::GetTopNResults(TfLiteTensor* tensor,
+    bool Classifier::GetTopNResults(const std::vector<float>& tensor,
                                     std::vector<ClassificationResult>& vecResults,
                                     uint32_t topNCount,
                                     const std::vector <std::string>& labels)
     {
-        std::set<std::pair<T, uint32_t>> sortedSet;
+
+        std::set<std::pair<float , uint32_t>> sortedSet;
 
         /* NOTE: inputVec's size verification against labels should be
          *       checked by the calling/public function. */
-        T* tensorData = tflite::GetTensorData<T>(tensor);
 
         /* Set initial elements. */
         for (uint32_t i = 0; i < topNCount; ++i) {
-            sortedSet.insert({tensorData[i], i});
+            sortedSet.insert({tensor[i], i});
         }
 
         /* Initialise iterator. */
@@ -86,33 +65,26 @@ namespace app {
 
         /* Scan through the rest of elements with compare operations. */
         for (uint32_t i = topNCount; i < labels.size(); ++i) {
-            if (setFwdIter->first < tensorData[i]) {
+            if (setFwdIter->first < tensor[i]) {
                 sortedSet.erase(*setFwdIter);
-                sortedSet.insert({tensorData[i], i});
+                sortedSet.insert({tensor[i], i});
                 setFwdIter = sortedSet.begin();
             }
         }
 
         /* Final results' container. */
         vecResults = std::vector<ClassificationResult>(topNCount);
-
-        SetVectorResults<T>(sortedSet, vecResults, tensor, labels);
+        SetVectorResults(sortedSet, vecResults, labels);
 
         return true;
     }
 
-    template bool  Classifier::GetTopNResults<uint8_t>(TfLiteTensor* tensor,
-                                                       std::vector<ClassificationResult>& vecResults,
-                                                       uint32_t topNCount, const std::vector <std::string>& labels);
-
-    template bool  Classifier::GetTopNResults<int8_t>(TfLiteTensor* tensor,
-                                                      std::vector<ClassificationResult>& vecResults,
-                                                      uint32_t topNCount, const std::vector <std::string>& labels);
-
     bool  Classifier::GetClassificationResults(
         TfLiteTensor* outputTensor,
         std::vector<ClassificationResult>& vecResults,
-        const std::vector <std::string>& labels, uint32_t topNCount)
+        const std::vector <std::string>& labels,
+        uint32_t topNCount,
+        bool useSoftmax)
     {
         if (outputTensor == nullptr) {
             printf_err("Output vector is null pointer.\n");
@@ -120,7 +92,7 @@ namespace app {
         }
 
         uint32_t totalOutputSize = 1;
-        for (int inputDim = 0; inputDim < outputTensor->dims->size; inputDim++){
+        for (int inputDim = 0; inputDim < outputTensor->dims->size; inputDim++) {
             totalOutputSize *= outputTensor->dims->data[inputDim];
         }
 
@@ -139,22 +111,52 @@ namespace app {
         bool resultState;
         vecResults.clear();
 
-        /* Get the top N results. */
+        /* De-Quantize Output Tensor */
+        QuantParams quantParams = GetTensorQuantParams(outputTensor);
+
+        /* Floating point tensor data to be populated
+         * NOTE: The assumption here is that the output tensor size isn't too
+         * big and therefore, there's neglibible impact on heap usage. */
+        std::vector<float> tensorData(totalOutputSize);
+
+        /* Populate the floating point buffer */
         switch (outputTensor->type) {
-            case kTfLiteUInt8:
-                resultState = GetTopNResults<uint8_t>(outputTensor, vecResults, topNCount, labels);
+            case kTfLiteUInt8: {
+                uint8_t *tensor_buffer = tflite::GetTensorData<uint8_t>(outputTensor);
+                for (size_t i = 0; i < totalOutputSize; ++i) {
+                    tensorData[i] = quantParams.scale *
+                        (static_cast<float>(tensor_buffer[i]) - quantParams.offset);
+                }
                 break;
-            case kTfLiteInt8:
-                resultState = GetTopNResults<int8_t>(outputTensor, vecResults, topNCount, labels);
+            }
+            case kTfLiteInt8: {
+                int8_t *tensor_buffer = tflite::GetTensorData<int8_t>(outputTensor);
+                for (size_t i = 0; i < totalOutputSize; ++i) {
+                    tensorData[i] = quantParams.scale *
+                        (static_cast<float>(tensor_buffer[i]) - quantParams.offset);
+                }
                 break;
-            case kTfLiteFloat32:
-                resultState = GetTopNResults<float>(outputTensor, vecResults, topNCount, labels);
+            }
+            case kTfLiteFloat32: {
+                float *tensor_buffer = tflite::GetTensorData<float>(outputTensor);
+                for (size_t i = 0; i < totalOutputSize; ++i) {
+                    tensorData[i] = tensor_buffer[i];
+                }
                 break;
+            }
             default:
-                printf_err("Tensor type %s not supported by classifier\n", TfLiteTypeGetName(outputTensor->type));
+                printf_err("Tensor type %s not supported by classifier\n",
+                    TfLiteTypeGetName(outputTensor->type));
                 return false;
         }
 
+        if (useSoftmax) {
+            math::MathUtils::SoftmaxF32(tensorData);
+        }
+
+        /* Get the top N results. */
+        resultState = GetTopNResults(tensorData, vecResults, topNCount, labels);
+
         if (!resultState) {
             printf_err("Failed to get top N results set\n");
             return false;
@@ -162,6 +164,5 @@ namespace app {
 
         return true;
     }
-
 } /* namespace app */
 } /* namespace arm */
 \ No newline at end of file
diff --git a/source/application/main/PlatformMath.cc b/source/application/main/PlatformMath.cc
index 0b8882a..26b4b72 100644
--- a/source/application/main/PlatformMath.cc
+++ b/source/application/main/PlatformMath.cc
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 #include "PlatformMath.hpp"
+#include <algorithm>
+#include <numeric>
 
 #if 0 == ARM_DSP_AVAILABLE
     #include <cmath>
@@ -290,6 +292,24 @@ namespace math {
         return true;
     }
 
+    void MathUtils::SoftmaxF32(std::vector<float>& vec)
+    {
+        /* Fix for numerical stability and apply exp. */
+        auto start = vec.begin();
+        auto end = vec.end();
+
+        float maxValue = *std::max_element(start, end);
+        for (auto it = start; it != end; ++it) {
+            *it = std::exp((*it) - maxValue);
+        }
+
+        float sumExp = std::accumulate(start, end, 0.0f);
+
+        for (auto it = start; it != end; ++it) {
+            *it = (*it)/sumExp;
+        }
+    }
+
 } /* namespace math */
 } /* namespace app */
 } /* namespace arm */
diff --git a/source/application/main/include/Classifier.hpp b/source/application/main/include/Classifier.hpp
index 3ee3148..d899e8e 100644
--- a/source/application/main/include/Classifier.hpp
+++ b/source/application/main/include/Classifier.hpp
@@ -42,18 +42,33 @@ namespace app {
          *                             populated by this function.
          * @param[in]   labels         Labels vector to match classified classes.
          * @param[in]   topNCount      Number of top classifications to pick. Default is 1.
+         * @param[in]   useSoftmax     Whether Softmax normalisation should be applied to output. Default is false.
          * @return      true if successful, false otherwise.
          **/
+
         virtual bool GetClassificationResults(
             TfLiteTensor* outputTensor,
             std::vector<ClassificationResult>& vecResults,
-            const std::vector <std::string>& labels, uint32_t topNCount);
+            const std::vector <std::string>& labels, uint32_t topNCount,
+            bool use_softmax = false);
+
+        /**
+        * @brief       Populate the elements of the Classification Result object.
+        * @param[in]   topNSet        Ordered set of top 5 output class scores and labels.
+        * @param[out]  vecResults     A vector of classification results.
+        *                             populated by this function.
+        * @param[in]   labels         Labels vector to match classified classes.
+        **/
+
+        void SetVectorResults(
+            std::set<std::pair<float, uint32_t>>& topNSet,
+            std::vector<ClassificationResult>& vecResults,
+            const std::vector <std::string>& labels);
 
     private:
         /**
          * @brief       Utility function that gets the top N classification results from the
          *              output vector.
-         * @tparam T value type
          * @param[in]   tensor       Inference output tensor from an NN model.
          * @param[out]  vecResults   A vector of classification results
          *                           populated by this function.
@@ -61,8 +76,8 @@ namespace app {
          * @param[in]   labels       Labels vector to match classified classes.
          * @return      true if successful, false otherwise.
          **/
-        template<typename T>
-        bool GetTopNResults(TfLiteTensor* tensor,
+
+        bool GetTopNResults(const std::vector<float>& tensor,
                             std::vector<ClassificationResult>& vecResults,
                             uint32_t topNCount,
                             const std::vector <std::string>& labels);
diff --git a/source/application/main/include/PlatformMath.hpp b/source/application/main/include/PlatformMath.hpp
index 6804025..fdb51b2 100644
--- a/source/application/main/include/PlatformMath.hpp
+++ b/source/application/main/include/PlatformMath.hpp
@@ -161,7 +161,14 @@ namespace math {
                                                float* ptrDst,
                                                const uint32_t dstLen);
 
+        /**
+        * @brief       Scales output scores for an arbitrary number of classes so
+        *              that they sum to 1, allowing output to be expressed as a probability.
+        * @param[in]   vector Vector of floats modified in-place
+        */
+        static void SoftmaxF32(std::vector<float>& vec);
     };
+
 } /* namespace math */
 } /* namespace app */
 } /* namespace arm */
diff --git a/source/use_case/asr/include/AsrClassifier.hpp b/source/use_case/asr/include/AsrClassifier.hpp
index 2c97a39..67a200e 100644
--- a/source/use_case/asr/include/AsrClassifier.hpp
+++ b/source/use_case/asr/include/AsrClassifier.hpp
@@ -32,12 +32,13 @@ namespace app {
          *                             populated by this function.
          * @param[in]   labels         Labels vector to match classified classes
          * @param[in]   topNCount      Number of top classifications to pick.
+         * @param[in]   use_softmax    Whether softmax scaling should be applied to model output.
          * @return      true if successful, false otherwise.
          **/
         bool GetClassificationResults(
             TfLiteTensor* outputTensor,
             std::vector<ClassificationResult>& vecResults,
-            const std::vector <std::string>& labels, uint32_t topNCount) override;
+            const std::vector <std::string>& labels, uint32_t topNCount, bool use_softmax = false) override;
 
     private:
         /**
diff --git a/source/use_case/asr/src/AsrClassifier.cc b/source/use_case/asr/src/AsrClassifier.cc
index c18bd88..a715068 100644
--- a/source/use_case/asr/src/AsrClassifier.cc
+++ b/source/use_case/asr/src/AsrClassifier.cc
@@ -73,8 +73,9 @@ template bool arm::app::AsrClassifier::GetTopResults<int8_t>(TfLiteTensor* tenso
 bool arm::app::AsrClassifier::GetClassificationResults(
             TfLiteTensor* outputTensor,
             std::vector<ClassificationResult>& vecResults,
-            const std::vector <std::string>& labels, uint32_t topNCount)
+            const std::vector <std::string>& labels, uint32_t topNCount, bool use_softmax)
 {
+        UNUSED(use_softmax);
         vecResults.clear();
 
         constexpr int minTensorDims = static_cast<int>(
diff --git a/source/use_case/kws/include/DsCnnMfcc.hpp b/source/use_case/kws/include/MicroNetKwsMfcc.hpp
index 3f681af..b2565a3 100644
--- a/source/use_case/kws/include/DsCnnMfcc.hpp
+++ b/source/use_case/kws/include/MicroNetKwsMfcc.hpp
@@ -14,8 +14,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef KWS_DSCNN_MFCC_HPP
-#define KWS_DSCNN_MFCC_HPP
+#ifndef KWS_MICRONET_MFCC_HPP
+#define KWS_MICRONET_MFCC_HPP
 
 #include "Mfcc.hpp"
 
@@ -23,8 +23,8 @@ namespace arm {
 namespace app {
 namespace audio {
 
-    /* Class to provide DS-CNN specific MFCC calculation requirements. */
-    class DsCnnMFCC : public MFCC {
+    /* Class to provide MicroNet specific MFCC calculation requirements. */
+    class MicroNetKwsMFCC : public MFCC {
 
     public:
         static constexpr uint32_t  ms_defaultSamplingFreq = 16000;
@@ -33,18 +33,18 @@ namespace audio {
         static constexpr uint32_t  ms_defaultMelHiFreq    =  4000;
         static constexpr bool      ms_defaultUseHtkMethod =  true;
 
-        explicit DsCnnMFCC(const size_t numFeats, const size_t frameLen)
+        explicit MicroNetKwsMFCC(const size_t numFeats, const size_t frameLen)
             :  MFCC(MfccParams(
                         ms_defaultSamplingFreq, ms_defaultNumFbankBins,
                         ms_defaultMelLoFreq, ms_defaultMelHiFreq,
                         numFeats, frameLen, ms_defaultUseHtkMethod))
         {}
-        DsCnnMFCC()  = delete;
-        ~DsCnnMFCC() = default;
+        MicroNetKwsMFCC()  = delete;
+        ~MicroNetKwsMFCC() = default;
     };
 
 } /* namespace audio */
 } /* namespace app */
 } /* namespace arm */
 
-#endif /* KWS_DSCNN_MFCC_HPP */
-\ No newline at end of file
+#endif /* KWS_MICRONET_MFCC_HPP */
+\ No newline at end of file
diff --git a/source/use_case/kws/include/DsCnnModel.hpp b/source/use_case/kws/include/MicroNetKwsModel.hpp
index a1a45cd..3259c45 100644
--- a/source/use_case/kws/include/DsCnnModel.hpp
+++ b/source/use_case/kws/include/MicroNetKwsModel.hpp
@@ -14,8 +14,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef KWS_DSCNNMODEL_HPP
-#define KWS_DSCNNMODEL_HPP
+#ifndef KWS_MICRONETMODEL_HPP
+#define KWS_MICRONETMODEL_HPP
 
 #include "Model.hpp"
 
@@ -26,11 +26,11 @@ extern const float g_ScoreThreshold;
 namespace arm {
 namespace app {
 
-    class DsCnnModel : public Model {
+    class MicroNetKwsModel : public Model {
     public:
         /* Indices for the expected model - based on input and output tensor shapes */
-        static constexpr uint32_t ms_inputRowsIdx = 2;
-        static constexpr uint32_t ms_inputColsIdx = 3;
+        static constexpr uint32_t ms_inputRowsIdx = 1;
+        static constexpr uint32_t ms_inputColsIdx = 2;
         static constexpr uint32_t ms_outputRowsIdx = 2;
         static constexpr uint32_t ms_outputColsIdx = 3;
     
@@ -47,7 +47,7 @@ namespace app {
 
     private:
         /* Maximum number of individual operations that can be enlisted. */
-        static constexpr int ms_maxOpCnt = 8;
+        static constexpr int ms_maxOpCnt = 7;
 
         /* A mutable op resolver instance. */
         tflite::MicroMutableOpResolver<ms_maxOpCnt> m_opResolver;
@@ -56,4 +56,4 @@ namespace app {
 } /* namespace app */
 } /* namespace arm */
 
-#endif /* KWS_DSCNNMODEL_HPP */
+#endif /* KWS_MICRONETMODEL_HPP */
diff --git a/source/use_case/kws/src/MainLoop.cc b/source/use_case/kws/src/MainLoop.cc
index c683e71..bde246b 100644
--- a/source/use_case/kws/src/MainLoop.cc
+++ b/source/use_case/kws/src/MainLoop.cc
@@ -16,7 +16,7 @@
  */
 #include "InputFiles.hpp"           /* For input audio clips. */
 #include "Classifier.hpp"           /* Classifier. */
-#include "DsCnnModel.hpp"           /* Model class for running inference. */
+#include "MicroNetKwsModel.hpp"     /* Model class for running inference. */
 #include "hal.h"                    /* Brings in platform definitions. */
 #include "Labels.hpp"               /* For label strings. */
 #include "UseCaseHandler.hpp"       /* Handlers for different user options. */
@@ -49,7 +49,7 @@ static void DisplayMenu()
 
 void main_loop(hal_platform& platform)
 {
-    arm::app::DsCnnModel model;  /* Model wrapper object. */
+    arm::app::MicroNetKwsModel model;  /* Model wrapper object. */
 
     /* Load the model. */
     if (!model.Init()) {
diff --git a/source/use_case/kws/src/DsCnnModel.cc b/source/use_case/kws/src/MicroNetKwsModel.cc
index 4edfc04..48a9b8c 100644
--- a/source/use_case/kws/src/DsCnnModel.cc
+++ b/source/use_case/kws/src/MicroNetKwsModel.cc
@@ -14,16 +14,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "DsCnnModel.hpp"
+#include "MicroNetKwsModel.hpp"
 
 #include "hal.h"
 
-const tflite::MicroOpResolver& arm::app::DsCnnModel::GetOpResolver()
+const tflite::MicroOpResolver& arm::app::MicroNetKwsModel::GetOpResolver()
 {
     return this->m_opResolver;
 }
 
-bool arm::app::DsCnnModel::EnlistOperations()
+bool arm::app::MicroNetKwsModel::EnlistOperations()
 {
     this->m_opResolver.AddReshape();
     this->m_opResolver.AddAveragePool2D();
@@ -31,7 +31,6 @@ bool arm::app::DsCnnModel::EnlistOperations()
     this->m_opResolver.AddDepthwiseConv2D();
     this->m_opResolver.AddFullyConnected();
     this->m_opResolver.AddRelu();
-    this->m_opResolver.AddSoftmax();
 
 #if defined(ARM_NPU)
     if (kTfLiteOk == this->m_opResolver.AddEthosU()) {
@@ -46,13 +45,13 @@ bool arm::app::DsCnnModel::EnlistOperations()
 }
 
 extern uint8_t* GetModelPointer();
-const uint8_t* arm::app::DsCnnModel::ModelPointer()
+const uint8_t* arm::app::MicroNetKwsModel::ModelPointer()
 {
     return GetModelPointer();
 }
 
 extern size_t GetModelLen();
-size_t arm::app::DsCnnModel::ModelSize()
+size_t arm::app::MicroNetKwsModel::ModelSize()
 {
     return GetModelLen();
 }
 \ No newline at end of file
diff --git a/source/use_case/kws/src/UseCaseHandler.cc b/source/use_case/kws/src/UseCaseHandler.cc
index 3d95753..8085af7 100644
--- a/source/use_case/kws/src/UseCaseHandler.cc
+++ b/source/use_case/kws/src/UseCaseHandler.cc
@@ -18,9 +18,9 @@
 
 #include "InputFiles.hpp"
 #include "Classifier.hpp"
-#include "DsCnnModel.hpp"
+#include "MicroNetKwsModel.hpp"
 #include "hal.h"
-#include "DsCnnMfcc.hpp"
+#include "MicroNetKwsMfcc.hpp"
 #include "AudioUtils.hpp"
 #include "UseCaseCommonUtils.hpp"
 #include "KwsResult.hpp"
@@ -59,7 +59,7 @@ namespace app {
      * @return          Function to be called providing audio sample and sliding window index.
      */
     static std::function<void (std::vector<int16_t>&, int, bool, size_t)>
-            GetFeatureCalculator(audio::DsCnnMFCC&  mfcc,
+            GetFeatureCalculator(audio::MicroNetKwsMFCC&  mfcc,
                                  TfLiteTensor*      inputTensor,
                                  size_t             cacheSize);
 
@@ -72,8 +72,8 @@ namespace app {
         constexpr uint32_t dataPsnTxtInfStartX = 20;
         constexpr uint32_t dataPsnTxtInfStartY = 40;
         constexpr int minTensorDims = static_cast<int>(
-            (arm::app::DsCnnModel::ms_inputRowsIdx > arm::app::DsCnnModel::ms_inputColsIdx)?
-             arm::app::DsCnnModel::ms_inputRowsIdx : arm::app::DsCnnModel::ms_inputColsIdx);
+            (arm::app::MicroNetKwsModel::ms_inputRowsIdx > arm::app::MicroNetKwsModel::ms_inputColsIdx)?
+             arm::app::MicroNetKwsModel::ms_inputRowsIdx : arm::app::MicroNetKwsModel::ms_inputColsIdx);
 
         auto& model = ctx.Get<Model&>("model");
 
@@ -105,10 +105,10 @@ namespace app {
         }
 
         TfLiteIntArray* inputShape = model.GetInputShape(0);
-        const uint32_t kNumCols = inputShape->data[arm::app::DsCnnModel::ms_inputColsIdx];
-        const uint32_t kNumRows = inputShape->data[arm::app::DsCnnModel::ms_inputRowsIdx];
+        const uint32_t kNumCols = inputShape->data[arm::app::MicroNetKwsModel::ms_inputColsIdx];
+        const uint32_t kNumRows = inputShape->data[arm::app::MicroNetKwsModel::ms_inputRowsIdx];
 
-        audio::DsCnnMFCC mfcc = audio::DsCnnMFCC(kNumCols, frameLength);
+        audio::MicroNetKwsMFCC mfcc = audio::MicroNetKwsMFCC(kNumCols, frameLength);
         mfcc.Init();
 
         /* Deduce the data length required for 1 inference from the network parameters. */
@@ -132,7 +132,7 @@ namespace app {
 
         /* We expect to be sampling 1 second worth of data at a time.
          * NOTE: This is only used for time stamp calculation. */
-        const float secondsPerSample = 1.0/audio::DsCnnMFCC::ms_defaultSamplingFreq;
+        const float secondsPerSample = 1.0/audio::MicroNetKwsMFCC::ms_defaultSamplingFreq;
 
         do {
             platform.data_psn->clear(COLOR_BLACK);
@@ -208,7 +208,7 @@ namespace app {
                 std::vector<ClassificationResult> classificationResult;
                 auto& classifier = ctx.Get<KwsClassifier&>("classifier");
                 classifier.GetClassificationResults(outputTensor, classificationResult,
-                                                    ctx.Get<std::vector<std::string>&>("labels"), 1);
+                                                    ctx.Get<std::vector<std::string>&>("labels"), 1, true);
 
                 results.emplace_back(kws::KwsResult(classificationResult,
                     audioDataSlider.Index() * secondsPerSample * audioDataStride,
@@ -240,7 +240,6 @@ namespace app {
         return true;
     }
 
-    
     static bool PresentInferenceResult(hal_platform& platform,
                                        const std::vector<arm::app::kws::KwsResult>& results)
     {
@@ -259,7 +258,6 @@ namespace app {
 
             std::string topKeyword{"<none>"};
             float score = 0.f;
-
             if (!results[i].m_resultVec.empty()) {
                 topKeyword = results[i].m_resultVec[0].m_label;
                 score = results[i].m_resultVec[0].m_normalisedVal;
@@ -366,7 +364,7 @@ namespace app {
 
 
     static std::function<void (std::vector<int16_t>&, int, bool, size_t)>
-    GetFeatureCalculator(audio::DsCnnMFCC& mfcc, TfLiteTensor* inputTensor, size_t cacheSize)
+    GetFeatureCalculator(audio::MicroNetKwsMFCC& mfcc, TfLiteTensor* inputTensor, size_t cacheSize)
     {
         std::function<void (std::vector<int16_t>&, size_t, bool, size_t)> mfccFeatureCalc;
 
diff --git a/source/use_case/kws/usecase.cmake b/source/use_case/kws/usecase.cmake
index 34e39e4..9f3736e 100644
--- a/source/use_case/kws/usecase.cmake
+++ b/source/use_case/kws/usecase.cmake
@@ -20,7 +20,7 @@ USER_OPTION(${use_case}_FILE_PATH "Directory with custom WAV input files, or pat
     PATH_OR_FILE)
 
 USER_OPTION(${use_case}_LABELS_TXT_FILE "Labels' txt file for the chosen model."
-    ${CMAKE_CURRENT_SOURCE_DIR}/resources/${use_case}/labels/ds_cnn_labels.txt
+    ${CMAKE_CURRENT_SOURCE_DIR}/resources/${use_case}/labels/micronet_kws_labels.txt
     FILEPATH)
 
 USER_OPTION(${use_case}_AUDIO_RATE "Specify the target sampling rate. Default is 16000."
@@ -48,7 +48,7 @@ USER_OPTION(${use_case}_AUDIO_MIN_SAMPLES "Specify the minimum number of samples
     STRING)
 
 USER_OPTION(${use_case}_MODEL_SCORE_THRESHOLD "Specify the score threshold [0.0, 1.0) that must be applied to the inference results for a label to be deemed valid."
-    0.9
+    0.7
     STRING)
 
 # Generate input files
@@ -73,10 +73,11 @@ USER_OPTION(${use_case}_ACTIVATION_BUF_SZ "Activation buffer size for the chosen
     0x00100000
     STRING)
 
+
 if (ETHOS_U_NPU_ENABLED)
-    set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/ds_cnn_clustered_int8_vela_${ETHOS_U_NPU_CONFIG_ID}.tflite)
+    set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/kws_micronet_m_vela_${ETHOS_U_NPU_CONFIG_ID}.tflite)
 else()
-    set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/ds_cnn_clustered_int8.tflite)
+    set(DEFAULT_MODEL_PATH      ${DEFAULT_MODEL_DIR}/kws_micronet_m.tflite)
 endif()
 
 set(EXTRA_MODEL_CODE
diff --git a/source/use_case/kws_asr/include/AsrClassifier.hpp b/source/use_case/kws_asr/include/AsrClassifier.hpp
index 7dbb6e9..6ab9685 100644
--- a/source/use_case/kws_asr/include/AsrClassifier.hpp
+++ b/source/use_case/kws_asr/include/AsrClassifier.hpp
@@ -32,12 +32,14 @@ namespace app {
          *                             populated by this function.
          * @param[in]   labels         Labels vector to match classified classes
          * @param[in]   topNCount      Number of top classifications to pick.
+         * @param[in]   use_softmax    Whether softmax scaling should be applied to model output.
          * @return      true if successful, false otherwise.
          **/
         bool GetClassificationResults(
                 TfLiteTensor* outputTensor,
                 std::vector<ClassificationResult>& vecResults,
-                const std::vector <std::string>& labels, uint32_t topNCount) override;
+                const std::vector <std::string>& labels, uint32_t topNCount,
+                bool use_softmax = false) override;
 
     private:
 
diff --git a/source/use_case/kws_asr/include/DsCnnMfcc.hpp b/source/use_case/kws_asr/include/MicroNetKwsMfcc.hpp
index c97dd9d..43bd390 100644
--- a/source/use_case/kws_asr/include/DsCnnMfcc.hpp
+++ b/source/use_case/kws_asr/include/MicroNetKwsMfcc.hpp
@@ -14,8 +14,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef KWS_ASR_DSCNN_MFCC_HPP
-#define KWS_ASR_DSCNN_MFCC_HPP
+#ifndef KWS_ASR_MICRONET_MFCC_HPP
+#define KWS_ASR_MICRONET_MFCC_HPP
 
 #include "Mfcc.hpp"
 
@@ -23,8 +23,8 @@ namespace arm {
 namespace app {
 namespace audio {
 
-    /* Class to provide DS-CNN specific MFCC calculation requirements. */
-    class DsCnnMFCC : public MFCC {
+    /* Class to provide MicroNet specific MFCC calculation requirements. */
+    class MicroNetMFCC : public MFCC {
 
     public:
         static constexpr uint32_t  ms_defaultSamplingFreq = 16000;
@@ -34,18 +34,18 @@ namespace audio {
         static constexpr bool      ms_defaultUseHtkMethod =  true;
 
 
-        explicit DsCnnMFCC(const size_t numFeats, const size_t frameLen)
+        explicit MicroNetMFCC(const size_t numFeats, const size_t frameLen)
             :  MFCC(MfccParams(
                         ms_defaultSamplingFreq, ms_defaultNumFbankBins,
                         ms_defaultMelLoFreq, ms_defaultMelHiFreq,
                         numFeats, frameLen, ms_defaultUseHtkMethod))
         {}
-        DsCnnMFCC()  = delete;
-        ~DsCnnMFCC() = default;
+        MicroNetMFCC()  = delete;
+        ~MicroNetMFCC() = default;
     };
 
 } /* namespace audio */
 } /* namespace app */
 } /* namespace arm */
 
-#endif /* KWS_ASR_DSCNN_MFCC_HPP */
+#endif /* KWS_ASR_MICRONET_MFCC_HPP */
diff --git a/source/use_case/kws_asr/include/DsCnnModel.hpp b/source/use_case/kws_asr/include/MicroNetKwsModel.hpp
index 92d96b9..22cf916 100644
--- a/source/use_case/kws_asr/include/DsCnnModel.hpp
+++ b/source/use_case/kws_asr/include/MicroNetKwsModel.hpp
@@ -14,8 +14,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#ifndef KWS_ASR_DSCNNMODEL_HPP
-#define KWS_ASR_DSCNNMODEL_HPP
+#ifndef KWS_ASR_MICRONETMODEL_HPP
+#define KWS_ASR_MICRONETMODEL_HPP
 
 #include "Model.hpp"
 
@@ -33,12 +33,11 @@ namespace kws {
 
 namespace arm {
 namespace app {
-
-    class DsCnnModel : public Model {
+    class MicroNetKwsModel : public Model {
     public:
         /* Indices for the expected model - based on input and output tensor shapes */
-        static constexpr uint32_t ms_inputRowsIdx = 2;
-        static constexpr uint32_t ms_inputColsIdx = 3;
+        static constexpr uint32_t ms_inputRowsIdx = 1;
+        static constexpr uint32_t ms_inputColsIdx = 2;
         static constexpr uint32_t ms_outputRowsIdx = 2;
         static constexpr uint32_t ms_outputColsIdx = 3;
 
@@ -55,7 +54,7 @@ namespace app {
 
     private:
         /* Maximum number of individual operations that can be enlisted. */
-        static constexpr int ms_maxOpCnt = 10;
+        static constexpr int ms_maxOpCnt = 7;
 
         /* A mutable op resolver instance. */
         tflite::MicroMutableOpResolver<ms_maxOpCnt> m_opResolver;
@@ -64,4 +63,4 @@ namespace app {
 } /* namespace app */
 } /* namespace arm */
 
-#endif /* KWS_DSCNNMODEL_HPP */
+#endif /* KWS_ASR_MICRONETMODEL_HPP */
diff --git a/source/use_case/kws_asr/src/AsrClassifier.cc b/source/use_case/kws_asr/src/AsrClassifier.cc
index 57d5058..3f9cd7b 100644
--- a/source/use_case/kws_asr/src/AsrClassifier.cc
+++ b/source/use_case/kws_asr/src/AsrClassifier.cc
@@ -73,8 +73,9 @@ template bool arm::app::AsrClassifier::GetTopResults<int8_t>(TfLiteTensor* tenso
 bool arm::app::AsrClassifier::GetClassificationResults(
             TfLiteTensor* outputTensor,
             std::vector<ClassificationResult>& vecResults,
-            const std::vector <std::string>& labels, uint32_t topNCount)
+            const std::vector <std::string>& labels, uint32_t topNCount, bool use_softmax)
 {
+        UNUSED(use_softmax);
         vecResults.clear();
 
         constexpr int minTensorDims = static_cast<int>(
diff --git a/source/use_case/kws_asr/src/MainLoop.cc b/source/use_case/kws_asr/src/MainLoop.cc
index d5a2c2b..30cb084 100644
--- a/source/use_case/kws_asr/src/MainLoop.cc
+++ b/source/use_case/kws_asr/src/MainLoop.cc
@@ -16,11 +16,11 @@
  */
 #include "hal.h"                    /* Brings in platform definitions. */
 #include "InputFiles.hpp"           /* For input images. */
-#include "Labels_dscnn.hpp"         /* For DS-CNN label strings. */
+#include "Labels_micronetkws.hpp"   /* For MicroNetKws label strings. */
 #include "Labels_wav2letter.hpp"    /* For Wav2Letter label strings. */
 #include "Classifier.hpp"           /* KWS classifier. */
 #include "AsrClassifier.hpp"        /* ASR classifier. */
-#include "DsCnnModel.hpp"           /* KWS model class for running inference. */
+#include "MicroNetKwsModel.hpp"     /* KWS model class for running inference. */
 #include "Wav2LetterModel.hpp"      /* ASR model class for running inference. */
 #include "UseCaseCommonUtils.hpp"   /* Utils functions. */
 #include "UseCaseHandler.hpp"       /* Handlers for different user options. */
@@ -69,7 +69,7 @@ static uint32_t GetOutputInnerLen(const arm::app::Model& model,
 void main_loop(hal_platform& platform)
 {
     /* Model wrapper objects. */
-    arm::app::DsCnnModel kwsModel;
+    arm::app::MicroNetKwsModel kwsModel;
     arm::app::Wav2LetterModel asrModel;
 
     /* Load the models. */
@@ -81,7 +81,7 @@ void main_loop(hal_platform& platform)
     /* Initialise the asr model using the same allocator from KWS
      * to re-use the tensor arena. */
     if (!asrModel.Init(kwsModel.GetAllocator())) {
-        printf_err("Failed to initalise ASR model\n");
+        printf_err("Failed to initialise ASR model\n");
         return;
     }
 
@@ -137,7 +137,7 @@ void main_loop(hal_platform& platform)
     caseContext.Set<const std::vector <std::string>&>("kwslabels", kwsLabels);
 
     /* Index of the kws outputs we trigger ASR on. */
-    caseContext.Set<uint32_t>("keywordindex", 2);
+    caseContext.Set<uint32_t>("keywordindex", 9 );
 
     /* Loop. */
     bool executionSuccessful = true;
diff --git a/source/use_case/kws_asr/src/DsCnnModel.cc b/source/use_case/kws_asr/src/MicroNetKwsModel.cc
index 71d4ceb..4b44580 100644
--- a/source/use_case/kws_asr/src/DsCnnModel.cc
+++ b/source/use_case/kws_asr/src/MicroNetKwsModel.cc
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "DsCnnModel.hpp"
+#include "MicroNetKwsModel.hpp"
 
 #include "hal.h"
 
@@ -27,21 +27,18 @@ namespace kws {
 } /* namespace app */
 } /* namespace arm */
 
-const tflite::MicroOpResolver& arm::app::DsCnnModel::GetOpResolver()
+const tflite::MicroOpResolver& arm::app::MicroNetKwsModel::GetOpResolver()
 {
     return this->m_opResolver;
 }
 
-bool arm::app::DsCnnModel::EnlistOperations()
+bool arm::app::MicroNetKwsModel::EnlistOperations()
 {
     this->m_opResolver.AddAveragePool2D();
     this->m_opResolver.AddConv2D();
     this->m_opResolver.AddDepthwiseConv2D();
     this->m_opResolver.AddFullyConnected();
     this->m_opResolver.AddRelu();
-    this->m_opResolver.AddSoftmax();
-    this->m_opResolver.AddQuantize();
-    this->m_opResolver.AddDequantize();
     this->m_opResolver.AddReshape();
 
 #if defined(ARM_NPU)
@@ -56,12 +53,12 @@ bool arm::app::DsCnnModel::EnlistOperations()
     return true;
 }
 
-const uint8_t* arm::app::DsCnnModel::ModelPointer()
+const uint8_t* arm::app::MicroNetKwsModel::ModelPointer()
 {
     return arm::app::kws::GetModelPointer();
 }
 
-size_t arm::app::DsCnnModel::ModelSize()
+size_t arm::app::MicroNetKwsModel::ModelSize()
 {
     return arm::app::kws::GetModelLen();
 }
 \ No newline at end of file
diff --git a/source/use_case/kws_asr/src/UseCaseHandler.cc b/source/use_case/kws_asr/src/UseCaseHandler.cc
index 1d88ba1..c67be22 100644
--- a/source/use_case/kws_asr/src/UseCaseHandler.cc
+++ b/source/use_case/kws_asr/src/UseCaseHandler.cc
@@ -20,8 +20,8 @@
 #include "InputFiles.hpp"
 #include "AudioUtils.hpp"
 #include "UseCaseCommonUtils.hpp"
-#include "DsCnnModel.hpp"
-#include "DsCnnMfcc.hpp"
+#include "MicroNetKwsModel.hpp"
+#include "MicroNetKwsMfcc.hpp"
 #include "Classifier.hpp"
 #include "KwsResult.hpp"
 #include "Wav2LetterMfcc.hpp"
@@ -77,12 +77,12 @@ namespace app {
      *
      * @param[in]           mfcc            MFCC feature calculator.
      * @param[in,out]       inputTensor     Input tensor pointer to store calculated features.
-     * @param[in]            cacheSize      Size of the feture vectors cache (number of feature vectors).
+     * @param[in]           cacheSize       Size of the feature vectors cache (number of feature vectors).
      *
      * @return function     function to be called providing audio sample and sliding window index.
      **/
     static std::function<void (std::vector<int16_t>&, int, bool, size_t)>
-    GetFeatureCalculator(audio::DsCnnMFCC&  mfcc,
+    GetFeatureCalculator(audio::MicroNetMFCC&  mfcc,
                          TfLiteTensor*      inputTensor,
                          size_t             cacheSize);
 
@@ -98,8 +98,8 @@ namespace app {
         constexpr uint32_t dataPsnTxtInfStartY = 40;
 
         constexpr int minTensorDims = static_cast<int>(
-            (arm::app::DsCnnModel::ms_inputRowsIdx > arm::app::DsCnnModel::ms_inputColsIdx)?
-             arm::app::DsCnnModel::ms_inputRowsIdx : arm::app::DsCnnModel::ms_inputColsIdx);
+            (arm::app::MicroNetKwsModel::ms_inputRowsIdx > arm::app::MicroNetKwsModel::ms_inputColsIdx)?
+             arm::app::MicroNetKwsModel::ms_inputRowsIdx : arm::app::MicroNetKwsModel::ms_inputColsIdx);
 
         KWSOutput output;
 
@@ -128,7 +128,7 @@ namespace app {
         const uint32_t kwsNumMfccFeats = ctx.Get<uint32_t>("kwsNumMfcc");
         const uint32_t kwsNumAudioWindows = ctx.Get<uint32_t>("kwsNumAudioWins");
 
-        audio::DsCnnMFCC kwsMfcc = audio::DsCnnMFCC(kwsNumMfccFeats, kwsFrameLength);
+        audio::MicroNetMFCC kwsMfcc = audio::MicroNetMFCC(kwsNumMfccFeats, kwsFrameLength);
         kwsMfcc.Init();
 
         /* Deduce the data length required for 1 KWS inference from the network parameters. */
@@ -152,7 +152,7 @@ namespace app {
 
         /* We expect to be sampling 1 second worth of data at a time
          * NOTE: This is only used for time stamp calculation. */
-        const float kwsAudioParamsSecondsPerSample = 1.0/audio::DsCnnMFCC::ms_defaultSamplingFreq;
+        const float kwsAudioParamsSecondsPerSample = 1.0/audio::MicroNetMFCC::ms_defaultSamplingFreq;
 
         auto currentIndex = ctx.Get<uint32_t>("clipIndex");
 
@@ -230,7 +230,7 @@ namespace app {
 
             kwsClassifier.GetClassificationResults(
                             kwsOutputTensor, kwsClassificationResult,
-                            ctx.Get<std::vector<std::string>&>("kwslabels"), 1);
+                            ctx.Get<std::vector<std::string>&>("kwslabels"), 1, true);
 
             kwsResults.emplace_back(
                 kws::KwsResult(
@@ -604,7 +604,7 @@ namespace app {
 
 
     static std::function<void (std::vector<int16_t>&, int, bool, size_t)>
-    GetFeatureCalculator(audio::DsCnnMFCC& mfcc, TfLiteTensor* inputTensor, size_t cacheSize)
+    GetFeatureCalculator(audio::MicroNetMFCC& mfcc, TfLiteTensor* inputTensor, size_t cacheSize)
     {
         std::function<void (std::vector<int16_t>&, size_t, bool, size_t)> mfccFeatureCalc;
 
diff --git a/source/use_case/kws_asr/usecase.cmake b/source/use_case/kws_asr/usecase.cmake
index d8629b6..b3fe020 100644
--- a/source/use_case/kws_asr/usecase.cmake
+++ b/source/use_case/kws_asr/usecase.cmake
@@ -45,7 +45,7 @@ USER_OPTION(${use_case}_AUDIO_MIN_SAMPLES "Specify the minimum number of samples
 
 # Generate kws labels file:
 USER_OPTION(${use_case}_LABELS_TXT_FILE_KWS "Labels' txt file for the chosen model."
-    ${CMAKE_CURRENT_SOURCE_DIR}/resources/${use_case}/labels/ds_cnn_labels.txt
+    ${CMAKE_CURRENT_SOURCE_DIR}/resources/${use_case}/labels/micronet_kws_labels.txt
     FILEPATH)
 
 # Generate asr labels file:
@@ -67,10 +67,10 @@ USER_OPTION(${use_case}_MODEL_SCORE_THRESHOLD_ASR "Specify the score threshold [
     STRING)
 
 if (ETHOS_U_NPU_ENABLED)
-    set(DEFAULT_MODEL_PATH_KWS      ${DEFAULT_MODEL_DIR}/ds_cnn_clustered_int8_vela_${ETHOS_U_NPU_CONFIG_ID}.tflite)
+    set(DEFAULT_MODEL_PATH_KWS      ${DEFAULT_MODEL_DIR}/kws_micronet_m_vela_${ETHOS_U_NPU_CONFIG_ID}.tflite)
     set(DEFAULT_MODEL_PATH_ASR      ${DEFAULT_MODEL_DIR}/wav2letter_pruned_int8_vela_${ETHOS_U_NPU_CONFIG_ID}.tflite)
 else()
-    set(DEFAULT_MODEL_PATH_KWS      ${DEFAULT_MODEL_DIR}/ds_cnn_clustered_int8.tflite)
+    set(DEFAULT_MODEL_PATH_KWS      ${DEFAULT_MODEL_DIR}/kws_micronet_m.tflite)
     set(DEFAULT_MODEL_PATH_ASR      ${DEFAULT_MODEL_DIR}/wav2letter_pruned_int8.tflite)
 endif()
 
@@ -134,7 +134,7 @@ generate_labels_code(
         INPUT           "${${use_case}_LABELS_TXT_FILE_KWS}"
         DESTINATION_SRC ${SRC_GEN_DIR}
         DESTINATION_HDR ${INC_GEN_DIR}
-        OUTPUT_FILENAME "Labels_dscnn"
+        OUTPUT_FILENAME "Labels_micronetkws"
         NAMESPACE       "arm" "app" "kws"
 )
author	Kshitij Sisodia <kshitij.sisodia@arm.com>	2021-12-24 11:05:11 +0000
committer	Liam Barry <liam.barry@arm.com>	2021-12-24 14:20:36 +0000
commit	76a1580861210e0310db23acbc29e1064ae30ead (patch)
tree	f947145cffd944aa3724c90745fc0e9d8e2fb2f4 /source
parent	871fcdc755173b9f7ecb8cf9dc8dc6306329958c (diff)
download	ml-embedded-evaluation-kit-76a1580861210e0310db23acbc29e1064ae30ead.tar.gz