MLECO-2458 and MLECO-2476 [Fix] VWW IFM quant step

* Changed image->cc conversion to be similar with preprocessing of img_class and vww models: images are scaled maintaing the aspect ration and then the centre crop of the correct size is taken. * VWW applies input quantization info to the int8 image (prior converted to [0,1] float range). * Changed adult_blur to a image without person. * Fix menu print when selecting a specific ifm to run (Select message was displayed after typing something) Change-Id: Ie6cde7ab4835ea842667b87397458a5d32131df3
author: Isabella Gottardi <isabella.gottardi@arm.com> 2021-10-20 15:52:32 +0100
committer: Isabella Gottardi <isabella.gottardi@arm.com> 2021-10-25 08:51:10 +0000
commit: 79d4154ee071d0e7ef2d1eecdde149d488bb9d8b (patch)
tree: 21c8c0a2bd187a925f28045d4a57e9e4ef05be82
parent: 14ab8d447c5f12df2ac7fd4217fc0d2005b02dca (diff)
download: ml-embedded-evaluation-kit-79d4154ee071d0e7ef2d1eecdde149d488bb9d8b.tar.gz
21 files changed, 52 insertions, 27 deletions
diff --git a/resources/vww/samples/adult_blur.png b/resources/vww/samples/adult_blur.png
deleted file mode 100644
index 5f2cc94..0000000
--- a/resources/vww/samples/adult_blur.png
+++ /dev/null
diff --git a/resources/vww/samples/files.md b/resources/vww/samples/files.md
index 13f5de6..6aca607 100644
--- a/resources/vww/samples/files.md
+++ b/resources/vww/samples/files.md
@@ -7,4 +7,4 @@ The paper for the Visual Wake Word Model:
 
 The sample images provided are under Creative Commons License. The links are documented here for traceability:
 * https://www.pexels.com/photo/man-in-red-jacket-1681010/
-* https://www.pexels.com/photo/adult-blur-camera-casual-598917/
+* https://www.pexels.com/photo/low-angle-photo-of-st-paul-s-cathedral-under-sunset-colors-1043121/
diff --git a/resources/vww/samples/st_paul_s_cathedral.png b/resources/vww/samples/st_paul_s_cathedral.png
new file mode 100644
index 0000000..a6e826a
--- /dev/null
+++ b/resources/vww/samples/st_paul_s_cathedral.png
diff --git a/scripts/py/gen_rgb_cpp.py b/scripts/py/gen_rgb_cpp.py
index 957d2d0..c53fbd7 100644
--- a/scripts/py/gen_rgb_cpp.py
+++ b/scripts/py/gen_rgb_cpp.py
@@ -70,12 +70,23 @@ def write_individual_img_cc_file(image_filename, cc_filename, header_template_fi
                                  gen_time=datetime.datetime.now(),
                                  file_name=os.path.basename(image_filename),
                                  year=datetime.datetime.now().year)
-
-    original_image.thumbnail(image_size)
-    delta_w = abs(image_size[0] - original_image.size[0])
-    delta_h = abs(image_size[1] - original_image.size[1])
-    resized_image = Image.new('RGB', args.image_size, (255, 255, 255, 0))
-    resized_image.paste(original_image, (int(delta_w / 2), int(delta_h / 2)))
+    # IFM size
+    ifm_width = image_size[0]
+    ifm_height = image_size[1]
+
+    # Aspect ratio resize
+    scale_ratio = (float)(max(ifm_width, ifm_height)) / (float)(min(original_image.size[0], original_image.size[1]))
+    resized_width = (int)(original_image.size[0] * scale_ratio)
+    resized_height = (int)(original_image.size[1] * scale_ratio)
+    resized_image = original_image.resize([resized_width,resized_height], Image.BILINEAR)
+
+    # Crop the center of the image
+    resized_image = resized_image.crop((
+        (resized_width - ifm_width) / 2,   # left
+        (resized_height - ifm_height) / 2, # top
+        (resized_width + ifm_width) / 2,   # right
+        (resized_height + ifm_height) / 2  # bottom
+        ))
 
     # Convert the image and write it to the cc file
     rgb_data = np.array(resized_image, dtype=np.uint8).flatten()
diff --git a/source/application/main/UseCaseCommonUtils.cc b/source/application/main/UseCaseCommonUtils.cc
index a99e05d..e48e308 100644
--- a/source/application/main/UseCaseCommonUtils.cc
+++ b/source/application/main/UseCaseCommonUtils.cc
@@ -35,8 +35,8 @@ void DisplayCommonMenu()
 
 void image::ConvertImgToInt8(void* data, const size_t kMaxImageSize)
 {
-    auto* tmp_req_data = (uint8_t*) data;
-    auto* tmp_signed_req_data = (int8_t*) data;
+    auto* tmp_req_data = static_cast<uint8_t *>(data);
+    auto* tmp_signed_req_data = static_cast<int8_t *>(data);
 
     for (size_t i = 0; i < kMaxImageSize; i++) {
         tmp_signed_req_data[i] = (int8_t) (
diff --git a/source/use_case/ad/src/MainLoop.cc b/source/use_case/ad/src/MainLoop.cc
index 3c2f9cc..a323610 100644
--- a/source/use_case/ad/src/MainLoop.cc
+++ b/source/use_case/ad/src/MainLoop.cc
@@ -90,6 +90,7 @@ void main_loop(hal_platform& platform)
             case MENU_OPT_RUN_INF_CHOSEN: {
                 printf("    Enter the data index [0, %d]: ",
                        NUMBER_OF_FILES-1);
+                fflush(stdout);
                 auto audioIndex = static_cast<uint32_t>(
                         arm::app::ReadUserInputAsInt(platform));
                 executionSuccessful = ClassifyVibrationHandler(caseContext,
diff --git a/source/use_case/asr/src/MainLoop.cc b/source/use_case/asr/src/MainLoop.cc
index 42bb08e..87145f4 100644
--- a/source/use_case/asr/src/MainLoop.cc
+++ b/source/use_case/asr/src/MainLoop.cc
@@ -133,6 +133,7 @@ void main_loop(hal_platform& platform)
             case MENU_OPT_RUN_INF_CHOSEN: {
                 printf("    Enter the audio clip index [0, %d]: ",
                        NUMBER_OF_FILES-1);
+                fflush(stdout);
                 auto clipIndex = static_cast<uint32_t>(
                                     arm::app::ReadUserInputAsInt(platform));
                 executionSuccessful = ClassifyAudioHandler(caseContext,
diff --git a/source/use_case/img_class/src/MainLoop.cc b/source/use_case/img_class/src/MainLoop.cc
index 79f6018..ea9f14a 100644
--- a/source/use_case/img_class/src/MainLoop.cc
+++ b/source/use_case/img_class/src/MainLoop.cc
@@ -68,6 +68,7 @@ void main_loop(hal_platform& platform)
                 break;
             case common::MENU_OPT_RUN_INF_CHOSEN: {
                 printf("    Enter the image index [0, %d]: ", NUMBER_OF_FILES-1);
+                fflush(stdout);
                 auto imgIndex = static_cast<uint32_t>(arm::app::ReadUserInputAsInt(platform));
                 executionSuccessful = ClassifyImageHandler(caseContext, imgIndex, false);
                 break;
diff --git a/source/use_case/img_class/src/UseCaseHandler.cc b/source/use_case/img_class/src/UseCaseHandler.cc
index 66df1da..effc06f 100644
--- a/source/use_case/img_class/src/UseCaseHandler.cc
+++ b/source/use_case/img_class/src/UseCaseHandler.cc
@@ -97,7 +97,7 @@ namespace app {
 
             /* Display this image on the LCD. */
             platform.data_psn->present_data_image(
-                (uint8_t*) inputTensor->data.data,
+                static_cast<uint8_t *>(inputTensor->data.data),
                 nCols, nRows, nChannels,
                 dataPsnImgStartX, dataPsnImgStartY, dataPsnImgDownscaleFactor);
 
diff --git a/source/use_case/kws/src/MainLoop.cc b/source/use_case/kws/src/MainLoop.cc
index 80254d0..c683e71 100644
--- a/source/use_case/kws/src/MainLoop.cc
+++ b/source/use_case/kws/src/MainLoop.cc
@@ -95,6 +95,7 @@ void main_loop(hal_platform& platform)
                 break;
             case MENU_OPT_RUN_INF_CHOSEN: {
                 printf("    Enter the audio clip index [0, %d]: ", NUMBER_OF_FILES-1);
+                fflush(stdout);
                 auto clipIndex = static_cast<uint32_t>(arm::app::ReadUserInputAsInt(platform));
                 executionSuccessful = ClassifyAudioHandler(caseContext, clipIndex, false);
                 break;
diff --git a/source/use_case/kws_asr/src/MainLoop.cc b/source/use_case/kws_asr/src/MainLoop.cc
index ed9034f..d5a2c2b 100644
--- a/source/use_case/kws_asr/src/MainLoop.cc
+++ b/source/use_case/kws_asr/src/MainLoop.cc
@@ -161,6 +161,7 @@ void main_loop(hal_platform& platform)
             case MENU_OPT_RUN_INF_CHOSEN: {
                 printf("    Enter the audio clip index [0, %d]: ",
                        NUMBER_OF_FILES-1);
+                fflush(stdout);
                 auto clipIndex = static_cast<uint32_t>(
                         arm::app::ReadUserInputAsInt(platform));
                 executionSuccessful = ClassifyAudioHandler(caseContext,
diff --git a/source/use_case/vww/src/MainLoop.cc b/source/use_case/vww/src/MainLoop.cc
index f026cc2..b29238c 100644
--- a/source/use_case/vww/src/MainLoop.cc
+++ b/source/use_case/vww/src/MainLoop.cc
@@ -67,6 +67,7 @@ void main_loop(hal_platform &platform)
                 break;
             case common::MENU_OPT_RUN_INF_CHOSEN: {
                 printf("    Enter the image index [0, %d]: ", NUMBER_OF_FILES-1);
+                fflush(stdout);
                 auto imgIndex = static_cast<uint32_t>(arm::app::ReadUserInputAsInt(platform));
                 executionSuccessful = ClassifyImageHandler(caseContext, imgIndex, false);
                 break;
diff --git a/source/use_case/vww/src/UseCaseHandler.cc b/source/use_case/vww/src/UseCaseHandler.cc
index fb2e837..d384032 100644
--- a/source/use_case/vww/src/UseCaseHandler.cc
+++ b/source/use_case/vww/src/UseCaseHandler.cc
@@ -21,6 +21,8 @@
 #include "UseCaseCommonUtils.hpp"
 #include "hal.h"
 
+#include <algorithm>
+
 namespace arm {
 namespace app {
 
@@ -94,13 +96,19 @@ namespace app {
 
             /* Display this image on the LCD. */
             platform.data_psn->present_data_image(
-                (uint8_t *) inputTensor->data.data,
+                static_cast<uint8_t *>(inputTensor->data.data),
                 nCols, nRows, nChannels,
                 dataPsnImgStartX, dataPsnImgStartY, dataPsnImgDownscaleFactor);
 
-            /* If the data is signed. */
-            if (model.IsDataSigned()) {
-                image::ConvertImgToInt8(inputTensor->data.data, inputTensor->bytes);
+            /* Vww model preprocessing is image conversion from uint8 to [0,1] float values,
+             * then quantize them with input quantization info. */
+            QuantParams inQuantParams = GetTensorQuantParams(inputTensor);
+
+            auto* req_data = static_cast<uint8_t *>(inputTensor->data.data);
+            auto* signed_req_data = static_cast<int8_t *>(inputTensor->data.data);
+            for (size_t i = 0; i < inputTensor->bytes; i++) {
+                auto i_data_int8 = static_cast<int8_t>(((static_cast<float>(req_data[i]) / 255.0f) / inQuantParams.scale) + inQuantParams.offset);
+                signed_req_data[i] = std::min<int8_t>(INT8_MAX, std::max<int8_t>(i_data_int8, INT8_MIN));
             }
 
             /* Display message on the LCD - inference running. */
@@ -159,7 +167,7 @@ namespace app {
         const uint32_t nChannels = (inputTensor->dims->size == 4) ? inputTensor->dims->data[3] : 1;
 
         const uint8_t* srcPtr = get_img_array(imIdx);
-        auto* dstPtr = (uint8_t*)inputTensor->data.data;
+        auto* dstPtr = static_cast<uint8_t *>(inputTensor->data.data);
         if (1 == nChannels) {
             /**
              * Visual Wake Word model accepts only one channel =>
diff --git a/tests/use_case/ad/InferenceTestAD.cc b/tests/use_case/ad/InferenceTestAD.cc
index d5e21c2..ad785e8 100644
--- a/tests/use_case/ad/InferenceTestAD.cc
+++ b/tests/use_case/ad/InferenceTestAD.cc
@@ -64,7 +64,7 @@ bool RunInferenceRandom(arm::app::Model& model)
 template <typename T>
 void TestInference(const T *input_goldenFV, const T *output_goldenFV, arm::app::Model& model)
 {
-    REQUIRE(RunInference(model, (int8_t*)input_goldenFV));
+    REQUIRE(RunInference(model, static_cast<const T*>(input_goldenFV)));
 
     TfLiteTensor *outputTensor = model.GetOutputTensor(0);
 
@@ -75,7 +75,7 @@ void TestInference(const T *input_goldenFV, const T *output_goldenFV, arm::app::
 
     for (size_t i = 0; i < outputTensor->bytes; i++)
     {
-        REQUIRE((int)tensorData[i] == (int)((T)output_goldenFV[i]));
+        REQUIRE(static_cast<int>(tensorData[i]) == static_cast<int>(((T)output_goldenFV[i])));
     }
 }
 
diff --git a/tests/use_case/asr/InferenceTestWav2Letter.cc b/tests/use_case/asr/InferenceTestWav2Letter.cc
index d5e6c35..1f9cb80 100644
--- a/tests/use_case/asr/InferenceTestWav2Letter.cc
+++ b/tests/use_case/asr/InferenceTestWav2Letter.cc
@@ -81,7 +81,7 @@ void TestInference(const T* input_goldenFV, const T* output_goldenFV, arm::app::
     REQUIRE(tensorData);
 
     for (size_t i = 0; i < outputTensor->bytes; i++) {
-        REQUIRE((int)tensorData[i] == (int)((T)output_goldenFV[i]));
+        REQUIRE(static_cast<int>(tensorData[i]) == static_cast<int>(((T)output_goldenFV[i])));
     }
 }
 
diff --git a/tests/use_case/img_class/InferenceTestMobilenetV2.cc b/tests/use_case/img_class/InferenceTestMobilenetV2.cc
index 6fbf374..bb89c99 100644
--- a/tests/use_case/img_class/InferenceTestMobilenetV2.cc
+++ b/tests/use_case/img_class/InferenceTestMobilenetV2.cc
@@ -56,7 +56,7 @@ void TestInference(int imageIdx, arm::app::Model& model, T tolerance) {
     REQUIRE(tensorData);
 
     for (size_t i = 0; i < outputTensor->bytes; i++) {
-        REQUIRE((int)tensorData[i] == Approx((int)((T)goldenFV[i])).epsilon(tolerance));
+        REQUIRE(static_cast<int>(tensorData[i]) == Approx(static_cast<int>((T)goldenFV[i])).epsilon(tolerance));
     }
 }
 
diff --git a/tests/use_case/kws/InferenceTestDSCNN.cc b/tests/use_case/kws/InferenceTestDSCNN.cc
index d02e33c..7ce55dd 100644
--- a/tests/use_case/kws/InferenceTestDSCNN.cc
+++ b/tests/use_case/kws/InferenceTestDSCNN.cc
@@ -70,7 +70,7 @@ void TestInference(const T* input_goldenFV, const T* output_goldenFV, arm::app::
     REQUIRE(tensorData);
 
     for (size_t i = 0; i < outputTensor->bytes; i++) {
-        REQUIRE((int)tensorData[i] == (int)((T)output_goldenFV[i]));
+        REQUIRE(static_cast<int>(tensorData[i]) == static_cast<int>(((T)output_goldenFV[i])));
     }
 }
 
diff --git a/tests/use_case/kws_asr/InferenceTestDSCNN.cc b/tests/use_case/kws_asr/InferenceTestDSCNN.cc
index e210c33..134003d 100644
--- a/tests/use_case/kws_asr/InferenceTestDSCNN.cc
+++ b/tests/use_case/kws_asr/InferenceTestDSCNN.cc
@@ -68,7 +68,7 @@ void TestInference(const T* input_goldenFV, const T* output_goldenFV, arm::app::
     REQUIRE(tensorData);
 
     for (size_t i = 0; i < outputTensor->bytes; i++) {
-        REQUIRE((int) tensorData[i] == (int) ((T) output_goldenFV[i]));
+        REQUIRE(static_cast<int>(tensorData[i]) == static_cast<int>((T) output_goldenFV[i]));
     }
 }
 
diff --git a/tests/use_case/kws_asr/InferenceTestWav2Letter.cc b/tests/use_case/kws_asr/InferenceTestWav2Letter.cc
index 5f5ad98..1b14a42 100644
--- a/tests/use_case/kws_asr/InferenceTestWav2Letter.cc
+++ b/tests/use_case/kws_asr/InferenceTestWav2Letter.cc
@@ -83,7 +83,7 @@ void TestInference(const T* input_goldenFV, const T* output_goldenFV, arm::app::
     REQUIRE(tensorData);
 
     for (size_t i = 0; i < outputTensor->bytes; i++) {
-        REQUIRE((int)tensorData[i] == (int)((T)output_goldenFV[i]));
+        REQUIRE(static_cast<int>(tensorData[i]) == static_cast<int>(((T)output_goldenFV[i])));
     }
 }
 
diff --git a/tests/use_case/vww/VisualWakeWordUCTests.cc b/tests/use_case/vww/VisualWakeWordUCTests.cc
index 891423b..700a6bb 100644
--- a/tests/use_case/vww/VisualWakeWordUCTests.cc
+++ b/tests/use_case/vww/VisualWakeWordUCTests.cc
@@ -73,7 +73,7 @@ TEST_CASE("Inference by index")
 
     auto results = caseContext.Get<std::vector<arm::app::ClassificationResult>>("results");
 
-    REQUIRE(results[0].m_labelIdx == 0);
+    REQUIRE(results[0].m_labelIdx == 1);
 }
 
 TEST_CASE("Inference run all images")
diff --git a/tests/utils/ImageUtils.cc b/tests/utils/ImageUtils.cc
index f77ce1e..506040f 100644
--- a/tests/utils/ImageUtils.cc
+++ b/tests/utils/ImageUtils.cc
@@ -18,12 +18,12 @@
 
 void convertImgIoInt8(void * data, const size_t sz)
 {
-    uint8_t * tmp_req_data          = (uint8_t *)data;
-    int8_t * tmp_signed_req_data    = (int8_t *) data;
+    uint8_t * tmp_req_data          = static_cast<uint8_t *>(data);
+    int8_t * tmp_signed_req_data    = static_cast<int8_t *>(data);
 
     for (size_t i = 0; i < sz; ++i) {
-        tmp_signed_req_data[i] = (int8_t)(
-                (int32_t)(tmp_req_data[i]) - 128);
+        tmp_signed_req_data[i] = static_cast<int8_t>(
+                static_cast<int32_t>(tmp_req_data[i]) - 128);
     }
 }
author	Isabella Gottardi <isabella.gottardi@arm.com>	2021-10-20 15:52:32 +0100
committer	Isabella Gottardi <isabella.gottardi@arm.com>	2021-10-25 08:51:10 +0000
commit	79d4154ee071d0e7ef2d1eecdde149d488bb9d8b (patch)
tree	21c8c0a2bd187a925f28045d4a57e9e4ef05be82
parent	14ab8d447c5f12df2ac7fd4217fc0d2005b02dca (diff)
download	ml-embedded-evaluation-kit-79d4154ee071d0e7ef2d1eecdde149d488bb9d8b.tar.gz