summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIsabella Gottardi <isabella.gottardi@arm.com>2021-10-20 15:52:32 +0100
committerIsabella Gottardi <isabella.gottardi@arm.com>2021-10-25 08:51:10 +0000
commit79d4154ee071d0e7ef2d1eecdde149d488bb9d8b (patch)
tree21c8c0a2bd187a925f28045d4a57e9e4ef05be82
parent14ab8d447c5f12df2ac7fd4217fc0d2005b02dca (diff)
downloadml-embedded-evaluation-kit-79d4154ee071d0e7ef2d1eecdde149d488bb9d8b.tar.gz
MLECO-2458 and MLECO-2476 [Fix] VWW IFM quant step
* Changed image->cc conversion to be similar with preprocessing of img_class and vww models: images are scaled maintaing the aspect ration and then the centre crop of the correct size is taken. * VWW applies input quantization info to the int8 image (prior converted to [0,1] float range). * Changed adult_blur to a image without person. * Fix menu print when selecting a specific ifm to run (Select message was displayed after typing something) Change-Id: Ie6cde7ab4835ea842667b87397458a5d32131df3
-rw-r--r--resources/vww/samples/adult_blur.pngbin28081 -> 0 bytes
-rw-r--r--resources/vww/samples/files.md2
-rw-r--r--resources/vww/samples/st_paul_s_cathedral.pngbin0 -> 50321 bytes
-rw-r--r--scripts/py/gen_rgb_cpp.py23
-rw-r--r--source/application/main/UseCaseCommonUtils.cc4
-rw-r--r--source/use_case/ad/src/MainLoop.cc1
-rw-r--r--source/use_case/asr/src/MainLoop.cc1
-rw-r--r--source/use_case/img_class/src/MainLoop.cc1
-rw-r--r--source/use_case/img_class/src/UseCaseHandler.cc2
-rw-r--r--source/use_case/kws/src/MainLoop.cc1
-rw-r--r--source/use_case/kws_asr/src/MainLoop.cc1
-rw-r--r--source/use_case/vww/src/MainLoop.cc1
-rw-r--r--source/use_case/vww/src/UseCaseHandler.cc18
-rw-r--r--tests/use_case/ad/InferenceTestAD.cc4
-rw-r--r--tests/use_case/asr/InferenceTestWav2Letter.cc2
-rw-r--r--tests/use_case/img_class/InferenceTestMobilenetV2.cc2
-rw-r--r--tests/use_case/kws/InferenceTestDSCNN.cc2
-rw-r--r--tests/use_case/kws_asr/InferenceTestDSCNN.cc2
-rw-r--r--tests/use_case/kws_asr/InferenceTestWav2Letter.cc2
-rw-r--r--tests/use_case/vww/VisualWakeWordUCTests.cc2
-rw-r--r--tests/utils/ImageUtils.cc8
21 files changed, 52 insertions, 27 deletions
diff --git a/resources/vww/samples/adult_blur.png b/resources/vww/samples/adult_blur.png
deleted file mode 100644
index 5f2cc94..0000000
--- a/resources/vww/samples/adult_blur.png
+++ /dev/null
Binary files differ
diff --git a/resources/vww/samples/files.md b/resources/vww/samples/files.md
index 13f5de6..6aca607 100644
--- a/resources/vww/samples/files.md
+++ b/resources/vww/samples/files.md
@@ -7,4 +7,4 @@ The paper for the Visual Wake Word Model:
The sample images provided are under Creative Commons License. The links are documented here for traceability:
* https://www.pexels.com/photo/man-in-red-jacket-1681010/
-* https://www.pexels.com/photo/adult-blur-camera-casual-598917/
+* https://www.pexels.com/photo/low-angle-photo-of-st-paul-s-cathedral-under-sunset-colors-1043121/
diff --git a/resources/vww/samples/st_paul_s_cathedral.png b/resources/vww/samples/st_paul_s_cathedral.png
new file mode 100644
index 0000000..a6e826a
--- /dev/null
+++ b/resources/vww/samples/st_paul_s_cathedral.png
Binary files differ
diff --git a/scripts/py/gen_rgb_cpp.py b/scripts/py/gen_rgb_cpp.py
index 957d2d0..c53fbd7 100644
--- a/scripts/py/gen_rgb_cpp.py
+++ b/scripts/py/gen_rgb_cpp.py
@@ -70,12 +70,23 @@ def write_individual_img_cc_file(image_filename, cc_filename, header_template_fi
gen_time=datetime.datetime.now(),
file_name=os.path.basename(image_filename),
year=datetime.datetime.now().year)
-
- original_image.thumbnail(image_size)
- delta_w = abs(image_size[0] - original_image.size[0])
- delta_h = abs(image_size[1] - original_image.size[1])
- resized_image = Image.new('RGB', args.image_size, (255, 255, 255, 0))
- resized_image.paste(original_image, (int(delta_w / 2), int(delta_h / 2)))
+ # IFM size
+ ifm_width = image_size[0]
+ ifm_height = image_size[1]
+
+ # Aspect ratio resize
+ scale_ratio = (float)(max(ifm_width, ifm_height)) / (float)(min(original_image.size[0], original_image.size[1]))
+ resized_width = (int)(original_image.size[0] * scale_ratio)
+ resized_height = (int)(original_image.size[1] * scale_ratio)
+ resized_image = original_image.resize([resized_width,resized_height], Image.BILINEAR)
+
+ # Crop the center of the image
+ resized_image = resized_image.crop((
+ (resized_width - ifm_width) / 2, # left
+ (resized_height - ifm_height) / 2, # top
+ (resized_width + ifm_width) / 2, # right
+ (resized_height + ifm_height) / 2 # bottom
+ ))
# Convert the image and write it to the cc file
rgb_data = np.array(resized_image, dtype=np.uint8).flatten()
diff --git a/source/application/main/UseCaseCommonUtils.cc b/source/application/main/UseCaseCommonUtils.cc
index a99e05d..e48e308 100644
--- a/source/application/main/UseCaseCommonUtils.cc
+++ b/source/application/main/UseCaseCommonUtils.cc
@@ -35,8 +35,8 @@ void DisplayCommonMenu()
void image::ConvertImgToInt8(void* data, const size_t kMaxImageSize)
{
- auto* tmp_req_data = (uint8_t*) data;
- auto* tmp_signed_req_data = (int8_t*) data;
+ auto* tmp_req_data = static_cast<uint8_t *>(data);
+ auto* tmp_signed_req_data = static_cast<int8_t *>(data);
for (size_t i = 0; i < kMaxImageSize; i++) {
tmp_signed_req_data[i] = (int8_t) (
diff --git a/source/use_case/ad/src/MainLoop.cc b/source/use_case/ad/src/MainLoop.cc
index 3c2f9cc..a323610 100644
--- a/source/use_case/ad/src/MainLoop.cc
+++ b/source/use_case/ad/src/MainLoop.cc
@@ -90,6 +90,7 @@ void main_loop(hal_platform& platform)
case MENU_OPT_RUN_INF_CHOSEN: {
printf(" Enter the data index [0, %d]: ",
NUMBER_OF_FILES-1);
+ fflush(stdout);
auto audioIndex = static_cast<uint32_t>(
arm::app::ReadUserInputAsInt(platform));
executionSuccessful = ClassifyVibrationHandler(caseContext,
diff --git a/source/use_case/asr/src/MainLoop.cc b/source/use_case/asr/src/MainLoop.cc
index 42bb08e..87145f4 100644
--- a/source/use_case/asr/src/MainLoop.cc
+++ b/source/use_case/asr/src/MainLoop.cc
@@ -133,6 +133,7 @@ void main_loop(hal_platform& platform)
case MENU_OPT_RUN_INF_CHOSEN: {
printf(" Enter the audio clip index [0, %d]: ",
NUMBER_OF_FILES-1);
+ fflush(stdout);
auto clipIndex = static_cast<uint32_t>(
arm::app::ReadUserInputAsInt(platform));
executionSuccessful = ClassifyAudioHandler(caseContext,
diff --git a/source/use_case/img_class/src/MainLoop.cc b/source/use_case/img_class/src/MainLoop.cc
index 79f6018..ea9f14a 100644
--- a/source/use_case/img_class/src/MainLoop.cc
+++ b/source/use_case/img_class/src/MainLoop.cc
@@ -68,6 +68,7 @@ void main_loop(hal_platform& platform)
break;
case common::MENU_OPT_RUN_INF_CHOSEN: {
printf(" Enter the image index [0, %d]: ", NUMBER_OF_FILES-1);
+ fflush(stdout);
auto imgIndex = static_cast<uint32_t>(arm::app::ReadUserInputAsInt(platform));
executionSuccessful = ClassifyImageHandler(caseContext, imgIndex, false);
break;
diff --git a/source/use_case/img_class/src/UseCaseHandler.cc b/source/use_case/img_class/src/UseCaseHandler.cc
index 66df1da..effc06f 100644
--- a/source/use_case/img_class/src/UseCaseHandler.cc
+++ b/source/use_case/img_class/src/UseCaseHandler.cc
@@ -97,7 +97,7 @@ namespace app {
/* Display this image on the LCD. */
platform.data_psn->present_data_image(
- (uint8_t*) inputTensor->data.data,
+ static_cast<uint8_t *>(inputTensor->data.data),
nCols, nRows, nChannels,
dataPsnImgStartX, dataPsnImgStartY, dataPsnImgDownscaleFactor);
diff --git a/source/use_case/kws/src/MainLoop.cc b/source/use_case/kws/src/MainLoop.cc
index 80254d0..c683e71 100644
--- a/source/use_case/kws/src/MainLoop.cc
+++ b/source/use_case/kws/src/MainLoop.cc
@@ -95,6 +95,7 @@ void main_loop(hal_platform& platform)
break;
case MENU_OPT_RUN_INF_CHOSEN: {
printf(" Enter the audio clip index [0, %d]: ", NUMBER_OF_FILES-1);
+ fflush(stdout);
auto clipIndex = static_cast<uint32_t>(arm::app::ReadUserInputAsInt(platform));
executionSuccessful = ClassifyAudioHandler(caseContext, clipIndex, false);
break;
diff --git a/source/use_case/kws_asr/src/MainLoop.cc b/source/use_case/kws_asr/src/MainLoop.cc
index ed9034f..d5a2c2b 100644
--- a/source/use_case/kws_asr/src/MainLoop.cc
+++ b/source/use_case/kws_asr/src/MainLoop.cc
@@ -161,6 +161,7 @@ void main_loop(hal_platform& platform)
case MENU_OPT_RUN_INF_CHOSEN: {
printf(" Enter the audio clip index [0, %d]: ",
NUMBER_OF_FILES-1);
+ fflush(stdout);
auto clipIndex = static_cast<uint32_t>(
arm::app::ReadUserInputAsInt(platform));
executionSuccessful = ClassifyAudioHandler(caseContext,
diff --git a/source/use_case/vww/src/MainLoop.cc b/source/use_case/vww/src/MainLoop.cc
index f026cc2..b29238c 100644
--- a/source/use_case/vww/src/MainLoop.cc
+++ b/source/use_case/vww/src/MainLoop.cc
@@ -67,6 +67,7 @@ void main_loop(hal_platform &platform)
break;
case common::MENU_OPT_RUN_INF_CHOSEN: {
printf(" Enter the image index [0, %d]: ", NUMBER_OF_FILES-1);
+ fflush(stdout);
auto imgIndex = static_cast<uint32_t>(arm::app::ReadUserInputAsInt(platform));
executionSuccessful = ClassifyImageHandler(caseContext, imgIndex, false);
break;
diff --git a/source/use_case/vww/src/UseCaseHandler.cc b/source/use_case/vww/src/UseCaseHandler.cc
index fb2e837..d384032 100644
--- a/source/use_case/vww/src/UseCaseHandler.cc
+++ b/source/use_case/vww/src/UseCaseHandler.cc
@@ -21,6 +21,8 @@
#include "UseCaseCommonUtils.hpp"
#include "hal.h"
+#include <algorithm>
+
namespace arm {
namespace app {
@@ -94,13 +96,19 @@ namespace app {
/* Display this image on the LCD. */
platform.data_psn->present_data_image(
- (uint8_t *) inputTensor->data.data,
+ static_cast<uint8_t *>(inputTensor->data.data),
nCols, nRows, nChannels,
dataPsnImgStartX, dataPsnImgStartY, dataPsnImgDownscaleFactor);
- /* If the data is signed. */
- if (model.IsDataSigned()) {
- image::ConvertImgToInt8(inputTensor->data.data, inputTensor->bytes);
+ /* Vww model preprocessing is image conversion from uint8 to [0,1] float values,
+ * then quantize them with input quantization info. */
+ QuantParams inQuantParams = GetTensorQuantParams(inputTensor);
+
+ auto* req_data = static_cast<uint8_t *>(inputTensor->data.data);
+ auto* signed_req_data = static_cast<int8_t *>(inputTensor->data.data);
+ for (size_t i = 0; i < inputTensor->bytes; i++) {
+ auto i_data_int8 = static_cast<int8_t>(((static_cast<float>(req_data[i]) / 255.0f) / inQuantParams.scale) + inQuantParams.offset);
+ signed_req_data[i] = std::min<int8_t>(INT8_MAX, std::max<int8_t>(i_data_int8, INT8_MIN));
}
/* Display message on the LCD - inference running. */
@@ -159,7 +167,7 @@ namespace app {
const uint32_t nChannels = (inputTensor->dims->size == 4) ? inputTensor->dims->data[3] : 1;
const uint8_t* srcPtr = get_img_array(imIdx);
- auto* dstPtr = (uint8_t*)inputTensor->data.data;
+ auto* dstPtr = static_cast<uint8_t *>(inputTensor->data.data);
if (1 == nChannels) {
/**
* Visual Wake Word model accepts only one channel =>
diff --git a/tests/use_case/ad/InferenceTestAD.cc b/tests/use_case/ad/InferenceTestAD.cc
index d5e21c2..ad785e8 100644
--- a/tests/use_case/ad/InferenceTestAD.cc
+++ b/tests/use_case/ad/InferenceTestAD.cc
@@ -64,7 +64,7 @@ bool RunInferenceRandom(arm::app::Model& model)
template <typename T>
void TestInference(const T *input_goldenFV, const T *output_goldenFV, arm::app::Model& model)
{
- REQUIRE(RunInference(model, (int8_t*)input_goldenFV));
+ REQUIRE(RunInference(model, static_cast<const T*>(input_goldenFV)));
TfLiteTensor *outputTensor = model.GetOutputTensor(0);
@@ -75,7 +75,7 @@ void TestInference(const T *input_goldenFV, const T *output_goldenFV, arm::app::
for (size_t i = 0; i < outputTensor->bytes; i++)
{
- REQUIRE((int)tensorData[i] == (int)((T)output_goldenFV[i]));
+ REQUIRE(static_cast<int>(tensorData[i]) == static_cast<int>(((T)output_goldenFV[i])));
}
}
diff --git a/tests/use_case/asr/InferenceTestWav2Letter.cc b/tests/use_case/asr/InferenceTestWav2Letter.cc
index d5e6c35..1f9cb80 100644
--- a/tests/use_case/asr/InferenceTestWav2Letter.cc
+++ b/tests/use_case/asr/InferenceTestWav2Letter.cc
@@ -81,7 +81,7 @@ void TestInference(const T* input_goldenFV, const T* output_goldenFV, arm::app::
REQUIRE(tensorData);
for (size_t i = 0; i < outputTensor->bytes; i++) {
- REQUIRE((int)tensorData[i] == (int)((T)output_goldenFV[i]));
+ REQUIRE(static_cast<int>(tensorData[i]) == static_cast<int>(((T)output_goldenFV[i])));
}
}
diff --git a/tests/use_case/img_class/InferenceTestMobilenetV2.cc b/tests/use_case/img_class/InferenceTestMobilenetV2.cc
index 6fbf374..bb89c99 100644
--- a/tests/use_case/img_class/InferenceTestMobilenetV2.cc
+++ b/tests/use_case/img_class/InferenceTestMobilenetV2.cc
@@ -56,7 +56,7 @@ void TestInference(int imageIdx, arm::app::Model& model, T tolerance) {
REQUIRE(tensorData);
for (size_t i = 0; i < outputTensor->bytes; i++) {
- REQUIRE((int)tensorData[i] == Approx((int)((T)goldenFV[i])).epsilon(tolerance));
+ REQUIRE(static_cast<int>(tensorData[i]) == Approx(static_cast<int>((T)goldenFV[i])).epsilon(tolerance));
}
}
diff --git a/tests/use_case/kws/InferenceTestDSCNN.cc b/tests/use_case/kws/InferenceTestDSCNN.cc
index d02e33c..7ce55dd 100644
--- a/tests/use_case/kws/InferenceTestDSCNN.cc
+++ b/tests/use_case/kws/InferenceTestDSCNN.cc
@@ -70,7 +70,7 @@ void TestInference(const T* input_goldenFV, const T* output_goldenFV, arm::app::
REQUIRE(tensorData);
for (size_t i = 0; i < outputTensor->bytes; i++) {
- REQUIRE((int)tensorData[i] == (int)((T)output_goldenFV[i]));
+ REQUIRE(static_cast<int>(tensorData[i]) == static_cast<int>(((T)output_goldenFV[i])));
}
}
diff --git a/tests/use_case/kws_asr/InferenceTestDSCNN.cc b/tests/use_case/kws_asr/InferenceTestDSCNN.cc
index e210c33..134003d 100644
--- a/tests/use_case/kws_asr/InferenceTestDSCNN.cc
+++ b/tests/use_case/kws_asr/InferenceTestDSCNN.cc
@@ -68,7 +68,7 @@ void TestInference(const T* input_goldenFV, const T* output_goldenFV, arm::app::
REQUIRE(tensorData);
for (size_t i = 0; i < outputTensor->bytes; i++) {
- REQUIRE((int) tensorData[i] == (int) ((T) output_goldenFV[i]));
+ REQUIRE(static_cast<int>(tensorData[i]) == static_cast<int>((T) output_goldenFV[i]));
}
}
diff --git a/tests/use_case/kws_asr/InferenceTestWav2Letter.cc b/tests/use_case/kws_asr/InferenceTestWav2Letter.cc
index 5f5ad98..1b14a42 100644
--- a/tests/use_case/kws_asr/InferenceTestWav2Letter.cc
+++ b/tests/use_case/kws_asr/InferenceTestWav2Letter.cc
@@ -83,7 +83,7 @@ void TestInference(const T* input_goldenFV, const T* output_goldenFV, arm::app::
REQUIRE(tensorData);
for (size_t i = 0; i < outputTensor->bytes; i++) {
- REQUIRE((int)tensorData[i] == (int)((T)output_goldenFV[i]));
+ REQUIRE(static_cast<int>(tensorData[i]) == static_cast<int>(((T)output_goldenFV[i])));
}
}
diff --git a/tests/use_case/vww/VisualWakeWordUCTests.cc b/tests/use_case/vww/VisualWakeWordUCTests.cc
index 891423b..700a6bb 100644
--- a/tests/use_case/vww/VisualWakeWordUCTests.cc
+++ b/tests/use_case/vww/VisualWakeWordUCTests.cc
@@ -73,7 +73,7 @@ TEST_CASE("Inference by index")
auto results = caseContext.Get<std::vector<arm::app::ClassificationResult>>("results");
- REQUIRE(results[0].m_labelIdx == 0);
+ REQUIRE(results[0].m_labelIdx == 1);
}
TEST_CASE("Inference run all images")
diff --git a/tests/utils/ImageUtils.cc b/tests/utils/ImageUtils.cc
index f77ce1e..506040f 100644
--- a/tests/utils/ImageUtils.cc
+++ b/tests/utils/ImageUtils.cc
@@ -18,12 +18,12 @@
void convertImgIoInt8(void * data, const size_t sz)
{
- uint8_t * tmp_req_data = (uint8_t *)data;
- int8_t * tmp_signed_req_data = (int8_t *) data;
+ uint8_t * tmp_req_data = static_cast<uint8_t *>(data);
+ int8_t * tmp_signed_req_data = static_cast<int8_t *>(data);
for (size_t i = 0; i < sz; ++i) {
- tmp_signed_req_data[i] = (int8_t)(
- (int32_t)(tmp_req_data[i]) - 128);
+ tmp_signed_req_data[i] = static_cast<int8_t>(
+ static_cast<int32_t>(tmp_req_data[i]) - 128);
}
}