diff options
author | Isabella Gottardi <isabella.gottardi@arm.com> | 2021-10-20 15:52:32 +0100 |
---|---|---|
committer | Isabella Gottardi <isabella.gottardi@arm.com> | 2021-10-25 08:51:10 +0000 |
commit | 79d4154ee071d0e7ef2d1eecdde149d488bb9d8b (patch) | |
tree | 21c8c0a2bd187a925f28045d4a57e9e4ef05be82 | |
parent | 14ab8d447c5f12df2ac7fd4217fc0d2005b02dca (diff) | |
download | ml-embedded-evaluation-kit-79d4154ee071d0e7ef2d1eecdde149d488bb9d8b.tar.gz |
MLECO-2458 and MLECO-2476 [Fix] VWW IFM quant step
* Changed image->cc conversion to be similar with preprocessing
of img_class and vww models: images are scaled maintaing the
aspect ration and then the centre crop of the correct size
is taken.
* VWW applies input quantization info to the int8 image
(prior converted to [0,1] float range).
* Changed adult_blur to a image without person.
* Fix menu print when selecting a specific ifm to run
(Select message was displayed after typing something)
Change-Id: Ie6cde7ab4835ea842667b87397458a5d32131df3
21 files changed, 52 insertions, 27 deletions
diff --git a/resources/vww/samples/adult_blur.png b/resources/vww/samples/adult_blur.png Binary files differdeleted file mode 100644 index 5f2cc94..0000000 --- a/resources/vww/samples/adult_blur.png +++ /dev/null diff --git a/resources/vww/samples/files.md b/resources/vww/samples/files.md index 13f5de6..6aca607 100644 --- a/resources/vww/samples/files.md +++ b/resources/vww/samples/files.md @@ -7,4 +7,4 @@ The paper for the Visual Wake Word Model: The sample images provided are under Creative Commons License. The links are documented here for traceability: * https://www.pexels.com/photo/man-in-red-jacket-1681010/ -* https://www.pexels.com/photo/adult-blur-camera-casual-598917/ +* https://www.pexels.com/photo/low-angle-photo-of-st-paul-s-cathedral-under-sunset-colors-1043121/ diff --git a/resources/vww/samples/st_paul_s_cathedral.png b/resources/vww/samples/st_paul_s_cathedral.png Binary files differnew file mode 100644 index 0000000..a6e826a --- /dev/null +++ b/resources/vww/samples/st_paul_s_cathedral.png diff --git a/scripts/py/gen_rgb_cpp.py b/scripts/py/gen_rgb_cpp.py index 957d2d0..c53fbd7 100644 --- a/scripts/py/gen_rgb_cpp.py +++ b/scripts/py/gen_rgb_cpp.py @@ -70,12 +70,23 @@ def write_individual_img_cc_file(image_filename, cc_filename, header_template_fi gen_time=datetime.datetime.now(), file_name=os.path.basename(image_filename), year=datetime.datetime.now().year) - - original_image.thumbnail(image_size) - delta_w = abs(image_size[0] - original_image.size[0]) - delta_h = abs(image_size[1] - original_image.size[1]) - resized_image = Image.new('RGB', args.image_size, (255, 255, 255, 0)) - resized_image.paste(original_image, (int(delta_w / 2), int(delta_h / 2))) + # IFM size + ifm_width = image_size[0] + ifm_height = image_size[1] + + # Aspect ratio resize + scale_ratio = (float)(max(ifm_width, ifm_height)) / (float)(min(original_image.size[0], original_image.size[1])) + resized_width = (int)(original_image.size[0] * scale_ratio) + resized_height = (int)(original_image.size[1] * scale_ratio) + resized_image = original_image.resize([resized_width,resized_height], Image.BILINEAR) + + # Crop the center of the image + resized_image = resized_image.crop(( + (resized_width - ifm_width) / 2, # left + (resized_height - ifm_height) / 2, # top + (resized_width + ifm_width) / 2, # right + (resized_height + ifm_height) / 2 # bottom + )) # Convert the image and write it to the cc file rgb_data = np.array(resized_image, dtype=np.uint8).flatten() diff --git a/source/application/main/UseCaseCommonUtils.cc b/source/application/main/UseCaseCommonUtils.cc index a99e05d..e48e308 100644 --- a/source/application/main/UseCaseCommonUtils.cc +++ b/source/application/main/UseCaseCommonUtils.cc @@ -35,8 +35,8 @@ void DisplayCommonMenu() void image::ConvertImgToInt8(void* data, const size_t kMaxImageSize) { - auto* tmp_req_data = (uint8_t*) data; - auto* tmp_signed_req_data = (int8_t*) data; + auto* tmp_req_data = static_cast<uint8_t *>(data); + auto* tmp_signed_req_data = static_cast<int8_t *>(data); for (size_t i = 0; i < kMaxImageSize; i++) { tmp_signed_req_data[i] = (int8_t) ( diff --git a/source/use_case/ad/src/MainLoop.cc b/source/use_case/ad/src/MainLoop.cc index 3c2f9cc..a323610 100644 --- a/source/use_case/ad/src/MainLoop.cc +++ b/source/use_case/ad/src/MainLoop.cc @@ -90,6 +90,7 @@ void main_loop(hal_platform& platform) case MENU_OPT_RUN_INF_CHOSEN: { printf(" Enter the data index [0, %d]: ", NUMBER_OF_FILES-1); + fflush(stdout); auto audioIndex = static_cast<uint32_t>( arm::app::ReadUserInputAsInt(platform)); executionSuccessful = ClassifyVibrationHandler(caseContext, diff --git a/source/use_case/asr/src/MainLoop.cc b/source/use_case/asr/src/MainLoop.cc index 42bb08e..87145f4 100644 --- a/source/use_case/asr/src/MainLoop.cc +++ b/source/use_case/asr/src/MainLoop.cc @@ -133,6 +133,7 @@ void main_loop(hal_platform& platform) case MENU_OPT_RUN_INF_CHOSEN: { printf(" Enter the audio clip index [0, %d]: ", NUMBER_OF_FILES-1); + fflush(stdout); auto clipIndex = static_cast<uint32_t>( arm::app::ReadUserInputAsInt(platform)); executionSuccessful = ClassifyAudioHandler(caseContext, diff --git a/source/use_case/img_class/src/MainLoop.cc b/source/use_case/img_class/src/MainLoop.cc index 79f6018..ea9f14a 100644 --- a/source/use_case/img_class/src/MainLoop.cc +++ b/source/use_case/img_class/src/MainLoop.cc @@ -68,6 +68,7 @@ void main_loop(hal_platform& platform) break; case common::MENU_OPT_RUN_INF_CHOSEN: { printf(" Enter the image index [0, %d]: ", NUMBER_OF_FILES-1); + fflush(stdout); auto imgIndex = static_cast<uint32_t>(arm::app::ReadUserInputAsInt(platform)); executionSuccessful = ClassifyImageHandler(caseContext, imgIndex, false); break; diff --git a/source/use_case/img_class/src/UseCaseHandler.cc b/source/use_case/img_class/src/UseCaseHandler.cc index 66df1da..effc06f 100644 --- a/source/use_case/img_class/src/UseCaseHandler.cc +++ b/source/use_case/img_class/src/UseCaseHandler.cc @@ -97,7 +97,7 @@ namespace app { /* Display this image on the LCD. */ platform.data_psn->present_data_image( - (uint8_t*) inputTensor->data.data, + static_cast<uint8_t *>(inputTensor->data.data), nCols, nRows, nChannels, dataPsnImgStartX, dataPsnImgStartY, dataPsnImgDownscaleFactor); diff --git a/source/use_case/kws/src/MainLoop.cc b/source/use_case/kws/src/MainLoop.cc index 80254d0..c683e71 100644 --- a/source/use_case/kws/src/MainLoop.cc +++ b/source/use_case/kws/src/MainLoop.cc @@ -95,6 +95,7 @@ void main_loop(hal_platform& platform) break; case MENU_OPT_RUN_INF_CHOSEN: { printf(" Enter the audio clip index [0, %d]: ", NUMBER_OF_FILES-1); + fflush(stdout); auto clipIndex = static_cast<uint32_t>(arm::app::ReadUserInputAsInt(platform)); executionSuccessful = ClassifyAudioHandler(caseContext, clipIndex, false); break; diff --git a/source/use_case/kws_asr/src/MainLoop.cc b/source/use_case/kws_asr/src/MainLoop.cc index ed9034f..d5a2c2b 100644 --- a/source/use_case/kws_asr/src/MainLoop.cc +++ b/source/use_case/kws_asr/src/MainLoop.cc @@ -161,6 +161,7 @@ void main_loop(hal_platform& platform) case MENU_OPT_RUN_INF_CHOSEN: { printf(" Enter the audio clip index [0, %d]: ", NUMBER_OF_FILES-1); + fflush(stdout); auto clipIndex = static_cast<uint32_t>( arm::app::ReadUserInputAsInt(platform)); executionSuccessful = ClassifyAudioHandler(caseContext, diff --git a/source/use_case/vww/src/MainLoop.cc b/source/use_case/vww/src/MainLoop.cc index f026cc2..b29238c 100644 --- a/source/use_case/vww/src/MainLoop.cc +++ b/source/use_case/vww/src/MainLoop.cc @@ -67,6 +67,7 @@ void main_loop(hal_platform &platform) break; case common::MENU_OPT_RUN_INF_CHOSEN: { printf(" Enter the image index [0, %d]: ", NUMBER_OF_FILES-1); + fflush(stdout); auto imgIndex = static_cast<uint32_t>(arm::app::ReadUserInputAsInt(platform)); executionSuccessful = ClassifyImageHandler(caseContext, imgIndex, false); break; diff --git a/source/use_case/vww/src/UseCaseHandler.cc b/source/use_case/vww/src/UseCaseHandler.cc index fb2e837..d384032 100644 --- a/source/use_case/vww/src/UseCaseHandler.cc +++ b/source/use_case/vww/src/UseCaseHandler.cc @@ -21,6 +21,8 @@ #include "UseCaseCommonUtils.hpp" #include "hal.h" +#include <algorithm> + namespace arm { namespace app { @@ -94,13 +96,19 @@ namespace app { /* Display this image on the LCD. */ platform.data_psn->present_data_image( - (uint8_t *) inputTensor->data.data, + static_cast<uint8_t *>(inputTensor->data.data), nCols, nRows, nChannels, dataPsnImgStartX, dataPsnImgStartY, dataPsnImgDownscaleFactor); - /* If the data is signed. */ - if (model.IsDataSigned()) { - image::ConvertImgToInt8(inputTensor->data.data, inputTensor->bytes); + /* Vww model preprocessing is image conversion from uint8 to [0,1] float values, + * then quantize them with input quantization info. */ + QuantParams inQuantParams = GetTensorQuantParams(inputTensor); + + auto* req_data = static_cast<uint8_t *>(inputTensor->data.data); + auto* signed_req_data = static_cast<int8_t *>(inputTensor->data.data); + for (size_t i = 0; i < inputTensor->bytes; i++) { + auto i_data_int8 = static_cast<int8_t>(((static_cast<float>(req_data[i]) / 255.0f) / inQuantParams.scale) + inQuantParams.offset); + signed_req_data[i] = std::min<int8_t>(INT8_MAX, std::max<int8_t>(i_data_int8, INT8_MIN)); } /* Display message on the LCD - inference running. */ @@ -159,7 +167,7 @@ namespace app { const uint32_t nChannels = (inputTensor->dims->size == 4) ? inputTensor->dims->data[3] : 1; const uint8_t* srcPtr = get_img_array(imIdx); - auto* dstPtr = (uint8_t*)inputTensor->data.data; + auto* dstPtr = static_cast<uint8_t *>(inputTensor->data.data); if (1 == nChannels) { /** * Visual Wake Word model accepts only one channel => diff --git a/tests/use_case/ad/InferenceTestAD.cc b/tests/use_case/ad/InferenceTestAD.cc index d5e21c2..ad785e8 100644 --- a/tests/use_case/ad/InferenceTestAD.cc +++ b/tests/use_case/ad/InferenceTestAD.cc @@ -64,7 +64,7 @@ bool RunInferenceRandom(arm::app::Model& model) template <typename T> void TestInference(const T *input_goldenFV, const T *output_goldenFV, arm::app::Model& model) { - REQUIRE(RunInference(model, (int8_t*)input_goldenFV)); + REQUIRE(RunInference(model, static_cast<const T*>(input_goldenFV))); TfLiteTensor *outputTensor = model.GetOutputTensor(0); @@ -75,7 +75,7 @@ void TestInference(const T *input_goldenFV, const T *output_goldenFV, arm::app:: for (size_t i = 0; i < outputTensor->bytes; i++) { - REQUIRE((int)tensorData[i] == (int)((T)output_goldenFV[i])); + REQUIRE(static_cast<int>(tensorData[i]) == static_cast<int>(((T)output_goldenFV[i]))); } } diff --git a/tests/use_case/asr/InferenceTestWav2Letter.cc b/tests/use_case/asr/InferenceTestWav2Letter.cc index d5e6c35..1f9cb80 100644 --- a/tests/use_case/asr/InferenceTestWav2Letter.cc +++ b/tests/use_case/asr/InferenceTestWav2Letter.cc @@ -81,7 +81,7 @@ void TestInference(const T* input_goldenFV, const T* output_goldenFV, arm::app:: REQUIRE(tensorData); for (size_t i = 0; i < outputTensor->bytes; i++) { - REQUIRE((int)tensorData[i] == (int)((T)output_goldenFV[i])); + REQUIRE(static_cast<int>(tensorData[i]) == static_cast<int>(((T)output_goldenFV[i]))); } } diff --git a/tests/use_case/img_class/InferenceTestMobilenetV2.cc b/tests/use_case/img_class/InferenceTestMobilenetV2.cc index 6fbf374..bb89c99 100644 --- a/tests/use_case/img_class/InferenceTestMobilenetV2.cc +++ b/tests/use_case/img_class/InferenceTestMobilenetV2.cc @@ -56,7 +56,7 @@ void TestInference(int imageIdx, arm::app::Model& model, T tolerance) { REQUIRE(tensorData); for (size_t i = 0; i < outputTensor->bytes; i++) { - REQUIRE((int)tensorData[i] == Approx((int)((T)goldenFV[i])).epsilon(tolerance)); + REQUIRE(static_cast<int>(tensorData[i]) == Approx(static_cast<int>((T)goldenFV[i])).epsilon(tolerance)); } } diff --git a/tests/use_case/kws/InferenceTestDSCNN.cc b/tests/use_case/kws/InferenceTestDSCNN.cc index d02e33c..7ce55dd 100644 --- a/tests/use_case/kws/InferenceTestDSCNN.cc +++ b/tests/use_case/kws/InferenceTestDSCNN.cc @@ -70,7 +70,7 @@ void TestInference(const T* input_goldenFV, const T* output_goldenFV, arm::app:: REQUIRE(tensorData); for (size_t i = 0; i < outputTensor->bytes; i++) { - REQUIRE((int)tensorData[i] == (int)((T)output_goldenFV[i])); + REQUIRE(static_cast<int>(tensorData[i]) == static_cast<int>(((T)output_goldenFV[i]))); } } diff --git a/tests/use_case/kws_asr/InferenceTestDSCNN.cc b/tests/use_case/kws_asr/InferenceTestDSCNN.cc index e210c33..134003d 100644 --- a/tests/use_case/kws_asr/InferenceTestDSCNN.cc +++ b/tests/use_case/kws_asr/InferenceTestDSCNN.cc @@ -68,7 +68,7 @@ void TestInference(const T* input_goldenFV, const T* output_goldenFV, arm::app:: REQUIRE(tensorData); for (size_t i = 0; i < outputTensor->bytes; i++) { - REQUIRE((int) tensorData[i] == (int) ((T) output_goldenFV[i])); + REQUIRE(static_cast<int>(tensorData[i]) == static_cast<int>((T) output_goldenFV[i])); } } diff --git a/tests/use_case/kws_asr/InferenceTestWav2Letter.cc b/tests/use_case/kws_asr/InferenceTestWav2Letter.cc index 5f5ad98..1b14a42 100644 --- a/tests/use_case/kws_asr/InferenceTestWav2Letter.cc +++ b/tests/use_case/kws_asr/InferenceTestWav2Letter.cc @@ -83,7 +83,7 @@ void TestInference(const T* input_goldenFV, const T* output_goldenFV, arm::app:: REQUIRE(tensorData); for (size_t i = 0; i < outputTensor->bytes; i++) { - REQUIRE((int)tensorData[i] == (int)((T)output_goldenFV[i])); + REQUIRE(static_cast<int>(tensorData[i]) == static_cast<int>(((T)output_goldenFV[i]))); } } diff --git a/tests/use_case/vww/VisualWakeWordUCTests.cc b/tests/use_case/vww/VisualWakeWordUCTests.cc index 891423b..700a6bb 100644 --- a/tests/use_case/vww/VisualWakeWordUCTests.cc +++ b/tests/use_case/vww/VisualWakeWordUCTests.cc @@ -73,7 +73,7 @@ TEST_CASE("Inference by index") auto results = caseContext.Get<std::vector<arm::app::ClassificationResult>>("results"); - REQUIRE(results[0].m_labelIdx == 0); + REQUIRE(results[0].m_labelIdx == 1); } TEST_CASE("Inference run all images") diff --git a/tests/utils/ImageUtils.cc b/tests/utils/ImageUtils.cc index f77ce1e..506040f 100644 --- a/tests/utils/ImageUtils.cc +++ b/tests/utils/ImageUtils.cc @@ -18,12 +18,12 @@ void convertImgIoInt8(void * data, const size_t sz) { - uint8_t * tmp_req_data = (uint8_t *)data; - int8_t * tmp_signed_req_data = (int8_t *) data; + uint8_t * tmp_req_data = static_cast<uint8_t *>(data); + int8_t * tmp_signed_req_data = static_cast<int8_t *>(data); for (size_t i = 0; i < sz; ++i) { - tmp_signed_req_data[i] = (int8_t)( - (int32_t)(tmp_req_data[i]) - 128); + tmp_signed_req_data[i] = static_cast<int8_t>( + static_cast<int32_t>(tmp_req_data[i]) - 128); } } |