From e12ac836d2110403475d0e8b4bdfec03a0874f6c Mon Sep 17 00:00:00 2001 From: Kshitij Sisodia Date: Thu, 20 May 2021 11:18:53 +0100 Subject: MLECO-1883: Updating wav2letter model Using the new pruned wav2letter model from Arm Model Zoo. The new model when optimised by Vela, produces a tflite file ~10 MB smaller than the current. Change-Id: I4ab6007c5b6111f41d8097e29b2af6cde2abc457 --- docs/documentation.md | 4 ++-- docs/quick_start.md | 20 +++++++++----------- docs/sections/memory_considerations.md | 8 +++----- set_up_default_resources.py | 16 ++++++++-------- source/use_case/asr/usecase.cmake | 6 +++--- source/use_case/kws_asr/usecase.cmake | 4 ++-- 6 files changed, 27 insertions(+), 31 deletions(-) diff --git a/docs/documentation.md b/docs/documentation.md index 9ec73a3..7f8fbf9 100644 --- a/docs/documentation.md +++ b/docs/documentation.md @@ -10,7 +10,7 @@ - [Deployment](#deployment) - [Implementing custom ML application](#implementing-custom-ml-application) - [Testing and benchmarking](#testing-and-benchmarking) - - [Memory considerations](#memory-considerations) + - [Memory Considerations](#memory-considerations) - [Troubleshooting](#troubleshooting) - [Appendix](#appendix) @@ -184,7 +184,7 @@ from [Arm ML-Zoo](https://github.com/ARM-software/ML-zoo/). - [Mobilenet V2](https://github.com/ARM-software/ML-zoo/blob/master/models/image_classification/mobilenet_v2_1.0_224/tflite_uint8). - [DS-CNN](https://github.com/ARM-software/ML-zoo/blob/master/models/keyword_spotting/ds_cnn_large/tflite_clustered_int8). -- [Wav2Letter](https://github.com/ARM-software/ML-zoo/blob/master/models/speech_recognition/wav2letter/tflite_int8). +- [Wav2Letter](https://github.com/ARM-software/ML-zoo/tree/1a92aa08c0de49a7304e0a7f3f59df6f4fd33ac8/models/speech_recognition/wav2letter/tflite_pruned_int8). - [Anomaly Detection](https://github.com/ARM-software/ML-zoo/raw/7c32b097f7d94aae2cd0b98a8ed5a3ba81e66b18/models/anomaly_detection/micronet_medium/tflite_int8/ad_medium_int8.tflite). When using Ethos-U55 NPU backend, the NN model is assumed to be optimized by Vela compiler. diff --git a/docs/quick_start.md b/docs/quick_start.md index abf8f50..6aea7b1 100644 --- a/docs/quick_start.md +++ b/docs/quick_start.md @@ -3,7 +3,7 @@ This is a quick start guide that will show you how to run the keyword spotting example application. The aim of this quick start guide is to enable you to run an application quickly on the Fixed Virtual Platform. The assumption we are making is that your Arm® Ethos™-U55 NPU is configured to use 128 Multiply-Accumulate units, -is using a shared SRAM with the Arm® Cortex®-M55. +is using a shared SRAM with the Arm® Cortex®-M55. 1. Verify you have installed [the required prerequisites](sections/building.md#Build-prerequisites). @@ -58,11 +58,11 @@ curl -L https://github.com/ARM-software/ML-zoo/raw/7c32b097f7d94aae2cd0b98a8ed5a --output ./resources_downloaded/ad/ifm0.npy curl -L https://github.com/ARM-software/ML-zoo/raw/7c32b097f7d94aae2cd0b98a8ed5a3ba81e66b18/models/anomaly_detection/micronet_medium/tflite_int8/testing_output/Identity/0.npy \ --output ./resources_downloaded/ad/ofm0.npy -curl -L https://github.com/ARM-software/ML-zoo/raw/68b5fbc77ed28e67b2efc915997ea4477c1d9d5b/models/speech_recognition/wav2letter/tflite_int8/wav2letter_int8.tflite \ - --output ./resources_downloaded/asr/wav2letter_int8.tflite -curl -L https://github.com/ARM-software/ML-zoo/raw/68b5fbc77ed28e67b2efc915997ea4477c1d9d5b/models/speech_recognition/wav2letter/tflite_int8/testing_input/input_2_int8/0.npy \ +curl -L https://github.com/ARM-software/ML-zoo/raw/1a92aa08c0de49a7304e0a7f3f59df6f4fd33ac8/models/speech_recognition/wav2letter/tflite_pruned_int8/wav2letter_pruned_int8.tflite \ + --output ./resources_downloaded/asr/wav2letter_pruned_int8.tflite +curl -L https://github.com/ARM-software/ML-zoo/raw/1a92aa08c0de49a7304e0a7f3f59df6f4fd33ac8/models/speech_recognition/wav2letter/tflite_pruned_int8/testing_input/input_2_int8/0.npy \ --output ./resources_downloaded/asr/ifm0.npy -curl -L https://github.com/ARM-software/ML-zoo/raw/68b5fbc77ed28e67b2efc915997ea4477c1d9d5b/models/speech_recognition/wav2letter/tflite_int8/testing_output/Identity_int8/0.npy \ +curl -L https://github.com/ARM-software/ML-zoo/raw/1a92aa08c0de49a7304e0a7f3f59df6f4fd33ac8/models/speech_recognition/wav2letter/tflite_pruned_int8/testing_output/Identity_int8/0.npy \ --output ./resources_downloaded/asr/ofm0.npy curl -L https://github.com/ARM-software/ML-zoo/raw/68b5fbc77ed28e67b2efc915997ea4477c1d9d5b/models/image_classification/mobilenet_v2_1.0_224/tflite_uint8/mobilenet_v2_1.0_224_quantized_1_default_1.tflite \ --output ./resources_downloaded/img_class/mobilenet_v2_1.0_224_quantized_1_default_1.tflite @@ -76,13 +76,11 @@ curl -L https://github.com/ARM-software/ML-zoo/raw/68b5fbc77ed28e67b2efc915997ea --output ./resources_downloaded/kws/ifm0.npy curl -L https://github.com/ARM-software/ML-zoo/raw/68b5fbc77ed28e67b2efc915997ea4477c1d9d5b/models/keyword_spotting/ds_cnn_large/tflite_clustered_int8/testing_output/Identity/0.npy \ --output ./resources_downloaded/kws/ofm0.npy -curl -L https://github.com/ARM-software/ML-zoo/raw/68b5fbc77ed28e67b2efc915997ea4477c1d9d5b/models/speech_recognition/wav2letter/tflite_int8/wav2letter_int8.tflite \ - --output ./resources_downloaded/kws_asr/wav2letter_int8.tflite -curl -L https://github.com/ARM-software/ML-zoo/raw/68b5fbc77ed28e67b2efc915997ea4477c1d9d5b/models/speech_recognition/wav2letter/tflite_int8/testing_input/input_2_int8/0.npy \ +curl -L https://github.com/ARM-software/ML-zoo/raw/1a92aa08c0de49a7304e0a7f3f59df6f4fd33ac8/models/speech_recognition/wav2letter/tflite_pruned_int8/wav2letter_pruned_int8.tflite \ + --output ./resources_downloaded/kws_asr/wav2letter_pruned_int8.tflite +curl -L https://github.com/ARM-software/ML-zoo/raw/1a92aa08c0de49a7304e0a7f3f59df6f4fd33ac8/models/speech_recognition/wav2letter/tflite_pruned_int8/testing_input/input_2_int8/0.npy \ --output ./resources_downloaded/kws_asr/asr/ifm0.npy -curl -L https://github.com/ARM-software/ML-zoo/raw/68b5fbc77ed28e67b2efc915997ea4477c1d9d5b/models/speech_recognition/wav2letter/tflite_int8/testing_input/input_2_int8/0.npy - --output ./resources_downloaded/kws_asr/asr/ifm0.npy -curl -L https://github.com/ARM-software/ML-zoo/raw/68b5fbc77ed28e67b2efc915997ea4477c1d9d5b/models/speech_recognition/wav2letter/tflite_int8/testing_output/Identity_int8/0.npy \ +curl -L https://github.com/ARM-software/ML-zoo/raw/1a92aa08c0de49a7304e0a7f3f59df6f4fd33ac8/models/speech_recognition/wav2letter/tflite_pruned_int8/testing_output/Identity_int8/0.npy \ --output ./resources_downloaded/kws_asr/asr/ofm0.npy curl -L https://github.com/ARM-software/ML-zoo/raw/68b5fbc77ed28e67b2efc915997ea4477c1d9d5b/models/keyword_spotting/ds_cnn_large/tflite_clustered_int8/ds_cnn_clustered_int8.tflite \ --output ./resources_downloaded/kws_asr/ds_cnn_clustered_int8.tflite diff --git a/docs/sections/memory_considerations.md b/docs/sections/memory_considerations.md index 48651f1..4727711 100644 --- a/docs/sections/memory_considerations.md +++ b/docs/sections/memory_considerations.md @@ -1,7 +1,5 @@ # Memory considerations -## Contents - - [Memory considerations](#memory-considerations) - [Introduction](#introduction) - [Understanding memory usage from Vela output](#understanding-memory-usage-from-vela-output) @@ -114,9 +112,9 @@ under [Total SRAM used](#total-sram-used). - 638.6 KiB of SRAM - 3.1 MB of flash memory. -- [Automated speech recognition](https://github.com/ARM-software/ML-zoo/tree/master/models/speech_recognition/wav2letter/tflite_int8) requires - - 635.3 KiB of SRAM - - 21.1 MB of flash memory. +- [Automated speech recognition](https://github.com/ARM-software/ML-zoo/tree/1a92aa08c0de49a7304e0a7f3f59df6f4fd33ac8/models/speech_recognition/wav2letter/tflite_pruned_int8) requires + - 655.16 KiB of SRAM + - 13.42 MB of flash memory. ## Memory constraints diff --git a/set_up_default_resources.py b/set_up_default_resources.py index 79b0333..7639364 100755 --- a/set_up_default_resources.py +++ b/set_up_default_resources.py @@ -36,12 +36,12 @@ json_uc_res = [{ }, { "use_case_name": "asr", - "resources": [{"name": "wav2letter_int8.tflite", - "url": "https://github.com/ARM-software/ML-zoo/raw/68b5fbc77ed28e67b2efc915997ea4477c1d9d5b/models/speech_recognition/wav2letter/tflite_int8/wav2letter_int8.tflite"}, + "resources": [{"name": "wav2letter_pruned_int8.tflite", + "url": "https://github.com/ARM-software/ML-zoo/raw/1a92aa08c0de49a7304e0a7f3f59df6f4fd33ac8/models/speech_recognition/wav2letter/tflite_pruned_int8/wav2letter_pruned_int8.tflite"}, {"name": "ifm0.npy", - "url": "https://github.com/ARM-software/ML-zoo/raw/68b5fbc77ed28e67b2efc915997ea4477c1d9d5b/models/speech_recognition/wav2letter/tflite_int8/testing_input/input_2_int8/0.npy"}, + "url": "https://github.com/ARM-software/ML-zoo/raw/1a92aa08c0de49a7304e0a7f3f59df6f4fd33ac8/models/speech_recognition/wav2letter/tflite_pruned_int8/testing_input/input_2_int8/0.npy"}, {"name": "ofm0.npy", - "url": "https://github.com/ARM-software/ML-zoo/raw/68b5fbc77ed28e67b2efc915997ea4477c1d9d5b/models/speech_recognition/wav2letter/tflite_int8/testing_output/Identity_int8/0.npy"}] + "url": "https://github.com/ARM-software/ML-zoo/raw/1a92aa08c0de49a7304e0a7f3f59df6f4fd33ac8/models/speech_recognition/wav2letter/tflite_pruned_int8/testing_output/Identity_int8/0.npy"}] }, { "use_case_name": "img_class", @@ -63,12 +63,12 @@ json_uc_res = [{ }, { "use_case_name": "kws_asr", - "resources": [{"name": "wav2letter_int8.tflite", - "url": "https://github.com/ARM-software/ML-zoo/raw/68b5fbc77ed28e67b2efc915997ea4477c1d9d5b/models/speech_recognition/wav2letter/tflite_int8/wav2letter_int8.tflite"}, + "resources": [{"name": "wav2letter_pruned_int8.tflite", + "url": "https://github.com/ARM-software/ML-zoo/raw/1a92aa08c0de49a7304e0a7f3f59df6f4fd33ac8/models/speech_recognition/wav2letter/tflite_pruned_int8/wav2letter_pruned_int8.tflite"}, {"sub_folder": "asr", "name": "ifm0.npy", - "url": "https://github.com/ARM-software/ML-zoo/raw/68b5fbc77ed28e67b2efc915997ea4477c1d9d5b/models/speech_recognition/wav2letter/tflite_int8/testing_input/input_2_int8/0.npy"}, + "url": "https://github.com/ARM-software/ML-zoo/raw/1a92aa08c0de49a7304e0a7f3f59df6f4fd33ac8/models/speech_recognition/wav2letter/tflite_pruned_int8/testing_input/input_2_int8/0.npy"}, {"sub_folder": "asr", "name": "ofm0.npy", - "url": "https://github.com/ARM-software/ML-zoo/raw/68b5fbc77ed28e67b2efc915997ea4477c1d9d5b/models/speech_recognition/wav2letter/tflite_int8/testing_output/Identity_int8/0.npy"}, + "url": "https://github.com/ARM-software/ML-zoo/raw/1a92aa08c0de49a7304e0a7f3f59df6f4fd33ac8/models/speech_recognition/wav2letter/tflite_pruned_int8/testing_output/Identity_int8/0.npy"}, {"name": "ds_cnn_clustered_int8.tflite", "url": "https://github.com/ARM-software/ML-zoo/raw/68b5fbc77ed28e67b2efc915997ea4477c1d9d5b/models/keyword_spotting/ds_cnn_large/tflite_clustered_int8/ds_cnn_clustered_int8.tflite"}, {"sub_folder": "kws", "name": "ifm0.npy", diff --git a/source/use_case/asr/usecase.cmake b/source/use_case/asr/usecase.cmake index 3d18997..89014de 100644 --- a/source/use_case/asr/usecase.cmake +++ b/source/use_case/asr/usecase.cmake @@ -63,7 +63,7 @@ generate_audio_code(${${use_case}_FILE_PATH} ${SRC_GEN_DIR} ${INC_GEN_DIR} # Generate labels file set(${use_case}_LABELS_CPP_FILE Labels) generate_labels_code( - INPUT "${${use_case}_LABELS_TXT_FILE}" + INPUT "${${use_case}_LABELS_TXT_FILE}" DESTINATION_SRC ${SRC_GEN_DIR} DESTINATION_HDR ${INC_GEN_DIR} OUTPUT_FILENAME "${${use_case}_LABELS_CPP_FILE}" @@ -75,9 +75,9 @@ USER_OPTION(${use_case}_ACTIVATION_BUF_SZ "Activation buffer size for the chosen STRING) if (ETHOS_U55_ENABLED) - set(DEFAULT_MODEL_PATH ${DEFAULT_MODEL_DIR}/wav2letter_int8_vela_H128.tflite) + set(DEFAULT_MODEL_PATH ${DEFAULT_MODEL_DIR}/wav2letter_pruned_int8_vela_H128.tflite) else() - set(DEFAULT_MODEL_PATH ${DEFAULT_MODEL_DIR}/wav2letter_int8.tflite) + set(DEFAULT_MODEL_PATH ${DEFAULT_MODEL_DIR}/wav2letter_pruned_int8.tflite) endif() set(EXTRA_MODEL_CODE diff --git a/source/use_case/kws_asr/usecase.cmake b/source/use_case/kws_asr/usecase.cmake index 9ff4fff..c47d633 100644 --- a/source/use_case/kws_asr/usecase.cmake +++ b/source/use_case/kws_asr/usecase.cmake @@ -68,10 +68,10 @@ USER_OPTION(${use_case}_MODEL_SCORE_THRESHOLD_ASR "Specify the score threshold [ if (ETHOS_U55_ENABLED) set(DEFAULT_MODEL_PATH_KWS ${DEFAULT_MODEL_DIR}/ds_cnn_clustered_int8_vela_H128.tflite) - set(DEFAULT_MODEL_PATH_ASR ${DEFAULT_MODEL_DIR}/wav2letter_int8_vela_H128.tflite) + set(DEFAULT_MODEL_PATH_ASR ${DEFAULT_MODEL_DIR}/wav2letter_pruned_int8_vela_H128.tflite) else() set(DEFAULT_MODEL_PATH_KWS ${DEFAULT_MODEL_DIR}/ds_cnn_clustered_int8.tflite) - set(DEFAULT_MODEL_PATH_ASR ${DEFAULT_MODEL_DIR}/wav2letter_int8.tflite) + set(DEFAULT_MODEL_PATH_ASR ${DEFAULT_MODEL_DIR}/wav2letter_pruned_int8.tflite) endif() USER_OPTION(${use_case}_MODEL_TFLITE_PATH_KWS "NN models file to be used for KWS in the evaluation application. Model files must be in tflite format." -- cgit v1.2.1