aboutsummaryrefslogtreecommitdiff
path: root/python/pyarmnn/examples/speech_recognition
diff options
context:
space:
mode:
authorRaviv Shalev <raviv.shalev@arm.com>2021-12-07 15:18:09 +0200
committerTeresaARM <teresa.charlinreyes@arm.com>2022-04-13 15:33:31 +0000
commit97ddc06e52fbcabfd8ede7a00e9494c663186b92 (patch)
tree43c84d352c3a67aa45d89760fba6c79b81c8f8dc /python/pyarmnn/examples/speech_recognition
parent2f0ddb67d8f9267ab600a8a26308cab32f9e16ac (diff)
downloadarmnn-97ddc06e52fbcabfd8ede7a00e9494c663186b92.tar.gz
MLECO-2493 Add python OD example with TFLite delegate
Signed-off-by: Raviv Shalev <raviv.shalev@arm.com> Change-Id: I25fcccbf912be0c5bd4fbfd2e97552341958af35
Diffstat (limited to 'python/pyarmnn/examples/speech_recognition')
-rw-r--r--python/pyarmnn/examples/speech_recognition/README.md4
-rw-r--r--python/pyarmnn/examples/speech_recognition/run_audio_file.py7
2 files changed, 6 insertions, 5 deletions
diff --git a/python/pyarmnn/examples/speech_recognition/README.md b/python/pyarmnn/examples/speech_recognition/README.md
index 2cdc8691d2..854cdaf03b 100644
--- a/python/pyarmnn/examples/speech_recognition/README.md
+++ b/python/pyarmnn/examples/speech_recognition/README.md
@@ -151,7 +151,7 @@ for i in range(features.shape[1]):
# audio_utils.py
# Quantize the input data and create input tensors with PyArmNN
input_tensor = quantize_input(input_tensor, input_binding_info)
-input_tensors = ann.make_input_tensors([input_binding_info], [input_tensor])
+input_tensors = ann.make_input_tensors([input_binding_info], [input_data])
```
Note: `ArmnnNetworkExecutor` has already created the output tensors for you.
@@ -172,4 +172,4 @@ Having now gained a solid understanding of performing automatic speech recogniti
An important step to improving accuracy of the generated output sentences is by providing cleaner data to the network. This can be done by including additional preprocessing steps such as noise reduction of your audio data.
-In this application, we had used a greedy decoder to decode the integer-encoded output however, better results can be achieved by implementing a beam search decoder. You may even try adding a language model at the end to aim to correct any spelling mistakes the model may produce. \ No newline at end of file
+In this application, we had used a greedy decoder to decode the integer-encoded output however, better results can be achieved by implementing a beam search decoder. You may even try adding a language model at the end to aim to correct any spelling mistakes the model may produce.
diff --git a/python/pyarmnn/examples/speech_recognition/run_audio_file.py b/python/pyarmnn/examples/speech_recognition/run_audio_file.py
index 0430f68c16..ddf6cb704c 100644
--- a/python/pyarmnn/examples/speech_recognition/run_audio_file.py
+++ b/python/pyarmnn/examples/speech_recognition/run_audio_file.py
@@ -12,7 +12,7 @@ sys.path.insert(1, os.path.join(script_dir, '..', 'common'))
from argparse import ArgumentParser
from network_executor import ArmnnNetworkExecutor
-from utils import prepare_input_tensors
+from utils import prepare_input_data
from audio_capture import AudioCaptureParams, capture_audio
from audio_utils import decode_text, display_text
from wav2letter_mfcc import Wav2LetterMFCC, W2LAudioPreprocessor
@@ -78,10 +78,11 @@ def main(args):
print("Processing Audio Frames...")
for audio_data in buffer:
# Prepare the input Tensors
- input_tensors = prepare_input_tensors(audio_data, network.input_binding_info, preprocessor)
+ input_data = prepare_input_data(audio_data, network.get_data_type(), network.get_input_quantization_scale(0),
+ network.get_input_quantization_offset(0), preprocessor)
# Run inference
- output_result = network.run(input_tensors)
+ output_result = network.run([input_data])
# Slice and Decode the text, and store the right context
current_r_context, text = decode_text(is_first_window, labels, output_result)