aboutsummaryrefslogtreecommitdiff
path: root/python/pyarmnn/examples/speech_recognition
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyarmnn/examples/speech_recognition')
-rw-r--r--python/pyarmnn/examples/speech_recognition/README.md4
-rw-r--r--python/pyarmnn/examples/speech_recognition/run_audio_file.py7
2 files changed, 6 insertions, 5 deletions
diff --git a/python/pyarmnn/examples/speech_recognition/README.md b/python/pyarmnn/examples/speech_recognition/README.md
index 2cdc8691d2..854cdaf03b 100644
--- a/python/pyarmnn/examples/speech_recognition/README.md
+++ b/python/pyarmnn/examples/speech_recognition/README.md
@@ -151,7 +151,7 @@ for i in range(features.shape[1]):
# audio_utils.py
# Quantize the input data and create input tensors with PyArmNN
input_tensor = quantize_input(input_tensor, input_binding_info)
-input_tensors = ann.make_input_tensors([input_binding_info], [input_tensor])
+input_tensors = ann.make_input_tensors([input_binding_info], [input_data])
```
Note: `ArmnnNetworkExecutor` has already created the output tensors for you.
@@ -172,4 +172,4 @@ Having now gained a solid understanding of performing automatic speech recogniti
An important step to improving accuracy of the generated output sentences is by providing cleaner data to the network. This can be done by including additional preprocessing steps such as noise reduction of your audio data.
-In this application, we had used a greedy decoder to decode the integer-encoded output however, better results can be achieved by implementing a beam search decoder. You may even try adding a language model at the end to aim to correct any spelling mistakes the model may produce. \ No newline at end of file
+In this application, we had used a greedy decoder to decode the integer-encoded output however, better results can be achieved by implementing a beam search decoder. You may even try adding a language model at the end to aim to correct any spelling mistakes the model may produce.
diff --git a/python/pyarmnn/examples/speech_recognition/run_audio_file.py b/python/pyarmnn/examples/speech_recognition/run_audio_file.py
index 0430f68c16..ddf6cb704c 100644
--- a/python/pyarmnn/examples/speech_recognition/run_audio_file.py
+++ b/python/pyarmnn/examples/speech_recognition/run_audio_file.py
@@ -12,7 +12,7 @@ sys.path.insert(1, os.path.join(script_dir, '..', 'common'))
from argparse import ArgumentParser
from network_executor import ArmnnNetworkExecutor
-from utils import prepare_input_tensors
+from utils import prepare_input_data
from audio_capture import AudioCaptureParams, capture_audio
from audio_utils import decode_text, display_text
from wav2letter_mfcc import Wav2LetterMFCC, W2LAudioPreprocessor
@@ -78,10 +78,11 @@ def main(args):
print("Processing Audio Frames...")
for audio_data in buffer:
# Prepare the input Tensors
- input_tensors = prepare_input_tensors(audio_data, network.input_binding_info, preprocessor)
+ input_data = prepare_input_data(audio_data, network.get_data_type(), network.get_input_quantization_scale(0),
+ network.get_input_quantization_offset(0), preprocessor)
# Run inference
- output_result = network.run(input_tensors)
+ output_result = network.run([input_data])
# Slice and Decode the text, and store the right context
current_r_context, text = decode_text(is_first_window, labels, output_result)