diff options
author | alexander <alexander.efremov@arm.com> | 2021-07-16 11:30:56 +0100 |
---|---|---|
committer | Jim Flynn <jim.flynn@arm.com> | 2022-02-04 09:55:21 +0000 |
commit | f42f56870c6201a876f025a423eb5540d7438e83 (patch) | |
tree | e8e57e371c851cbb9a51a2f3ec35059addd2e93e /python/pyarmnn/examples/speech_recognition/audio_utils.py | |
parent | 9d74ba6e85a043e9603445e062315f5c4965fbd6 (diff) | |
download | armnn-f42f56870c6201a876f025a423eb5540d7438e83.tar.gz |
MLECO-2079 Adding the python KWS example
Signed-off-by: Eanna O Cathain <eanna.ocathain@arm.com>
Change-Id: Ie1463aaeb5e3cade22df8f560ae99a8e1c4a9c17
Diffstat (limited to 'python/pyarmnn/examples/speech_recognition/audio_utils.py')
-rw-r--r-- | python/pyarmnn/examples/speech_recognition/audio_utils.py | 53 |
1 files changed, 1 insertions, 52 deletions
diff --git a/python/pyarmnn/examples/speech_recognition/audio_utils.py b/python/pyarmnn/examples/speech_recognition/audio_utils.py index f03d2e1290..1ac78e8074 100644 --- a/python/pyarmnn/examples/speech_recognition/audio_utils.py +++ b/python/pyarmnn/examples/speech_recognition/audio_utils.py @@ -1,10 +1,9 @@ -# Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +# Copyright © 2021 Arm Ltd and Contributors. All rights reserved. # SPDX-License-Identifier: MIT """Utilities for speech recognition apps.""" import numpy as np -import pyarmnn as ann def decode(model_output: np.ndarray, labels: dict) -> str: @@ -50,33 +49,6 @@ def display_text(text: str): print(text, sep="", end="", flush=True) -def quantize_input(data, input_binding_info): - """Quantize the float input to (u)int8 ready for inputting to model.""" - if data.ndim != 2: - raise RuntimeError("Audio data must have 2 dimensions for quantization") - - quant_scale = input_binding_info[1].GetQuantizationScale() - quant_offset = input_binding_info[1].GetQuantizationOffset() - data_type = input_binding_info[1].GetDataType() - - if data_type == ann.DataType_QAsymmS8: - data_type = np.int8 - elif data_type == ann.DataType_QAsymmU8: - data_type = np.uint8 - else: - raise ValueError("Could not quantize data to required data type") - - d_min = np.iinfo(data_type).min - d_max = np.iinfo(data_type).max - - for row in range(data.shape[0]): - for col in range(data.shape[1]): - data[row, col] = (data[row, col] / quant_scale) + quant_offset - data[row, col] = np.clip(data[row, col], d_min, d_max) - data = data.astype(data_type) - return data - - def decode_text(is_first_window, labels, output_result): """ Slices the text appropriately depending on the window, and decodes for wav2letter output. @@ -88,7 +60,6 @@ def decode_text(is_first_window, labels, output_result): is_first_window: Boolean to show if it is the first window we are running inference on labels: the label set output_result: the output from the inference - text: the current text string, to be displayed at the end Returns: current_r_context: the current right context text: the current text string, with the latest output decoded and appended @@ -109,25 +80,3 @@ def decode_text(is_first_window, labels, output_result): # Store the right context, we will need it after the last inference current_r_context = decode(output_result[0][0][0][right_context_start:], labels) return current_r_context, text - - -def prepare_input_tensors(audio_data, input_binding_info, mfcc_preprocessor): - """ - Takes a block of audio data, extracts the MFCC features, quantizes the array, and uses ArmNN to create the - input tensors. - - Args: - audio_data: The audio data to process - mfcc_instance: the mfcc class instance - input_binding_info: the model input binding info - mfcc_preprocessor: the mfcc preprocessor instance - Returns: - input_tensors: the prepared input tensors, ready to be consumed by the ArmNN NetworkExecutor - """ - - data_type = input_binding_info[1].GetDataType() - input_tensor = mfcc_preprocessor.extract_features(audio_data) - if data_type != ann.DataType_Float32: - input_tensor = quantize_input(input_tensor, input_binding_info) - input_tensors = ann.make_input_tensors([input_binding_info], [input_tensor]) - return input_tensors |