diff options
author | alexander <alexander.efremov@arm.com> | 2021-03-26 21:42:19 +0000 |
---|---|---|
committer | Kshitij Sisodia <kshitij.sisodia@arm.com> | 2021-03-29 16:29:55 +0100 |
commit | 3c79893217bc632c9b0efa815091bef3c779490c (patch) | |
tree | ad06b444557eb8124652b45621d736fa1b92f65d /scripts/py/gen_utils.py | |
parent | 6ad6d55715928de72979b04194da1bdf04a4c51b (diff) | |
download | ml-embedded-evaluation-kit-3c79893217bc632c9b0efa815091bef3c779490c.tar.gz |
Opensource ML embedded evaluation kit21.03
Change-Id: I12e807f19f5cacad7cef82572b6dd48252fd61fd
Diffstat (limited to 'scripts/py/gen_utils.py')
-rw-r--r-- | scripts/py/gen_utils.py | 115 |
1 files changed, 115 insertions, 0 deletions
diff --git a/scripts/py/gen_utils.py b/scripts/py/gen_utils.py new file mode 100644 index 0000000..4a56646 --- /dev/null +++ b/scripts/py/gen_utils.py @@ -0,0 +1,115 @@ +#!env/bin/python3 + +# Copyright (c) 2021 Arm Limited. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import soundfile as sf +import resampy +import numpy as np + + +class AudioUtils: + @staticmethod + def res_data_type(res_type_value): + """ + Returns the input string if is one of the valid resample type + """ + import argparse + if res_type_value not in AudioUtils.res_type_list(): + raise argparse.ArgumentTypeError(f"{res_type_value} not valid. Supported only {AudioUtils.res_type_list()}") + return res_type_value + + @staticmethod + def res_type_list(): + """ + Returns the resample type list + """ + return ['kaiser_best', 'kaiser_fast'] + + @staticmethod + def load_resample_audio_clip(path, target_sr=16000, mono=True, offset=0.0, duration=0, res_type='kaiser_best', + min_len=16000): + """ + Load and resample an audio clip with the given desired specs. + + Parameters: + ---------- + path (string): Path to the input audio clip. + target_sr (int, optional): Target sampling rate. Positive number are considered valid, + if zero or negative the native sampling rate of the file will be preserved. Default is 16000. + mono (bool, optional): Specify if the audio file needs to be converted to mono. Default is True. + offset (float, optional): Target sampling rate. Default is 0.0. + duration (int, optional): Target duration. Positive number are considered valid, + if zero or negative the duration of the file will be preserved. Default is 0. + res_type (int, optional): Resample type to use, Default is 'kaiser_best'. + min_len (int, optional): Minimun lenght of the output audio time series. Default is 16000. + + Returns: + ---------- + y (np.ndarray): Output audio time series of shape shape=(n,) or (2, n). + sr (int): A scalar number > 0 that represent the sampling rate of `y` + """ + try: + with sf.SoundFile(path) as audio_file: + origin_sr = audio_file.samplerate + + if offset: + # Seek to the start of the target read + audio_file.seek(int(offset * origin_sr)) + + if duration > 0: + num_frame_duration = int(duration * origin_sr) + else: + num_frame_duration = -1 + + # Load the target number of frames + y = audio_file.read(frames=num_frame_duration, dtype=np.float32, always_2d=False).T + + except: + print(f"Failed to open {path} as an audio.") + + # Convert to mono if requested and if audio has more than one dimension + if mono and (y.ndim > 1): + y = np.mean(y, axis=0) + + if not (origin_sr == target_sr) and (target_sr > 0): + ratio = float(target_sr) / origin_sr + axis = -1 + n_samples = int(np.ceil(y.shape[axis] * ratio)) + + # Resample using resampy + y_rs = resampy.resample(y, origin_sr, target_sr, filter=res_type, axis=axis) + n_rs_samples = y_rs.shape[axis] + + # Adjust the size + if n_rs_samples > n_samples: + slices = [slice(None)] * y_rs.ndim + slices[axis] = slice(0, n_samples) + y = y_rs[tuple(slices)] + elif n_rs_samples < n_samples: + lengths = [(0, 0)] * y_rs.ndim + lengths[axis] = (0, n_samples - n_rs_samples) + y = np.pad(y_rs, lengths, 'constant', constant_values=(0)) + + sr = target_sr + else: + sr = origin_sr + + # Pad if necessary and min lenght is setted (min_len> 0) + if (y.shape[0] < min_len) and (min_len > 0): + sample_to_pad = min_len - y.shape[0] + y = np.pad(y, (0, sample_to_pad), 'constant', constant_values=(0)) + + return y, sr |