summaryrefslogtreecommitdiff
path: root/scripts/py/gen_utils.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/py/gen_utils.py')
-rw-r--r--scripts/py/gen_utils.py115
1 files changed, 115 insertions, 0 deletions
diff --git a/scripts/py/gen_utils.py b/scripts/py/gen_utils.py
new file mode 100644
index 0000000..4a56646
--- /dev/null
+++ b/scripts/py/gen_utils.py
@@ -0,0 +1,115 @@
+#!env/bin/python3
+
+# Copyright (c) 2021 Arm Limited. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import soundfile as sf
+import resampy
+import numpy as np
+
+
+class AudioUtils:
+ @staticmethod
+ def res_data_type(res_type_value):
+ """
+ Returns the input string if is one of the valid resample type
+ """
+ import argparse
+ if res_type_value not in AudioUtils.res_type_list():
+ raise argparse.ArgumentTypeError(f"{res_type_value} not valid. Supported only {AudioUtils.res_type_list()}")
+ return res_type_value
+
+ @staticmethod
+ def res_type_list():
+ """
+ Returns the resample type list
+ """
+ return ['kaiser_best', 'kaiser_fast']
+
+ @staticmethod
+ def load_resample_audio_clip(path, target_sr=16000, mono=True, offset=0.0, duration=0, res_type='kaiser_best',
+ min_len=16000):
+ """
+ Load and resample an audio clip with the given desired specs.
+
+ Parameters:
+ ----------
+ path (string): Path to the input audio clip.
+ target_sr (int, optional): Target sampling rate. Positive number are considered valid,
+ if zero or negative the native sampling rate of the file will be preserved. Default is 16000.
+ mono (bool, optional): Specify if the audio file needs to be converted to mono. Default is True.
+ offset (float, optional): Target sampling rate. Default is 0.0.
+ duration (int, optional): Target duration. Positive number are considered valid,
+ if zero or negative the duration of the file will be preserved. Default is 0.
+ res_type (int, optional): Resample type to use, Default is 'kaiser_best'.
+ min_len (int, optional): Minimun lenght of the output audio time series. Default is 16000.
+
+ Returns:
+ ----------
+ y (np.ndarray): Output audio time series of shape shape=(n,) or (2, n).
+ sr (int): A scalar number > 0 that represent the sampling rate of `y`
+ """
+ try:
+ with sf.SoundFile(path) as audio_file:
+ origin_sr = audio_file.samplerate
+
+ if offset:
+ # Seek to the start of the target read
+ audio_file.seek(int(offset * origin_sr))
+
+ if duration > 0:
+ num_frame_duration = int(duration * origin_sr)
+ else:
+ num_frame_duration = -1
+
+ # Load the target number of frames
+ y = audio_file.read(frames=num_frame_duration, dtype=np.float32, always_2d=False).T
+
+ except:
+ print(f"Failed to open {path} as an audio.")
+
+ # Convert to mono if requested and if audio has more than one dimension
+ if mono and (y.ndim > 1):
+ y = np.mean(y, axis=0)
+
+ if not (origin_sr == target_sr) and (target_sr > 0):
+ ratio = float(target_sr) / origin_sr
+ axis = -1
+ n_samples = int(np.ceil(y.shape[axis] * ratio))
+
+ # Resample using resampy
+ y_rs = resampy.resample(y, origin_sr, target_sr, filter=res_type, axis=axis)
+ n_rs_samples = y_rs.shape[axis]
+
+ # Adjust the size
+ if n_rs_samples > n_samples:
+ slices = [slice(None)] * y_rs.ndim
+ slices[axis] = slice(0, n_samples)
+ y = y_rs[tuple(slices)]
+ elif n_rs_samples < n_samples:
+ lengths = [(0, 0)] * y_rs.ndim
+ lengths[axis] = (0, n_samples - n_rs_samples)
+ y = np.pad(y_rs, lengths, 'constant', constant_values=(0))
+
+ sr = target_sr
+ else:
+ sr = origin_sr
+
+ # Pad if necessary and min lenght is setted (min_len> 0)
+ if (y.shape[0] < min_len) and (min_len > 0):
+ sample_to_pad = min_len - y.shape[0]
+ y = np.pad(y, (0, sample_to_pad), 'constant', constant_values=(0))
+
+ return y, sr