Source code for mltk.utils.audio_dataset_generator.generator

from typing import List, Callable, Dict, Union, Tuple
import random
import threading
import functools
import os
import time
from multiprocessing.pool import ThreadPool



from mltk.utils.string_formatting import format_units

from .generator_types import (
    BackendBase,
    Voice,
    GenerationConfig,
    Keyword,
    Augmentation,
    logger
)
from .backends import BACKENDS


[docs]class AudioDatasetGenerator: """Utility for generating synthetic keyword datasets See the `Synthetic Audio Dataset Generation <https://siliconlabs.github.io/mltk/mltk/tutorials/synthetic_audio_dataset_generation.html>`_ tutorial for more details. .. note:: The generated audio files are 16kHz, 16-bit PCM ``.wav`` files. Args: out_dir: Directory where dataset will be generated n_jobs: Number of parallel processing jobs """
[docs] def __init__(self, out_dir:str, n_jobs:int=4): self._backends:Dict[str, BackendBase] = {} self._out_dir = out_dir self._pool = _ProcessingPool(n_jobs=n_jobs) self._lock = threading.RLock() self._condition = threading.Condition(lock=self._lock) self._pending_configs:Dict[BackendBase,List[Tuple[GenerationConfig,Callable]]] = {} self._n_config_processing = 0 t = threading.Thread( target=self._generation_loop, name='AudioDatasetGenerator', daemon=True ) t.start()
[docs] @staticmethod def list_supported_backends() -> List[str]: """Return a list of the available backends""" return list(BACKENDS.keys())
@property def is_running(self) -> bool: """Return if the processing pool is active""" return self._pool.is_running @property def out_dir(self) -> bool: """Return the output directory where the dataset is generated""" return self._out_dir
[docs] def is_backend_loaded(self, backend:str, raise_exception=False) -> bool: """Return if the given backend has been loaded""" if backend not in BACKENDS: if raise_exception: raise ValueError( f'Unknown backend: {backend}, supported backends are: {", ".join(AudioDatasetGenerator.list_supported_backends())}' ) return False if backend not in self._backends: if raise_exception: raise ValueError(f'Backend: {backend} not loaded') return False return True
[docs] def load_backend(self, name:str, install_python_package=False, **kwargs): """Load the specified backend NOTE: The backend's corresponding "credentials" must be provided Additional kwargs may be passed to the backend's initialization. Refer the the backend's docs for the available kwargs: - ``name=aws`` --> `boto3.session.Session <https://boto3.amazonaws.com/v1/documentation/api/latest/guide/session.html>`_ - ``name=azure`` --> `azure.cognitiveservices.speech.SpeechConfig <https://learn.microsoft.com/en-us/python/api/azure-cognitiveservices-speech/azure.cognitiveservices.speech.speechconfig?source=recommendations&view=azure-python>`_ - ``name=gcp`` --> `google.cloud.texttospeech.TextToSpeechClient <https://cloud.google.com/python/docs/reference/texttospeech/latest/google.cloud.texttospeech_v1beta1.services.text_to_speech.TextToSpeechClient>`_ Args: name: The name of the cloud backend, see :py:func:`~list_supported_backends` auto_install_python_package: If true, then automatically install the backend's corresponding Python package (if necessary) kwargs: Additional keyword args to pass to the backend's Python package (see comment above) """ if name not in BACKENDS: raise ValueError( f'Unknown backend: {name}, supported backends are: {", ".join(AudioDatasetGenerator.list_supported_backends())}' ) if self.is_backend_loaded(name): raise RuntimeError(f'Backend {name} already loaded') backend = BACKENDS[name]() backend.load(install_python_package=install_python_package, **kwargs) self._backends[name] = backend self._pending_configs[backend] = []
[docs] def list_languages(self, backend:str=None) -> List[str]: """Return a list of the available language codes Args: backend: If provided, then only return languages supported by backend, else return languages for all loaded backends Returns: List of languages codes """ retval = set() for backend_name in self._get_backend_list(backend): for lang in self._backends[backend_name].list_languages(): retval.add(lang) return sorted(retval)
[docs] def list_voices(self, language_code:str=None, backend:str=None) -> List[Voice]: """Return a list of the available "voices" Args: language_code: If provided, then only returned voices that support given language code, else return all languages backend: If provided, then only return voices supported by backend, else return voices for all loaded backends Returns: List of voices """ retval:List[Voice] = [] for backend_name in self._get_backend_list(backend): retval.extend(self._backends[backend_name].list_voices(language_code=language_code)) return sorted(retval, key=lambda x: (x.backend, x.language_code, x.name))
[docs] def list_configurations( self, keywords:List[Keyword], augmentations:List[Augmentation], voices:List[Voice], truncate=False, seed:int=None, ) -> Dict[Keyword,List[GenerationConfig]]: """Generate a list of generation configurations Generate a list of all possible combinations of the given keywords, augmentations, and voices. If the ``truncate`` argument is provided, then shuffle the generated list and return the truncated list based on the ``max_count`` specified in the ``keywords``. Args: keywords: List of keywords to use for the generation configurations augmentations: List of augmentations to apply to each keyword voices: List of voices to use for keyword generation truncate: If true, then randomly shuffle all possible combinations and return a truncated list of configurations. The truncated count is specified in the ``max_count`` field of the keywords seed: Seed to use for randomly shuffling the truncated list Returns: Dictionary of keywords and corresponding list of configurations """ retval:Dict[Keyword,List[GenerationConfig]] = {} for keyword in keywords: keyword_configs:List[GenerationConfig] = [] for voice in voices: base_configs = self._backends[voice.backend].list_configurations( augmentations=augmentations, voice=voice ) for kw in keyword.as_list(): for config in base_configs: config = config.copy() config.keyword = kw config.keyword_group = keyword.value keyword_configs.append(config) if truncate and keyword.max_count: if seed: random.seed(seed) random.shuffle(keyword_configs) keyword_configs = keyword_configs[:keyword.max_count] retval[keyword] = sorted(keyword_configs, key=lambda x: (x.keyword, x.voice.backend, x.voice.language_code, x.voice.name, x.rate, x.pitch)) return retval
[docs] def count_characters( self, config:Dict[Keyword,List[GenerationConfig]], ) -> Dict[Keyword,Dict[str,int]]: """Count the number of characters that will be sent to each backend The cloud backends charge per character that is sent. This API returns the number of characters required for each keyword. Args: config: Dictionary of keywords and corresponding list of configurations returned by :py:func:`~list_configurations` Returns: Dictionary<keyword, Dictionary<backend, char count>> """ retval:Dict[Keyword,Dict[str,int]] = {} for keyword, config_list in config.items(): stats:Dict[str,int] = {} for cfg in config_list: n_chars = self._backends[cfg.voice.backend].count_characters(cfg) stats[cfg.voice.backend] = stats.get(cfg.voice.backend, 0) + n_chars retval[keyword] = stats return retval
[docs] def get_summary( self, config:Dict[Keyword,List[GenerationConfig]], as_dict=False ) -> Union[dict,str]: """Generate a summary of the given configurations Args: config: Dictionary of keywords and corresponding list of configurations returned by :py:func:`~list_configurations` as_dict: If true then return the summary as a dictionary, else return the summary as a string Returns: If ``as_dict=True`` then return the summary as a dictionary, else return the summary as a string """ voice_stats = {} sample_stats = {} voices = set() for config_list in config.values(): for cfg in config_list: if cfg.voice not in voices: backend = cfg.voice.backend voices.add(cfg.voice) n_voices = voice_stats.get(backend, 0) + 1 voice_stats[backend] = n_voices for keyword, config_list in config.items(): stats = {} for cfg in config_list: backend = cfg.voice.backend n_samples = stats.get(backend, 0) + 1 stats[backend] = n_samples sample_stats[keyword] = stats character_stats = self.count_characters(config) retval = dict( voices=voice_stats, samples=sample_stats, characters=character_stats ) if as_dict: return retval s = 'Voice Counts\n' s += '---------------------\n' n_voices = 0 for backend, count in retval['voices'].items(): s += f' {backend:<6s}: {count}\n' n_voices += count s += f' Total : {n_voices}\n' s += '\n' s += 'Keyword Counts\n' s += '---------------------\n' total_samples = 0 for keyword, stats in retval['samples'].items(): n_samples = 0 s += f' {keyword}:\n' for backend, count in stats.items(): s += f' {backend:<6s}: {format_units(count, add_space=False, precision=1)}\n' n_samples += count total_samples += count s += f' Total : {format_units(n_samples, add_space=False, precision=1)}\n' s += f' Overall total: {format_units(total_samples, add_space=False, precision=1)}\n' s += '\n' s += 'Character Counts\n' s += '---------------------\n' backend_totals = {} for keyword, stats in retval['characters'].items(): s += f' {keyword}:\n' for backend, count in stats.items(): s += f' {backend:<6s}: {format_units(count, add_space=False, precision=1)}\n' backend_totals[backend] = backend_totals.get(backend, 0) + count s += ' Backend totals:\n' for backend, count in backend_totals.items(): s += f' {backend:<6s}: {format_units(count, add_space=False, precision=1)}\n' return s
[docs] def generate( self, config:GenerationConfig, on_finished:Callable[[str],None]=None ): """Generate a keyword using the given configuration This will generate a keyword using the given configuration in the specified :py:attr:`~out_dir`. Processing is done asynchronously in a thread pool. The ``on_finished`` will be invoked when processing is complete. Alternatively, call :py:func:`~join` to wait for all processing to complete. Args: config: The configuration to use for keyword generation on_finished: Optional callback to be invoked when generation completes The parameter given to the callback contains the file path to the generated audio file """ if not self.is_running: raise RuntimeError('Not running') with self._lock: backend = self._backends[config.voice.backend] self._pending_configs[backend].append((config, on_finished)) self._condition.notify_all()
[docs] def join(self, timeout:float=None) -> bool: """Wait for all generation tasks to complete Args: timeout: The maximum amount of time in seconds to wait If not specified then wait forever Returns: True if processing has completed, false else """ is_processing = True start_time = time.time() with self._lock: while is_processing and self.is_running: is_processing = False for configs in self._pending_configs.values(): if self._n_config_processing > 0 or len(configs) > 0: is_processing = True break if not is_processing or (timeout and (time.time() - start_time) > timeout): break self._condition.wait(0.100) return not is_processing
[docs] def shutdown(self): """Shutdown the underlying thread pool""" self._pool.shutdown()
def _get_backend_list(self, backend:str=None) -> List[str]: if backend: self.is_backend_loaded(backend, raise_exception=True) if len(self._backends) == 0: raise RuntimeError('No backends loaded') return [backend] if backend is not None else list(self._backends.keys()) def _generation_loop(self): while self.is_running: with self._lock: try: self._condition.wait(timeout=0.010) except: pass self._process_once() def _process_once(self): if not self.is_running: return for backend, configs in self._pending_configs.items(): if len(configs) == 0 or backend.is_rate_limited: continue self._n_config_processing += 1 config, callback = configs.pop(0) out_dir = f'{self._out_dir}/{config.keyword_group}' os.makedirs(out_dir, exist_ok=True) self._pool( backend.generate, config=config, out_dir=out_dir, _on_finished=functools.partial(self._on_finished, callback=callback), _on_error=self._on_error ) def _on_finished(self, sample_path:str, callback:Callable=None): with self._lock: self._n_config_processing -= 1 self._condition.notify_all() if callback is not None: try: callback(sample_path) except: pass def _on_error(self, e:Exception): with self._lock: logger.exception(f'{e}', exc_info=e) self._n_config_processing -= 1 self._condition.notify_all() def __enter__(self): return self def __exit__(self, dtype, value, tb): self.join() self.shutdown()
class _ProcessingPool(): def __init__(self, n_jobs:int): self.pool = ThreadPool(processes=n_jobs) self._running_event = threading.Event() @property def is_running(self) -> bool: return not self._running_event.is_set() def shutdown(self): self._running_event.set() self.pool.close() def __call__(self, func, *, _on_finished, _on_error, **kwargs): self.pool.apply_async( _process_with_retries, args=(func,), kwds=kwargs, callback=_on_finished, error_callback=_on_error ) def _process_with_retries(_func, **kwargs): for i in range(3): try: _func(**kwargs) return except KeyboardInterrupt: return except Exception as e: if i == 2: raise e time.sleep(0.100)