[docs]classAudioDatasetGenerator:"""Utility for generating synthetic keyword datasets See the `Synthetic Audio Dataset Generation <https://siliconlabs.github.io/mltk/mltk/tutorials/synthetic_audio_dataset_generation.html>`_ tutorial for more details. .. note:: The generated audio files are 16kHz, 16-bit PCM ``.wav`` files. Args: out_dir: Directory where dataset will be generated n_jobs: Number of parallel processing jobs """
[docs]@staticmethoddeflist_supported_backends()->List[str]:"""Return a list of the available backends"""returnlist(BACKENDS.keys())
@propertydefis_running(self)->bool:"""Return if the processing pool is active"""returnself._pool.is_running@propertydefout_dir(self)->bool:"""Return the output directory where the dataset is generated"""returnself._out_dir
[docs]defis_backend_loaded(self,backend:str,raise_exception=False)->bool:"""Return if the given backend has been loaded"""ifbackendnotinBACKENDS:ifraise_exception:raiseValueError(f'Unknown backend: {backend}, supported backends are: {", ".join(AudioDatasetGenerator.list_supported_backends())}')returnFalseifbackendnotinself._backends:ifraise_exception:raiseValueError(f'Backend: {backend} not loaded')returnFalsereturnTrue
[docs]defload_backend(self,name:str,install_python_package=False,**kwargs):"""Load the specified backend NOTE: The backend's corresponding "credentials" must be provided Additional kwargs may be passed to the backend's initialization. Refer the the backend's docs for the available kwargs: - ``name=aws`` --> `boto3.session.Session <https://boto3.amazonaws.com/v1/documentation/api/latest/guide/session.html>`_ - ``name=azure`` --> `azure.cognitiveservices.speech.SpeechConfig <https://learn.microsoft.com/en-us/python/api/azure-cognitiveservices-speech/azure.cognitiveservices.speech.speechconfig?source=recommendations&view=azure-python>`_ - ``name=gcp`` --> `google.cloud.texttospeech.TextToSpeechClient <https://cloud.google.com/python/docs/reference/texttospeech/latest/google.cloud.texttospeech_v1beta1.services.text_to_speech.TextToSpeechClient>`_ Args: name: The name of the cloud backend, see :py:func:`~list_supported_backends` auto_install_python_package: If true, then automatically install the backend's corresponding Python package (if necessary) kwargs: Additional keyword args to pass to the backend's Python package (see comment above) """ifnamenotinBACKENDS:raiseValueError(f'Unknown backend: {name}, supported backends are: {", ".join(AudioDatasetGenerator.list_supported_backends())}')ifself.is_backend_loaded(name):raiseRuntimeError(f'Backend {name} already loaded')backend=BACKENDS[name]()backend.load(install_python_package=install_python_package,**kwargs)self._backends[name]=backendself._pending_configs[backend]=[]
[docs]deflist_languages(self,backend:str=None)->List[str]:"""Return a list of the available language codes Args: backend: If provided, then only return languages supported by backend, else return languages for all loaded backends Returns: List of languages codes """retval=set()forbackend_nameinself._get_backend_list(backend):forlanginself._backends[backend_name].list_languages():retval.add(lang)returnsorted(retval)
[docs]deflist_voices(self,language_code:str=None,backend:str=None)->List[Voice]:"""Return a list of the available "voices" Args: language_code: If provided, then only returned voices that support given language code, else return all languages backend: If provided, then only return voices supported by backend, else return voices for all loaded backends Returns: List of voices """retval:List[Voice]=[]forbackend_nameinself._get_backend_list(backend):retval.extend(self._backends[backend_name].list_voices(language_code=language_code))returnsorted(retval,key=lambdax:(x.backend,x.language_code,x.name))
[docs]deflist_configurations(self,keywords:List[Keyword],augmentations:List[Augmentation],voices:List[Voice],truncate=False,seed:int=None,)->Dict[Keyword,List[GenerationConfig]]:"""Generate a list of generation configurations Generate a list of all possible combinations of the given keywords, augmentations, and voices. If the ``truncate`` argument is provided, then shuffle the generated list and return the truncated list based on the ``max_count`` specified in the ``keywords``. Args: keywords: List of keywords to use for the generation configurations augmentations: List of augmentations to apply to each keyword voices: List of voices to use for keyword generation truncate: If true, then randomly shuffle all possible combinations and return a truncated list of configurations. The truncated count is specified in the ``max_count`` field of the keywords seed: Seed to use for randomly shuffling the truncated list Returns: Dictionary of keywords and corresponding list of configurations """retval:Dict[Keyword,List[GenerationConfig]]={}forkeywordinkeywords:keyword_configs:List[GenerationConfig]=[]forvoiceinvoices:base_configs=self._backends[voice.backend].list_configurations(augmentations=augmentations,voice=voice)forkwinkeyword.as_list():forconfiginbase_configs:config=config.copy()config.keyword=kwconfig.keyword_group=keyword.valuekeyword_configs.append(config)iftruncateandkeyword.max_count:ifseed:random.seed(seed)random.shuffle(keyword_configs)keyword_configs=keyword_configs[:keyword.max_count]retval[keyword]=sorted(keyword_configs,key=lambdax:(x.keyword,x.voice.backend,x.voice.language_code,x.voice.name,x.rate,x.pitch))returnretval
[docs]defcount_characters(self,config:Dict[Keyword,List[GenerationConfig]],)->Dict[Keyword,Dict[str,int]]:"""Count the number of characters that will be sent to each backend The cloud backends charge per character that is sent. This API returns the number of characters required for each keyword. Args: config: Dictionary of keywords and corresponding list of configurations returned by :py:func:`~list_configurations` Returns: Dictionary<keyword, Dictionary<backend, char count>> """retval:Dict[Keyword,Dict[str,int]]={}forkeyword,config_listinconfig.items():stats:Dict[str,int]={}forcfginconfig_list:n_chars=self._backends[cfg.voice.backend].count_characters(cfg)stats[cfg.voice.backend]=stats.get(cfg.voice.backend,0)+n_charsretval[keyword]=statsreturnretval
[docs]defget_summary(self,config:Dict[Keyword,List[GenerationConfig]],as_dict=False)->Union[dict,str]:"""Generate a summary of the given configurations Args: config: Dictionary of keywords and corresponding list of configurations returned by :py:func:`~list_configurations` as_dict: If true then return the summary as a dictionary, else return the summary as a string Returns: If ``as_dict=True`` then return the summary as a dictionary, else return the summary as a string """voice_stats={}sample_stats={}voices=set()forconfig_listinconfig.values():forcfginconfig_list:ifcfg.voicenotinvoices:backend=cfg.voice.backendvoices.add(cfg.voice)n_voices=voice_stats.get(backend,0)+1voice_stats[backend]=n_voicesforkeyword,config_listinconfig.items():stats={}forcfginconfig_list:backend=cfg.voice.backendn_samples=stats.get(backend,0)+1stats[backend]=n_samplessample_stats[keyword]=statscharacter_stats=self.count_characters(config)retval=dict(voices=voice_stats,samples=sample_stats,characters=character_stats)ifas_dict:returnretvals='Voice Counts\n's+='---------------------\n'n_voices=0forbackend,countinretval['voices'].items():s+=f' {backend:<6s}: {count}\n'n_voices+=counts+=f' Total : {n_voices}\n's+='\n's+='Keyword Counts\n's+='---------------------\n'total_samples=0forkeyword,statsinretval['samples'].items():n_samples=0s+=f' {keyword}:\n'forbackend,countinstats.items():s+=f' {backend:<6s}: {format_units(count,add_space=False,precision=1)}\n'n_samples+=counttotal_samples+=counts+=f' Total : {format_units(n_samples,add_space=False,precision=1)}\n's+=f' Overall total: {format_units(total_samples,add_space=False,precision=1)}\n's+='\n's+='Character Counts\n's+='---------------------\n'backend_totals={}forkeyword,statsinretval['characters'].items():s+=f' {keyword}:\n'forbackend,countinstats.items():s+=f' {backend:<6s}: {format_units(count,add_space=False,precision=1)}\n'backend_totals[backend]=backend_totals.get(backend,0)+counts+=' Backend totals:\n'forbackend,countinbackend_totals.items():s+=f' {backend:<6s}: {format_units(count,add_space=False,precision=1)}\n'returns
[docs]defgenerate(self,config:GenerationConfig,on_finished:Callable[[str],None]=None):"""Generate a keyword using the given configuration This will generate a keyword using the given configuration in the specified :py:attr:`~out_dir`. Processing is done asynchronously in a thread pool. The ``on_finished`` will be invoked when processing is complete. Alternatively, call :py:func:`~join` to wait for all processing to complete. Args: config: The configuration to use for keyword generation on_finished: Optional callback to be invoked when generation completes The parameter given to the callback contains the file path to the generated audio file """ifnotself.is_running:raiseRuntimeError('Not running')withself._lock:backend=self._backends[config.voice.backend]self._pending_configs[backend].append((config,on_finished))self._condition.notify_all()
[docs]defjoin(self,timeout:float=None)->bool:"""Wait for all generation tasks to complete Args: timeout: The maximum amount of time in seconds to wait If not specified then wait forever Returns: True if processing has completed, false else """is_processing=Truestart_time=time.time()withself._lock:whileis_processingandself.is_running:is_processing=Falseforconfigsinself._pending_configs.values():ifself._n_config_processing>0orlen(configs)>0:is_processing=Truebreakifnotis_processingor(timeoutand(time.time()-start_time)>timeout):breakself._condition.wait(0.100)returnnotis_processing
[docs]defshutdown(self):"""Shutdown the underlying thread pool"""self._pool.shutdown()
Important: We use cookies only for functional and traffic analytics.
We DO NOT use cookies for any marketing purposes. By using our site you acknowledge you have read and understood our Cookie Policy.