"""Yes/No=======================This is a `synthetically <https://siliconlabs.github.io/mltk/mltk/tutorials/synthetic_audio_dataset_generation.html>`_ generated dataset with the keywords:- **yes**- **no**The samples are 16kHz, 16-bit PCM ``.wav`` files... seealso:: - `AudioDatasetGenerator <https://siliconlabs.github.io/mltk/docs/python_api/utils/audio_dataset_generator/index.html>`_ - `Synthetic Audio Dataset Generation Tutorial <https://siliconlabs.github.io/mltk/mltk/tutorials/synthetic_audio_dataset_generation.html>`_"""importloggingimportosimportjsonfrommltk.utils.archive_downloaderimportdownload_verify_extractfrommltk.utils.pathimportcreate_user_dir,fullpathfrommltk.utils.audio_dataset_generatorimport(AudioDatasetGenerator,Keyword,Augmentation,VoiceRate,VoicePitch)DOWNLOAD_URL='https://www.silabs.com/public/files/github/mltk/datasets/sl_synthetic_yes_no.7z'"""Public download URL"""VERIFY_SHA1='abf31f3444f17e94b5bded4d4e2a001b5a6cb1b7'"""SHA1 hash of the downloaded archive file"""CLASSES=['yes','no',]"""The class labels of the dataset samples"""
[docs]defdownload(dest_dir:str=None,dest_subdir='datasets/yes_no',logger:logging.Logger=None,clean_dest_dir=False)->str:"""Download and extract the dataset Returns: The directory path to the extracted dataset """ifdest_dir:dest_subdir=Nonesample_dir=download_verify_extract(url=DOWNLOAD_URL,dest_dir=dest_dir,dest_subdir=dest_subdir,file_hash=VERIFY_SHA1,show_progress=False,remove_root_dir=False,clean_dest_dir=clean_dest_dir,logger=logger)returnsample_dir
[docs]defgenerate_dataset(out_dir:str=None):"""Generate the dataset This generates the dataset using the `AudioDatasetGenerator <https://siliconlabs.github.io/mltk/docs/python_api/utils/audio_dataset_generator/index.html>`_ Python package provided by the MLTK. """importtqdmKEYWORDS=[Keyword('yes',max_count=10000),Keyword('no',max_count=10000),]AUGMENTATIONS=[Augmentation(rate=VoiceRate.xslow,pitch=VoicePitch.low),Augmentation(rate=VoiceRate.xslow,pitch=VoicePitch.medium),Augmentation(rate=VoiceRate.xslow,pitch=VoicePitch.high),Augmentation(rate=VoiceRate.medium,pitch=VoicePitch.low),Augmentation(rate=VoiceRate.medium,pitch=VoicePitch.medium),Augmentation(rate=VoiceRate.medium,pitch=VoicePitch.high),# Augmentation(rate=VoiceRate.xfast, pitch=VoicePitch.low),# Augmentation(rate=VoiceRate.xfast, pitch=VoicePitch.medium),# Augmentation(rate=VoiceRate.xfast, pitch=VoicePitch.high),]out_dir=out_dirorcreate_user_dir('datasets/generated/yes_no')withAudioDatasetGenerator(out_dir=out_dir,n_jobs=8)asgenerator:# Load the cloud backends, installing the Python packages if necessary# See: https://codelabs.developers.google.com/codelabs/cloud-text-speech-python3if'GOOGLE_APPLICATION_CREDENTIALS'inos.environ:try:os.environ['GOOGLE_APPLICATION_CREDENTIALS']=fullpath(os.environ['GOOGLE_APPLICATION_CREDENTIALS'])except:passwithopen(os.environ['GOOGLE_APPLICATION_CREDENTIALS'],'r')asf:credentials=json.load(f)os.environ['PROJECT_ID']=credentials['project_id']generator.load_backend('gcp',install_python_package=True)print('Loaded GCP backend')else:print('GOOGLE_APPLICATION_CREDENTIALS env not found, *not* loading GCP backend')# See: https://docs.aws.amazon.com/polly/latest/dg/get-started-what-next.htmlif'AWS_ACCESS_KEY_ID'inos.environoros.path.exists(os.path.expanduser('~/.aws')):generator.load_backend('aws',install_python_package=True)print('Loaded AWS backend')else:print('AWS_ACCESS_KEY_ID env not found, *not* loading AWS backend')# See: https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/get-started-text-to-speech?pivots=programming-language-pythonif'SPEECH_KEY'inos.environ:generator.load_backend('azure',install_python_package=True)print('Loaded Azure backend')else:print('SPEECH_KEY env not found, *not* loading Azure backend')print('Listing voices ...')voices=generator.list_voices()# Generate a list of all possible configurations, randomly shuffle, then truncate# based on the "max_count" specified for each keywordprint('Listing configurations ...')all_configurations=generator.list_configurations(keywords=KEYWORDS,augmentations=AUGMENTATIONS,voices=voices,truncate=True,seed=42)n_configs=sum(len(x)forxinall_configurations.values())# Print a summary of the configurationsprint(generator.get_summary(all_configurations))input('\nWARNING: Running this script is NOT FREE!\n\n''Each cloud backend charges a different rate per character.\n''The character counts are listed above.\n\n''Refer to each backend\'s docs for the latest pricing:\n''- AWS: https://aws.amazon.com/polly/pricing\n''- Azure: https://azure.microsoft.com/en-us/pricing/details/cognitive-services/speech-services\n''- Google: https://cloud.google.com/text-to-speech/pricing\n''\nPress "enter" to continue and generate the dataset\n')# Generate the dataset (with pretty progress bars)print(f'Generating keywords at: {generator.out_dir}\n')withtqdm.tqdm(total=n_configs,desc='Overall'.rjust(10),unit='word',position=1)aspb_outer:forkeyword,config_listinall_configurations.items():withtqdm.tqdm(desc=keyword.value.rjust(10),total=len(config_list),unit='word',position=0)aspb_inner:forconfiginconfig_list:generator.generate(config,on_finished=lambda_:(pb_inner.update(1),pb_outer.update(1)))generator.join()# Wait for the current keyword to finish before continuing to the next
Important: We use cookies only for functional and traffic analytics.
We DO NOT use cookies for any marketing purposes. By using our site you acknowledge you have read and understood our Cookie Policy.