Source code for mltk.datasets.audio.mit_ir_survey

"""MIT Impulse Response Survey
=================================

This is a dataset of environmental impulse responses from various real-world locations,
collected in the `MIT IR Survey <https://mcdermottlab.mit.edu/Reverb/IR_Survey.html>`_ by Traer and McDermott.

Each audio file is a waveform which contains the impulse response of a location.
That is to say, how an instantaneous pressure at $t = 0$ is reflected, damped and scattered in the environment.

By convolving the dataset impulse responses with an audio clip,
we can simulate how that audio clip would sound if emitted and recorded in the environment
where the impulse response was recorded. This is a technique commonly used for data augmentation in
audio processing problems, commonly referred to as
multi-style training (see `Deep Spoken Keyword Spotting: An Overview <https://arxiv.org/pdf/2111.10592.pdf>`_), simulated reverberation
(see e.g. `End-to-End Streaming Keyword Spotting <https://arxiv.org/pdf/1812.02802.pdf>`_) or acoustic simulation.

License
---------

CC-BY 4.0 (see `MIT Creative Commons License <https://creativecommons.org/licenses/by/4.0>`_ for details).

Credits
--------------

Traer and McDermott 2016 paper `Statistics of natural reverberation enable perceptual separation of sound and space <https://www.pnas.org/doi/full/10.1073/pnas.1612524113>`_

"""
from typing import List
import os
import logging
import numpy as np

from mltk.core.preprocess.utils import audio as audio_utils
from mltk.utils.python import append_exception_msg
from mltk.utils.archive_downloader import download_verify_extract





DOWNLOAD_URL = 'https://mcdermottlab.mit.edu/Reverb/IRMAudio/Audio.zip'
"""Public download URL"""
VERIFY_SHA1 = 'de04f5be419c12f4f847f65d7ef8e2356b73aa38'
"""SHA1 hash of the downloaded archive file"""



[docs]def download(
    dest_dir:str=None,
    dest_subdir='datasets/mit_ir_survey',
    logger:logging.Logger=None,
    clean_dest_dir=False,
    sample_rate_hz=16000,
) -> str:
    """Download and extract the dataset

    Returns:
        The directory path to the extracted dataset
    """
    try:
        import soundfile
    except ModuleNotFoundError as e:
        append_exception_msg(e, 'Try running the command: pip install soundfile')
        raise e

    if dest_dir:
        dest_subdir = None

    sample_dir = download_verify_extract(
        url=DOWNLOAD_URL,
        archive_fname='mit_ir_survey.zip',
        dest_dir=dest_dir,
        dest_subdir=dest_subdir,
        file_hash=VERIFY_SHA1,
        show_progress=False,
        remove_root_dir=False,
        clean_dest_dir=clean_dest_dir,
        logger=logger
    )

    src_dir = f'{sample_dir}/Audio'
    for fn in os.listdir(src_dir):
        if not fn.endswith('.wav'):
            continue

        dst_path = f'{sample_dir}/{fn}'
        if not os.path.exists(dst_path):
            data, sr = soundfile.read(f'{src_dir}/{fn}')
            data = data.astype(np.float32)
            if sr != sample_rate_hz:
                data = audio_utils.resample(data, orig_sr=sr, target_sr=sample_rate_hz)
            audio_utils.write_audio_file(dst_path, data, sample_rate=sample_rate_hz)

    return sample_dir



[docs]def apply_ir(
    audio: np.ndarray,
    ir: np.ndarray
) -> np.ndarray:
    """Apply an impulse response to the given audio sample"""
    try:
        from scipy import signal
    except ModuleNotFoundError as e:
        append_exception_msg(e, 'Try running the command: pip install scipy')
        raise

    return signal.fftconvolve(audio, ir)


[docs]def load_dataset(dataset_dir:str) -> List[np.ndarray]:
    """Load the impulse response dataset directory into RAM"""
    try:
        import soundfile
    except ModuleNotFoundError as e:
        append_exception_msg(e, 'Try running the command: pip install soundfile')
        raise e

    retval = []
    for fn in os.listdir(dataset_dir):
        if not fn.endswith('.wav'):
            continue
        data, _ = soundfile.read(f'{dataset_dir}/{fn}')
        data = data.astype(np.float32)
        retval.append(data)

    return retval



[docs]def apply_random_ir(
    audio: np.ndarray,
    ir_samples:List[np.ndarray],
    seed:int=42
) -> np.ndarray:
    """Appyly a random impulse response to the given audio sample"""
    rgen = np.random.RandomState(seed=seed)
    index = rgen.choice(len(ir_samples))
    ir = ir_samples[index]

    return apply_ir(audio, ir)