Source code for mltk.core.model.mixins.audio_dataset_mixin

from typing import Union, Tuple, List, Callable
import types
import numpy as np

from mltk.utils.python import prepend_exception_msg
from mltk.utils.process_pool_manager import ProcessPoolManager
from mltk.core.utils import get_mltk_logger

from .data_generator_dataset_mixin import (DataGeneratorDatasetMixin, DataGeneratorContext)
from ..model_attributes import MltkModelAttributesDecorator
from ..model_event import MltkModelEvent

[docs]@MltkModelAttributesDecorator()
class AudioDatasetMixin(DataGeneratorDatasetMixin):
    """Provides audio dataset properties to the base :py:class:`~MltkModel`

    .. seealso::
       - `AudioFeatureGenerator documentation <https://siliconlabs.github.io/mltk/docs/audio/audio_feature_generator.html>`_
       - `AudioFeatureGenerator API docs <https://siliconlabs.github.io/mltk/docs/python_api/data_preprocessing/audio_feature_generator.html>`_
       - `ParallelAudioDataGenerator API docs <https://siliconlabs.github.io/mltk/docs/python_api/data_preprocessing/audio_data_generator.html>`_
    """

    @property
    def dataset(self) -> Union[str,types.ModuleType,Callable]:
        """Path to the audio dataset's python module, a function
        that manually loads the datset, or the file path to a directory of samples.

        If a Python module is provided, it must implement the function:

        .. highlight:: python
        .. code-block:: python

           def load_data():
              ...

        which should return the file path to the dataset's directory

        If a function is provided, the function  should return
        the path to a directory containing the dataset's samples.
        """
        return self._attributes['dataset.dataset']
    @dataset.setter
    def dataset(self, v: Union[str,types.ModuleType]):
        self._attributes['dataset.dataset'] = v


    @property
    def follow_links(self) -> bool:
        """Whether to follow symlinks inside class subdirectories

        Default: ``True``
        """
        return self._attributes.get_value('audio.follow_links', default=True)
    @follow_links.setter
    def follow_links(self, v: bool):
        self._attributes['audio.follow_links'] = v


    @property
    def shuffle_dataset_enabled(self) -> bool:
        """Shuffle the dataset directory once

        Default: ``false``

        - If true, the dataset directory will be shuffled the first time it is processed and
            and an index containing the shuffled file names is generated in the training log directory.
            The index is reused to maintain the shuffled order for subsequent processing.
        - If false, then the dataset samples are sorted alphabetically and saved to an index in the dataset directory.
            The alphabetical index file is used for subsequent processing.

        """
        return self._attributes.get_value('audio.shuffle_dataset_enabled', default=False)
    @shuffle_dataset_enabled.setter
    def shuffle_dataset_enabled(self, v: bool):
        self._attributes['audio.shuffle_dataset_enabled'] = v


    @property
    def class_mode(self) -> str:
        """Determines the type of label arrays that are returned.

        Default: ``categorical``

        - **categorical** -  2D one-hot encoded labels
        - **binary** - 1D binary labels
        - **sparse** - 1D integer labels
        - **input** - images identical to input images (mainly used to work with autoencoders)

        """
        return self._attributes.get_value('audio.class_mode', default='categorical')
    @class_mode.setter
    def class_mode(self, v: str):
        self._attributes['audio.class_mode'] = v


    @property
    def audio_classes(self) -> List[str]:
        """Return a list of class labels the model should classify"""
        return self._attributes['audio.classes']
    @audio_classes.setter
    def audio_classes(self, v: List[str]):
        self._attributes['audio.classes'] = v


    @property
    def audio_input_shape(self) -> Tuple[int, int, int]:
        """Get the shape of the spectrogram generated by the :py:class:`mltk.core.preprocess.audio.audio_feature_generator.AudioFeatureGenerator` as (height, width, 1)

        .. note::
           If frontend_enabled = True then the input size is automatically calculated based the on the
           :py:class:`mltk.core.preprocess.audio.audio_feature_generator.AudioFeatureGeneratorSettings`
           If frontend_enabled = False then the input size must be manually specified.
        """
        if self.datagen is not None and self.datagen.frontend_enabled:
            if self._attributes.value_is_set('audio.manual_in_shape'):
                get_mltk_logger().warning('When ParallelAudioDataGenerator.frontend_enabled=True, the model.input shape is automatically calculated, however, the input_shape has been manually set')

            spectrogram_shape = self.datagen.frontend_settings.spectrogram_shape
            if self.datagen.add_channel_dimension:
                spectrogram_shape = spectrogram_shape + (1,)
            return spectrogram_shape
        else:
            return self._attributes['audio.manual_in_shape']
    @audio_input_shape.setter
    def audio_input_shape(self, v):
        if self.datagen is not None and self.datagen.frontend_enabled:
            raise Exception('mltk_model.input_shape is determined dynamically based on the AudioFeatureGeneratorSettings when datagen.frontend_enabled=True. In this case, it cannot be manually set')
        self._attributes['audio.manual_in_shape'] = v

    @property
    def sample_length_ms(self) -> int:
        """Get the data generator sample length in milliseconds"""
        if self.datagen is None:
            raise Exception('You must specify mltk_model.datagen')
        return self.datagen.sample_length_ms
    @sample_length_ms.setter
    def sample_length_ms(self, v: int):
        if self.datagen is None:
            raise Exception('You must specify mltk_model.datagen')
        self.datagen.sample_length_ms = v


    @property
    def sample_rate_hz(self) -> int:
        """Get the data generator sample rate in hertz"""
        if self.datagen is None:
            raise Exception('You must specify mltk_model.datagen')
        return self.datagen.sample_rate_hz
    @sample_rate_hz.setter
    def sample_rate_hz(self, v: int):
        if self.datagen is None:
            raise Exception('You must specify mltk_model.datagen')
        self.datagen.sample_rate_hz = v


    @property
    def frontend_settings(self):
        """Get the data generator's :py:class:`mltk.core.preprocess.audio.audio_feature_generator.AudioFeatureGeneratorSettingsSettings` """
        if self.datagen is None:
            raise Exception('You must specify mltk_model.datagen')
        return self.datagen.frontend_settings


    @property
    def datagen(self):
        """Training data generator.

        Should be a reference to a :py:attr:`mltk.core.preprocess.audio.parallel_generator.ParallelAudioDataGenerator` instance
        """
        return self._attributes.get_value('audio.datagen', default=None)
    @datagen.setter
    def datagen(self, v):
        self._attributes['audio.datagen'] = v


    @property
    def validation_datagen(self):
        """Validation/evaluation data generator.

        If omitted, then :py:attr:`~datagen` is used for validation and evaluation.

        Should be a reference to a :py:attr:`mltk.core.preprocess.audio.parallel_generator.ParallelAudioDataGenerator` instance
        """
        return self._attributes.get_value('audio.validation_datagen', default=None)
    @validation_datagen.setter
    def validation_datagen(self, v):
        self._attributes['audio.validation_datagen'] = v



[docs]    def load_dataset(
        self,
        subset: str,
        classes: List[str]=None,
        max_samples_per_class: int=-1,
        test:bool = False,
        **kwargs,
    ): # pylint: disable=arguments-differ
        """Pre-process the dataset and prepare the model dataset attributes

        Args:
            subset: Data subset name
        """
        self.loaded_subset = subset

        logger = get_mltk_logger()
        ProcessPoolManager.set_logger(logger)

        if self.datagen is None:
            raise Exception('Must specify mltk_model.datgen')
        if not classes:
            if not self.classes or not isinstance(self.classes, (list,tuple)):
                raise Exception('Must specify mltk_model.classes which must be a list of class labels')
            classes = self.classes

        # First download the dataset if necessary
        if self.dataset is None:
            raise Exception('Must specify mltk_model.dataset')

        self.trigger_event(
            MltkModelEvent.BEFORE_LOAD_DATASET,
            subset=subset,
            test=test,
            **kwargs
        )

        dataset_dir = _load_dataset(self.dataset)

        if not isinstance(dataset_dir, str):
            raise Exception('Dataset must be a path to a directory')

        if not hasattr(self, 'batch_size'):
            logger.warning('MltkModel does not define batch_size, defaulting to 32')
            batch_size = 32
        else:
            batch_size = self.batch_size

        shuffle_index_dir = None
        if self.shuffle_dataset_enabled:
            shuffle_index_dir = self.get_shuffle_index_dir()
            logger.debug(f'shuffle_index_dir={shuffle_index_dir}')

        eval_shuffle = False
        eval_augmentation_enabled = False

        if test:
            batch_size = 3
            max_samples_per_class = batch_size
            if hasattr(self, 'batch_size'):
                self.batch_size = batch_size
            self.datagen.max_batches_pending = 1
            logger.debug(f'Test mode enabled, forcing max_samples_per_class={max_samples_per_class}, batch_size={batch_size}')

        if self.loaded_subset == 'evaluation':
            if hasattr(self, 'eval_shuffle'):
                eval_shuffle = self.eval_shuffle
            if hasattr(self, 'eval_augment'):
                eval_augmentation_enabled = self.eval_augment
            if max_samples_per_class == -1 and hasattr(self, 'eval_max_samples_per_class'):
                max_samples_per_class = self.eval_max_samples_per_class

        train_datagen = None
        validation_datagen = None

        if self.loaded_subset == 'training':
            training_datagen_creator = self.get_datagen_creator('training')
            if training_datagen_creator is None:
                raise Exception('Must specify mltk_model.datagen for model')

        # Get the validation data generator if one was specified
        # otherwise fallback to the training data generator
        validation_datagen_creator = self.get_datagen_creator('validation')

        logger.debug(f'Dataset directory: {dataset_dir}')
        kwargs = dict(
            directory=dataset_dir,
            target_size=self.input_shape[:2], # Get the height and width
            classes=classes,
            class_mode=self.class_mode,
            follow_links=self.follow_links,
            batch_size=batch_size,
            max_samples_per_class=max_samples_per_class,
            shuffle_index_dir=shuffle_index_dir,
            list_valid_filenames_in_directory_function=_get_list_valid_filenames_function(self.dataset),
        )

        if self.loaded_subset == 'training':
            train_datagen = training_datagen_creator.flow_from_directory(
                subset='training',
                shuffle=True,
                class_counts=self.class_counts['training'],
                **kwargs
            )
        if self.loaded_subset in ('training', 'validation'):
            validation_datagen = validation_datagen_creator.flow_from_directory(
                subset='validation',
                shuffle=True,
                class_counts=self.class_counts['validation'],
                **kwargs
            )

        if self.loaded_subset == 'evaluation':
            validation_datagen_creator.validation_augmentation_enabled = eval_augmentation_enabled
            validation_datagen = validation_datagen_creator.flow_from_directory(
                subset='validation',
                shuffle=eval_shuffle,
                class_counts=self.class_counts['validation'],
                **kwargs
            )

        self.x = None
        self.validation_data = None

        if self.loaded_subset == 'training':
            self.x = train_datagen

        if self.loaded_subset in ('training', 'validation'):
            self.validation_data = validation_datagen

        if self.loaded_subset == 'evaluation':
            self.x = train_datagen if validation_datagen is None else validation_datagen

        self.datagen_context = DataGeneratorContext(
            subset = self.loaded_subset,
            train_datagen = train_datagen,
            validation_datagen = validation_datagen,
            train_class_counts=self.class_counts['training'],
            validation_class_counts=self.class_counts['validation']
        )

        self.trigger_event(
            MltkModelEvent.AFTER_LOAD_DATASET,
            subset=subset,
            test=test,
            **kwargs
        )


    def _register_attributes(self):
        from mltk.core.preprocess.audio.parallel_generator import ParallelAudioDataGenerator

        self._attributes.register('audio.follow_links', dtype=bool)
        self._attributes.register('audio.shuffle_dataset_enabled', dtype=bool)
        self._attributes.register('audio.class_mode', dtype=str)
        self._attributes.register('audio.datagen', dtype=ParallelAudioDataGenerator)
        self._attributes.register('audio.validation_datagen', dtype=ParallelAudioDataGenerator)

        def _set_audio_input_shape(v):
            self.audio_input_shape = v
        # If datagen.frontend_enabled = True then this is used
        self._attributes.register('audio.input_shape', lambda: self.audio_input_shape, setter=_set_audio_input_shape)
        # If datagen.frontend_enabled = False then this is used
        self._attributes.register('audio.manual_in_shape', dtype=(list,tuple))
        self._attributes.register('audio.classes', dtype=(list,tuple))

        # We cannot call attributes while we're registering them
        # So we return a function that will be called after
        # all the attributes are registered
        def register_parameters_populator():
            self.add_model_parameter_populate_callback(self._populate_audio_dataset_model_parameters)

        return register_parameters_populator


    def _populate_audio_dataset_model_parameters(self):
        """Populate the AudioPipeline parameters required by the device at runtime

        These parameters will be added to the compiled .tflite ModelParameters metadata.
        The device retrieves these paramaters from the generated .tflite at run-time and
        uses them to process the microphone audio in the AudioPipeline.

        NOTE: This is invoked during the compile_model() API execution.
        """
        if self.datagen is not None:
            parameters = self.model_parameters
            parameters['samplewise_norm.rescale'] = float(self.datagen.rescale or 0)
            parameters['samplewise_norm.mean_and_std'] = self.datagen.samplewise_center and self.datagen.samplewise_std_normalization
            parameters.update(self.frontend_settings)


def _load_dataset(dataset) -> Union[str,tuple]:
    if isinstance(dataset,str):
        return dataset

    if callable(dataset):
        try:
            return dataset()
        except Exception as e:
            prepend_exception_msg(e, f'Exception while invoking mltk_model.dataset function: {dataset}')
            raise

    if isinstance(dataset, (types.ModuleType, object)):
        if not hasattr(dataset, 'load_data'):
            raise Exception('If a module or class is set in mltk_model.dataset, the the module/class must specify the function: load_data()')

        try:
            return dataset.load_data()
        except Exception as e:
            prepend_exception_msg(e, f'Exception while invoking mltk_model.dataset.load_data(): {dataset}')
            raise

    raise Exception('mltk_model.dataset must either be file path to a dictionary or callback function')


def _get_list_valid_filenames_function(dataset):
    if isinstance(dataset, (types.ModuleType, object)):
        if hasattr(dataset, 'list_valid_filenames_in_directory'):
            return getattr(dataset, 'list_valid_filenames_in_directory')

    return None