Source code for mltk.core.preprocess.audio.audio_feature_generator.audio_feature_generator_settings

from __future__ import annotations
from typing import Tuple
import copy



[docs]class AudioFeatureGeneratorSettings(dict):
    """Settings for the `AudioFeatureGenerator <https://siliconlabs.github.io/mltk/docs/python_api/data_preprocessing/audio_feature_generator.html>`_


    **Example Usage**

    .. highlight:: python
    .. code-block:: python

        from mltk.core.preprocess.audio.audio_feature_generator import AudioFeatureGeneratorSettings

        # Define the settings used to convert the audio into a spectrogram
        frontend_settings = AudioFeatureGeneratorSettings()

        frontend_settings.sample_rate_hz = 16000
        frontend_settings.sample_length_ms = 1200
        frontend_settings.window_size_ms = 30
        frontend_settings.window_step_ms = 10
        frontend_settings.filterbank_n_channels = 108
        frontend_settings.filterbank_upper_band_limit = 7500.0
        frontend_settings.filterbank_lower_band_limit = 125.0
        frontend_settings.noise_reduction_enable = True
        frontend_settings.noise_reduction_smoothing_bits = 10
        frontend_settings.noise_reduction_even_smoothing =  0.025
        frontend_settings.noise_reduction_odd_smoothing = 0.06
        frontend_settings.noise_reduction_min_signal_remaining = 0.40
        frontend_settings.quantize_dynamic_scale_enable = True # Enable dynamic quantization
        frontend_settings.quantize_dynamic_scale_range_db = 40.0

        # If this is used in a model specification file,
        # be sure to add the Audio Feature generator settings to the model parameters.
        # This way, they are included in the generated .tflite model file
        # See https://siliconlabs.github.io/mltk/docs/guides/model_parameters.html
        my_model.model_parameters.update(frontend_settings)


    See the `Audio Feature Generator <https://siliconlabs.github.io/mltk/docs/audio/audio_feature_generator.html>`_ guide for more details.
    """

[docs]    def __init__(self, **kwargs):
        super().__init__()
        self.sample_rate_hz = 16000
        self.sample_length_ms = 1000
        self.window_size_ms = 25
        self.window_step_ms = 10

        self.filterbank_n_channels = 32
        self.filterbank_upper_band_limit = 7500.0
        self.filterbank_lower_band_limit = 125.0
        self.noise_reduction_enable = False
        self.noise_reduction_smoothing_bits = 10
        self.noise_reduction_even_smoothing = 0.025
        self.noise_reduction_odd_smoothing =  0.06
        self.noise_reduction_min_signal_remaining = 0.05
        self.pcan_enable = False
        self.pcan_strength = 0.95
        self.pcan_offset = 80.0
        self.pcan_gain_bits = 21
        self.log_scale_enable = True
        self.log_scale_shift = 6
        self.activity_detection_enable = False
        self.activity_detection_alpha_a = 0.5
        self.activity_detection_alpha_b = 0.8
        self.activity_detection_arm_threshold = 0.75
        self.activity_detection_trip_threshold = 0.8
        self.dc_notch_filter_enable = False
        self.dc_notch_filter_coefficient = 0.95
        self.quantize_dynamic_scale_enable = False
        self.quantize_dynamic_scale_range_db = 40.0

        # Update the dict with the given values
        # AFTER setting the defaults
        super().update(kwargs)



    @property
    def spectrogram_shape(self) -> Tuple[int, int]:
        """Return the generated spectrogram shape as (height, width) i.e. (n_features, filterbank_n_channels)"""
        window_size_length = int((self.window_size_ms * self.sample_rate_hz) / 1000)
        window_step_length = int((self.window_step_ms * self.sample_rate_hz) / 1000)
        height = (self.sample_length - window_size_length) // window_step_length + 1
        width = self.filterbank_n_channels
        return (height, width)

    @property
    def sample_rate_hz(self) -> int:
        """The sample rate of the audio in Hz, default 16000"""
        return self.get('fe.sample_rate_hz', 0)
    @sample_rate_hz.setter
    def sample_rate_hz(self, v: int):
        s = int(v)
        if s <= 0 or s > 10e6:
            raise ValueError(f'Invalid sample_rate_hz, {v}')
        self['fe.sample_rate_hz'] = s
        self._update_fft_length()

    @property
    def sample_length_ms(self) -> int:
        """The length of an audio sample in milliseconds, default 1000"""
        return self['fe.sample_length_ms']
    @sample_length_ms.setter
    def sample_length_ms(self, v: int):
        s = int(v)
        if s <= 0 or s > 10e6:
            msg = ''
            if isinstance(v, float) and v < 1:
                msg = '. You may need to multiply this value by 1000'
            raise ValueError(f'Invalid sample_length_ms, {v}{msg}')
        self['fe.sample_length_ms'] = s

    @property
    def sample_length(self) -> int:
        """Calculated length of an audio sample in frames
        sample_length = (self.sample_length_ms * self.sample_rate_hz) // 1000
        """
        sample_length      = int((self.sample_length_ms * self.sample_rate_hz) / 1000)
        return sample_length

    @property
    def window_size_ms(self) -> int:
        """length of desired time frames in ms, default 25"""
        return self.get('fe.window_size_ms', 0)
    @window_size_ms.setter
    def window_size_ms(self, v: int):
        s = int(v)
        if s <= 0 or s > 10e6:
            msg = ''
            if isinstance(v, float) and v < 1:
                msg = '. You may need to multiply this value by 1000'
            raise ValueError(f'Invalid window_size_ms, {v}{msg}')
        self['fe.window_size_ms'] = s
        self._update_fft_length()

    @property
    def window_step_ms(self) -> int:
        """length of step size for the next frame in ms, default 10"""
        return self['fe.window_step_ms']
    @window_step_ms.setter
    def window_step_ms(self, v: int):
        s = int(v)
        if s <= 0 or s > 10e6:
            msg = ''
            if isinstance(v, float) and v < 1:
                msg = '. You may need to multiply this value by 1000'
            raise ValueError(f'Invalid window_step_ms, {v}{msg}')
        self['fe.window_step_ms'] = s

    @property
    def filterbank_n_channels(self) -> int:
        """the number of filterbank channels to use, default 32"""
        return self['fe.filterbank_n_channels']
    @filterbank_n_channels.setter
    def filterbank_n_channels(self, v: int):
        s = int(v)
        if s <= 0 or s > 10e6:
            raise ValueError(f'Invalid filterbank_n_channels, {v}')
        self['fe.filterbank_n_channels'] = s

    @property
    def filterbank_upper_band_limit(self) -> float:
        """ Float, the highest frequency included in the filterbanks, default 7500.0
        NOTE: This should be no more than sample_rate_hz / 2
        """
        return self['fe.filterbank_upper_band_limit']
    @filterbank_upper_band_limit.setter
    def filterbank_upper_band_limit(self, v: float):
        self['fe.filterbank_upper_band_limit'] = float(v)

    @property
    def filterbank_lower_band_limit(self) -> float:
        """ the lowest frequency included in the filterbanks, default 125.0"""
        return self['fe.filterbank_lower_band_limit']
    @filterbank_lower_band_limit.setter
    def filterbank_lower_band_limit(self, v: float):
        self['fe.filterbank_lower_band_limit'] = float(v)

    @property
    def noise_reduction_enable(self) -> bool:
        """Enable/disable noise reduction module, default false"""
        return self['fe.noise_reduction_enable']
    @noise_reduction_enable.setter
    def noise_reduction_enable(self, v: bool):
        self['fe.noise_reduction_enable'] = bool(v)

    @property
    def noise_reduction_smoothing_bits(self) -> int:
        """scale up signal by 2^(smoothing_bits) before reduction, default 10"""
        return self['fe.noise_reduction_smoothing_bits']
    @noise_reduction_smoothing_bits.setter
    def noise_reduction_smoothing_bits(self, v: int):
        self['fe.noise_reduction_smoothing_bits'] = int(v)

    @property
    def noise_reduction_even_smoothing(self) -> float:
        """smoothing coefficient for even-numbered channels, default 0.025"""
        return self['fe.noise_reduction_even_smoothing']
    @noise_reduction_even_smoothing.setter
    def noise_reduction_even_smoothing(self, v: float):
        self['fe.noise_reduction_even_smoothing'] = float(v)

    @property
    def noise_reduction_odd_smoothing(self) -> float:
        """smoothing coefficient for odd-numbered channels, default 0.06"""
        return self['fe.noise_reduction_odd_smoothing']
    @noise_reduction_odd_smoothing.setter
    def noise_reduction_odd_smoothing(self, v: float):
        self['fe.noise_reduction_odd_smoothing'] = float(v)

    @property
    def noise_reduction_min_signal_remaining(self) -> float:
        """fraction of signal to preserve in smoothing, default 0.05"""
        return self['fe.noise_reduction_min_signal_remaining']
    @noise_reduction_min_signal_remaining.setter
    def noise_reduction_min_signal_remaining(self, v: float):
        self['fe.noise_reduction_min_signal_remaining'] = float(v)

    @property
    def pcan_enable(self) -> bool:
        """enable PCAN auto gain control, default false"""
        return self['fe.pcan_enable']
    @pcan_enable.setter
    def pcan_enable(self, v: bool):
        self['fe.pcan_enable'] = bool(v)

    @property
    def pcan_strength(self) -> float:
        """ gain normalization exponent, default 0.95"""
        return self['fe.pcan_strength']
    @pcan_strength.setter
    def pcan_strength(self, v: float):
        self['fe.pcan_strength'] = float(v)

    @property
    def pcan_offset(self) -> float:
        """positive value added in the normalization denominator, default 80.0"""
        return self['fe.pcan_offset']
    @pcan_offset.setter
    def pcan_offset(self, v: float):
        self['fe.pcan_offset'] = float(v)

    @property
    def pcan_gain_bits(self) -> int:
        """number of fractional bits in the gain, default 21"""
        return self['fe.pcan_gain_bits']
    @pcan_gain_bits.setter
    def pcan_gain_bits(self, v: int):
        self['fe.pcan_gain_bits'] = int(v)

    @property
    def log_scale_enable(self) -> bool:
        """enable logarithmic scaling of filterbanks, default true"""
        return self['fe.log_scale_enable']
    @log_scale_enable.setter
    def log_scale_enable(self, v: bool):
        self['fe.log_scale_enable'] = bool(v)

    @property
    def log_scale_shift(self) -> int:
        """scale filterbanks by 2^(scale_shift), default 6"""
        return self['fe.log_scale_shift']
    @log_scale_shift.setter
    def log_scale_shift(self, v: int):
        self['fe.log_scale_shift'] = int(v)

    @property
    def activity_detection_enable(self) -> bool:
        """Enable the activity detection block.
        This indicates when activity, such as a speech command, is detected in the audio stream,
        default False"""
        return self['fe.activity_detection_enable']
    @activity_detection_enable.setter
    def activity_detection_enable(self, v: bool):
        self['fe.activity_detection_enable'] = bool(v)

    @property
    def activity_detection_alpha_a(self) -> float:
        """Activity detection filter A coefficient
        The activity detection "fast filter" coefficient.
        The filter is a 1-real pole IIR filter: ``computes out = (1-k)*in + k*out``
        Default 0.5"""
        return self['fe.activity_detection_alpha_a']
    @activity_detection_alpha_a.setter
    def activity_detection_alpha_a(self, v: float):
        self['fe.activity_detection_alpha_a'] = float(v)

    @property
    def activity_detection_alpha_b(self) -> float:
        """Activity detection filter B coefficient
        The activity detection "slow filter" coefficient.
        The filter is a 1-real pole IIR filter: ``computes out = (1-k)*in + k*out``
        Default 0.8"""
        return self['fe.activity_detection_alpha_b']
    @activity_detection_alpha_b.setter
    def activity_detection_alpha_b(self, v: float):
        self['fe.activity_detection_alpha_b'] = float(v)

    @property
    def activity_detection_arm_threshold(self) -> float:
        """Threshold for arming the detection block
        The threshold for when there should be considered possible activity in the audio stream
        Default 0.75"""
        return self['fe.activity_detection_arm_threshold']
    @activity_detection_arm_threshold.setter
    def activity_detection_arm_threshold(self, v: float):
        self['fe.activity_detection_arm_threshold'] = float(v)

    @property
    def activity_detection_trip_threshold(self) -> float:
        """Threshold for tripping the detection block
        The threshold for when activity is considered detected in the audio stream
        Default 0.8"""
        return self['fe.activity_detection_trip_threshold']
    @activity_detection_trip_threshold.setter
    def activity_detection_trip_threshold(self, v: float):
        self['fe.activity_detection_trip_threshold'] = float(v)

    @property
    def dc_notch_filter_enable(self) -> bool:
        """Enable the DC notch filter
        This will help negate any DC components in the audio signal
        Default False"""
        return self['fe.dc_notch_filter_enable']
    @dc_notch_filter_enable.setter
    def dc_notch_filter_enable(self, v: bool):
        self['fe.dc_notch_filter_enable'] = bool(v)

    @property
    def dc_notch_filter_coefficient(self) -> float:
        """Coefficient used by DC notch filter

        The DC notch filter coefficient k in Q(16,15) format, ``H(z) = (1 - z^-1)/(1 - k*z^-1)``
        Default 0.95"""
        return self['fe.dc_notch_filter_coefficient']
    @dc_notch_filter_coefficient.setter
    def dc_notch_filter_coefficient(self, v: float):
        self['fe.dc_notch_filter_coefficient'] = float(v)

    @property
    def quantize_dynamic_scale_enable(self) -> bool:
        """Enable dynamic quantization

        Enable dynamic quantization of the generated audio spectrogram.
        With this, the max spectrogram value is mapped to +127,
        and the max spectrogram minus :py:class:`~quantize_dynamic_scale_range_db` is mapped to -128.
        Anything below max spectrogram minus :py:class:`~quantize_dynamic_scale_range_db` is mapped to -128.
        Default False"""
        return self['fe.quantize_dynamic_scale_enable']
    @quantize_dynamic_scale_enable.setter
    def quantize_dynamic_scale_enable(self, v: bool):
        self['fe.quantize_dynamic_scale_enable'] = bool(v)

    @property
    def quantize_dynamic_scale_range_db(self) -> float:
        """Rhe dynamic range in dB used by the dynamic quantization, default 40.0"""
        return self['fe.quantize_dynamic_scale_range_db']
    @quantize_dynamic_scale_range_db.setter
    def quantize_dynamic_scale_range_db(self, v: float):
        self['fe.quantize_dynamic_scale_range_db'] = float(v)


    @property
    def fft_length(self) -> int:
        """The calculated size required to do an FFT.
        This is dependent on the window_size_ms and sample_rate_hz values"""
        return self['fe.fft_length']


[docs]    def copy(self) -> AudioFeatureGeneratorSettings:
        """Return a deep copy of the current settings"""
        return copy.deepcopy(self)

    def _update_fft_length(self):
        windows_size = int((self.window_size_ms * self.sample_rate_hz) / 1000)
        # The FFT length is the smallest power of 2 that
        # is larger than the window size
        fft_length = 1
        while fft_length < windows_size:
            fft_length <<= 1
        self['fe.fft_length'] = fft_length