Source code for mltk.core.preprocess.audio.audio_feature_generator.audio_feature_generator_settings

from __future__ import annotations
from typing import Tuple
import copy



[docs]class AudioFeatureGeneratorSettings(dict): """Settings for the `AudioFeatureGenerator <https://siliconlabs.github.io/mltk/docs/python_api/data_preprocessing/audio_feature_generator.html>`_ **Example Usage** .. highlight:: python .. code-block:: python from mltk.core.preprocess.audio.audio_feature_generator import AudioFeatureGeneratorSettings # Define the settings used to convert the audio into a spectrogram frontend_settings = AudioFeatureGeneratorSettings() frontend_settings.sample_rate_hz = 16000 frontend_settings.sample_length_ms = 1200 frontend_settings.window_size_ms = 30 frontend_settings.window_step_ms = 10 frontend_settings.filterbank_n_channels = 108 frontend_settings.filterbank_upper_band_limit = 7500.0 frontend_settings.filterbank_lower_band_limit = 125.0 frontend_settings.noise_reduction_enable = True frontend_settings.noise_reduction_smoothing_bits = 10 frontend_settings.noise_reduction_even_smoothing = 0.025 frontend_settings.noise_reduction_odd_smoothing = 0.06 frontend_settings.noise_reduction_min_signal_remaining = 0.40 frontend_settings.quantize_dynamic_scale_enable = True # Enable dynamic quantization frontend_settings.quantize_dynamic_scale_range_db = 40.0 # If this is used in a model specification file, # be sure to add the Audio Feature generator settings to the model parameters. # This way, they are included in the generated .tflite model file # See https://siliconlabs.github.io/mltk/docs/guides/model_parameters.html my_model.model_parameters.update(frontend_settings) See the `Audio Feature Generator <https://siliconlabs.github.io/mltk/docs/audio/audio_feature_generator.html>`_ guide for more details. """
[docs] def __init__(self, **kwargs): super().__init__() self.sample_rate_hz = 16000 self.sample_length_ms = 1000 self.window_size_ms = 25 self.window_step_ms = 10 self.filterbank_n_channels = 32 self.filterbank_upper_band_limit = 7500.0 self.filterbank_lower_band_limit = 125.0 self.noise_reduction_enable = False self.noise_reduction_smoothing_bits = 10 self.noise_reduction_even_smoothing = 0.025 self.noise_reduction_odd_smoothing = 0.06 self.noise_reduction_min_signal_remaining = 0.05 self.pcan_enable = False self.pcan_strength = 0.95 self.pcan_offset = 80.0 self.pcan_gain_bits = 21 self.log_scale_enable = True self.log_scale_shift = 6 self.activity_detection_enable = False self.activity_detection_alpha_a = 0.5 self.activity_detection_alpha_b = 0.8 self.activity_detection_arm_threshold = 0.75 self.activity_detection_trip_threshold = 0.8 self.dc_notch_filter_enable = False self.dc_notch_filter_coefficient = 0.95 self.quantize_dynamic_scale_enable = False self.quantize_dynamic_scale_range_db = 40.0 # Update the dict with the given values # AFTER setting the defaults super().update(kwargs)
@property def spectrogram_shape(self) -> Tuple[int, int]: """Return the generated spectrogram shape as (height, width) i.e. (n_features, filterbank_n_channels)""" window_size_length = int((self.window_size_ms * self.sample_rate_hz) / 1000) window_step_length = int((self.window_step_ms * self.sample_rate_hz) / 1000) height = (self.sample_length - window_size_length) // window_step_length + 1 width = self.filterbank_n_channels return (height, width) @property def sample_rate_hz(self) -> int: """The sample rate of the audio in Hz, default 16000""" return self.get('fe.sample_rate_hz', 0) @sample_rate_hz.setter def sample_rate_hz(self, v: int): s = int(v) if s <= 0 or s > 10e6: raise ValueError(f'Invalid sample_rate_hz, {v}') self['fe.sample_rate_hz'] = s self._update_fft_length() @property def sample_length_ms(self) -> int: """The length of an audio sample in milliseconds, default 1000""" return self['fe.sample_length_ms'] @sample_length_ms.setter def sample_length_ms(self, v: int): s = int(v) if s <= 0 or s > 10e6: msg = '' if isinstance(v, float) and v < 1: msg = '. You may need to multiply this value by 1000' raise ValueError(f'Invalid sample_length_ms, {v}{msg}') self['fe.sample_length_ms'] = s @property def sample_length(self) -> int: """Calculated length of an audio sample in frames sample_length = (self.sample_length_ms * self.sample_rate_hz) // 1000 """ sample_length = int((self.sample_length_ms * self.sample_rate_hz) / 1000) return sample_length @property def window_size_ms(self) -> int: """length of desired time frames in ms, default 25""" return self.get('fe.window_size_ms', 0) @window_size_ms.setter def window_size_ms(self, v: int): s = int(v) if s <= 0 or s > 10e6: msg = '' if isinstance(v, float) and v < 1: msg = '. You may need to multiply this value by 1000' raise ValueError(f'Invalid window_size_ms, {v}{msg}') self['fe.window_size_ms'] = s self._update_fft_length() @property def window_step_ms(self) -> int: """length of step size for the next frame in ms, default 10""" return self['fe.window_step_ms'] @window_step_ms.setter def window_step_ms(self, v: int): s = int(v) if s <= 0 or s > 10e6: msg = '' if isinstance(v, float) and v < 1: msg = '. You may need to multiply this value by 1000' raise ValueError(f'Invalid window_step_ms, {v}{msg}') self['fe.window_step_ms'] = s @property def filterbank_n_channels(self) -> int: """the number of filterbank channels to use, default 32""" return self['fe.filterbank_n_channels'] @filterbank_n_channels.setter def filterbank_n_channels(self, v: int): s = int(v) if s <= 0 or s > 10e6: raise ValueError(f'Invalid filterbank_n_channels, {v}') self['fe.filterbank_n_channels'] = s @property def filterbank_upper_band_limit(self) -> float: """ Float, the highest frequency included in the filterbanks, default 7500.0 NOTE: This should be no more than sample_rate_hz / 2 """ return self['fe.filterbank_upper_band_limit'] @filterbank_upper_band_limit.setter def filterbank_upper_band_limit(self, v: float): self['fe.filterbank_upper_band_limit'] = float(v) @property def filterbank_lower_band_limit(self) -> float: """ the lowest frequency included in the filterbanks, default 125.0""" return self['fe.filterbank_lower_band_limit'] @filterbank_lower_band_limit.setter def filterbank_lower_band_limit(self, v: float): self['fe.filterbank_lower_band_limit'] = float(v) @property def noise_reduction_enable(self) -> bool: """Enable/disable noise reduction module, default false""" return self['fe.noise_reduction_enable'] @noise_reduction_enable.setter def noise_reduction_enable(self, v: bool): self['fe.noise_reduction_enable'] = bool(v) @property def noise_reduction_smoothing_bits(self) -> int: """scale up signal by 2^(smoothing_bits) before reduction, default 10""" return self['fe.noise_reduction_smoothing_bits'] @noise_reduction_smoothing_bits.setter def noise_reduction_smoothing_bits(self, v: int): self['fe.noise_reduction_smoothing_bits'] = int(v) @property def noise_reduction_even_smoothing(self) -> float: """smoothing coefficient for even-numbered channels, default 0.025""" return self['fe.noise_reduction_even_smoothing'] @noise_reduction_even_smoothing.setter def noise_reduction_even_smoothing(self, v: float): self['fe.noise_reduction_even_smoothing'] = float(v) @property def noise_reduction_odd_smoothing(self) -> float: """smoothing coefficient for odd-numbered channels, default 0.06""" return self['fe.noise_reduction_odd_smoothing'] @noise_reduction_odd_smoothing.setter def noise_reduction_odd_smoothing(self, v: float): self['fe.noise_reduction_odd_smoothing'] = float(v) @property def noise_reduction_min_signal_remaining(self) -> float: """fraction of signal to preserve in smoothing, default 0.05""" return self['fe.noise_reduction_min_signal_remaining'] @noise_reduction_min_signal_remaining.setter def noise_reduction_min_signal_remaining(self, v: float): self['fe.noise_reduction_min_signal_remaining'] = float(v) @property def pcan_enable(self) -> bool: """enable PCAN auto gain control, default false""" return self['fe.pcan_enable'] @pcan_enable.setter def pcan_enable(self, v: bool): self['fe.pcan_enable'] = bool(v) @property def pcan_strength(self) -> float: """ gain normalization exponent, default 0.95""" return self['fe.pcan_strength'] @pcan_strength.setter def pcan_strength(self, v: float): self['fe.pcan_strength'] = float(v) @property def pcan_offset(self) -> float: """positive value added in the normalization denominator, default 80.0""" return self['fe.pcan_offset'] @pcan_offset.setter def pcan_offset(self, v: float): self['fe.pcan_offset'] = float(v) @property def pcan_gain_bits(self) -> int: """number of fractional bits in the gain, default 21""" return self['fe.pcan_gain_bits'] @pcan_gain_bits.setter def pcan_gain_bits(self, v: int): self['fe.pcan_gain_bits'] = int(v) @property def log_scale_enable(self) -> bool: """enable logarithmic scaling of filterbanks, default true""" return self['fe.log_scale_enable'] @log_scale_enable.setter def log_scale_enable(self, v: bool): self['fe.log_scale_enable'] = bool(v) @property def log_scale_shift(self) -> int: """scale filterbanks by 2^(scale_shift), default 6""" return self['fe.log_scale_shift'] @log_scale_shift.setter def log_scale_shift(self, v: int): self['fe.log_scale_shift'] = int(v) @property def activity_detection_enable(self) -> bool: """Enable the activity detection block. This indicates when activity, such as a speech command, is detected in the audio stream, default False""" return self['fe.activity_detection_enable'] @activity_detection_enable.setter def activity_detection_enable(self, v: bool): self['fe.activity_detection_enable'] = bool(v) @property def activity_detection_alpha_a(self) -> float: """Activity detection filter A coefficient The activity detection "fast filter" coefficient. The filter is a 1-real pole IIR filter: ``computes out = (1-k)*in + k*out`` Default 0.5""" return self['fe.activity_detection_alpha_a'] @activity_detection_alpha_a.setter def activity_detection_alpha_a(self, v: float): self['fe.activity_detection_alpha_a'] = float(v) @property def activity_detection_alpha_b(self) -> float: """Activity detection filter B coefficient The activity detection "slow filter" coefficient. The filter is a 1-real pole IIR filter: ``computes out = (1-k)*in + k*out`` Default 0.8""" return self['fe.activity_detection_alpha_b'] @activity_detection_alpha_b.setter def activity_detection_alpha_b(self, v: float): self['fe.activity_detection_alpha_b'] = float(v) @property def activity_detection_arm_threshold(self) -> float: """Threshold for arming the detection block The threshold for when there should be considered possible activity in the audio stream Default 0.75""" return self['fe.activity_detection_arm_threshold'] @activity_detection_arm_threshold.setter def activity_detection_arm_threshold(self, v: float): self['fe.activity_detection_arm_threshold'] = float(v) @property def activity_detection_trip_threshold(self) -> float: """Threshold for tripping the detection block The threshold for when activity is considered detected in the audio stream Default 0.8""" return self['fe.activity_detection_trip_threshold'] @activity_detection_trip_threshold.setter def activity_detection_trip_threshold(self, v: float): self['fe.activity_detection_trip_threshold'] = float(v) @property def dc_notch_filter_enable(self) -> bool: """Enable the DC notch filter This will help negate any DC components in the audio signal Default False""" return self['fe.dc_notch_filter_enable'] @dc_notch_filter_enable.setter def dc_notch_filter_enable(self, v: bool): self['fe.dc_notch_filter_enable'] = bool(v) @property def dc_notch_filter_coefficient(self) -> float: """Coefficient used by DC notch filter The DC notch filter coefficient k in Q(16,15) format, ``H(z) = (1 - z^-1)/(1 - k*z^-1)`` Default 0.95""" return self['fe.dc_notch_filter_coefficient'] @dc_notch_filter_coefficient.setter def dc_notch_filter_coefficient(self, v: float): self['fe.dc_notch_filter_coefficient'] = float(v) @property def quantize_dynamic_scale_enable(self) -> bool: """Enable dynamic quantization Enable dynamic quantization of the generated audio spectrogram. With this, the max spectrogram value is mapped to +127, and the max spectrogram minus :py:class:`~quantize_dynamic_scale_range_db` is mapped to -128. Anything below max spectrogram minus :py:class:`~quantize_dynamic_scale_range_db` is mapped to -128. Default False""" return self['fe.quantize_dynamic_scale_enable'] @quantize_dynamic_scale_enable.setter def quantize_dynamic_scale_enable(self, v: bool): self['fe.quantize_dynamic_scale_enable'] = bool(v) @property def quantize_dynamic_scale_range_db(self) -> float: """Rhe dynamic range in dB used by the dynamic quantization, default 40.0""" return self['fe.quantize_dynamic_scale_range_db'] @quantize_dynamic_scale_range_db.setter def quantize_dynamic_scale_range_db(self, v: float): self['fe.quantize_dynamic_scale_range_db'] = float(v) @property def fft_length(self) -> int: """The calculated size required to do an FFT. This is dependent on the window_size_ms and sample_rate_hz values""" return self['fe.fft_length']
[docs] def copy(self) -> AudioFeatureGeneratorSettings: """Return a deep copy of the current settings""" return copy.deepcopy(self)
def _update_fft_length(self): windows_size = int((self.window_size_ms * self.sample_rate_hz) / 1000) # The FFT length is the smallest power of 2 that # is larger than the window size fft_length = 1 while fft_length < windows_size: fft_length <<= 1 self['fe.fft_length'] = fft_length