[docs]classAudioFeatureGeneratorSettings(dict):"""Settings for the `AudioFeatureGenerator <https://siliconlabs.github.io/mltk/docs/python_api/data_preprocessing/audio_feature_generator.html>`_ **Example Usage** .. highlight:: python .. code-block:: python from mltk.core.preprocess.audio.audio_feature_generator import AudioFeatureGeneratorSettings # Define the settings used to convert the audio into a spectrogram frontend_settings = AudioFeatureGeneratorSettings() frontend_settings.sample_rate_hz = 16000 frontend_settings.sample_length_ms = 1200 frontend_settings.window_size_ms = 30 frontend_settings.window_step_ms = 10 frontend_settings.filterbank_n_channels = 108 frontend_settings.filterbank_upper_band_limit = 7500.0 frontend_settings.filterbank_lower_band_limit = 125.0 frontend_settings.noise_reduction_enable = True frontend_settings.noise_reduction_smoothing_bits = 10 frontend_settings.noise_reduction_even_smoothing = 0.025 frontend_settings.noise_reduction_odd_smoothing = 0.06 frontend_settings.noise_reduction_min_signal_remaining = 0.40 frontend_settings.quantize_dynamic_scale_enable = True # Enable dynamic quantization frontend_settings.quantize_dynamic_scale_range_db = 40.0 # If this is used in a model specification file, # be sure to add the Audio Feature generator settings to the model parameters. # This way, they are included in the generated .tflite model file # See https://siliconlabs.github.io/mltk/docs/guides/model_parameters.html my_model.model_parameters.update(frontend_settings) See the `Audio Feature Generator <https://siliconlabs.github.io/mltk/docs/audio/audio_feature_generator.html>`_ guide for more details. """
[docs]def__init__(self,**kwargs):super().__init__()self.sample_rate_hz=16000self.sample_length_ms=1000self.window_size_ms=25self.window_step_ms=10self.filterbank_n_channels=32self.filterbank_upper_band_limit=7500.0self.filterbank_lower_band_limit=125.0self.noise_reduction_enable=Falseself.noise_reduction_smoothing_bits=10self.noise_reduction_even_smoothing=0.025self.noise_reduction_odd_smoothing=0.06self.noise_reduction_min_signal_remaining=0.05self.pcan_enable=Falseself.pcan_strength=0.95self.pcan_offset=80.0self.pcan_gain_bits=21self.log_scale_enable=Trueself.log_scale_shift=6self.activity_detection_enable=Falseself.activity_detection_alpha_a=0.5self.activity_detection_alpha_b=0.8self.activity_detection_arm_threshold=0.75self.activity_detection_trip_threshold=0.8self.dc_notch_filter_enable=Falseself.dc_notch_filter_coefficient=0.95self.quantize_dynamic_scale_enable=Falseself.quantize_dynamic_scale_range_db=40.0# Update the dict with the given values# AFTER setting the defaultssuper().update(kwargs)
@propertydefspectrogram_shape(self)->Tuple[int,int]:"""Return the generated spectrogram shape as (height, width) i.e. (n_features, filterbank_n_channels)"""window_size_length=int((self.window_size_ms*self.sample_rate_hz)/1000)window_step_length=int((self.window_step_ms*self.sample_rate_hz)/1000)height=(self.sample_length-window_size_length)//window_step_length+1width=self.filterbank_n_channelsreturn(height,width)@propertydefsample_rate_hz(self)->int:"""The sample rate of the audio in Hz, default 16000"""returnself.get('fe.sample_rate_hz',0)@sample_rate_hz.setterdefsample_rate_hz(self,v:int):s=int(v)ifs<=0ors>10e6:raiseValueError(f'Invalid sample_rate_hz, {v}')self['fe.sample_rate_hz']=sself._update_fft_length()@propertydefsample_length_ms(self)->int:"""The length of an audio sample in milliseconds, default 1000"""returnself['fe.sample_length_ms']@sample_length_ms.setterdefsample_length_ms(self,v:int):s=int(v)ifs<=0ors>10e6:msg=''ifisinstance(v,float)andv<1:msg='. You may need to multiply this value by 1000'raiseValueError(f'Invalid sample_length_ms, {v}{msg}')self['fe.sample_length_ms']=s@propertydefsample_length(self)->int:"""Calculated length of an audio sample in frames sample_length = (self.sample_length_ms * self.sample_rate_hz) // 1000 """sample_length=int((self.sample_length_ms*self.sample_rate_hz)/1000)returnsample_length@propertydefwindow_size_ms(self)->int:"""length of desired time frames in ms, default 25"""returnself.get('fe.window_size_ms',0)@window_size_ms.setterdefwindow_size_ms(self,v:int):s=int(v)ifs<=0ors>10e6:msg=''ifisinstance(v,float)andv<1:msg='. You may need to multiply this value by 1000'raiseValueError(f'Invalid window_size_ms, {v}{msg}')self['fe.window_size_ms']=sself._update_fft_length()@propertydefwindow_step_ms(self)->int:"""length of step size for the next frame in ms, default 10"""returnself['fe.window_step_ms']@window_step_ms.setterdefwindow_step_ms(self,v:int):s=int(v)ifs<=0ors>10e6:msg=''ifisinstance(v,float)andv<1:msg='. You may need to multiply this value by 1000'raiseValueError(f'Invalid window_step_ms, {v}{msg}')self['fe.window_step_ms']=s@propertydeffilterbank_n_channels(self)->int:"""the number of filterbank channels to use, default 32"""returnself['fe.filterbank_n_channels']@filterbank_n_channels.setterdeffilterbank_n_channels(self,v:int):s=int(v)ifs<=0ors>10e6:raiseValueError(f'Invalid filterbank_n_channels, {v}')self['fe.filterbank_n_channels']=s@propertydeffilterbank_upper_band_limit(self)->float:""" Float, the highest frequency included in the filterbanks, default 7500.0 NOTE: This should be no more than sample_rate_hz / 2 """returnself['fe.filterbank_upper_band_limit']@filterbank_upper_band_limit.setterdeffilterbank_upper_band_limit(self,v:float):self['fe.filterbank_upper_band_limit']=float(v)@propertydeffilterbank_lower_band_limit(self)->float:""" the lowest frequency included in the filterbanks, default 125.0"""returnself['fe.filterbank_lower_band_limit']@filterbank_lower_band_limit.setterdeffilterbank_lower_band_limit(self,v:float):self['fe.filterbank_lower_band_limit']=float(v)@propertydefnoise_reduction_enable(self)->bool:"""Enable/disable noise reduction module, default false"""returnself['fe.noise_reduction_enable']@noise_reduction_enable.setterdefnoise_reduction_enable(self,v:bool):self['fe.noise_reduction_enable']=bool(v)@propertydefnoise_reduction_smoothing_bits(self)->int:"""scale up signal by 2^(smoothing_bits) before reduction, default 10"""returnself['fe.noise_reduction_smoothing_bits']@noise_reduction_smoothing_bits.setterdefnoise_reduction_smoothing_bits(self,v:int):self['fe.noise_reduction_smoothing_bits']=int(v)@propertydefnoise_reduction_even_smoothing(self)->float:"""smoothing coefficient for even-numbered channels, default 0.025"""returnself['fe.noise_reduction_even_smoothing']@noise_reduction_even_smoothing.setterdefnoise_reduction_even_smoothing(self,v:float):self['fe.noise_reduction_even_smoothing']=float(v)@propertydefnoise_reduction_odd_smoothing(self)->float:"""smoothing coefficient for odd-numbered channels, default 0.06"""returnself['fe.noise_reduction_odd_smoothing']@noise_reduction_odd_smoothing.setterdefnoise_reduction_odd_smoothing(self,v:float):self['fe.noise_reduction_odd_smoothing']=float(v)@propertydefnoise_reduction_min_signal_remaining(self)->float:"""fraction of signal to preserve in smoothing, default 0.05"""returnself['fe.noise_reduction_min_signal_remaining']@noise_reduction_min_signal_remaining.setterdefnoise_reduction_min_signal_remaining(self,v:float):self['fe.noise_reduction_min_signal_remaining']=float(v)@propertydefpcan_enable(self)->bool:"""enable PCAN auto gain control, default false"""returnself['fe.pcan_enable']@pcan_enable.setterdefpcan_enable(self,v:bool):self['fe.pcan_enable']=bool(v)@propertydefpcan_strength(self)->float:""" gain normalization exponent, default 0.95"""returnself['fe.pcan_strength']@pcan_strength.setterdefpcan_strength(self,v:float):self['fe.pcan_strength']=float(v)@propertydefpcan_offset(self)->float:"""positive value added in the normalization denominator, default 80.0"""returnself['fe.pcan_offset']@pcan_offset.setterdefpcan_offset(self,v:float):self['fe.pcan_offset']=float(v)@propertydefpcan_gain_bits(self)->int:"""number of fractional bits in the gain, default 21"""returnself['fe.pcan_gain_bits']@pcan_gain_bits.setterdefpcan_gain_bits(self,v:int):self['fe.pcan_gain_bits']=int(v)@propertydeflog_scale_enable(self)->bool:"""enable logarithmic scaling of filterbanks, default true"""returnself['fe.log_scale_enable']@log_scale_enable.setterdeflog_scale_enable(self,v:bool):self['fe.log_scale_enable']=bool(v)@propertydeflog_scale_shift(self)->int:"""scale filterbanks by 2^(scale_shift), default 6"""returnself['fe.log_scale_shift']@log_scale_shift.setterdeflog_scale_shift(self,v:int):self['fe.log_scale_shift']=int(v)@propertydefactivity_detection_enable(self)->bool:"""Enable the activity detection block. This indicates when activity, such as a speech command, is detected in the audio stream, default False"""returnself['fe.activity_detection_enable']@activity_detection_enable.setterdefactivity_detection_enable(self,v:bool):self['fe.activity_detection_enable']=bool(v)@propertydefactivity_detection_alpha_a(self)->float:"""Activity detection filter A coefficient The activity detection "fast filter" coefficient. The filter is a 1-real pole IIR filter: ``computes out = (1-k)*in + k*out`` Default 0.5"""returnself['fe.activity_detection_alpha_a']@activity_detection_alpha_a.setterdefactivity_detection_alpha_a(self,v:float):self['fe.activity_detection_alpha_a']=float(v)@propertydefactivity_detection_alpha_b(self)->float:"""Activity detection filter B coefficient The activity detection "slow filter" coefficient. The filter is a 1-real pole IIR filter: ``computes out = (1-k)*in + k*out`` Default 0.8"""returnself['fe.activity_detection_alpha_b']@activity_detection_alpha_b.setterdefactivity_detection_alpha_b(self,v:float):self['fe.activity_detection_alpha_b']=float(v)@propertydefactivity_detection_arm_threshold(self)->float:"""Threshold for arming the detection block The threshold for when there should be considered possible activity in the audio stream Default 0.75"""returnself['fe.activity_detection_arm_threshold']@activity_detection_arm_threshold.setterdefactivity_detection_arm_threshold(self,v:float):self['fe.activity_detection_arm_threshold']=float(v)@propertydefactivity_detection_trip_threshold(self)->float:"""Threshold for tripping the detection block The threshold for when activity is considered detected in the audio stream Default 0.8"""returnself['fe.activity_detection_trip_threshold']@activity_detection_trip_threshold.setterdefactivity_detection_trip_threshold(self,v:float):self['fe.activity_detection_trip_threshold']=float(v)@propertydefdc_notch_filter_enable(self)->bool:"""Enable the DC notch filter This will help negate any DC components in the audio signal Default False"""returnself['fe.dc_notch_filter_enable']@dc_notch_filter_enable.setterdefdc_notch_filter_enable(self,v:bool):self['fe.dc_notch_filter_enable']=bool(v)@propertydefdc_notch_filter_coefficient(self)->float:"""Coefficient used by DC notch filter The DC notch filter coefficient k in Q(16,15) format, ``H(z) = (1 - z^-1)/(1 - k*z^-1)`` Default 0.95"""returnself['fe.dc_notch_filter_coefficient']@dc_notch_filter_coefficient.setterdefdc_notch_filter_coefficient(self,v:float):self['fe.dc_notch_filter_coefficient']=float(v)@propertydefquantize_dynamic_scale_enable(self)->bool:"""Enable dynamic quantization Enable dynamic quantization of the generated audio spectrogram. With this, the max spectrogram value is mapped to +127, and the max spectrogram minus :py:class:`~quantize_dynamic_scale_range_db` is mapped to -128. Anything below max spectrogram minus :py:class:`~quantize_dynamic_scale_range_db` is mapped to -128. Default False"""returnself['fe.quantize_dynamic_scale_enable']@quantize_dynamic_scale_enable.setterdefquantize_dynamic_scale_enable(self,v:bool):self['fe.quantize_dynamic_scale_enable']=bool(v)@propertydefquantize_dynamic_scale_range_db(self)->float:"""Rhe dynamic range in dB used by the dynamic quantization, default 40.0"""returnself['fe.quantize_dynamic_scale_range_db']@quantize_dynamic_scale_range_db.setterdefquantize_dynamic_scale_range_db(self,v:float):self['fe.quantize_dynamic_scale_range_db']=float(v)@propertydeffft_length(self)->int:"""The calculated size required to do an FFT. This is dependent on the window_size_ms and sample_rate_hz values"""returnself['fe.fft_length']
[docs]defcopy(self)->AudioFeatureGeneratorSettings:"""Return a deep copy of the current settings"""returncopy.deepcopy(self)
def_update_fft_length(self):windows_size=int((self.window_size_ms*self.sample_rate_hz)/1000)# The FFT length is the smallest power of 2 that# is larger than the window sizefft_length=1whilefft_length<windows_size:fft_length<<=1self['fe.fft_length']=fft_length
Important: We use cookies only for functional and traffic analytics.
We DO NOT use cookies for any marketing purposes. By using our site you acknowledge you have read and understood our Cookie Policy.