Source code for mltk.core.profile_model


from typing import Union, List
import re
import copy



from mltk.utils.logger import get_logger, make_filelike
from mltk.utils.python import append_exception_msg

from .model import (
    MltkModel,
    MltkModelEvent,
    load_tflite_model,
    load_mltk_model
)
from .tflite_model import TfliteModel
from .profiling_results import ProfilingModelResults, ProfilingLayerResult
from .utils import (get_mltk_logger, ArchiveFileNotFoundError)
from .tflite_micro import TfliteMicro, TfliteMicroLayerError
from .tflite_model_parameters import TfliteModelParameters


[docs]def profile_model( model:Union[MltkModel, TfliteModel, str], image_path:str=None, accelerator:str=None, baud:int=115200, port:str=None, use_device:bool=False, build:bool=False, platform:str=None, runtime_buffer_size=-1, test:bool=False, post_process:bool=True, return_estimates=False, **kwargs ) -> ProfilingModelResults: """Profile a model for the given accelerator This will profile the given model in either a hardware simulator or on a physical device. .. seealso:: * `Model Profiler Guide <https://siliconlabs.github.io/mltk/docs/guides/model_profiler.html>`_ * `Model Profiler API examples <https://siliconlabs.github.io/mltk/mltk/examples/profile_model.html>`_ Args: model: The model to profile as either a :py:class:`mltk.core.MltkModel` or :py:class:`mltk.core.TfliteModel` instance, or the path to a `.tflite` or `.mltk.zip` file accelerator: The name of the hardware accelerator to profile for If omitted, then use reference kernels use_device: Profile on a locally connected embedded device. If omitted, then profile in simulator port: Serial port of physical platform. If omitted, attempt to discover automatically build: If true, build the MLTK Model as a .tflite before profiling runtime_buffer_size: The size of the tensor arena. This is only used by the simulator (i.e. when use_device=False). If greater than 0, use the size given. If the given size is too small then loading the model will fail. If equal to 0, try to use the size built into the model's parameters, if the model size is not available or too small, find the optimal size If less than 0, automatically find the optimal tensor arena size, ignore the size built into the model parameters test: If a "test" model is provided post_process: This allows for post-processing the profiling results (e.g. uploading to a cloud) if supported by the given MltkModel return_estimates: If profiling in the simulator, this will estimate additional metrics such as CPU cycles and energy. Disabling this option can reduce profiling time Returns: The results of model profiling """ #accelerator = TfliteMicro.normalize_accelerator_name(accelerator) try: tflite_model = load_tflite_model(model=model, build=build, test=test) except ArchiveFileNotFoundError as e: append_exception_msg(e, '\nAlternatively, add the --build option to profile the model without training it first' ) raise if use_device: # Profile on embedded device profiling_model_results = profile_model_on_device( tflite_model, image_path, platform=platform, baud=baud, accelerator=accelerator, port=port ) elif image_path: profiling_model_results = profile_model_in_executable( image_path=image_path, tflite_model=tflite_model, accelerator=accelerator ) else: # Profile in hardware simulator profiling_model_results = profile_model_in_simulator( tflite_model, accelerator=accelerator, runtime_buffer_size=runtime_buffer_size, return_estimates=return_estimates, **kwargs ) profiling_model_results._model_name = (tflite_model.filename or 'my_model.tflite')[:-len('.tflite')] # pylint: disable=protected-access if post_process: post_process_profiling_results( model=model, results=profiling_model_results, test=test, ) return profiling_model_results
def profile_model_in_executable( image_path:str, tflite_model:str, accelerator:str ) -> ProfilingModelResults : """Profile the given model using the given profiler executable Args: image_path: path to the profiler executable tflite_model: path to the model to profile accelerator: name of the accelerator to use when profiling Returns: ProfilingModelResults: results of the model profiling """ from mltk.utils.shell_cmd import run_shell_cmd model_path = tflite_model.path _, retval = run_shell_cmd([image_path, '--model', model_path]) return parse_device_model_profiler_log( log_data=retval, tflite_model=tflite_model, accelerator=accelerator ) def profile_model_in_simulator( tflite_model:TfliteModel, accelerator:str=None, runtime_buffer_size:int=-1, return_estimates=False, **kwargs ) -> ProfilingModelResults: """Profile the given TfliteModel in simulator Args: tflite_model: .tflite model to profile accelerator: Optional, name of accelerator to profile for return_estimates: This will estimate additional metrics such as CPU cycles and energy. Disabling this option can reduce profiling time Returns: ProfilingModelResults: Results of the model profiling """ logger = get_mltk_logger() logger.error('Profiling model in simulator ...') # Profile the model in the hardware simulator # and estimate various metrics if possible profiling_results = TfliteMicro.profile_model( tflite_model, accelerator=accelerator, return_estimates=return_estimates, runtime_buffer_size=runtime_buffer_size, **kwargs ) return profiling_results def profile_model_on_device( tflite_model:TfliteModel, image_path:str=None, accelerator:str=None, port:str=None, baud:int=115200, platform:str=None, timeout:float=90, ) -> ProfilingModelResults: """Profile the given TfliteModel on a physical embedded target Args: tflite_model: TfliteModel instance accelerator: Name of hardware accelerator port: Serial COM port Returns: ProfilingModelResults: Results of the model profiling """ # pylint: disable=import-outside-toplevel from mltk.utils import commander from mltk.utils import firmware_apps from mltk.utils.serial_reader import SerialReader logger = get_mltk_logger() tflite_model = copy.deepcopy(tflite_model) try: tflite_model_params = TfliteModelParameters.load_from_tflite_model(tflite_model) if 'runtime_memory_size' in tflite_model_params: del tflite_model_params['runtime_memory_size'] # Ensure the memory size is -1 so it is calculated at runtime tflite_model_params.add_to_tflite_model(tflite_model) except: # If the model doesn't have params then just ignore the error pass port = port or 'regex:JLink CDC UART Port' platform = platform or commander.query_platform() logger.error('Programming ML model to device ...') firmware_apps.program_image_with_model( name='mltk_model_profiler', accelerator=accelerator, platform=platform, firmware_image_path=image_path, tflite_model=tflite_model, logger=logger, halt=True ) # We want the serial logger to always write to the file # but only to the console if verbose logging is enabled serial_logger = get_logger('serial_logger', 'DEBUG', parent=logger) make_filelike(serial_logger, level='INFO' if logger.verbose else 'DEBUG') # Start the serial COM port reader logger.error('Profiling ML model on device ...') with SerialReader( port=port, baud=baud, outfile=serial_logger, start_regex=[ re.compile('.*Starting Model Profiler', re.IGNORECASE), re.compile('Loading model', re.IGNORECASE) ], stop_regex=[re.compile(r'.*done.*', re.IGNORECASE)], fail_regex=[ re.compile(r'.*hardfault.*', re.IGNORECASE), re.compile(r'.*error.*', re.IGNORECASE), re.compile(r'.*failed to alloc memory.*', re.IGNORECASE) ] ) as serial_reader: # Reset the board to start the profiling firmware commander.reset_device(platform=platform) # Wait for up to a minute for the profiler to complete if not serial_reader.read(activity_timeout=timeout): raise TimeoutError(f'Timed-out ({timeout}s) waiting for profiler on device to complete') # Check if the profiler failed if serial_reader.failed: raise RuntimeError(f'Profiler failed on device, err: {serial_reader.error_message}') # Retrieve the captured data device_log = serial_reader.captured_data return parse_device_model_profiler_log( device_log, tflite_model=tflite_model, accelerator=accelerator, platform=platform, ) def parse_device_model_profiler_log( log_data:str, tflite_model:TfliteModel, accelerator:str, platform:str=None ) -> ProfilingModelResults: # pylint: disable=protected-access lines = [x.strip() for x in log_data.splitlines()] runtime_memory_size = 0 cpu_clock_rate = 0 layer_error_msgs = {} n_layers = 0 layer_results:List[ProfilingLayerResult] = [] cpu_clock_re = re.compile(r'CPU clock:\s([\d\.Mk]+)Hz') runtime_memory_re = re.compile(r'Tensor runtime memory:\s([\d\.Mk]+)') layer_name_re = re.compile(r'Op(\d+)-\S+') cpu_cycles_re = re.compile(r'([\d\.kMG]+) CPU cycles') acc_cycles_re = re.compile(r'([\d\.kMG]+) Accelerator cycles') ops_cycles_re = re.compile(r'([\d\.kMG]+) OPs') macs_cycles_re = re.compile(r'([\d\.kMG]+) MACs') error_msg_re = TfliteMicroLayerError.WARNING_RE time_ms_re = re.compile(r'([\d\.]+) ms') custom_stat_re = re.compile(r'\s*([\w\-\.]+)=([\d\-\.]+).*') # First parse the summary info and any layer error messages for line in lines: match = cpu_clock_re.match(line) if match: cpu_clock_rate = _line_to_int(match.group(1)) continue match = runtime_memory_re.match(line) if match: runtime_memory_size = _line_to_int(match.group(1)) continue match = error_msg_re.match(line) if match: layer_error_msgs[int(match.group(1))] = match.group(3) continue match = layer_name_re.match(line) if match: n_layers = int(match.group(1))+1 continue # Next parse each layer's info for current_layer_index in range(n_layers): layer_err_msg = None if current_layer_index not in layer_error_msgs else layer_error_msgs[current_layer_index] layer = ProfilingLayerResult( tflite_layer=tflite_model.layers[current_layer_index], error_msg=layer_err_msg ) layer_results.append(layer) parse_state = 'find_next_layer' for line in lines: match = layer_name_re.match(line) if match: layer_index = int(match.group(1)) if parse_state == 'find_next_layer' and layer_index == current_layer_index: parse_state = 'parse_metrics' elif parse_state == 'parse_metrics': parse_state = 'find_layer_results' elif parse_state == 'find_layer_results' and layer_index == current_layer_index: parse_state = 'parse_results' elif parse_state == 'parse_results': break continue if parse_state == 'parse_metrics': match = ops_cycles_re.match(line) if match: layer['ops'] = _line_to_int(match.group(1)) continue match = macs_cycles_re.match(line) if match: layer['macs'] = _line_to_int(match.group(1)) continue if parse_state == 'parse_results': match = time_ms_re.match(line) if match: layer['time'] = float(match.group(1))/1e3 continue match = cpu_cycles_re.match(line) if match: layer['cpu_cycles'] = _line_to_int(match.group(1)) continue match = acc_cycles_re.match(line) if match: layer['accelerator_cycles'] = _line_to_int(match.group(1)) continue match = custom_stat_re.match(line) if match: layer[match.group(1)] = _line_to_int(match.group(2)) continue return ProfilingModelResults( model=tflite_model, accelerator=accelerator, platform=platform, cpu_clock_rate=cpu_clock_rate, runtime_memory_bytes=runtime_memory_size, layers=layer_results, is_simulated=False ) def post_process_profiling_results( model:Union[MltkModel, TfliteModel, str], results: ProfilingModelResults, test:bool, ): if isinstance(model, TfliteModel): return elif isinstance(model, MltkModel): mltk_model = model elif isinstance(model, str) and model.endswith(('.tflite', '.h5')): return else: mltk_model = load_mltk_model( model=model, test=test ) mltk_model.trigger_event( MltkModelEvent.AFTER_PROFILE, results=results ) def _line_to_int(line:str) -> int: multiplier = 1 if 'k' in line: multiplier = 1e3 elif 'M' in line: multiplier = 1e6 elif 'G' in line: multiplier = 1e9 if multiplier > 1: line = line[:-1] v = float(line) return int(v * multiplier)