Source code for mltk.utils.hasher
"""Data hashing utilities
See the source code on Github: `mltk/utils/hasher.py <https://github.com/siliconlabs/mltk/blob/master/mltk/utils/hasher.py>`_
"""
from typing import Union
import os
import hashlib
[docs]def generate_hash(*args) -> str:
"""Generate an MD5 hash of the given Python objects"""
md5 = hashlib.md5()
hash_object(*args, hasher=md5)
return md5.hexdigest().lower()
[docs]def hash_file(
path: str,
algorithm: Union[str,'hashlib._Hash'] = 'md5',
include_filename: bool = False,
) -> str:
"""Generate a hash of the given file"""
if not os.path.exists(path):
return None
if isinstance(algorithm, str):
algorithm = algorithm.lower()
if algorithm in ('sha256', 'sha2'):
hasher = hashlib.sha256()
elif algorithm in ('sha128', 'sha1'):
hasher = hashlib.sha1()
elif algorithm == 'md5':
hasher = hashlib.md5()
else:
raise ValueError('Hash algorithm must be md5, sha1, or sha256')
elif hasattr(algorithm, 'update') and hasattr(algorithm, 'hexdigest'):
hasher = algorithm
else:
raise ValueError('"algorithm argument must be the name of a hash algorithm or a hashlib._Hash instance')
with open(path, 'rb') as f:
if include_filename:
hasher.update(path.encode('utf-8'))
for chunk in iter(lambda: f.read(4096), b""):
hasher.update(chunk)
return hasher.hexdigest().lower()
[docs]def hash_object(*objects, hasher=None):
"""Hash the given object(s) and return the hashlib hasher instance
If no hasher argument is provided, then automatically created
a hashlib.md5()
"""
if hasher is None:
hasher = hashlib.md5()
for obj in objects:
if isinstance(obj, dict):
for key, value in obj.items():
hash_object(key, hasher=hasher)
hash_object(value, hasher=hasher)
continue
elif isinstance(obj, (list,tuple)):
for e in obj:
hash_object(e, hasher=hasher)
continue
elif isinstance(obj, (bytes,bytearray)):
hasher.update(obj)
continue
elif isinstance(obj, str):
hasher.update(obj.encode('utf-8'))
continue
else:
hasher.update(f'{obj}'.encode('utf-8'))
return hasher