diff --git a/.gitignore b/.gitignore index e139b6d..bd5bc7a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,9 @@ # Test files sha1sum-from-bash.txt -test.tar.sha1 -test.tar +test.* test/ + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/hashTar.py b/hashTar.py index 2834817..4df9049 100644 --- a/hashTar.py +++ b/hashTar.py @@ -5,26 +5,62 @@ """ This script calculates hashes for every file inside a tar file and creates a checksum file Usage: -python3 hashTar.py target-tar-file +python3 hashTar.py target-tar-file optional-algorithm +Options for optional-algorithm are (md5 is not available in -- rare -- FIPS compliant Python): +md5 +sha1 (default) +sha256 +sha3_512 + +Check hashlib documentation for other hashes that could be implemented here. """ +ALGORITHMS = ["md5", "sha1", "sha256", "sha3_512"] -def checksum(file_to_hash): - hashresult = hashlib.sha1() +def checksum(file_to_hash, algorithm="sha1"): + if algorithm == "sha1": + hashresult = hashlib.sha1() + elif algorithm == "md5": + hashresult = hashlib.md5() + elif algorithm == "sha256": + hashresult = hashlib.sha256() + elif algorithm == "sha3_512": # contrary to sha512, sha3-512 is not vulnerable to length extension attack + hashresult = hashlib.sha3_512() for chunk in iter(lambda: file_to_hash.read(4096), b''): hashresult.update(chunk) return hashresult.hexdigest() -def hashtar(input_tar_file): +def hashtar(input_tar_file, algorithm="sha1"): + if algorithm not in ALGORITHMS: + print("Please choose a valid algorithm, options are:") + for algo in ALGORITHMS: + print(algo, end=" ") + print() + sys.exit() + # Check if hashlib has the algorithm (e.g FIPS Python without MD5) + assert hasattr(hashlib, algorithm), "Invalid algorithm." with tarfile.open(input_tar_file) as tar_input: - algo = "sha1" # Getting ready for algorithm chooser - outputname = input_tar_file + '.' + algo + outputname = input_tar_file + '.' + algorithm with open(outputname, 'w') as checksums_file: for member in tar_input.getmembers(): if member.isreg(): # skip if not file (folders are members, hashing them fails) with tar_input.extractfile(member) as _file: - checksums_file.write('{} ./{}\n'.format(checksum(_file), member.name)) + checksums_file.write('{} ./{}\n'.format(checksum(_file, algorithm), member.name)) if __name__ == '__main__': - hashtar(sys.argv[1]) + if len(sys.argv) == 2: + hashtar(sys.argv[1]) + elif len(sys.argv) == 3: + hashtar(sys.argv[1], sys.argv[2]) + else: + print("I need a target file and an optional algorithm.") + print() + print("Usage:") + print("python3 hashTar.py target-tar-file optional-algorithm") + print() + print("Options for algorithm are:") + for algo in ALGORITHMS: + print(algo, end=" ") + print() +