-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathhuffman.py
58 lines (40 loc) · 1.76 KB
/
huffman.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from typing import Dict, List, Tuple
NumSymbol = int
AlphaSymbol = str
Frequency = int
Code = str
Frequencies = List[Frequency]
Distribution = Dict[AlphaSymbol, Frequency]
IntermediateEncoding = Dict[AlphaSymbol, Code]
Encodings = Dict[NumSymbol, Code]
def huffman(frequencies: Frequencies) -> Encodings:
distribution = weights_to_dist_dict(frequencies)
encoding = huffman_recursive(distribution)
return {alpha_to_index(symbol): code for symbol, code in encoding.items()}
def huffman_recursive(distribution: Distribution) -> IntermediateEncoding:
if is_base_case(distribution):
return dict(zip(distribution.keys(), ["1", "0"]))
dist_prime = distribution.copy()
symbol1, symbol2 = lowest_prob_pair(distribution)
prob1, prob2 = dist_prime.pop(symbol1), dist_prime.pop(symbol2)
dist_prime[symbol1 + symbol2] = prob1 + prob2
encoding = huffman_recursive(dist_prime)
symbol_encoding_prefix = encoding.pop(symbol1 + symbol2)
encoding[symbol1], encoding[symbol2] = (
symbol_encoding_prefix + "0",
symbol_encoding_prefix + "1",
)
return encoding
def is_base_case(distribution: Distribution):
return len(distribution) == 2
def lowest_prob_pair(distribution: Distribution) -> Tuple[AlphaSymbol, AlphaSymbol]:
sorted_dist = sorted(distribution.items(), key=lambda item: item[1])
most_probable_symbol = sorted_dist[0][0]
second_most_probable_symbol = sorted_dist[1][0]
return most_probable_symbol, second_most_probable_symbol
def weights_to_dist_dict(weights: List[int]) -> Distribution:
return {index_to_alpha(i): weight for i, weight in enumerate(weights)}
def index_to_alpha(index: int) -> str:
return chr(ord("`") + index)
def alpha_to_index(alpha: str) -> int:
return ord(alpha) - 96