-
Notifications
You must be signed in to change notification settings - Fork 0
/
huffman.py
197 lines (150 loc) · 5.44 KB
/
huffman.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
import heapq
from collections import defaultdict
class Node:
"""Represents a node in the Huffman Tree."""
def __init__(self, char, freq):
"""
Initialize a node with a character and its frequency.
Args:
char (str): The character represented by the node.
freq (int): The frequency of the character.
"""
self.char = char
self.freq = freq
self.left = None
self.right = None
def __lt__(self, other):
"""
Less-than operator for priority queue comparisons.
Args:
other (Node): Another node to compare with.
Returns:
bool: True if this node's frequency is less than the other.
"""
return self.freq < other.freq
def __str__(self):
"""Return a string representation of the node for debugging."""
return f"Node(char={self.char}, freq={self.freq})"
class HuffmanCoding:
"""
Implements Huffman encoding and decoding for lossless compression.
Attributes:
codes (dict): Maps characters to their binary Huffman codes.
reverse_mapping (dict): Maps binary Huffman codes to their characters.
"""
def __init__(self):
"""Initialize the HuffmanCoding class with empty mappings."""
self.codes = {}
self.reverse_mapping = {}
def build_frequency_table(self, text):
"""
Builds a frequency table from the input text.
Args:
text (str): The text to analyze.
Returns:
dict: A dictionary where keys are characters and values are frequencies.
"""
if not text:
raise ValueError("Input text cannot be empty.")
frequency = defaultdict(int)
for char in text:
frequency[char] += 1
return frequency
def build_huffman_tree(self, frequency_table):
"""
Constructs the Huffman Tree from a given frequency table.
Args:
frequency_table (dict): A dictionary where keys are characters and values are their frequencies.
Returns:
Node: The root of the Huffman tree.
"""
heap = [Node(char, freq) for char, freq in frequency_table.items()]
heapq.heapify(heap)
while len(heap) > 1:
node1 = heapq.heappop(heap)
node2 = heapq.heappop(heap)
merged = Node(None, node1.freq + node2.freq)
merged.left = node1
merged.right = node2
heapq.heappush(heap, merged)
return heap[0]
def generate_codes(self, root, current_code=""):
"""
Recursively generates Huffman codes for each character.
Args:
root (Node): The root of the Huffman Tree.
current_code (str): The binary code being generated (used internally).
"""
if root is None:
return
if root.char is not None: # Leaf node
self.codes[root.char] = current_code
self.reverse_mapping[current_code] = root.char
return
self.generate_codes(root.left, current_code + "0")
self.generate_codes(root.right, current_code + "1")
def encode(self, text):
"""
Encodes the input text into a binary string using Huffman codes.
Args:
text (str): The text to encode.
Returns:
str: The encoded binary string.
"""
if not text:
return ""
encoded_text = "".join(self.codes[char] for char in text)
return encoded_text
def decode(self, encoded_text):
"""
Decodes a binary string back into the original text.
Args:
encoded_text (str): The encoded binary string.
Returns:
str: The original text.
"""
current_code = ""
decoded_text = []
for bit in encoded_text:
current_code += bit
if current_code in self.reverse_mapping:
decoded_text.append(self.reverse_mapping[current_code])
current_code = ""
return "".join(decoded_text)
def compress(self, text):
"""
Compresses the input text using Huffman encoding.
Args:
text (str): The text to compress.
Returns:
str: The encoded binary string.
"""
frequency_table = self.build_frequency_table(text)
huffman_tree = self.build_huffman_tree(frequency_table)
self.generate_codes(huffman_tree)
return self.encode(text)
def decompress(self, encoded_text):
"""
Decompresses an encoded binary string back into the original text.
Args:
encoded_text (str): The encoded binary string.
Returns:
str: The original text.
"""
return self.decode(encoded_text)
# Example usage:
if __name__ == "__main__":
text = "this is an example for huffman encoding"
huffman = HuffmanCoding()
# Compress the text.
try:
encoded_text = huffman.compress(text)
print("Encoded Text:", encoded_text)
# Decompress the text.
decoded_text = huffman.decompress(encoded_text)
print("Decoded Text:", decoded_text)
# Check if the original text matches the decoded text.
assert text == decoded_text, "Error: Original and decoded text do not match."
print("Compression and Decompression are successful!")
except ValueError as e:
print(f"Error: {e}")