-
Notifications
You must be signed in to change notification settings - Fork 0
/
T1A1_P1_load_data.py
58 lines (42 loc) · 1.98 KB
/
T1A1_P1_load_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# Muhammet Furkan Yalcin - 101233944
# Soliman Elkhouli - 101244211
# Tsu Chen - 101238818
import string
def book_category_dictionary(filename: str) -> dict[str:list[dict]]:
"""Return a dictionary of all the distinct words in the specified file,
sorted in ascending order and first item in the dictionary needs to be categories in the file.
Precondition: Specified ile must be in a format of csv or txt in order for it be worked.
>>> word_list = book_category_dictionary('google_books_dataset.csv')
>>> {"Fiction":[ {"title": "Antiques Roadkill: A Trash 'n' Treasures Mystery","author": " Barbara Allan","language ": "English","rating": 3.3, ["title": "The Painted Man (The Demon Cycle. Book 1)", "author":"Peter V. Brett", "language":"English","rating": 4.5]...}
>>> len(word_list)
25
"""
infile = open(filename)
book_dictionary= dict()
categories = set()
lst = []
for line in infile:
word_list = line.strip(string.punctuation).split(",")
categories.add(word_list[5])
rating = word_list[2]
if rating != "N/A" and rating!= "rating":
rating = float(rating)
else:
rating = ""
pages = word_list[4]
if pages != "pages":
pages = int(pages)
else:
pages = ""
lst.append({"title": word_list[0],"author":word_list[1],"language":word_list[6],"rating": rating, "publisher": word_list[3],"pages":pages})
for book in line:
if word_list[5] not in book_dictionary:
category = word_list[5]
book_dictionary[category] = lst
lst= []
book_dictionary.pop(category)
return book_dictionary
filename = 'google_books_dataset.csv'
word_list = book_category_dictionary(filename)
print('File', filename, 'contains', len(word_list), 'distinct words')
print('The book categories in this file is:', word_list)