-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathnews_loader.py
81 lines (59 loc) · 2.34 KB
/
news_loader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import psycopg2
import db_settings
dbname = db_settings.DBNAME
dbhost = db_settings.DBHOST
dbuser = db_settings.DBUSER
dbpass = db_settings.DBPASS
def connect():
return psycopg2.connect("dbname='" + dbname +
"' user='" + dbuser +
"' host='" + dbhost +
"' password='" + dbpass + "'")
def load_news(fields=['id', 'title', 'text', 'portal'],
date_begin='22/09/2018', date_end='22/10/2018'):
conn = connect()
cur = conn.cursor()
field_names = str(fields).replace("'", "") \
.replace("[", "") \
.replace("]", "")
query = "SELECT " + field_names + " FROM news WHERE date_time <= '" + date_end + "' AND date_time >= '" + date_begin + "' ORDER BY id ASC"
news = []
cur.execute(query)
for values in cur.fetchall():
data = dict(zip(fields, values))
news.append(data)
return news
def load_distinct(field_name):
conn = connect()
cur = conn.cursor()
query = "SELECT DISTINCT " + field_name + " FROM news WHERE date_time <= '22/10/2018' AND date_time >= '22/09/2018'"
cur.execute(query)
fields = []
for field in cur.fetchall():
fields.append(field[0])
return fields
def add_score(ids, field, score):
conn = connect()
cur = conn.cursor()
query = "SELECT COUNT(*) FROM similarity WHERE news_id1 = :id1 AND news_id2 = :id2"
query = query.replace(":id1", str(ids[0]))
query = query.replace(":id2", str(ids[1]))
cur.execute(query)
def get_insert():
insert = "INSERT INTO similarity (news_id1, news_id2, " + field + ") VALUES (:id1, :id2, :score)"
insert = insert.replace(":id1", str(ids[0]))
insert = insert.replace(":id2", str(ids[1]))
insert = insert.replace(":score", str(score))
return insert
def get_update():
update = "UPDATE similarity SET " + field + " = :score WHERE news_id1 = :id1 AND news_id2 = :id2"
update = update.replace(":id1", str(ids[0]))
update = update.replace(":id2", str(ids[1]))
update = update.replace(":score", str(score))
return update
if cur.fetchall()[0][0] > 0:
surprise_query = get_update()
else:
surprise_query = get_insert()
cur.execute(surprise_query)
conn.commit()