-
Notifications
You must be signed in to change notification settings - Fork 1
/
sync_db.py
79 lines (69 loc) · 1.7 KB
/
sync_db.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
"""
Moves the data into a sql database
"""
from datetime import datetime
from flask.ext.sqlalchemy import *
import os
from flask import Flask
from models.company import Company,VC,Trigram,Cogram, db
import pickle
import sys
from vc_matcher import *
import datetime
db.create_all()
if len(sys.argv) > 1:
arg = sys.argv[1]
if arg == "drop":
db.drop_all()
sys.exit()
app = Flask(__name__)
app.config['SQLALCHEMY_DATABASE_URI'] = os.getenv('db_url')
db = SQLAlchemy(app)
# For each vc, create the vc
vcs= pickle.load(open("data/vctree/topvcs.p", "rb"))
co_cograms = pickle.load(open("data/vctree/cograms.p","rb"))
print datetime.datetime.now()
# get all companies for each vc
vc_cos = {}
for vc in vcs:
vc_name = vc[0]
cos = vc[2]
vc_cos[vc_name] = [co[0] for co in cos]
cogram_dic = {}
#get all cograms for each company
for cogram in co_cograms:
company_name = cogram[0]
cograms = cogram[1]
cogram_dic[company_name] = cograms
vc_num = 1
errors = 0
for vc in vcs:
vc_url = vc[0]
vc_name = vc[1]
vc_companies = vc_cos[vc_url]
yes_cograms = []
# get cograms used in vc companies
no_cograms = []
for company in len(vc_companies//2):
try:
yes_cograms.append(cogram_dic[company])
except:
errors = errors + 1
#get 1/2 cograms not used in vc companies
for i in range(len(vc_companies)//2):
company = cogram_dic.keys()[i]
if company not in vc_companies:
no_cograms.append(cogram_dic[company])
no_cograms = []
nb_model = build_model(yes_cograms,no_cograms)
new_VC = VC(name=vc_name,url=vc_url, nb_model=nb_model)
try:
db.session.add(new_VC)
db.session.commit()
print vc_num
vc_num = vc_num + 1
except:
print 'failed'
db.session.close()
print datetime.datetime.now()
print 'errors %i' %(errors)