-
Notifications
You must be signed in to change notification settings - Fork 0
/
tocgrapher.py
116 lines (91 loc) · 3.56 KB
/
tocgrapher.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
'''
Workflow for module for graphing TOCs.
2024.9.16 Matt Briggs
'''
import yaml
import threading
import datetime
import time
import logging
from neo4j import GraphDatabase
import tocharvestor as TH
import tocscanner as TS
import tocformats as TF
import mdbutilities as MU
TODAYSDATE = datetime.date.fromtimestamp(time.time());
TOCLIST = []
def get_split(innumber):
'''with a number split the number into 4 ranges'''
size = int(innumber/4)
a1 = 0
a2 = size
b1 = a2 + 1
b2 = size *2
c1 = b2 +1
c2 = size *3
d1 = c2 +1
d2 = innumber
return [(a1,a2),(b1,b2),(c1,c2),(d1,d2)]
def parse_toc_block(index_start, index_end, outtype, outputpath):
'''Pass a segment of the TOC.'''
toc_seg = list(TOCLIST[index_start:index_end])
size = len(TOCLIST)
for count, t, in enumerate(toc_seg):
print("{} of {} getting {}".format(count+index_start, size, t))
graphed = TS.input_tocfile(t)
MU.write_text(str(graphed), "C:\\git\\feature\\information-retrieval-graph-poc\\working\\data.txt")
if outtype == "neo4j":
try:
with open("working/fowler.yml", "r") as stream:
credentials = yaml.safe_load(stream)
driver = GraphDatabase.driver(credentials["domain"], auth=(credentials["username"], credentials["password"]))
output = TF.create_cypher_graph(driver, graphed)
filename = outputpath + "{}-graph-{}.cypher".format(TODAYSDATE, count+index_start)
# MU.write_text(output, filename)
except Exception as e:
logging.error("Error neo4j for {} : {}\n".format(t, e))
elif outtype == "csv":
try:
filename = outputpath + "{}-graph-{}.txt".format(TODAYSDATE, count+index_start)
MU.write_text(str(graphed), filename)
TF.create_csv_check(output, graphed, count, TODAYSDATE)
except Exception as e:
logging.error("Error csv for {} : {} : {}".format(t, e, graphed))
else:
print("You need a value for the output type.")
def main():
'''Builds the graph by the specified output type from a list of github
repositories that use the DocFX/Learn.microsoft.com content type.
Operation: Loads a config file, counts the yml files, and parses each toc
file and the content associated with it. Writes to a cypher database,
or outputs graph formats to the specified file.
'''
global TOCLIST
with open ("jobtoc.yml", "r") as stream:
config = yaml.safe_load(stream)
outtype = config["type"].lower()
outputpath = config["output"]
logging.basicConfig(filename="{}{}-logs.log".format(outputpath, TODAYSDATE), level=logging.INFO)
logging.info("Job run at: {}".format(TODAYSDATE))
for i in config["folders"]:
tocs = TH.get_tocs_from_repo(i["folder"])
if config["limit"] == "0":
limit = len(tocs)
else:
limit = config["limit"]
TOCLIST = tocs[:int(limit)]
l_indexes = get_split(len(TOCLIST))
if len(TOCLIST) < 8:
parse_toc_block(0, len(TOCLIST), outtype, outputpath)
else:
threads = []
for i in range(4):
print("Thread: {}".format(i))
th = threading.Thread(target=parse_toc_block, args=(l_indexes[i][0], l_indexes[i][1], outtype, outputpath))
th.start()
threads.append(th)
[th.join() for th in threads]
print("Done.")
logging.info("Finished: {}".format(time.localtime(time.time())))
if __name__ == "__main__":
main()