-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathurlClassification.py
110 lines (85 loc) · 3.18 KB
/
urlClassification.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import json
#Total de paginas de categoria juegos
totalGames=1
#Total de paginas de categoria computadoras
totalComp=1
#Total de paginas de invalidas
totalInvalid=0
#Total constante de links evaluados
totalLinks=7000
#Provabilidades previas
priorProbGames=0
priorProbComp=0
def analyzeCategory(priorProbGames, priorProbComp,probIncGames, probIncComp):
propCatGames=priorProbGames*probIncGames
propCatComp=priorProbComp*probIncComp
if(propCatGames>propCatComp):
return "Games"
elif(propCatGames<propCatComp):
return "Computers"
else:
return "Invalid"
def defineCategory(keyword):
global totalGames, totalComp, totalInvalid
#Palabras claves sobre juegos en la página que está siendo analizada
tempComp=keyword['computers']['total']
#Palabras claves sobre computadoras en la página que está siendo analizada
tempGames=keyword['games']['total']
#actualiza las probabilidades previas de juegos y computadoras
priorProbGames=totalGames/totalLinks
priorProbComp=totalComp/totalLinks
globals()['priorProbGames'] = priorProbGames
globals()['priorProbComp'] = priorProbComp
if(tempComp<7 and tempGames<7):
totalInvalid += 1
globals()['totalInvalid'] = totalInvalid
return "Invalid"
#actuliza el total de páginas de categorizadas para juegos y computadoras
elif(tempGames != 0 and tempComp != 0):
category=analyzeCategory(priorProbGames, priorProbComp, tempGames/totalGames, tempComp/totalComp)
if(category=="Games"):
totalGames += 1
globals()['totalGames'] = totalGames
return "Games"
elif(category=="Computers"):
totalComp += 1
globals()['totalComp'] = totalComp
return "Computers"
else:
totalInvalid += 1
globals()['totalInvalid'] = totalInvalid
return "Invalid"
elif(tempGames != 0 and tempComp == 0):
totalGames += 1
globals()['totalGames'] = totalGames
return "Games"
elif(tempComp != 0 and tempGames == 0):
totalComp += 1
globals()['totalComp'] = totalComp
return "Computers"
else:
totalInvalid += 1
globals()['totalInvalid'] = totalInvalid
return "Invalid"
with open("datos.json") as file:
data = json.load(file)
id = 1
listCategory = []
for key in zip(data['keywords'], data['link']):
category = defineCategory(key[0])
link = key[1]
if category == "Games":
keywords = key[0]['games']
elif category == "Computers":
keywords = key[0]['computers']
elif category == "Invalid":
keywords = {}
listCategory.append({"id": id, "category": category, "link": link, "keywords": keywords})
id+=1
listTotals = []
listTotals.append({'name': 'totalGames', 'value': totalGames-1})
listTotals.append({'name': 'totalComputer', 'value': totalComp-1})
listTotals.append({'name': 'totalInvalid', 'value': totalInvalid})
data2 = ({"totals":listTotals, "categories": listCategory})
with open('dataCategories.json', 'w') as f:
json.dump(data2, f, indent= 4)