forked from hanel2527/dcinside-crawler
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgallchangranking.ver.1.2.py
142 lines (127 loc) · 5.09 KB
/
gallchangranking.ver.1.2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import requests
from bs4 import BeautifulSoup
import operator
import time
import re
import os
def request(url):
header = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9',
'Accept-Encoding': 'gzip,deflate',
'Accept-Language': 'ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Host': 'gall.dcinside.com',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0;Win64;x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36'
}
url_get = requests.get(url, headers=header)
return url_get
def gall_check(gall):
recept = request("http://gall.dcinside.com/board/lists/?id=%s" %gall)
soup = BeautifulSoup(recept.text, "html.parser")
meta_data = soup.find_all("meta", {"name": "title"})
comp = re.findall("\"(.*갤러리)", str(meta_data))
if comp == []:
return None
gall_name = comp[0]
return gall_name
def main():
gall = input("갤러리 id?(ex:mlp): ")
if gall_check(gall):
print(gall_check(gall))
else:
print("id 잘못 입력한듯")
main()
init_page = int(input("시작 페이지?: "))
final_page = int(input("마지막 페이지?: "))
nick_dic = dict()
for page in range(init_page, final_page + 1):
print("\rWorking page={}/{}".format(page, final_page), end="")
recept = request("http://gall.dcinside.com/board/lists/?id=%s&page=%d" %(gall, page))
soup = BeautifulSoup(recept.text, "html.parser")
nick_list = soup.find_all('td', {'class': "gall_writer ub-writer"})
for nicks in nick_list:
try: # 첫부분 예외처리
nick = nicks.attrs['data-nick']
uid = nicks.attrs['data-uid']
ip = nicks.attrs['data-ip']
except:
nick = "운영자"
if nick == "운영자": # 공지사항
continue
nick_str = nick + "(" + uid + ip + ")"
if nick_str in nick_dic:
nick_dic[nick_str] += 1
else:
nick_dic[nick_str] = 1
nick_list = dict_sorter(nick_dic)
page_num = final_page - init_page
file_writer(gall, nick_list, page_num) #저장
def dict_sorter(nick_dic):
sorted_dic = sorted(nick_dic.items(), key=operator.itemgetter(1)) #딕셔너리 value로 정렬
sorted_dic.reverse()
return sorted_dic
def nick_change(nick_list):
print("랭킹\t닉\t글수")
for i in range(len(nick_list)):
print("%d\t%s\t%s" %((i+1), nick_list[i][0], nick_list[i][1]))
print("닉변 처리(ex)1위와 10위가 동일닉일 시 1,10 한번에 두개씩만, 종료는 0,0")
while 1:
change = input("닉변?: ")
rankings = change.split(",")
if rankings[0] == "0":
break
temp_1 = nick_list[int(rankings[0]) - 1][0] + "=" + nick_list[int(rankings[1]) - 1][0]
temp_2 = nick_list[int(rankings[0]) - 1][1] + nick_list[int(rankings[1]) - 1][1]
temp_3 = nick_list[int(rankings[1]) - 1][0]
nick_list[int(rankings[0]) - 1] = (temp_1, temp_2)
nick_list[int(rankings[1]) - 1] = (temp_3, 0)
temp_dic = dict(nick_list)
nick_list = dict_sorter(temp_dic)
return nick_list
def file_writer(gall, nick_list, page_num):
timestr = time.strftime("%Y_%m_%d-%H_%M")
file_name = "%s_gall-%s.txt" %(gall, timestr)
edit_file_name = "edit_%s_gall-%s-%d.txt" %(gall, timestr, page_num)
print(file_name)
f = open(file_name, 'w')
ef = open(edit_file_name, "w")
f.write("갤창랭킹 made by hanel2527, 마이 리틀 포니 갤러리\n")
total = page_num*49
f.write("총 글수: %d\n" %total)
f.write("랭킹\t닉\t글 수\t갤 지분(%)\n")
for i in range(len(nick_list)):
if nick_list[i][1] == 0:
continue
string = "%d\t%s\t%d\t%.2f\n" %((i+1), nick_list[i][0], nick_list[i][1], (nick_list[i][1] / total * 100))
f.write(string)
ef.write("%s\t%d\n" %(nick_list[i][0], nick_list[i][1]))
f.close()
ef.close()
def edit_nick():
filename_list = list()
n = 0
for filename in os.listdir():
if re.match("^edit_.*\.txt", filename):
n += 1
print(n, filename)
filename_list.append(filename)
num = int(input("맞는 것 번호?: "))
file_name = filename_list[num - 1]
print(file_name)
f = open(file_name, "r")
nicks = f.readlines()
nick_list = list()
for i in range(len(nicks)):
temp = nicks[i].split('\t')
nick_list.append((temp[0], int(temp[1])))
nick_list = nick_change(nick_list)
gall = re.findall("^edit_(.*)_gall", file_name)[0]
page_num = re.findall("(\d+)\.txt", file_name)[0]
file_writer(gall, nick_list, int(page_num))
if __name__ == "__main__":
print("갤창랭킹 made by hanel2527, mlp갤")
if input("갤창랭킹/편집(g/e): ") == "g":
main()
edit_nick()