-
Notifications
You must be signed in to change notification settings - Fork 0
/
tomo.py
136 lines (114 loc) · 4.08 KB
/
tomo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import cabocha
import asyncio
from cabocha.analyzer import CaboChaAnalyzer
import configparser
from configparser import ConfigParser
import json
from pprint import pprint
import re
import requests
import socket
import urllib
import webbrowser
apiURL = 'http://jisho.org/api/v1/search/words?keyword='
def load_config():
config = configparser.ConfigParser()
config.read('tomo.ini')
nickname = config.get('TWITCH', 'nickname')
channel = config.get('TWITCH', 'channel')
token = config.get('TWITCH', 'token')
return nickname, channel, token
def print_tree(tree):
"""pretty-print the tokens"""
for chunk in tree:
for token in chunk:
pprint(vars(token))
# print(vars(token)['genkei'])
def analyze_msg(message):
"""Applies NLP Cabocha to the passed message and return resulting tree."""
analyzer = CaboChaAnalyzer()
return analyzer.parse(message)
def parse(tree):
"""Iterates through the tree to extract their base forms and returns
the resulting list of strings."""
tokens = []
chunks = []
for chunk in tree:
tokenstr = ""
for token in chunk:
# chunk_genkei.append(vars(token)['genkei'])
tokens.append(token.genkei)
tokenstr += token.genkei
chunks.append(tokenstr)
# to remove the trailing '*' element
if tokens[-1] == "*":
tokens.pop()
if chunks[-1] == "*":
chunks.pop()
return chunks, tokens
async def ask_jisho(tokens):
"""Queries Jisho for the kanji"""
data = {}
senses = {}
url = 'http://jisho.org/api/v1/search/words?keyword=\"'
for word in tokens:
response = requests.get(apiURL + word)
data[word] = json.loads(response.content.decode('utf-8'))['data']
# json.loads(response.content.decode())['data'][i]['slug']
# data['日本語'][0]['slug']
for i in range(len(data[word])):
if data[word][i]['slug'] == word:
senses[word] = get_senses(data[word][i]['slug'])
return data, senses
async def open_jisho(message):
"""Opens Jisho.org in a browser with the full chat message"""
search = urllib.parse.quote(message)
url = "http://jisho.org/search/" + search
webbrowser.open(url)
async def get_senses(word):
"""Iterates through the responses from Jisho to pick
out the correct slug and it's english senses."""
senses = []
word['senses']
return senses
async def listen_for_messages(s):
"""Listens for message on the socket"""
while True:
resp = s.recv(2048).decode('utf-8')
# to ensure connection to server isn't prematurely terminated:
if resp.startswith('PING'):
s.send("PONG\n".encode('utf-8'))
elif len(resp) > 0:
# parse the message
result = re.search(':(.*)!.*@.*.tmi.twitch.tv PRIVMSG #(.*) :(.*)', resp)
if result:
# extract tokens from message and put tpりんhem into a list
username, channel, message = result.groups()
await open_jisho(message)
tree = analyze_msg(message)
chunks = tree.chunks
tokens = tree.tokens
chunk_list, token_list = parse(tree)
pprint(chunk_list)
pprint(token_list)
# using the JishoAPI, query the word in the list
data, senses = await ask_jisho(token_list)
# senses = get_senses(data)
pprint(data)
else:
print("parse failed")
async def main():
# load credentials from the config file
nickname, channel, token = load_config()
server = 'irc.chat.twitch.tv'
port = 6667
# connect session and setting up socket to listen for messages in chat
s = socket.socket()
s.connect((server, port))
s.send(f"PASS {token}\n".encode('utf-8'))
s.send(f"NICK {nickname}\n".encode('utf-8'))
s.send(f"JOIN {channel}\n".encode('utf-8'))
await listen_for_messages(s)
if __name__ == "__main__":
loop = asyncio.get_event_loop()
loop.run_until_complete(main())