-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathsxsw.py
35 lines (24 loc) · 779 Bytes
/
sxsw.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import json
import matplotlib.pyplot as plt
from textstat.textstat import textstat
from nltk import FreqDist
filename = 'bieber-raw-test.json'
READ = 'rb'
TEXT=1
stopwords = open('stopwords',READ).read().splitlines()
tweets = json.load(open(filename,READ))
#Identify retweets
words = ' '.join([tweet['text'] for tweet in tweets]).split()
fdist = FreqDist(words)
symbol,freq = zip(*fdist.most_common(20))
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(freq,'k',linewidth=2)
ax.set_xticks(range(1,len(symbol)+1))
ax.set_xticklabels(symbol,rotation='vertical')
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
ax.spines['top'].set_color('none')
ax.spines['right'].set_color('none')
plt.tight_layout()
plt.savefig('frequencies.png',dpi=300)