-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwordCloud.R
37 lines (32 loc) · 909 Bytes
/
wordCloud.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
install.packages('ctv')
install.views('NaturalLanguageProcessing')
worldCloud <- function()
{
library('ctv')
#path: (./corpus/target)
cname <- file.path(".","corpus","target")
library (tm)
docs <- Corpus(DirSource(cname))
library (SnowballC)
#replacing '/' and '@' with a whitespace
for (j in seq(docs))
{
docs[[j]] <- gsub("/"," ",docs[[j]])
docs[[j]] <- gsub("@"," ",docs[[j]])
}
docs <- tm_map(docs,tolower)
docs <- tm_map(docs, PlainTextDocument)
docs <- tm_map(docs,removeWords, stopwords("english"))
docs <- tm_map(docs,removeNumbers)
docs <- tm_map(docs,removePunctuation)
docs <- tm_map(docs,stripWhitespace)
dtm <- DocumentTermMatrix(docs)
library(wordcloud)
m <- as.matrix(dtm)
v <- sort(colSums(m),decreasing=TRUE)
head(v,14)
words <- names(v)
d <- data.frame(word=words, freq=v)
wordcloud(d$word,d$freq,min.freq=50)
return
}