-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextract_tweets.jl
154 lines (123 loc) · 4.6 KB
/
extract_tweets.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
module TweetExtractor
using HTTP
using JSON
using DataFrames
using CSV
"""
get_keys(filename=".final_keys)
Return dictionary of Twitter API keys.
"""
function get_keys(filename::String = ".final_keys")
keys = Dict()
isfile(filename) || exit("File not found: " + filename)
open(filename) do f
for line in eachline(f)
line = strip(line)
occursin("=", line) || continue
key, value = split(line, "=")
keys[key] = strip(value)
end
end
return keys
end
"""
make_get_req(api_keys::Dict, url:String, params::Dict)
Return response of GET request given api keys, url, and params for the query.
"""
function make_get_req(api_keys::Dict, url::String, params::Dict)
response = HTTP.request("GET", url, [
"Authorization"=>"Bearer "* api_keys["token"],
"User-Agent"=>"Twitter-API-sample-code"
], query=params)
response.status == 200 || exit("Error: " + response.status)
return response
end
"""
extract_tweets(write_result_csv::String, next_token=nothing)
Write tweets to write_result_csv; return result count & next token.
# Arguments
- `write_result_csv::String`: filepath to write the CSV to.
- `next_token=nothing`: next token String to use in query, if using
"""
function extract_tweets(write_result_csv::String, next_token=nothing)
api_keys = get_keys()
# these lines create query parameters in the form of a dictionary and a
# url link to the twitter API
# https://github.com/twitterdev/Twitter-API-v2-sample-code/blob/main/Recent-Tweet-Counts/recent_tweet_counts.py
query_academic_no_next = Dict(
"query"=>"((Ivermectin OR Remdesivir OR Hydroxychloroquine OR ivermectin OR remdesivir OR hydroxychloroquine OR #Ivermectin OR #Remdesivir OR #Hydroxychloroquine OR #ivermectin OR #remdesivir OR #hydroxychloroquine) -is:retweet lang:en)",
"tweet.fields"=>"text",
"max_results" => "500",
"start_time" => "2022-4-1T13:00:00.00Z",
"end_time" => "2022-4-30T13:00:00.00Z")
query_academic_next_token = Dict(
"query"=>"((Ivermectin OR Remdesivir OR Hydroxychloroquine OR ivermectin OR remdesivir OR hydroxychloroquine OR #Ivermectin OR #Remdesivir OR #Hydroxychloroquine OR #ivermectin OR #remdesivir OR #hydroxychloroquine) -is:retweet lang:en)",
"tweet.fields"=>"text",
"max_results" => "500",
"start_time" => "2022-4-1T13:00:00.00Z",
"end_time" => "2022-4-30T13:00:00.00Z",
"next_token" => next_token
)
search_url_academic = "https://api.twitter.com/2/tweets/search/all"
url = search_url_academic
params = nothing
if isnothing(next_token)
params = query_academic_no_next
else
url = search_url_academic
params = query_academic_next_token
end
r1 = make_get_req(api_keys, url, params)
r1_obj = String(r1.body)
r1_Dict = JSON.parse(r1_obj)
data_dict = r1_Dict["data"]
new_next_token = r1_Dict["meta"]["next_token"]
result_count = r1_Dict["meta"]["result_count"]
write_unlabeled_tweets(data_dict, write_result_csv)
return result_count, new_next_token
end
"""
replace_delimiters(tweet_dict::Dict)
Replace "|" with "/" and remove newlines in a tweet_dict of ids to tweets.
"""
function replace_delimiters(tweet_dict::Dict)
for (id, text) in tweet_dict
processed_text = replace(text, "|" => "/")
removed_new_lines = replace(processed_text, "\n" => " ")
tweet_dict[id] = removed_new_lines
end
return tweet_dict
end
"""
json_to_dict(data_dict::Dict)
Return Json data dictionary as dictionary of id --> tweet text.
"""
function json_to_dict(data_dict::Dict)
id_to_text_dict = Dict{String, String}()
for tweet_dict in data_dict
id_to_text_dict[tweet_dict["id"]] = tweet_dict["text"]
end
return id_to_text_dict
end
"""
write_csv(output_path::String, tweet_dict::Dict)
Writes CSV to output_path given a tweet_dict of ids to tweets.
"""
function write_csv(output_path::String, tweet_dict::String)
writer = open(output_path, "a")
for (id, tweet) in tweet_dict
line = "$id|$tweet"
println(writer, line)
end
close(writer)
end
"""
write_unlabeled_tweets(data_dict::Dict, output_csv_path::String)
Write id|tweet_text to CSV at output_csv_path given a data_dict from json
"""
function write_unlabeled_tweets(data_dict::Dict, output_csv_path::String)
tweet_dict = json_to_dict(data_dict)
tweet_dict = replace_delimiters(tweet_dict)
write_csv(output_csv_path, tweet_dict)
end
end