Upgrades

otaviodantas · Oct 16, 2020 · 7f2e521 · 7f2e521
1 parent daaf600
commit 7f2e521
Show file tree

Hide file tree

Showing 7 changed files with 45 additions and 38 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1 +1,2 @@
 ./teste.py
+.pyc*
diff --git a/aux_mod.py b/aux_mod.py
@@ -7,7 +7,7 @@
 nltk.download("stopwords")
 
 
-def TokenData(data) -> List[str]:
+def TokenData(data: str) -> List[str]:
     token_space = tokenize.WhitespaceTokenizer()
     all_words_token = token_space.tokenize(data)
     return all_words_token

diff --git a/call_stream.py b/call_stream.py
@@ -2,7 +2,7 @@
 from stream import MyStreamListener
 
 
-def calling_stream(wordkey, api):
+def calling_stream(wordkey: str, api: tweepy):
     twitter_listener = MyStreamListener(api, wordkey)
     stream = tweepy.Stream(api.auth, twitter_listener)
     stream.filter(track=[wordkey], languages=["pt"])
diff --git a/config.env b/config.env
@@ -2,6 +2,7 @@
 # WORDKEY is a variables that will be searched
 
 WORDKEY=
+
 CONSUMER_KEY=
 CONSUMER_SECRET_KEY=
 ACESS_TOKEN=

diff --git a/main.py b/main.py
@@ -1,10 +1,15 @@
 import os
 from dotenv import load_dotenv
 
-from auth import create_api
+import sys
+
+sys.path.insert(1, "./all_module/call_stream")
+sys.path.insert(1, "./all_module/auth")
+
 from call_stream import calling_stream
+from auth import create_api
 
-load_dotenv('config.env')
+load_dotenv("wordkey.env")
 
 if __name__ == "__main__":
     wordkey = os.getenv("WORDKEY")

diff --git a/stream.py b/stream.py
@@ -16,7 +16,6 @@ def on_status(self, tweet):
             return
         else:
             messy_data = self.trash.clean(tweet.text)
-            print(messy_data)
             TransformToCSV(messy_data)
 
     def on_error(self, status):

diff --git a/twitter_data_cleaner.py b/twitter_data_cleaner.py
@@ -7,19 +7,17 @@
 from aux_mod import TokenData
 
 
-class TwitterDataCleaner():
-
+class TwitterDataCleaner:
     def __init__(self, list_stopword: List[str]):
         self.stopword = list_stopword
 
-    def clean(self, data):
+    def clean(self, data: str):
         cleaned_data = self.__remove_stopwords(data)
         cleaned_data = self.__remove_user(cleaned_data)
         cleaned_data = self.__remove_URL(cleaned_data)
         cleaned_data = self.__remove_emoji(cleaned_data)
         cleaned_data = self.__remove_hashtag(cleaned_data)
         cleaned_data = self.__remove_punct(cleaned_data)
-        cleaned_data = self.__remove_hashtag(cleaned_data)
         return cleaned_data
 
     def __remove_stopwords(self, data: str) -> str:
@@ -31,50 +29,53 @@ def __remove_stopwords(self, data: str) -> str:
             if not_accent not in self.stopword:
                 list_clean_data.append(word)
 
-        phrase_complete = ' '.join(list_clean_data)
+        phrase_complete = " ".join(list_clean_data)
         return phrase_complete
 
     def __remove_user(self, data: str) -> str:
-        pattern = re.compile(r'@\w+: |@\w+|@\w+ ')
-        data_without_user = pattern.sub(r'', str(data))
+        pattern = re.compile(r"@\w+: |@\w+|@\w+ ")
+        data_without_user = pattern.sub(r"", str(data))
         return data_without_user
 
     def __remove_URL(self, data: str) -> str:
-        url = re.compile(r'https?://\S+|www\.\S+|https+|tco?')
-        data_without_url = url.sub(r'', data)
+        url = re.compile(r"https?://\S+|www\.\S+|https+|tco?")
+        data_without_url = url.sub(r"", data)
         return data_without_url
 
     def __remove_emoji(self, data: str) -> str:
-        emoji_pattern = re.compile("["
-                                   u"\U0001F600-\U0001F64F"  # emoticons
-                                   u"\U0001F300-\U0001F5FF"  # symbols & pictographs
-                                   u"\U0001F680-\U0001F6FF"  # transport & map symbols
-                                   u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
-                                   u"\U00002500-\U00002BEF"  # chinese char
-                                   u"\U00002702-\U000027B0"
-                                   u"\U00002702-\U000027B0"
-                                   u"\U000024C2-\U0001F251"
-                                   u"\U0001f926-\U0001f937"
-                                   u"\U00010000-\U0010ffff"
-                                   u"\u2640-\u2642"
-                                   u"\u2600-\u2B55"
-                                   u"\u200d"
-                                   u"\u23cf"
-                                   u"\u23e9"
-                                   u"\u231a"
-                                   u"\ufe0f"  # dingbats
-                                   u"\u3030"
-                                   "]+", flags=re.UNICODE)
+        emoji_pattern = re.compile(
+            "["
+            u"\U0001F600-\U0001F64F"  # emoticons
+            u"\U0001F300-\U0001F5FF"  # symbols & pictographs
+            u"\U0001F680-\U0001F6FF"  # transport & map symbols
+            u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
+            u"\U00002500-\U00002BEF"  # chinese char
+            u"\U00002702-\U000027B0"
+            u"\U00002702-\U000027B0"
+            u"\U000024C2-\U0001F251"
+            u"\U0001f926-\U0001f937"
+            u"\U00010000-\U0010ffff"
+            u"\u2640-\u2642"
+            u"\u2600-\u2B55"
+            u"\u200d"
+            u"\u23cf"
+            u"\u23e9"
+            u"\u231a"
+            u"\ufe0f"  # dingbats
+            u"\u3030"
+            "]+",
+            flags=re.UNICODE,
+        )
 
-        data_without_emoji = emoji_pattern.sub(r'', str(data))
+        data_without_emoji = emoji_pattern.sub(r"", str(data))
         return data_without_emoji
 
     def __remove_punct(self, data: str) -> str:
-        table = str.maketrans('', '', string.punctuation)
+        table = str.maketrans("", "", string.punctuation)
         data_without_punct = data.translate(table)
         return data_without_punct
 
     def __remove_hashtag(self, data: str) -> str:
-        pattern = re.compile(r'#\w+')
-        data_without_hashtag = pattern.sub(r'', str(data))
+        pattern = re.compile(r"#\w+")
+        data_without_hashtag = pattern.sub(r"", str(data))
         return data_without_hashtag