diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2b006b34..efd20006 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -49,7 +49,7 @@ jobs: - uses: actions/checkout@v3 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 + uses: docker/setup-buildx-action@v3 - name: Build docker run: docker build . --tag ${{ env.IMAGE_NAME }} @@ -61,7 +61,7 @@ jobs: - uses: actions/checkout@v3 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 + uses: docker/setup-buildx-action@v3 - name: Build docker run: docker build . --tag ${{ env.IMAGE_NAME }} - name: Run tests in docker diff --git a/README.md b/README.md index c5431af8..53424170 100644 --- a/README.md +++ b/README.md @@ -18,9 +18,9 @@ [![master](https://github.com/TheShellLand/automonisaur/actions/workflows/python37.yml/badge.svg)](https://github.com/TheShellLand/automonisaur/actions/workflows/python37.yml) [![master](https://github.com/TheShellLand/automonisaur/actions/workflows/python36.yml/badge.svg)](https://github.com/TheShellLand/automonisaur/actions/workflows/python36.yml) -[![Downloads](https://pepy.tech/badge/automonisaur)](https://pepy.tech/project/automonisaur) -[![Downloads](https://pepy.tech/badge/automonisaur/month)](https://pepy.tech/project/automonisaur) -[![Downloads](https://pepy.tech/badge/automonisaur/week)](https://pepy.tech/project/automonisaur) +[![Downloads](https://static.pepy.tech/badge/automonisaur)](https://pepy.tech/project/automonisaur) +[![Downloads](https://static.pepy.tech/badge/automonisaur/month)](https://pepy.tech/project/automonisaur) +[![Downloads](https://static.pepy.tech/badge/automonisaur/week)](https://pepy.tech/project/automonisaur) [//]: # ([![codecov](https://codecov.io/gh/TheShellLand/automonisaur/branch/master/graph/badge.svg)](https://codecov.io/gh/TheShellLand/automonisaur)) @@ -35,15 +35,20 @@ Github issues and feature requests welcomed. ### Integrations - airport +- beautifulsoup - elasticsearch +- facebook groups - flask +- google auth api - google people api +- google sheets api - instagram - logging - minio - neo4j - nmap - requests +- scrapy - selenium - sentryio - slack diff --git a/automon/helpers/sleeper.py b/automon/helpers/sleeper.py index 15e631f5..82f6479d 100644 --- a/automon/helpers/sleeper.py +++ b/automon/helpers/sleeper.py @@ -3,7 +3,7 @@ from automon.log import Logging -log = Logging('Sleeper', level=Logging.INFO) +log = Logging(name='Sleeper', level=Logging.INFO) class Sleeper: @@ -14,10 +14,10 @@ def seconds(caller: object or str, seconds: int) -> time.sleep: sleep = seconds if sleep < 2: - log.info(f'[{Sleeper.seconds.__name__}] ' + log.debug(f'[{Sleeper.seconds.__name__}] ' f'[{caller}] sleeping for {sleep} second') else: - log.info(f'[{Sleeper.seconds.__name__}] ' + log.debug(f'[{Sleeper.seconds.__name__}] ' f'[{caller}] sleeping for {sleep} seconds') return time.sleep(sleep) @@ -25,7 +25,7 @@ def seconds(caller: object or str, seconds: int) -> time.sleep: def minute(caller: object or str, sleep: int = 60) -> time.sleep: """Sleep for a minute""" - log.info(f'[{Sleeper.minute.__name__}] ' + log.debug(f'[{Sleeper.minute.__name__}] ' f'[{caller}] sleeping for {sleep} seconds') return time.sleep(sleep) @@ -35,7 +35,7 @@ def within_a_minute(caller, sleep: int = None): sleep = sleep if isinstance(sleep, int) else \ random.choice(range(1, 1 * 60)) - log.info(f'[{Sleeper.within_a_minute.__name__}] ' + log.debug(f'[{Sleeper.within_a_minute.__name__}] ' f'[{caller}] sleeping for {sleep} seconds') return time.sleep(sleep) @@ -44,7 +44,7 @@ def minutes(caller, minutes: int): """Sleep for this many minutes""" sleep = minutes * 60 - log.info(f'[{Sleeper.minutes.__name__}] ' + log.debug(f'[{Sleeper.minutes.__name__}] ' f'[{caller}] sleeping for {sleep} minutes') return time.sleep(sleep) @@ -54,7 +54,7 @@ def hour(caller, hour: int = 1): sleep = hour if not hour else random.choice( range(1, hour * 60 * 60)) - log.info(f'[{Sleeper.hour.__name__}] ' + log.debug(f'[{Sleeper.hour.__name__}] ' f'[{caller}] sleeping for {sleep} seconds') return time.sleep(sleep) @@ -63,7 +63,7 @@ def hours(caller, hours): """Sleep for this many hours""" sleep = hours * 60 * 60 - log.info(f'[{Sleeper.hours.__name__}] ' + log.debug(f'[{Sleeper.hours.__name__}] ' f'[{caller}] sleeping for {hours} hours') return time.sleep(sleep) @@ -73,7 +73,7 @@ def day(caller, hours: int = 24): sleep = hours if not hours else random.choice( range(1, hours * 60 * 60)) - log.info(f'[{Sleeper.day.__name__}] ' + log.debug(f'[{Sleeper.day.__name__}] ' f'[{caller}] sleeping for {sleep} seconds') return time.sleep(sleep) @@ -82,7 +82,7 @@ def daily(caller, hours: int = 24): """Sleep for one day""" sleep = hours if not hours else hours * 60 * 60 - log.info(f'[{Sleeper.daily.__name__}] ' + log.debug(f'[{Sleeper.daily.__name__}] ' f'[{caller}] sleeping for {sleep} seconds') return time.sleep(sleep) @@ -92,6 +92,6 @@ def time_range(caller, seconds: int): """ sleep = seconds if not seconds else random.choice( range(1, seconds)) - log.info(f'[{Sleeper.time_range.__name__}] ' + log.debug(f'[{Sleeper.time_range.__name__}] ' f'[{caller}] sleeping for {sleep} seconds') return time.sleep(sleep) diff --git a/automon/integrations/beautifulsoupWrapper/__init__.py b/automon/integrations/beautifulsoupWrapper/__init__.py new file mode 100644 index 00000000..37058512 --- /dev/null +++ b/automon/integrations/beautifulsoupWrapper/__init__.py @@ -0,0 +1 @@ +from .client import BeautifulSoupClient diff --git a/automon/integrations/beautifulsoupWrapper/client.py b/automon/integrations/beautifulsoupWrapper/client.py new file mode 100644 index 00000000..a520bd54 --- /dev/null +++ b/automon/integrations/beautifulsoupWrapper/client.py @@ -0,0 +1,24 @@ +from bs4 import BeautifulSoup + +from automon.log import Logging + +log = Logging(name='BeautifulSoupClient', level=Logging.DEBUG) + + +class BeautifulSoupClient(object): + + def __init__(self, bs: BeautifulSoup = None): + self.bs = bs + + def read_markup(self, markup: str, features: str = 'lxml'): + """read markup with beautifulsoup""" + try: + self.bs = BeautifulSoup( + markup=markup or self.markup, + features=features + ) + log.info(f'read_markup success ({len(markup)} B)') + except Exception as e: + log.error(f'read_markup failed ({len(markup)} B): {e}') + + return self diff --git a/automon/integrations/facebook/__init__.py b/automon/integrations/facebook/__init__.py new file mode 100644 index 00000000..718b9b6b --- /dev/null +++ b/automon/integrations/facebook/__init__.py @@ -0,0 +1 @@ +from .groups import FacebookGroups diff --git a/automon/integrations/facebook/groups.py b/automon/integrations/facebook/groups.py new file mode 100644 index 00000000..c00cd453 --- /dev/null +++ b/automon/integrations/facebook/groups.py @@ -0,0 +1,345 @@ +import datetime + +from automon.log import Logging +from automon.integrations.seleniumWrapper import SeleniumBrowser + +log = Logging(name='FacebookGroups', level=Logging.DEBUG) + + +class FacebookGroups(object): + xpath_about = [ + '/html/body/div[1]/div/div[1]/div/div[3]/div/div/div/div[1]/div[1]/div[3]/div/div/div/div/div/div/div[1]/div/div/div/div/div[2]/a[1]/div[1]/span', + '/html/body/div[1]/div/div[1]/div/div[3]/div/div/div/div[1]/div[1]/div[3]/div/div/div/div/div/div/div[1]/div/div/div/div/div[1]/div[1]/span', + ] + xpath_popup_close = [ + '/html/body/div[1]/div/div[1]/div/div[5]/div/div/div[1]/div/div[2]/div/div/div/div[1]/div/i', + ] + xpath_content_unavailble = [ + '/html/body/div[1]/div/div[1]/div/div[3]/div/div/div/div[1]/div[1]/div/div/div[1]/div[2]/div[1]/span', + ] + xpath_creation_date = [ + '/html/body/div[1]/div/div[1]/div/div[3]/div/div/div/div[1]/div[1]/div/div[2]/div/div/div[4]/div/div/div/div/div/div[3]/div/div/div/div/div/div[2]/div/div[3]/div/div/div[2]/div/div/span', + '/html/body/div[1]/div/div[1]/div/div[3]/div/div/div/div[1]/div[1]/div[4]/div/div/div/div/div/div[3]/div/div/div/div/div/div[2]/div/div[3]/div/div/div[2]/div/div/span', + ] + xpath_history = [ + '/html/body/div[1]/div/div[1]/div/div[3]/div/div/div/div[1]/div[1]/div[4]/div/div/div/div/div/div[1]/div/div/div/div/div/div[2]/div[4]/div/div/div[2]/div/div[2]/span/span', + ] + xpath_title = [ + '/html/body/div[1]/div/div[1]/div/div[3]/div/div/div/div[1]/div[1]/div[1]/div[2]/div/div/div/div/div[1]/div/div/div/div/div/div[1]/h1/span/a', + ] + xpath_members = [ + '/html/body/div[1]/div/div[1]/div/div[3]/div/div/div/div[1]/div[1]/div/div[2]/div/div/div[4]/div/div/div/div/div/div[3]/div/div/div/div/div/div[2]/div/div[2]/div/div/div[2]/div/div[1]/span', + '/html/body/div[1]/div/div[1]/div/div[3]/div/div/div/div[1]/div[1]/div[4]/div/div/div/div/div/div[3]/div/div/div/div/div/div[2]/div/div[2]/div/div/div[2]/div/div[1]/span', + ] + xpath_posts_today = [ + '/html/body/div[1]/div/div[1]/div/div[3]/div/div/div/div[1]/div[1]/div[4]/div/div/div/div/div/div[3]/div/div/div/div/div/div[2]/div/div[1]/div/div/div[2]/div/div[1]/span', + ] + xpath_posts_monthly = [ + '/html/body/div[1]/div/div[1]/div/div[3]/div/div/div/div[1]/div[1]/div[4]/div/div/div/div/div/div[3]/div/div/div/div/div/div[2]/div/div[1]/div/div/div[2]/div/div[2]/span', + ] + xpath_privacy = [ + '/html/body/div[1]/div/div[1]/div/div[3]/div/div/div/div[1]/div[1]/div[4]/div/div/div/div/div/div[1]/div/div/div/div/div/div[2]/div[2]/div/div/div[2]/div/div[1]/span/span', + ] + xpath_privacy_details = [ + '/html/body/div[1]/div/div[1]/div/div[3]/div/div/div/div[1]/div[1]/div[4]/div/div/div/div/div/div[1]/div/div/div/div/div/div[2]/div[2]/div/div/div[2]/div/div[2]/span/span', + ] + xpath_visible = [ + '/html/body/div[1]/div/div[1]/div/div[3]/div/div/div/div[1]/div[1]/div[4]/div/div/div/div/div/div[1]/div/div/div/div/div/div[2]/div[3]/div/div/div[2]/div/div[2]/span/span', + ] + + def __init__(self, url: str = None): + """Facebook Groups object + + Depends on Selenium""" + self._content_unavailable = None + self._creation_date = None + self._creation_date_timestamp = None + self._history = None + self._members = None + self._members_count = None + self._posts_monthly = None + self._posts_monthly_count = None + self._posts_today = None + self._posts_today_count = None + self._privacy = None + self._privacy_details = None + self._title = None + self._url = url + self._visible = None + + self._browser = None + + def __repr__(self): + return f'{self.__dict__}' + + @property + def content_unavailable(self): + """This content isn't available right now""" + if not self._browser: + self.start() + + if not self._content_unavailable: + try: + xpath_content_unavailble = self._browser.wait_for_xpath(self.xpath_content_unavailble) + self._content_unavailable = self._browser.find_xpath(xpath_content_unavailble).text + except Exception as e: + log.error(f"can't get content message {self.url}: {e}", enable_traceback=False) + + return self._content_unavailable + + @property + def creation_date(self): + if not self._browser: + self.start() + + if not self._creation_date: + try: + xpath_creation_date = self._browser.wait_for_xpath(self.xpath_creation_date) + self._creation_date = self._browser.find_xpath(xpath_creation_date).text + except Exception as e: + log.error(f"can't get creation date {self.url}: {e}", enable_traceback=False) + + return self._creation_date + + @property + def creation_date_timestamp(self): + if self._creation_date: + # TODO: convert date to datetime timestamp + return self._creation_date_timestamp + + @property + def history(self): + if not self._browser: + self.start() + + if not self._history: + try: + xpath_history = self._browser.wait_for_xpath(self.xpath_history) + self._history = self._browser.find_xpath(xpath_history).text + except Exception as e: + log.error(f"can't get history {self.url}: {e}", enable_traceback=False) + + return self._history + + @property + def members(self): + if not self._browser: + self.start() + + if not self._members: + try: + xpath_members = self._browser.wait_for_xpath(self.xpath_members) + self._members = self._browser.find_xpath(xpath_members).text + # TODO: need to clean up string from members and remove bad chars + except Exception as e: + log.error(f"can't get member count {self.url}: {e}", enable_traceback=False) + + return self._members + + @property + def members_count(self): + if not self._browser: + self.start() + + if self._members: + count = [x for x in self._members] + count = [x for x in count if x in [str(x) for x in range(0, 10)]] + if count: + self._members_count = int(''.join(count)) if count else 0 + + return self._members_count + + @property + def posts_monthly(self): + if not self._browser: + self.start() + + if not self._posts_monthly: + try: + xpath_monthly_posts = self._browser.wait_for_xpath(self.xpath_posts_monthly) + self._posts_monthly = self._browser.find_xpath(xpath_monthly_posts).text + except Exception as e: + print(f"can't get monthly posts {self.url}: {e}") + + return self._posts_monthly + + @property + def posts_monthly_count(self): + if not self._browser: + self.start() + + if self._posts_monthly: + count = [x for x in self._posts_monthly] + count = [x for x in count if x in [str(x) for x in range(0, 10)]] + if count: + self._posts_monthly_count = int(''.join(count)) if count else 0 + + return self._posts_monthly_count + + @property + def posts_today(self): + if not self._browser: + self.start() + + if not self._posts_today: + try: + xpath_posts_today = self._browser.wait_for_xpath(self.xpath_posts_today) + self._posts_today = self._browser.find_xpath(xpath_posts_today).text + except Exception as e: + log.error(f"can't get today's posts {self.url}: {e}", enable_traceback=False) + + return self._posts_today + + @property + def posts_today_count(self): + if not self._browser: + self.start() + + if self.posts_today: + count = [x for x in self.posts_today] + count = [x for x in count if x in [str(x) for x in range(0, 10)]] + if count: + self._posts_today_count = int(''.join(count)) if count else 0 + + return self._posts_today_count + + @property + def privacy(self): + if not self._browser: + self.start() + + if not self._privacy: + try: + xpath_privacy = self._browser.wait_for_xpath(self.xpath_privacy) + self._privacy = self._browser.find_xpath(xpath_privacy).text + except Exception as e: + log.error(f"can't get privacy {self.url}: {e}", enable_traceback=False) + + return self._privacy + + @property + def privacy_details(self): + if not self._browser: + self.start() + + if not self._privacy_details: + try: + xpath_privacy_details = self._browser.wait_for_xpath(self.xpath_privacy_details) + self._privacy_details = self._browser.find_xpath(xpath_privacy_details).text + except Exception as e: + log.error(f"can't get privacy details {self.url}: {e}", enable_traceback=False) + + return self._privacy_details + + @property + def title(self) -> str: + if not self._browser: + self.start() + + if not self._title: + try: + xpath_title = self._browser.wait_for_xpath(self.xpath_title) + self._title = self._browser.find_xpath(xpath_title).text + except Exception as e: + log.error(f"can't get title {self.url}: {e}", enable_traceback=False) + + return self._title + + @property + def url(self) -> str: + return self._url + + @property + def visible(self) -> str: + if not self._browser: + self.start() + + if not self._visible: + try: + xpath_visible = self._browser.wait_for_xpath(self.xpath_visible) + self._visible = self._browser.find_xpath(xpath_visible).text + except Exception as e: + log.error(f"can't get visible {self.url}: {e}", enable_traceback=False) + + return self._visible + + def get(self, url: str = None) -> bool: + """get url""" + if not self._browser: + self.start() + + if not url and not self.url: + raise Exception(f"missing url") + + return self._browser.get(url=url or self.url) + + def get_about(self): + url = f'{self.url}/about' + return self.get(url=url) + + def run(self): + """run selenium browser""" + if self._browser: + return self._browser.run() + + def restart(self): + """quit and start new instance of selenium""" + if self._browser: + self.quit() + return self.start() + + def start(self, headless: bool = True): + """start new instance of selenium""" + self._browser = SeleniumBrowser() + + if headless: + self._browser.config.set_webdriver.Chrome().in_headless().set_locale_experimental() + else: + self._browser.config.set_webdriver.Chrome().set_locale_experimental() + + return self._browser.run() + + def stop(self): + """alias to quit""" + return self.quit() + + def to_dict(self): + self.content_unavailable + self.creation_date + self.creation_date_timestamp + self.history + self.members + self.members_count + self.posts_monthly + self.posts_monthly_count + self.posts_today + self.posts_today_count + self.privacy + self.privacy_details + self.title + self.url + self.visible + + return dict( + content_unavailable=self._content_unavailable, + creation_date=self._creation_date, + creation_date_timestamp=self._creation_date_timestamp, + history=self._history, + members=self._members, + members_count=self._members_count, + posts_monthly=self._posts_monthly, + posts_monthly_count=self._posts_monthly_count, + posts_today=self._posts_today, + posts_today_count=self._posts_today_count, + privacy=self._privacy, + privacy_details=self._privacy_details, + title=self._title, + url=self._url, + visible=self._visible, + status=self._browser.status, + ) + + def quit(self): + """quit selenium""" + if self._browser: + return self._browser.quit() diff --git a/automon/integrations/google/__init__.py b/automon/integrations/google/__init__.py index d14fd0e2..63c280a3 100644 --- a/automon/integrations/google/__init__.py +++ b/automon/integrations/google/__init__.py @@ -1,2 +1,3 @@ -from .gmail import GmailClientV1 -from .people import PeopleClient +from .auth import GoogleAuthClient +from .gmail import GoogleGmailClient +from .people import GooglePeopleClient diff --git a/automon/integrations/google/auth/__init__.py b/automon/integrations/google/auth/__init__.py new file mode 100644 index 00000000..a96ebe64 --- /dev/null +++ b/automon/integrations/google/auth/__init__.py @@ -0,0 +1,2 @@ +from .client import GoogleAuthClient +from .config import GoogleAuthConfig diff --git a/automon/integrations/google/auth/client.py b/automon/integrations/google/auth/client.py new file mode 100644 index 00000000..a479679b --- /dev/null +++ b/automon/integrations/google/auth/client.py @@ -0,0 +1,131 @@ +import functools +import googleapiclient.http +import googleapiclient.discovery +import google.auth.transport.requests + +from automon.log import Logging + +from .config import GoogleAuthConfig + +log = Logging(name='GoogleAuthClient', level=Logging.DEBUG) + + +class GoogleAuthClient(object): + """Google Auth client""" + + def __init__( + self, + config: GoogleAuthConfig = None, + serviceName: str = None, + scopes: list = None, + version: str = None, + **kwargs, + ): + + self.config = config or GoogleAuthConfig( + serviceName=serviceName, + scopes=scopes, + version=version, + **kwargs + ) + + def __repr__(self): + return f'{self.__dict__}' + + @classmethod + def execute(cls, func): + return func.execute() + + def _is_connected(func): + @functools.wraps(func) + def wrapped(self, *args, **kwargs): + if self.authenticate(): + return func(self, *args, **kwargs) + + return wrapped + + def authenticate(self) -> bool: + """authenticate with credentials""" + + try: + return self.authenticate_oauth() + except: + pass + + try: + return self.authenticate_service_account() + except: + pass + + return False + + def authenticate_oauth(self) -> bool: + """authenticate web token""" + + creds = self.config.Credentials + refresh_token = creds.refresh_token + + if refresh_token: + try: + creds.refresh(google.auth.transport.requests.Request()) + log.info(f'token refresh success') + return True + except Exception as e: + log.error(msg=f'token refresh failed: {e}', enable_traceback=False) + + else: + # TODO: add google flow() authentication here + log.info(f'flow login success') + return True + + return False + + def authenticate_service_account(self) -> bool: + """authenticate service account""" + if self.config.Credentials: + return True + return False + + def is_connected(self) -> bool: + """Check if authenticated to make requests""" + return self.authenticate() + + def service( + self, + serviceName: str = None, + version: str = None, + http=None, + discoveryServiceUrl=None, + developerKey=None, + model=None, + requestBuilder=None, + credentials=None, + cache_discovery=True, + cache=None, + client_options=None, + adc_cert_path=None, + adc_key_path=None, + num_retries=1, + static_discovery=None, + always_use_jwt_access=False, + **kwargs + ) -> googleapiclient.discovery.build: + return googleapiclient.discovery.build( + serviceName=serviceName or self.config.serviceName, + version=version or self.config.version, + http=http, + discoveryServiceUrl=discoveryServiceUrl, + developerKey=developerKey, + model=model, + requestBuilder=requestBuilder or googleapiclient.http.HttpRequest, + credentials=credentials or self.config.Credentials, + cache_discovery=cache_discovery, + cache=cache, + client_options=client_options, + adc_cert_path=adc_cert_path, + adc_key_path=adc_key_path, + num_retries=num_retries, + static_discovery=static_discovery, + always_use_jwt_access=always_use_jwt_access, + **kwargs, + ) diff --git a/automon/integrations/google/auth/config.py b/automon/integrations/google/auth/config.py new file mode 100644 index 00000000..c160473f --- /dev/null +++ b/automon/integrations/google/auth/config.py @@ -0,0 +1,124 @@ +import os +import json +import base64 + +import google.auth.crypt +import google.oauth2.credentials +import google.oauth2.service_account + +from google.auth.transport.requests import Request +from google_auth_oauthlib.flow import InstalledAppFlow + +from automon.log import Logging +from automon.helpers import environ + +log = Logging(name='GoogleAuthConfig', level=Logging.DEBUG) + + +class GoogleAuthConfig(object): + """Google Auth config""" + + def __init__( + self, + serviceName: str = None, + scopes: list = None, + version: str = None, + ): + self.serviceName = serviceName or 'servicemanagement' + self.scopes = scopes or ['https://www.googleapis.com/auth/cloud-platform.read-only'] + self.version = version or 'v1' + + def __repr__(self): + return f'{self.__dict__}' + + @property + def Credentials(self): + """return Google Credentials object""" + try: + if self.CredentialsFile(): + return self.CredentialsFile() + except: + pass + + try: + if self.CredentialsInfo(): + return self.CredentialsInfo() + except: + pass + + try: + if self.CredentialsServiceAccountFile(): + return self.CredentialsServiceAccountFile() + except: + pass + + try: + if self.CredentialsServiceAccountInfo(): + return self.CredentialsServiceAccountInfo() + except: + pass + + log.error(f'Missing GOOGLE_CREDENTIALS or GOOGLE_CREDENTIALS_BASE64', enable_traceback=False) + + @property + def _GOOGLE_CREDENTIALS(self): + """env var GOOGLE_CREDENTIALS""" + return environ('GOOGLE_CREDENTIALS') + + @property + def _GOOGLE_CREDENTIALS_BASE64(self): + """env var GOOGLE_CREDENTIALS_BASE64""" + return environ('GOOGLE_CREDENTIALS_BASE64') + + def CredentialsFile(self) -> google.oauth2.credentials.Credentials: + """return Credentials object for web auth from file""" + if self._GOOGLE_CREDENTIALS: + if os.path.exists(self._GOOGLE_CREDENTIALS): + return google.oauth2.credentials.Credentials.from_authorized_user_file( + self._GOOGLE_CREDENTIALS + ) + + def CredentialsInfo(self) -> google.oauth2.credentials.Credentials: + """return Credentials object for web auth from dict""" + if self._GOOGLE_CREDENTIALS_BASE64: + return google.oauth2.credentials.Credentials.from_authorized_user_info( + self.base64_to_dict() + ) + + def CredentialsServiceAccountFile(self) -> google.oauth2.service_account.Credentials: + """return Credentials object for service account from file""" + if self._GOOGLE_CREDENTIALS: + if os.path.exists(self._GOOGLE_CREDENTIALS): + return google.oauth2.service_account.Credentials.from_service_account_file( + self._GOOGLE_CREDENTIALS + ) + + def CredentialsServiceAccountInfo(self) -> google.oauth2.service_account.Credentials: + """return Credentials object for service account from dict""" + if self._GOOGLE_CREDENTIALS_BASE64: + return google.oauth2.service_account.Credentials.from_service_account_info( + self.base64_to_dict() + ) + + def base64_to_dict(self, base64_str: str = None) -> dict: + """convert credential json to dict""" + if not base64_str and not self._GOOGLE_CREDENTIALS_BASE64: + raise Exception(f'Missing GOOGLE_CREDENTIALS_BASE6') + + base64_str = base64_str or self._GOOGLE_CREDENTIALS_BASE64 + return json.loads( + base64.b64decode(base64_str) + ) + + def file_to_base64(self, path: str = None): + """convert file to base64""" + if not path and self._GOOGLE_CREDENTIALS: + path = self._GOOGLE_CREDENTIALS + + with open(path, 'rb') as f: + return base64.b64encode(f.read()).decode() + + def is_ready(self): + """return True if configured""" + if self.Credentials: + return True diff --git a/automon/integrations/google/auth/tests/__init__.py b/automon/integrations/google/auth/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/automon/integrations/google/auth/tests/test_config_Credentials.py b/automon/integrations/google/auth/tests/test_config_Credentials.py new file mode 100644 index 00000000..48c2e3c0 --- /dev/null +++ b/automon/integrations/google/auth/tests/test_config_Credentials.py @@ -0,0 +1,14 @@ +import unittest + +from automon.integrations.google.auth import GoogleAuthConfig + + +class MyTestCase(unittest.TestCase): + def test_something(self): + test = GoogleAuthConfig() + if test.Credentials: + self.assertTrue(test.Credentials) + + +if __name__ == '__main__': + unittest.main() diff --git a/automon/integrations/google/auth/tests/test_google_auth.py b/automon/integrations/google/auth/tests/test_google_auth.py new file mode 100644 index 00000000..57d0ab8b --- /dev/null +++ b/automon/integrations/google/auth/tests/test_google_auth.py @@ -0,0 +1,16 @@ +import unittest + +from automon.integrations.google.auth import GoogleAuthClient + + +class MyTestCase(unittest.TestCase): + def test_authenticate(self): + test = GoogleAuthClient() + # scopes = ['https://www.googleapis.com/auth/contacts.readonly'] + # client = AuthClient(serviceName='people', scopes=scopes) + if test.authenticate(): + self.assertTrue(test.authenticate()) + + +if __name__ == '__main__': + unittest.main() diff --git a/automon/integrations/google/gmail/__init__.py b/automon/integrations/google/gmail/__init__.py index 37d7a424..f11c7114 100644 --- a/automon/integrations/google/gmail/__init__.py +++ b/automon/integrations/google/gmail/__init__.py @@ -1,2 +1,2 @@ -from .v1 import GmailClient as GmailClientV1 -from .v1 import GmailConfig as GmailConfigV1 +from .v1 import GoogleGmailClient +from .v1 import GoogleGmailConfig diff --git a/automon/integrations/google/gmail/v1/__init__.py b/automon/integrations/google/gmail/v1/__init__.py index 063d8da6..c26f7cff 100644 --- a/automon/integrations/google/gmail/v1/__init__.py +++ b/automon/integrations/google/gmail/v1/__init__.py @@ -1,2 +1,2 @@ -from .client import GmailClient -from .config import GmailConfig +from .client import GoogleGmailClient +from .config import GoogleGmailConfig diff --git a/automon/integrations/google/gmail/v1/client.py b/automon/integrations/google/gmail/v1/client.py index b9579176..347eb1ba 100644 --- a/automon/integrations/google/gmail/v1/client.py +++ b/automon/integrations/google/gmail/v1/client.py @@ -1,12 +1,12 @@ from automon.integrations.requestsWrapper import RequestsClient -from .config import GmailConfig +from .config import GoogleGmailConfig -class GmailClient: +class GoogleGmailClient: - def __init__(self, api_key: str = None, user: str = None, password: str = None, config: GmailConfig = None): - self.config = config or GmailConfig(user=user, password=password, api_key=api_key) + def __init__(self, api_key: str = None, user: str = None, password: str = None, config: GoogleGmailConfig = None): + self.config = config or GoogleGmailConfig(user=user, password=password, api_key=api_key) self.endpoint = self.config.endpoint self.userId = self.config.userId diff --git a/automon/integrations/google/gmail/v1/config.py b/automon/integrations/google/gmail/v1/config.py index f5413c22..fecff96d 100644 --- a/automon/integrations/google/gmail/v1/config.py +++ b/automon/integrations/google/gmail/v1/config.py @@ -2,10 +2,10 @@ from automon.log import Logging -log = Logging(name='GmailConfig', level=Logging.DEBUG) +log = Logging(name='GoogleGmailConfig', level=Logging.DEBUG) -class GmailConfig: +class GoogleGmailConfig: def __init__(self, endpoint: str = None, api_key: str = None, user: str = None, diff --git a/automon/integrations/google/people/__init__.py b/automon/integrations/google/people/__init__.py index 458c9651..a802b592 100644 --- a/automon/integrations/google/people/__init__.py +++ b/automon/integrations/google/people/__init__.py @@ -1,2 +1,2 @@ -from .client import PeopleClient -from .config import PeopleConfig +from .client import GooglePeopleClient +from .config import GooglePeopleConfig diff --git a/automon/integrations/google/people/client.py b/automon/integrations/google/people/client.py index 0c781a31..a5e9f6fd 100644 --- a/automon/integrations/google/people/client.py +++ b/automon/integrations/google/people/client.py @@ -9,20 +9,20 @@ from automon.log import Logging -from .urls import PeopleUrls -from .config import PeopleConfig +from .urls import GooglePeopleUrls +from .config import GooglePeopleConfig from .results import ConnectionsResults -log = Logging(name='PeopleClient', level=Logging.DEBUG) +log = Logging(name='GooglePeopleClient', level=Logging.DEBUG) -class PeopleClient: +class GooglePeopleClient: def __init__(self, client_id: str = None, client_secret: str = None, - config: PeopleConfig = None): + config: GooglePeopleConfig = None): """Google People API Client""" - self.config = config or PeopleConfig( + self.config = config or GooglePeopleConfig( client_id=client_id, client_secret=client_secret ) @@ -132,10 +132,10 @@ def list_connections( """ if not resourceName: - resourceName = PeopleUrls().resourceName() + resourceName = GooglePeopleUrls().resourceName() if not personFields: - personFields = PeopleUrls().personFields_toStr() + personFields = GooglePeopleUrls().personFields_toStr() return self._list( resourceName=resourceName, diff --git a/automon/integrations/google/people/config.py b/automon/integrations/google/people/config.py index 0cc13a32..96c5048f 100644 --- a/automon/integrations/google/people/config.py +++ b/automon/integrations/google/people/config.py @@ -13,10 +13,10 @@ from automon.log import Logging from automon.helpers import environ -log = Logging(name='PeopleConfig', level=Logging.DEBUG) +log = Logging(name='GooglePeopleConfig', level=Logging.DEBUG) -class PeopleConfig: +class GooglePeopleConfig: def __init__(self, token=None, diff --git a/automon/integrations/google/people/person.py b/automon/integrations/google/people/person.py index c3a6662b..f5b1f362 100644 --- a/automon/integrations/google/people/person.py +++ b/automon/integrations/google/people/person.py @@ -2,7 +2,7 @@ from automon.log import Logging -log = Logging(level=Logging.DEBUG) +log = Logging(name='GooglePeople', level=Logging.DEBUG) class AgeRange(Enum): diff --git a/automon/integrations/google/people/results.py b/automon/integrations/google/people/results.py index f5e18c38..2f573fe8 100644 --- a/automon/integrations/google/people/results.py +++ b/automon/integrations/google/people/results.py @@ -2,7 +2,7 @@ from .person import Person -log = Logging(name='PeopleResults', level=Logging.DEBUG) +log = Logging(name='GooglePeopleResults', level=Logging.DEBUG) class ConnectionsResults: diff --git a/automon/integrations/google/people/tests/__init__.py b/automon/integrations/google/people/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/automon/integrations/google/people/tests/test_google_contacts.py b/automon/integrations/google/people/tests/test_google_contacts.py new file mode 100644 index 00000000..d3755a71 --- /dev/null +++ b/automon/integrations/google/people/tests/test_google_contacts.py @@ -0,0 +1,15 @@ +import unittest + +from automon.integrations.google import GooglePeopleClient + +c = GooglePeopleClient() + + +class TestClient(unittest.TestCase): + def test_list_connections(self): + if c.isConnected(): + self.assertTrue(list(c.list_connection_generator())) + + +if __name__ == '__main__': + unittest.main() diff --git a/automon/integrations/google/people/tests/test_google_contacts_neo4j.py b/automon/integrations/google/people/tests/test_google_contacts_neo4j.py new file mode 100644 index 00000000..9c5ba56a --- /dev/null +++ b/automon/integrations/google/people/tests/test_google_contacts_neo4j.py @@ -0,0 +1,21 @@ +import unittest + +from automon.integrations.google import GooglePeopleClient +from automon.integrations.neo4jWrapper import Neo4jClient + +c = GooglePeopleClient() +n = Neo4jClient() + + +class TestClient(unittest.TestCase): + + def test_create_nodes(self): + if c.isConnected(): + contacts = c.list_connections().contacts + for contact in contacts: + n.merge_dict(contact) + pass + + +if __name__ == '__main__': + unittest.main() diff --git a/automon/integrations/google/people/urls.py b/automon/integrations/google/people/urls.py index 6c05c5ea..fb80ea40 100644 --- a/automon/integrations/google/people/urls.py +++ b/automon/integrations/google/people/urls.py @@ -1,9 +1,9 @@ from automon.log import Logging -log = Logging(name='PeopleUrls', level=Logging.ERROR) +log = Logging(name='GooglePeopleUrls', level=Logging.ERROR) -class PeopleUrls: +class GooglePeopleUrls: PEOPLE_API = 'https://people.googleapis.com' API_VER = 'v1' BASE_URL = f'{PEOPLE_API}/{API_VER}' diff --git a/automon/integrations/google/sheets/__init__.py b/automon/integrations/google/sheets/__init__.py new file mode 100644 index 00000000..0f5f8d55 --- /dev/null +++ b/automon/integrations/google/sheets/__init__.py @@ -0,0 +1,2 @@ +from .client import GoogleSheetsClient +from .config import GoogleSheetsConfig diff --git a/automon/integrations/google/sheets/client.py b/automon/integrations/google/sheets/client.py new file mode 100644 index 00000000..8269ee4d --- /dev/null +++ b/automon/integrations/google/sheets/client.py @@ -0,0 +1,123 @@ +from automon.log import Logging +from automon.integrations.google.auth import GoogleAuthClient + +from .config import GoogleSheetsConfig + +log = Logging(name='GoogleSheetsClient', level=Logging.DEBUG) + + +class Fields: + hyperlink: str = 'sheets/data/rowData/values/hyperlink' + + +class ValueInputOption: + USER_ENTERED: str = 'USER_ENTERED' + RAW: str = 'RAW' + + +class GoogleSheetsClient(GoogleAuthClient): + """Google Sheets client""" + + spreadsheetId: str + worksheet: str + range: str + config: GoogleSheetsConfig + + def __init__( + self, + spreadsheetId: str = None, + worksheet: str = '', + range: str = 'A:Z', + config: GoogleSheetsConfig = None, + **kwargs + ): + super().__init__() + self.config = config or GoogleSheetsConfig( + spreadsheetId=spreadsheetId, + **kwargs + ) + + self.worksheet = worksheet + self.range = range + + self.response = None + + @property + def values(self): + if self.response: + try: + return self.response['values'] + except Exception as e: + pass + + def spreadsheets(self): + """spreadsheet service""" + return self.service().spreadsheets() + + def get( + self, + spreadsheetId: str = None, + ranges: str = None, + includeGridData: bool = False, + fields: Fields or str = None, + **kwargs, + ): + try: + self.response = self.spreadsheets().get( + spreadsheetId=spreadsheetId or self.config.spreadsheetId, + ranges=ranges or self.range, + includeGridData=includeGridData, + fields=fields, + **kwargs, + ).execute() + except Exception as e: + log.error(f'{e}', enable_traceback=False) + + return self + + def get_values( + self, + spreadsheetId: str = None, + range: str = None, + **kwargs, + ): + try: + self.response = self.spreadsheets().values().get( + spreadsheetId=spreadsheetId or self.config.spreadsheetId, + range=range or f'{self.worksheet}!{self.range}', + **kwargs, + ).execute() + except Exception as e: + log.error(f'{e}', enable_traceback=False) + + return self + + def list(self): + # list(pageSize=1).execute() + return + + def update( + self, + spreadsheetId: str = None, + range: str = None, + valueInputOption: ValueInputOption = ValueInputOption.USER_ENTERED, + values: list = None, + ): + try: + + body = { + 'values': values + } + + result = self.spreadsheets().values().update( + spreadsheetId=spreadsheetId or self.config.spreadsheetId, + range=range or self.range, + valueInputOption=valueInputOption, + body=body + ).execute() + + print(f"{result.get('updatedCells')} cells updated.") + return result + except Exception as error: + print(f"An error occurred: {error}") + return error diff --git a/automon/integrations/google/sheets/config.py b/automon/integrations/google/sheets/config.py new file mode 100644 index 00000000..f4f99606 --- /dev/null +++ b/automon/integrations/google/sheets/config.py @@ -0,0 +1,27 @@ +from automon.log import Logging +from automon.helpers.osWrapper import environ +from automon.integrations.google.auth import GoogleAuthConfig + +log = Logging(name='SheetsConfig', level=Logging.DEBUG) + + +class GoogleSheetsConfig(GoogleAuthConfig): + """Google Sheets config""" + + def __init__( + self, + spreadsheetId: str = None, + ): + super().__init__() + + self.serviceName = 'sheets' + self.scopes = [ + 'https://www.googleapis.com/auth/drive', + 'https://www.googleapis.com/auth/drive.file', + 'https://www.googleapis.com/auth/drive.readonly', + 'https://www.googleapis.com/auth/spreadsheets', + 'https://www.googleapis.com/auth/spreadsheets.readonly', + ] + self.version = 'v4' + + self.spreadsheetId = spreadsheetId or environ('GOOGLE_SHEET_ID') diff --git a/automon/integrations/google/sheets/tests/__init__.py b/automon/integrations/google/sheets/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/automon/integrations/google/sheets/tests/test_google_sheets.py b/automon/integrations/google/sheets/tests/test_google_sheets.py new file mode 100644 index 00000000..ec6b08c1 --- /dev/null +++ b/automon/integrations/google/sheets/tests/test_google_sheets.py @@ -0,0 +1,245 @@ +import datetime +import logging +import automon +import tracemalloc + +import pandas as pd +import numpy as np + +from automon import Logging +from automon.integrations.facebook import FacebookGroups +from automon.integrations.google.sheets import GoogleSheetsClient + +logging.getLogger('google_auth_httplib2').setLevel(logging.ERROR) +logging.getLogger('googleapiclient.discovery').setLevel(logging.ERROR) +logging.getLogger('googleapiclient.discovery_cache').setLevel(logging.ERROR) +logging.getLogger('urllib3.connectionpool').setLevel(logging.ERROR) +logging.getLogger('selenium.webdriver.common.service').setLevel(logging.ERROR) +logging.getLogger('selenium.webdriver.remote.remote_connection').setLevel(logging.ERROR) +logging.getLogger('selenium.webdriver.common.selenium_manager').setLevel(logging.ERROR) + +logging.getLogger('SeleniumBrowser').setLevel(logging.CRITICAL) +logging.getLogger('FacebookGroups').setLevel(logging.CRITICAL) +logging.getLogger('ConfigChrome').setLevel(logging.ERROR) +logging.getLogger('RequestsClient').setLevel(logging.INFO) + +tracemalloc.start() + +log = Logging(level=Logging.INFO) + +sheets_client = GoogleSheetsClient( + worksheet='Automated Count WIP', +) + + +def get_facebook_info(url: str): + if not url: + return {} + + group = FacebookGroups( + url=url_cleaner(url=url) + ) + # group.start(headless=False) + group.start(headless=True) + group.get_about() + + return group.to_dict() + + +def url_cleaner(url: str): + if url[-1] == '/': + url = url[:-1] + return url + + +def merge_urls(): + sheets_client.get( + ranges='AUDIT list Shelley!A:Z', + fields="sheets/data/rowData/values/hyperlink", + ) + + data = sheets_client.response['sheets'][0]['data'][0]['rowData'] + # expand nested data + links = [] + for x in data: + if x: + links.append( + x['values'][0]['hyperlink'] + ) + + df_Shelley = pd.DataFrame(data=links, columns=['url']) + + sheets_client.get() + sheets_client.get_values( + range='Automated Count WIP!A:Z' + ) + + sheet_values = sheets_client.values + sheet_columns = sheet_values[0] + sheet_data = sheet_values[1:] + + df = pd.DataFrame(data=sheet_data, columns=sheet_columns) + df = df.dropna(subset=['url']) + + # merge both lists or urls + df = pd.merge(df, df_Shelley, how='outer', on='url') + df = df.drop_duplicates(subset=['url'], keep='first') + return df + + +def batch_processing(index: int, df: pd.DataFrame): + df_results = df['url'].dropna().apply( + lambda url: get_facebook_info(url=url) + ) + df_results = pd.DataFrame(df_results.tolist()) + + df = df.reset_index() + df = df.drop('index', axis=1) + + todays_date = datetime.datetime.now().date() + monthly = f'{todays_date.year}-{todays_date.month}' + + # create columns + df[f'url'] = df_results['url'] + df[f'{monthly}'] = df_results['members_count'] + df[f'last_updated'] = monthly + df[f'title'] = df_results['title'] + df[f'content_unavailable'] = df_results['content_unavailable'] + df[f'creation_date'] = df_results['creation_date'] + df[f'creation_date_timestamp'] = df_results['creation_date_timestamp'] + df[f'history'] = df_results['history'] + df[f'members_count'] = df_results['members_count'] + df[f'posts_monthly_count'] = df_results['posts_monthly_count'] + df[f'posts_today_count'] = df_results['posts_today_count'] + df[f'privacy'] = df_results['privacy'] + df[f'visible'] = df_results['visible'] + + # set dtype to Int32 + df[f'{monthly}'] = df[f'{monthly}'].astype('Int32') + df[f'creation_date_timestamp'] = df[f'creation_date_timestamp'].astype('Int32') + df[f'members_count'] = df[f'members_count'].astype('Int32') + df[f'posts_monthly_count'] = df[f'posts_monthly_count'].astype('Int32') + df[f'posts_today_count'] = df[f'posts_today_count'].astype('Int32') + + # order columns + columns = [ + 'url', + 'title', + 'creation_date', + 'creation_date_timestamp', + 'history', + 'privacy', + 'visible', + 'content_unavailable', + 'last_updated', + 'posts_monthly_count', + 'posts_today_count', + 'members_count', + ] + + # add all other dates + df_columns = df.columns.tolist() + columns.extend( + [x for x in df_columns if x not in columns] + ) + + # finally add today's date + if f'{monthly}' not in columns: + columns.append( + f'{monthly}', + ) + + df = df.loc[:, columns] + df = df.fillna(np.nan).replace([np.nan], [None]) + + sheet_index = index + 2 + + update_columns = sheets_client.update( + range=f'Automated Count WIP!A1:Z', + values=[columns], + ) + + update = sheets_client.update( + range=f'Automated Count WIP!A{sheet_index}:Z', + values=[x for x in df.values.tolist()] + ) + + log.info( + f'{[x for x in df.values.tolist()]}' + ) + + return df + + +def memory_profiler(): + snapshot = tracemalloc.take_snapshot() + top_stats = snapshot.statistics("lineno") + + df_memory_profile = pd.DataFrame([ + dict(size_B=stat.size, count=stat.count, file=stat.traceback._frames[0][0], + file_line=stat.traceback._frames[0][1]) for stat in top_stats + ]) + df_memory_profile.sort_values(by='size_B', ascending=False) + df_memory_profile['size_KB'] = df_memory_profile['size_B'].apply( + lambda B: round(B / 1024) + ) + df_memory_profile['size_MB'] = df_memory_profile['size_KB'].apply( + lambda KB: round(KB / 1024) + ) + cols = df_memory_profile.columns.tolist() + cols.sort() + df_memory_profile = df_memory_profile.loc[:, cols] + + log.debug( + f"total memory used: {df_memory_profile['size_MB'].sum()} MB; " + f'most memory used: ' + f'{df_memory_profile.iloc[0].to_dict()}' + ) + + return df_memory_profile + + +def main(): + if not sheets_client.authenticate(): + return + + # start processing + sheets_client.get_values( + range='Automated Count WIP!A:Z' + ) + + sheet_values = sheets_client.values + sheet_columns = sheet_values[0] + sheet_data = sheet_values[1:] + + df = pd.DataFrame(data=sheet_data, columns=sheet_columns) + df = df.dropna(subset=['url']) + + todays_date = datetime.datetime.now().date() + last_updated = f'{todays_date.year}-{todays_date.month}' + + BATCH_SIZE = 1 + + i = 0 + while i < len(df): + df_batch = df[i:i + BATCH_SIZE] + + # skip if last_updated is in current month + if not df_batch['last_updated'].iloc[0] == last_updated: + + try: + df_batch = batch_processing(index=i, df=df_batch) + df_memory = memory_profiler() + except Exception as e: + df_memory = memory_profiler() + pass + + i += 1 + + pass + + pass + + +if __name__ == '__main__': + main() diff --git a/automon/integrations/google/sheets/tests/test_google_sheets_AUDIT.py b/automon/integrations/google/sheets/tests/test_google_sheets_AUDIT.py new file mode 100644 index 00000000..4160e3c7 --- /dev/null +++ b/automon/integrations/google/sheets/tests/test_google_sheets_AUDIT.py @@ -0,0 +1,59 @@ +import datetime +import pandas as pd + +import unittest + +from automon.integrations.google.sheets import GoogleSheetsClient +from automon.integrations.facebook import FacebookGroups + + +def get_facebook_info(url: str): + group = FacebookGroups() + # group.start(headless=False) + group.start(headless=True) + group.get(url=url) + if not group.privacy_details: + close = group._browser.wait_for(group.xpath_popup_close) + group._browser.action_click(close) + about = group._browser.wait_for(group.xpath_about) + group._browser.action_click(about) + + return group.to_dict() + + +class MyTestCase(unittest.TestCase): + def test_authenticate(self): + spreadsheetId = '1isrvjU0DaRijEztByQuT9u40TaCOCwdaLAXgGmKHap8' + test = GoogleSheetsClient( + spreadsheetId=spreadsheetId, + worksheet='AUDIT list Shelley', + range='AUDIT list Shelley!A:B' + ) + + if not test.authenticate(): + return + + test.get_values( + range='AUDIT list Shelley!A:Z', + ) + test.get( + ranges='AUDIT list Shelley!A:Z', + fields="sheets/data/rowData/values/hyperlink", + ) + + data = test.response['sheets'][0]['data'][0]['rowData'] + # expand nested data + links = [] + for x in data: + if x: + links.append( + x['values'][0]['hyperlink'] + ) + + df = pd.DataFrame(links) + + pass + + +if __name__ == '__main__': + unittest.main() diff --git a/automon/integrations/google/tests/test_google_contacts.py b/automon/integrations/google/tests/test_google_contacts.py index a5c18459..d3755a71 100644 --- a/automon/integrations/google/tests/test_google_contacts.py +++ b/automon/integrations/google/tests/test_google_contacts.py @@ -1,8 +1,8 @@ import unittest -from automon.integrations.google import PeopleClient +from automon.integrations.google import GooglePeopleClient -c = PeopleClient() +c = GooglePeopleClient() class TestClient(unittest.TestCase): diff --git a/automon/integrations/google/tests/test_google_contacts_neo4j.py b/automon/integrations/google/tests/test_google_contacts_neo4j.py index 15cf1c3d..9c5ba56a 100644 --- a/automon/integrations/google/tests/test_google_contacts_neo4j.py +++ b/automon/integrations/google/tests/test_google_contacts_neo4j.py @@ -1,9 +1,9 @@ import unittest -from automon.integrations.google import PeopleClient +from automon.integrations.google import GooglePeopleClient from automon.integrations.neo4jWrapper import Neo4jClient -c = PeopleClient() +c = GooglePeopleClient() n = Neo4jClient() diff --git a/automon/integrations/instagram/client_browser.py b/automon/integrations/instagram/client_browser.py index 04349f6f..8ce2b74f 100644 --- a/automon/integrations/instagram/client_browser.py +++ b/automon/integrations/instagram/client_browser.py @@ -27,10 +27,13 @@ def __init__(self, """Instagram Browser Client""" self.config = config or InstagramConfig(login=login, password=password) self.browser = SeleniumBrowser() - self.browser.set_browser(self.browser.type.chrome()) + + useragent = self.browser.get_random_user_agent() if headless: - self.browser.set_browser(self.browser.type.chrome_headless()) + self.browser.config.set_webdriver.Chrome().in_headless().set_user_agent(useragent) + else: + self.browser.config.set_webdriver.Chrome().in_headless() def __repr__(self): return f'{self.__dict__}' @@ -42,6 +45,7 @@ def wrapped(self, *args, **kwargs): if self.browser.is_running(): return func(self, *args, **kwargs) return False + return wrapped def _is_authenticated(func): @@ -53,22 +57,22 @@ def wrapped(self, *args, **kwargs): return wrapped - def _get_page(self, account): + def get_page(self, account: str): """ Get page """ - log.debug('[_get_page] getting {}'.format(account)) + log.debug(f'[get_page] getting {account}') - page = 'https://instagram.com/{}'.format(account) + page = f'https://instagram.com/{account}' browser = self.authenticated_browser return browser.get(page) - def _get_stories(self, account): + def get_stories(self, account: str): """ Retrieve story """ - story = 'https://www.instagram.com/stories/{}/'.format(account) + story = f'https://www.instagram.com/stories/{account}/' num_of_stories = 0 - log.debug('[get_stories] {}'.format(story)) + log.debug(f'[get_stories] {story}') browser = self.authenticated_browser browser.get(story) @@ -76,7 +80,7 @@ def _get_stories(self, account): prefix='instagram/' + account) if 'Page Not Found' in browser.browser.title: - log.debug('[get_stories] no stories for {}'.format(account)) + log.debug(f'[get_stories] no stories for {account}') return num_of_stories Sleeper.seconds('instagram', 2) @@ -164,22 +168,26 @@ def authenticate(self): self.browser.get(self.urls.login_page) # user - self.browser.wait_for_xpath(self.xpaths.login_user) - self.browser.action_click(self.xpaths.login_user, 'user') + login_user = self.browser.wait_for_xpath(self.xpaths.login_user) + self.browser.action_click(login_user, 'user') self.browser.action_type(self.login) # password login_pass = self.browser.wait_for_xpath(self.xpaths.login_pass) self.browser.action_click(login_pass, 'login') self.browser.action_type(self.config.password, secret=True) + self.browser.action_type(self.browser.keys.ENTER) # login - login_btn = self.browser.wait_for_xpath(self.xpaths.login_btn) - self.browser.action_click(login_btn, 'login button') + # login_btn = self.browser.wait_for_xpath(self.xpaths.login_btn) + # self.browser.action_click(login_btn, 'login button') # check for "save your login info" dialogue - not_now = self.browser.wait_for_xpath(self.xpaths.save_info_not_now) - self.browser.action_click(not_now, 'dont save login info') + not_now = self.browser.wait_for_xpath(self.xpaths.save_info_not_now_div) + self.browser.action_type(self.browser.keys.TAB) + self.browser.action_type(self.browser.keys.TAB) + self.browser.action_type(self.browser.keys.ENTER) + # self.browser.action_click(not_now, 'dont save login info') # check for "notifications" dialogue notifications_not_now = self.browser.wait_for_xpath(self.xpaths.turn_on_notifications_not_now) @@ -195,6 +203,12 @@ def authenticate(self): return False + @_is_running + @_is_authenticated + def get_followers(self, account: str): + url = self.urls.followers(account) + self.browser.get(url) + @_is_running @_is_authenticated def is_authenticated(self): @@ -215,13 +229,3 @@ def urls(self): @property def xpaths(self): return XPaths() - - -def get_page(authenticated_browser, account): - """ Get page - """ - # TODO: need to download page - log.debug('[get_page] getting {}'.format(account)) - page = 'https://instagram.com/{}'.format(account) - browser = authenticated_browser - return browser.get(page) diff --git a/automon/integrations/instagram/config.py b/automon/integrations/instagram/config.py index 53e154c5..7055bb64 100644 --- a/automon/integrations/instagram/config.py +++ b/automon/integrations/instagram/config.py @@ -20,6 +20,4 @@ def is_configured(self): return False def __repr__(self): - if self.is_configured: - return f'ready' - return f'not ready' + return f'{self.login}' diff --git a/automon/integrations/instagram/tests/test_instagram_browser_auth.py b/automon/integrations/instagram/tests/test_instagram_browser_auth.py new file mode 100644 index 00000000..5b8d8ce7 --- /dev/null +++ b/automon/integrations/instagram/tests/test_instagram_browser_auth.py @@ -0,0 +1,21 @@ +import unittest + +from automon.integrations.instagram.client_browser import InstagramBrowserClient + +c = InstagramBrowserClient(headless=False) + + +class InstagramClientTest(unittest.TestCase): + if c.is_running(): + c.browser.get(c.urls.login_page) + + # user + login_user = c.browser.wait_for_xpath(c.xpaths.login_user) + c.browser.action_click(login_user, 'user') + c.browser.action_type(c.login) + + c.browser.quit() + + +if __name__ == '__main__': + unittest.main() diff --git a/automon/integrations/instagram/urls.py b/automon/integrations/instagram/urls.py index 0e8786d0..94081889 100644 --- a/automon/integrations/instagram/urls.py +++ b/automon/integrations/instagram/urls.py @@ -6,3 +6,11 @@ def __repr__(self): @property def login_page(self): return 'https://www.instagram.com/accounts/login/?source=auth_switcher' + + @staticmethod + def followers(account: str): + return f'https://www.instagram.com/{account}/followers/' + + @staticmethod + def following(account: str): + return f'https://www.instagram.com/{account}/following/' diff --git a/automon/integrations/instagram/xpaths.py b/automon/integrations/instagram/xpaths.py index aee13f0a..fded5aa9 100644 --- a/automon/integrations/instagram/xpaths.py +++ b/automon/integrations/instagram/xpaths.py @@ -5,32 +5,74 @@ def __repr__(self): @property def login_user(self): - return '/html/body/div[2]/div/div/div/div[1]/div/div/div/div[1]/section/main/div/div/div[1]/div[2]/form/div/div[1]/div/label/input' + return [ + '/html/body/div[2]/div/div/div[1]/div/div/div/div[1]/section/main/div/div/div[1]/div[2]/form/div/div[1]/div/label/input', + '/html/body/div[2]/div/div/div/div[1]/div/div/div/div[1]/section/main/div/div/div[1]/div[2]/form/div/div[1]/div/label/input', + ] @property def login_pass(self): - return '/html/body/div[2]/div/div/div/div[1]/div/div/div/div[1]/section/main/div/div/div[1]/div[2]/form/div/div[2]/div/label/input' + return [ + '/html/body/div[2]/div/div/div[1]/div/div/div/div[1]/section/main/div/div/div[1]/div[2]/form/div/div[2]/div/label/input', + '/html/body/div[2]/div/div/div/div[1]/div/div/div/div[1]/section/main/div/div/div[1]/div[2]/form/div/div[2]/div/label/input' + ] @property def login_btn(self): - return '/html/body/div[2]/div/div/div/div[1]/div/div/div/div[1]/section/main/div/div/div[1]/div[2]/form/div/div[3]/button' + return [ + '/html/body/div[2]/div/div/div[1]/div/div/div/div[1]/section/main/div/div/div[1]/div[2]/form/div/div[3]/button', + '/html/body/div[2]/div/div/div[1]/div/div/div/div[1]/section/main/div/div/div[1]/div[2]/form/div/div[4]/button', + '/html/body/div[2]/div/div/div/div[1]/div/div/div/div[1]/section/main/div/div/div[1]/div[2]/form/div/div[3]/button' + ] + + @property + def authenticated_paths(self): + authenticated = [] + authenticated.extend(self.profile_picture) + authenticated.extend(self.home) + return authenticated + + @property + def home(self): + return [ + '/html/body/div[2]/div/div/div[2]/div/div/div/div[1]/div[1]/div[1]/div/div/div/div/div[2]/div[1]/div/div/a/div', + ] @property def profile_picture(self): - return '/html/body/div[2]/div/div/div/div[1]/div/div/div/div[1]/section/nav/div[2]/div/div/div[3]/div/div[6]' + return [ + '/html/body/div[2]/div/div/div[2]/div/div/div/div[1]/div[1]/div[2]/section/main/div[1]/section/div[3]/div[1]/div/div/div/div/div/div[1]/div/div/span/img', + '/html/body/div[2]/div/div/div[2]/div/div/div/div[1]/div[1]/div[2]/section/main/div[1]/section/div/div[2]/div/div/div/div/ul/li[3]/div/button/div[1]/span/img', + ] @property def save_info(self): return '/html/body/div[2]/div/div/div/div[1]/div/div/div/div[1]/section/main/div/div/div/section/div/button' + @property + def save_info_not_now_div(self): + return [ + '/html/body/div[2]/div/div/div[3]/div/div/div[1]/div/div[2]/div/div/div/div/div[2]/div/div', + ] + @property def save_info_not_now(self): - return '/html/body/div[2]/div/div/div/div[1]/div/div/div/div[1]/section/main/div/div/div/div/button' + return [ + '/html/body/div[2]/div/div/div[3]/div/div/div[1]/div/div[2]/div/div/div/div/div[2]/div/div/div[3]/button[2]', + '/html/body/div[2]/div/div/div[2]/div/div/div/div[1]/div[1]/div[2]/section/main/div/div/div/div', + '/html/body/div[2]/div/div/div/div[1]/div/div/div/div[1]/section/main/div/div/div/div/button' + ] @property def turn_on_notifications(self): - return '/html/body/div[2]/div/div/div/div[2]/div/div/div[1]/div/div[2]/div/div/div/div/div[2]/div/div/div[3]/button[1]' + return [ + '/html/body/div[2]/div/div/div[3]/div/div/div[1]/div/div[2]/div/div/div/div/div[2]/div/div/div[3]/button[1]', + '/html/body/div[2]/div/div/div/div[2]/div/div/div[1]/div/div[2]/div/div/div/div/div[2]/div/div/div[3]/button[1]' + ] @property def turn_on_notifications_not_now(self): - return '/html/body/div[2]/div/div/div/div[2]/div/div/div[1]/div/div[2]/div/div/div/div/div[2]/div/div/div[3]/button[2]' \ No newline at end of file + return [ + '/html/body/div[2]/div/div/div[3]/div/div/div[1]/div/div[2]/div/div/div/div/div[2]/div/div/div[3]/button[2]', + '/html/body/div[2]/div/div/div/div[2]/div/div/div[1]/div/div[2]/div/div/div/div/div[2]/div/div/div[3]/button[2]' + ] diff --git a/automon/integrations/requestsWrapper/client.py b/automon/integrations/requestsWrapper/client.py index 880ebf22..86c69cff 100644 --- a/automon/integrations/requestsWrapper/client.py +++ b/automon/integrations/requestsWrapper/client.py @@ -28,6 +28,10 @@ def __init__(self, url: str = None, data: dict = None, headers: dict = None, def __repr__(self): return f'{self.__dict__}' + def __len__(self): + if self.content: + len(self.content) + def _log_result(self): if self.results.status_code == 200: msg = f'{self.results.status_code} ' \ @@ -60,9 +64,14 @@ def _params(self, url, data, headers): @property def content(self): - if self.results is not None: + if self.results: return self.results.content + @property + def text(self): + if self.results: + return self.results.text + def delete(self, url: str = None, data: dict = None, diff --git a/automon/integrations/scrapyWrapper/__init__.py b/automon/integrations/scrapyWrapper/__init__.py new file mode 100644 index 00000000..42d0cbab --- /dev/null +++ b/automon/integrations/scrapyWrapper/__init__.py @@ -0,0 +1 @@ +from .client import ScrapyClient diff --git a/automon/integrations/scrapyWrapper/client.py b/automon/integrations/scrapyWrapper/client.py new file mode 100644 index 00000000..634ae6d6 --- /dev/null +++ b/automon/integrations/scrapyWrapper/client.py @@ -0,0 +1,14 @@ +import scrapy + +from automon.log import Logging + +log = Logging(name='ScrapyClient', level=Logging.DEBUG) + + +class ScrapyClient(object): + + def Selector(self, text: str): + return scrapy.selector.Selector(text=text) + + def xpath(self, text: str, xpath: str): + return self.Selector(text=text).xpath(xpath).get() diff --git a/automon/integrations/seleniumWrapper/browser.py b/automon/integrations/seleniumWrapper/browser.py index 6e47a907..481d3b2b 100644 --- a/automon/integrations/seleniumWrapper/browser.py +++ b/automon/integrations/seleniumWrapper/browser.py @@ -2,15 +2,18 @@ import tempfile import functools import selenium +import selenium.webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from urllib.parse import urlparse +from bs4 import BeautifulSoup from automon.log import Logging from automon.helpers.dates import Dates from automon.helpers.sleeper import Sleeper from automon.helpers.sanitation import Sanitation +from automon.integrations.requestsWrapper import RequestsClient from .config import SeleniumConfig from .browser_types import SeleniumBrowserType @@ -21,25 +24,19 @@ class SeleniumBrowser(object): config: SeleniumConfig - type: SeleniumBrowserType + webdriver: selenium.webdriver + status: int def __init__(self, config: SeleniumConfig = None): """A selenium wrapper""" - self.config = config or SeleniumConfig() - self.driver = 'not set' or self.type.chrome_headless() - self.window_size = '' - self.status = '' + self._config = config or SeleniumConfig() + self.request = None def __repr__(self): if self.url: - return f'{self.browser.name} {self.status} {self.browser.current_url} {self.window_size}' - return f'{self.browser.name} {self.window_size}' - - @property - def browser(self): - """alias to selenium driver""" - return self.driver + return f'{self.webdriver.name} {self.status} {self.webdriver.current_url} {self.window_size}' + return f'{self.webdriver}' @property def by(self) -> By: @@ -47,9 +44,25 @@ def by(self) -> By: return selenium.webdriver.common.by.By() @property - def get_log(self, log_type: str = 'browser') -> list: + def config(self): + return self._config + + @property + def webdriver(self): + return self.config.webdriver + + @property + def get_log(self) -> list: """Gets the log for a given log type""" - return self.browser.get_log(log_type) + logs = [] + for log_type in self.webdriver.log_types: + logs.append( + { + log_type: self.webdriver.get_log(log_type) + } + ) + + return logs @property def keys(self): @@ -57,21 +70,32 @@ def keys(self): return selenium.webdriver.common.keys.Keys @property - def type(self): - return SeleniumBrowserType(self.config) + def status(self): + if self.request is not None: + try: + return self.request.results.status_code + except: + pass + + # @property + # def type(self) -> SeleniumBrowserType: + # return SeleniumBrowserType(self.config) @property def url(self): - if self.browser.current_url == 'data:,': + if self.webdriver.current_url == 'data:,': return '' - return self.browser.current_url + return self.webdriver.current_url + + @property + def window_size(self): + return self.config.set_webdriver.window_size def _is_running(func) -> functools.wraps: @functools.wraps(func) def wrapped(self, *args, **kwargs): - if self.browser != 'not set': + if self.webdriver is not None: return func(self, *args, **kwargs) - log.error(f'Browser is not set!', enable_traceback=False) return False return wrapped @@ -79,8 +103,8 @@ def wrapped(self, *args, **kwargs): def _screenshot_name(self, prefix=None): """Generate a unique filename""" - title = self.browser.title - url = self.browser.current_url + title = self.webdriver.title + url = self.webdriver.current_url hostname = urlparse(url).hostname hostname_ = Sanitation.ascii_numeric_only(hostname) @@ -113,7 +137,7 @@ def action_type(self, key: str or Keys, secret: bool = False): """perform keyboard command""" try: actions = selenium.webdriver.common.action_chains.ActionChains( - self.browser) + self.webdriver) actions.send_keys(key) actions.perform() @@ -130,36 +154,40 @@ def action_type(self, key: str or Keys, secret: bool = False): def close(self): """close browser""" log.info(f'Browser closed') - self.browser.close() + self.webdriver.close() @_is_running def find_element( self, value: str, - by: By = By.ID, + by: By.ID = By.ID, **kwargs): """find element""" - return self.browser.find_element(value=value, by=by, **kwargs) + element = self.webdriver.find_element(value=value, by=by, **kwargs) + log.debug(f'found element: {self.url} {element.text}') + return element @_is_running def find_xpath(self, value: str, by: By = By.XPATH, **kwargs): """find xpath""" - return self.find_element(value=value, by=by, **kwargs) + xpath = self.find_element(value=value, by=by, **kwargs) + log.debug(f'found xpath: {self.url} {xpath.text}') + return xpath @_is_running def get(self, url: str, **kwargs) -> bool: """get url""" try: - self.browser.get(url, **kwargs) - self.status = 'OK' + self.webdriver.get(url, **kwargs) + self.request = RequestsClient(url=url) - msg = f'GET {self.status} {self.browser.current_url}' + msg = f'GET {self.status} {self.webdriver.current_url}' if kwargs: msg += f', {kwargs}' log.debug(msg) return True except Exception as e: - self.status = f'ERROR {url}' + self.request = RequestsClient(url=url) msg = f'GET {self.status}: {e}' log.error(msg, enable_traceback=False) @@ -171,9 +199,16 @@ def get_page(self, *args, **kwargs): return self.get(*args, **kwargs) @_is_running - def get_page_source(self): + def get_page_source(self) -> str: """get page source""" - return self.driver.page_source + return self.webdriver.page_source + + @_is_running + def get_page_source_beautifulsoup(self, markdup: str = None, features: str = 'lxml') -> BeautifulSoup: + """read page source with beautifulsoup""" + if not markdup: + markdup = self.get_page_source() + return BeautifulSoup(markup=markdup, features=features) def get_random_user_agent(self, filter: list or str = None, case_sensitive: bool = False) -> list: return SeleniumUserAgentBuilder().get_random(filter=filter, case_sensitive=case_sensitive) @@ -181,16 +216,16 @@ def get_random_user_agent(self, filter: list or str = None, case_sensitive: bool @_is_running def get_screenshot_as_png(self, **kwargs): """screenshot as png""" - return self.browser.get_screenshot_as_png(**kwargs) + return self.webdriver.get_screenshot_as_png(**kwargs) @_is_running def get_screenshot_as_base64(self, **kwargs): """screenshot as base64""" - return self.browser.get_screenshot_as_base64(**kwargs) + return self.webdriver.get_screenshot_as_base64(**kwargs) @_is_running def get_user_agent(self): - return self.browser.execute_script("return navigator.userAgent") + return self.webdriver.execute_script("return navigator.userAgent") @_is_running def is_running(self) -> bool: @@ -201,9 +236,9 @@ def is_running(self) -> bool: def quit(self) -> bool: """gracefully quit browser""" try: - self.browser.close() - self.browser.quit() - self.browser.stop_client() + self.webdriver.close() + self.webdriver.quit() + self.webdriver.stop_client() except Exception as error: log.error(f'failed to quit browser. {error}') return False @@ -233,114 +268,66 @@ def save_screenshot( log.info(f'Saving screenshot to: {save}') - return self.browser.save_screenshot(save, **kwargs) - - def set_browser(self, browser: SeleniumBrowserType) -> True: - """set browser driver""" - return self.set_driver(driver=browser) - - def set_driver(self, driver: SeleniumBrowserType) -> True: - """set driver - - setting driver will launch browser - """ - if driver: - self.driver = driver - log.info(f'Launching {self.browser.name}') - return True + return self.webdriver.save_screenshot(save, **kwargs) @_is_running - def set_resolution(self, width=1920, height=1080, device_type=None) -> bool: + def set_window_size(self, width=1920, height=1080, device_type=None) -> bool: """set browser resolution""" - if device_type == 'pixel3': - width = 1080 - height = 2160 - - if device_type == 'web-small' or device_type == '800x600': - width = 800 - height = 600 - - if device_type == 'web-small-2' or device_type == '1024x768': - width = 1024 - height = 768 - - if device_type == 'web-small-3' or device_type == '1280x960': - width = 1280 - height = 960 - - if device_type == 'web-small-4' or device_type == '1280x1024': - width = 1280 - height = 1024 - - if device_type == 'web' or device_type == '1920x1080': - width = 1920 - height = 1080 - - if device_type == 'web-2' or device_type == '1600x1200': - width = 1600 - height = 1200 - - if device_type == 'web-3' or device_type == '1920x1200': - width = 1920 - height = 1200 - - if device_type == 'web-large' or device_type == '2560x1400': - width = 2560 - height = 1400 - - if device_type == 'web-long' or device_type == '1920x3080': - width = 1920 - height = 3080 - - if not width and not height: - width = 1920 - height = 1080 - - self.window_size = width, height - try: - self.browser.set_window_size(width, height) + self.config.set_webdriver.webdriver_wrapper.set_window_size(width=width, height=height, + device_type=device_type) except Exception as error: log.error(f'failed to set resolution. {error}') return False return True + def run(self): + """run browser""" + return self.config.set_webdriver.run() + + def start(self): + """alias to run""" + return self.run() + def wait_for( self, value: str or list, by: By = By.XPATH, - retries: int = 30, + retries: int = 3, **kwargs) -> str or False: """wait for something""" retry = 1 while True: try: if isinstance(value, list): - for each in value: - self.find_element( - by=by, - value=each, - **kwargs) - value = each - log.debug(f'found {by}: {value}') - return value + values = value + for value in values: + try: + self.find_element( + by=by, + value=value, + **kwargs) + log.debug(f'waiting for {by}: {self.url} {value}') + return value + except: + log.error(f'{by} not found: {self.url} {value}', enable_traceback=False) else: self.find_element( by=by, value=value, **kwargs) - log.debug(f'found {by}: {value}') + log.debug(f'waiting for {by}: {self.url} {value}') return value except Exception as error: - log.error(f'waiting for {by}: {value}, {error}', + log.error(f'not found {by}: {self.url} {value}, {error}', enable_traceback=False) - Sleeper.seconds(f'wait for', round(retry / 2)) + Sleeper.seconds(f'wait for', 1) retry += 1 if retry > retries: - log.error(f'max wait reached', enable_traceback=False) + log.error(f'max wait reached: {self.url}', enable_traceback=False) break return False diff --git a/automon/integrations/seleniumWrapper/browser_capabilities.py b/automon/integrations/seleniumWrapper/browser_capabilities.py new file mode 100644 index 00000000..55fea37a --- /dev/null +++ b/automon/integrations/seleniumWrapper/browser_capabilities.py @@ -0,0 +1,20 @@ +from selenium.webdriver.common.desired_capabilities import DesiredCapabilities + +caps = DesiredCapabilities.CHROME + #as per latest docs +caps['goog:loggingPrefs'] = {'performance': 'ALL'} +driver = webdriver.Chrome(desired_capabilities=caps) + + +class SeleniumDesiredCapabilities(DesiredCapabilities): + + def __init__(self): + pass + + @property + def DesiredCapabilities(self): + return DesiredCapabilities + + @property + def CHROME(self): + return self.DesiredCapabilities.CHROME \ No newline at end of file diff --git a/automon/integrations/seleniumWrapper/config.py b/automon/integrations/seleniumWrapper/config.py index c2fb4ef7..0277897f 100644 --- a/automon/integrations/seleniumWrapper/config.py +++ b/automon/integrations/seleniumWrapper/config.py @@ -1,192 +1,32 @@ -import os -import warnings -import selenium import selenium.webdriver from automon.log import Logging -from automon.helpers.osWrapper.environ import environ + +from .config_webdriver import ConfigWebdriver log = Logging(name='SeleniumConfig', level=Logging.INFO) class SeleniumConfig(object): - webdriver: selenium.webdriver - selenium_chromedriver_path: str - - def __init__(self, webdriver=None, chromedriver: str = None): - self.webdriver = webdriver or selenium.webdriver - self.selenium_chromedriver_path = chromedriver or environ('SELENIUM_CHROMEDRIVER_PATH', '') + webdriver_wrapper: ConfigWebdriver - if self.selenium_chromedriver_path: - os.environ['PATH'] = f"{os.getenv('PATH')}:{self.selenium_chromedriver_path}" + def __init__(self): + self._webdriver_wrapper = ConfigWebdriver() def __repr__(self): - return f'{self.__dict__}' - - def chrome(self): - """Chrome with no options - - """ - opt = SeleniumOptions(self.webdriver) - opt.default() - - def chrome_maximized(self): - """Chrome with no options - - """ - opt = SeleniumOptions(self.webdriver) - opt.default() - opt.maximized() - - def chrome_fullscreen(self): - """Chrome with no options - - """ - opt = SeleniumOptions(self.webdriver) - opt.default() - opt.fullscreen() - - def chrome_for_docker(self): - """Chrome best used with docker - - """ - opt = SeleniumOptions(self.webdriver) - opt.default() - opt.nosandbox() - opt.headless() - opt.noinfobars() - opt.noextensions() - opt.nonotifications() - - def chrome_sandboxed(self): - """Chrome with sandbox enabled - - """ - warnings.warn('Docker does not support sandbox option') - warnings.warn('Default shm size is 64m, which will cause chrome driver to crash.', Warning) - - opt = SeleniumOptions(self.webdriver) - opt.default() - - def chrome_nosandbox(self): - """Chrome with sandbox disabled - - """ - warnings.warn('Default shm size is 64m, which will cause chrome driver to crash.', Warning) - - opt = SeleniumOptions(self.webdriver) - opt.default() - opt.nosandbox() - - def chrome_headless_sandboxed(self): - """Headless Chrome with sandbox enabled - - """ - warnings.warn('Docker does not support sandbox option') - warnings.warn('Default shm size is 64m, which will cause chrome driver to crash.', Warning) - - opt = SeleniumOptions(self.webdriver) - opt.default() - opt.headless() - - def chrome_headless_nosandbox(self): - """Headless Chrome with sandbox disabled - - """ - warnings.warn('Default shm size is 64m, which will cause chrome driver to crash.', Warning) - - opt = SeleniumOptions(self.webdriver) - opt.default() - opt.headless() - opt.nosandbox() - - def chrome_headless_nosandbox_unsafe(self): - """Headless Chrome with sandbox disabled with not certificate verification - - """ - warnings.warn('Default shm size is 64m, which will cause chrome driver to crash.', Warning) - - opt = SeleniumOptions(self.webdriver) - opt.default() - opt.headless() - opt.nosandbox() - opt.unsafe() - - def chrome_headless_nosandbox_noshm(self): - """Headless Chrome with sandbox disabled - - """ - opt = SeleniumOptions(self.webdriver) - opt.default() - opt.headless() - opt.nosandbox() - opt.noshm() - - def chrome_headless_nosandbox_bigshm(self): - """Headless Chrome with sandbox disabled - - """ - warnings.warn('Larger shm option is not implemented', Warning) - - opt = SeleniumOptions(self.webdriver) - opt.default() - opt.headless() - opt.nosandbox() - opt.bigshm() - - def chrome_remote(self, host: str = '127.0.0.1', port: str = '4444', executor_path: str = '/wd/hub'): - """Remote Selenium - - """ - log.info( - f'Remote WebDriver Hub URL: http://{host}:{port}{executor_path}/static/resource/hub.html') - - self.webdriver.Remote( - command_executor=f'http://{host}:{port}{executor_path}', - desired_capabilities=selenium.webdriver.common.desired_capabilities.DesiredCapabilities.CHROME - ) - - -class SeleniumOptions: - - def __init__(self, webdriver): - self.webdriver = webdriver or selenium.webdriver - self.options = self.webdriver.ChromeOptions() - - def default(self): - self.options.add_argument('start-maximized') - - def unsafe(self): - warnings.warn('Certificates are not verified', Warning) - self.options.add_argument('--ignore-certificate-errors') - - def nosandbox(self): - self.options.add_argument('--no-sandbox') - - def headless(self): - self.options.add_argument('headless') - - def noshm(self): - warnings.warn('Disabled shm will use disk I/O, and will be slow', Warning) - self.options.add_argument('--disable-dev-shm-usage') - - def bigshm(self): - warnings.warn('Big shm not yet implemented', Warning) - - def noinfobars(self): - self.options.add_argument("--disable-infobars") - - def noextensions(self): - self.options.add_argument("--disable-extensions") - - def maximized(self): - self.options.add_argument("--start-maximized") - - def fullscreen(self): - self.options.add_argument("--start-fullscreen") - - def nonotifications(self): - # Pass the argument 1 to allow and 2 to block - self.options.add_experimental_option( - "prefs", {"profile.default_content_setting_values.notifications": 1} - ) + return f'{self.driver}' + + # @property + # def driver(self): + # return self.webdriver + + @property + def set_webdriver(self): + return self._webdriver_wrapper + + @property + def webdriver(self): + if self.set_webdriver.webdriver: + return self.set_webdriver.webdriver + else: + log.debug('waiting for driver') diff --git a/automon/integrations/seleniumWrapper/config_webdriver.py b/automon/integrations/seleniumWrapper/config_webdriver.py new file mode 100644 index 00000000..e80c973a --- /dev/null +++ b/automon/integrations/seleniumWrapper/config_webdriver.py @@ -0,0 +1,91 @@ +import selenium.webdriver + +from automon.log import Logging + +from .config_webdriver_chrome import ConfigChrome + +log = Logging(name='ConfigWebdriver', level=Logging.INFO) + + +class ConfigWebdriver(object): + webdriver: selenium.webdriver + webdriver_wrapper: any + + Chrome: ConfigChrome + Edge: NotImplemented + Firefox: NotImplemented + + def __init__(self): + self._webdriver_wrapper = None + + self._chrome = ConfigChrome() + self._edge = NotImplemented + self._firefox = NotImplemented + + def __repr__(self): + if self._webdriver_wrapper: + return f'{self._webdriver_wrapper}' + return f'webdriver not configured. try selecting a webdriver' + + @property + def driver(self): + """alias to webdriver + + """ + return self.webdriver + + @property + def webdriver(self): + """selenium webdriver + + """ + return self.webdriver_wrapper.webdriver + + @property + def webdriver_wrapper(self) -> any or ConfigChrome: + """webdriver wrapper + + """ + return self._webdriver_wrapper + + @property + def window_size(self): + """get window size + + """ + if self.webdriver_wrapper: + return self.webdriver_wrapper.window_size + + def Chrome(self): + """selenium Chrome webdriver + + """ + self._webdriver_wrapper = self._chrome + return self._webdriver_wrapper + + def Edge(self): + """selenium Edge webdriver + + """ + return self._edge + + def Firefox(self): + """selenium Firefox webdriver + + """ + return self._firefox + + def run(self): + """run webdriver""" + try: + return self.webdriver_wrapper.run() + except Exception as e: + log.error(f'failed to run: {e}', enable_traceback=False) + + def start(self): + """alias to run""" + return self.run() + + def quit(self): + """quit webdriver""" + return diff --git a/automon/integrations/seleniumWrapper/config_webdriver_chrome.py b/automon/integrations/seleniumWrapper/config_webdriver_chrome.py new file mode 100644 index 00000000..44e319c6 --- /dev/null +++ b/automon/integrations/seleniumWrapper/config_webdriver_chrome.py @@ -0,0 +1,319 @@ +import os +import warnings +import selenium +import selenium.webdriver + +from automon.log import Logging +from automon.helpers.osWrapper.environ import environ + +from .config_window_size import set_window_size + +log = Logging(name='ConfigChrome', level=Logging.INFO) + + +class ConfigChrome(object): + + def __init__(self): + self._webdriver = None + self._chrome_options = selenium.webdriver.ChromeOptions() + self._chromedriver = environ('SELENIUM_CHROMEDRIVER_PATH') + self._ChromeService = None + + self.update_paths() + + self._window_size = set_window_size() + + def __repr__(self): + if self.chromedriver: + return f'Chrome {self.chromedriver}' + return f'Chrome' + + @property + def chrome_options(self): + return self._chrome_options + + @property + def chrome_options_arg(self): + return self.chrome_options.arguments + + @property + def chromedriver(self): + return self._chromedriver + + @property + def ChromeService(self): + return self._ChromeService + + @property + def webdriver(self) -> selenium.webdriver.Chrome: + return self._webdriver + + @property + def window_size(self): + return self._window_size + + def disable_certificate_verification(self): + log.warn('Certificates are not verified') + self.chrome_options.add_argument('--ignore-certificate-errors') + return self + + def disable_extensions(self): + self.chrome_options.add_argument("--disable-extensions") + return self + + def disable_infobars(self): + self.chrome_options.add_argument("--disable-infobars") + return self + + def disable_notifications(self): + """Pass the argument 1 to allow and 2 to block + + """ + self.chrome_options.add_experimental_option( + "prefs", {"profile.default_content_setting_values.notifications": 2} + ) + return self + + def disable_sandbox(self): + self.chrome_options.add_argument('--no-sandbox') + return self + + def disable_shm(self): + log.warn('Disabled shm will use disk I/O, and will be slow') + self.chrome_options.add_argument('--disable-dev-shm-usage') + return self + + def enable_bigshm(self): + log.warn('Big shm not yet implemented') + return self + + def enable_defaults(self): + self.enable_maximized() + return self + + def enable_fullscreen(self): + self.chrome_options.add_argument("--start-fullscreen") + return self + + def enable_headless(self): + self.chrome_options.add_argument('headless') + return self + + def enable_notifications(self): + """Pass the argument 1 to allow and 2 to block + + """ + self.chrome_options.add_experimental_option( + "prefs", {"profile.default_content_setting_values.notifications": 1} + ) + return self + + def enable_maximized(self): + self.chrome_options.add_argument('--start-maximized') + return self + + def enable_translate(self, native_language: str = 'en'): + prefs = { + "translate_whitelists": {"your native language": native_language}, + "translate": {"enabled": "True"} + } + self.chrome_options.add_experimental_option( + name="prefs", + value=prefs, + ) + return self + + def close(self): + """close + + """ + return self.webdriver.close() + + def in_docker(self): + """Chrome best used with docker + + """ + self.enable_defaults() + self.enable_headless() + self.disable_sandbox() + self.disable_infobars() + self.disable_extensions() + self.disable_notifications() + return self + + def in_headless(self): + """alias to headless sandboxed + + """ + return self.in_headless_sandboxed() + + def in_headless_sandboxed(self): + """Headless Chrome with sandbox enabled + + """ + log.warn( + 'Docker does not support sandbox option. ' + 'Default shm size is 64m, which will cause chrome driver to crash.' + ) + + self.enable_defaults() + self.enable_headless() + return self + + def in_headless_sandbox_disabled(self): + """Headless Chrome with sandbox disabled + + """ + log.warn('Default shm size is 64m, which will cause chrome driver to crash.') + + self.enable_defaults() + self.enable_headless() + self.disable_sandbox() + return self + + def in_headless_sandbox_disabled_certificate_unverified(self): + """Headless Chrome with sandbox disabled with no certificate verification + + """ + log.warn('Default shm size is 64m, which will cause chrome driver to crash.') + + self.enable_defaults() + self.enable_headless() + self.disable_sandbox() + self.disable_certificate_verification() + return self + + def in_headless_sandbox_disabled_shm_disabled(self): + """Headless Chrome with sandbox disabled + + """ + self.enable_defaults() + self.enable_headless() + self.disable_sandbox() + self.disable_shm() + return self + + def in_headless_sandbox_disabled_bigshm(self): + """Headless Chrome with sandbox disabled + + """ + log.warn('Larger shm option is not implemented') + + self.enable_defaults() + self.enable_headless() + self.enable_bigshm() + self.disable_sandbox() + return self + + def in_remote_driver(self, host: str = '127.0.0.1', port: str = '4444', executor_path: str = '/wd/hub'): + """Remote Selenium + + """ + log.info( + f'Remote WebDriver Hub URL: http://{host}:{port}{executor_path}/static/resource/hub.html') + + selenium.webdriver.Remote( + command_executor=f'http://{host}:{port}{executor_path}', + desired_capabilities=selenium.webdriver.common.desired_capabilities.DesiredCapabilities.CHROME + ) + + def in_sandbox(self): + """Chrome with sandbox enabled + + """ + log.warn( + 'Docker does not support sandbox option. ' + 'Default shm size is 64m, which will cause chrome driver to crash.' + ) + + self.enable_defaults() + return self + + def in_sandbox_disabled(self): + """Chrome with sandbox disabled + + """ + log.warn('Default shm size is 64m, which will cause chrome driver to crash.') + + self.enable_defaults() + self.disable_sandbox() + return self + + def run(self) -> selenium.webdriver.Chrome: + log.info(f'starting {self}') + try: + if self.chromedriver: + self._ChromeService = selenium.webdriver.ChromeService( + executable_path=self.chromedriver + ) + self._webdriver = selenium.webdriver.Chrome( + service=self._ChromeService, + options=self.chrome_options + ) + return self.webdriver + + self._webdriver = selenium.webdriver.Chrome(options=self.chrome_options) + return self.webdriver + except Exception as e: + log.error(f'Browser not set. {e}', raise_exception=True) + + def set_chromedriver(self, chromedriver: str): + self._chromedriver = chromedriver + self.update_paths() + return self + + def set_locale(self, locale: str = 'en'): + self.chrome_options.add_argument(f"--lang={locale}") + return self + + def set_locale_experimental(self, locale: str = 'en-US'): + self.chrome_options.add_experimental_option( + name='prefs', + value={'intl.accept_languages': locale}) + return self + + def set_user_agent(self, user_agent: str): + self.chrome_options.add_argument(f"user-agent={user_agent}") + return self + + def set_window_size(self, *args, **kwargs): + self._window_size = set_window_size(*args, **kwargs) + width, height = self.window_size + self.webdriver.set_window_size(width=width, height=height) + return self + + def start(self): + """alias to run + + """ + return self.run() + + def stop_client(self): + """stop client + + """ + return self.webdriver.stop_client() + + def update_paths(self): + if self.chromedriver: + if self.chromedriver not in os.getenv('PATH'): + os.environ['PATH'] = f"{os.getenv('PATH')}:{self._chromedriver}" + + def quit(self): + """quit + + """ + return self.webdriver.quit() + + def quit_gracefully(self): + """gracefully quit webdriver + + """ + try: + self.close() + self.quit() + self.stop_client() + except Exception as error: + log.error(f'failed to gracefully quit. {error}') + return False + return True diff --git a/automon/integrations/seleniumWrapper/config_window_size.py b/automon/integrations/seleniumWrapper/config_window_size.py new file mode 100644 index 00000000..fb424e03 --- /dev/null +++ b/automon/integrations/seleniumWrapper/config_window_size.py @@ -0,0 +1,48 @@ +def set_window_size(width=1920, height=1080, device_type=None) -> (int, int): + """set browser resolution""" + + if device_type == 'pixel3': + width = 1080 + height = 2160 + + if device_type == 'web-small' or device_type == '800x600': + width = 800 + height = 600 + + if device_type == 'web-small-2' or device_type == '1024x768': + width = 1024 + height = 768 + + if device_type == 'web-small-3' or device_type == '1280x960': + width = 1280 + height = 960 + + if device_type == 'web-small-4' or device_type == '1280x1024': + width = 1280 + height = 1024 + + if device_type == 'web' or device_type == '1920x1080': + width = 1920 + height = 1080 + + if device_type == 'web-2' or device_type == '1600x1200': + width = 1600 + height = 1200 + + if device_type == 'web-3' or device_type == '1920x1200': + width = 1920 + height = 1200 + + if device_type == 'web-large' or device_type == '2560x1400': + width = 2560 + height = 1400 + + if device_type == 'web-long' or device_type == '1920x3080': + width = 1920 + height = 3080 + + if not width and not height: + width = 1920 + height = 1080 + + return width, height diff --git a/automon/integrations/seleniumWrapper/tests/test_browser.py b/automon/integrations/seleniumWrapper/tests/test_browser.py index edf7395a..a190044c 100644 --- a/automon/integrations/seleniumWrapper/tests/test_browser.py +++ b/automon/integrations/seleniumWrapper/tests/test_browser.py @@ -3,11 +3,11 @@ from automon.integrations.seleniumWrapper.browser import SeleniumBrowser browser = SeleniumBrowser() -browser.set_driver(browser.type.chrome()) +browser.config.set_webdriver.Chrome().enable_defaults() class SeleniumClientTest(unittest.TestCase): - if browser.is_running(): + if browser.run(): def test(self): self.assertFalse(browser.get('http://555.555.555.555')) if browser.get('http://1.1.1.1'): diff --git a/automon/integrations/seleniumWrapper/tests/test_browser_headless.py b/automon/integrations/seleniumWrapper/tests/test_browser_headless.py index 217bbc79..ae5b4d0c 100644 --- a/automon/integrations/seleniumWrapper/tests/test_browser_headless.py +++ b/automon/integrations/seleniumWrapper/tests/test_browser_headless.py @@ -3,12 +3,13 @@ from automon.integrations.seleniumWrapper.browser import SeleniumBrowser browser = SeleniumBrowser() -browser.set_driver(browser.type.chrome_headless()) -browser.set_resolution(device_type='web-large') +browser.config.set_webdriver.Chrome().enable_defaults() class SeleniumClientTest(unittest.TestCase): - if browser.is_running(): + if browser.run(): + browser.set_window_size(device_type='web-large') + def test(self): while True: diff --git a/automon/integrations/seleniumWrapper/tests/test_browser_useragent.py b/automon/integrations/seleniumWrapper/tests/test_browser_useragent.py index b2e7e1eb..957df4cc 100644 --- a/automon/integrations/seleniumWrapper/tests/test_browser_useragent.py +++ b/automon/integrations/seleniumWrapper/tests/test_browser_useragent.py @@ -3,15 +3,15 @@ from automon.integrations.seleniumWrapper.browser import SeleniumBrowser browser = SeleniumBrowser() +browser.config.set_webdriver.Chrome().enable_defaults() agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:112.0) Gecko/20100101 Firefox/112.0' -opts = [f"user-agent={agent}"] -browser.set_driver(browser.type.chrome(options=opts)) +browser.config.set_webdriver.webdriver_wrapper.set_user_agent(agent) class SeleniumClientTest(unittest.TestCase): - if browser.is_running(): + if browser.run(): def test_user_agent(self): self.assertEqual(browser.get_user_agent(), agent) diff --git a/automon/integrations/seleniumWrapper/tests/test_new_browser.py b/automon/integrations/seleniumWrapper/tests/test_new_browser.py new file mode 100644 index 00000000..d5a30e90 --- /dev/null +++ b/automon/integrations/seleniumWrapper/tests/test_new_browser.py @@ -0,0 +1,16 @@ +import unittest + +from automon.integrations.seleniumWrapper.browser import SeleniumBrowser + +browser = SeleniumBrowser() +browser.config.set_webdriver.Chrome().enable_defaults() + + +class SeleniumClientTest(unittest.TestCase): + if browser.run(): + def test(self): + browser.quit() + + +if __name__ == '__main__': + unittest.main() diff --git a/requirements.txt b/requirements.txt index 64ea3bb8..1bf2a99c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -49,6 +49,9 @@ selenium>=3.141.0 # sentry.io sentry-sdk>=1.5.1 +# scrapy +Scrapy>=2.6.0 + # slack slackclient>=2.9.3 diff --git a/setup.py b/setup.py index 4dfd8122..f171f051 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="automonisaur", - version="0.2.50", + version="0.3.5", author="naisanza", author_email="naisanza@gmail.com", description="Core libraries for automonisaur",