From 74dbb0db7b5a606b7a77a15d578e63d66ff6d543 Mon Sep 17 00:00:00 2001 From: david-dong828 Date: Tue, 2 Apr 2024 21:05:40 -0230 Subject: [PATCH] update from planetscale(mysql) to postgresql --- api/database_handle.py | 10 +++++----- api/getCompniesCareerPage.py | 27 +++++++++++++++++---------- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/api/database_handle.py b/api/database_handle.py index 7085e00..cd031c5 100644 --- a/api/database_handle.py +++ b/api/database_handle.py @@ -6,12 +6,12 @@ def get_planetscale_params(file ='vercel_postgres.json'): # To check if in CI env (GITHUB Workflow file) - if os.getenv("POSTGRES_HOST") and os.getenv("POSTGRES_USER") and os.getenv("POSTGRES_PASSWORD"): + if os.getenv("DB_HOST") and os.getenv("DB_USER") and os.getenv("DB_PASSWD"): return { - "host": os.getenv("POSTGRES_HOST"), - "user": os.getenv("POSTGRES_USER"), - "password": os.getenv("POSTGRES_PASSWORD"), - "database": os.getenv("POSTGRES_DATABASE") + "host": os.getenv("DB_HOST"), + "user": os.getenv("DB_USER"), + "password": os.getenv("DB_PASSWD"), + "database": os.getenv("DB_NAME") } else: params = {} diff --git a/api/getCompniesCareerPage.py b/api/getCompniesCareerPage.py index 9544c6a..dd7d482 100644 --- a/api/getCompniesCareerPage.py +++ b/api/getCompniesCareerPage.py @@ -13,7 +13,8 @@ from selenium.webdriver.common.action_chains import ActionChains import random import api.database_handle -import mysql.connector +# import mysql.connector +import psycopg2 class BaseScraper: def __init__(self,company,url): @@ -83,6 +84,7 @@ def indivisual_scrape(self,job_file_id): } json_string = json.dumps(all_items) api.database_handle.saveJsonFileToTable(job_file_id, json_string, self.db, self.cursor) + self.db.commit() self.db.close() return all_items @@ -115,6 +117,7 @@ def indivisual_scrape(self,job_file_id): json_string = json.dumps(all_items) api.database_handle.saveJsonFileToTable(job_file_id, json_string, self.db, self.cursor) + self.db.commit() self.db.close() return all_items @@ -149,6 +152,7 @@ def indivisual_scrape(self,job_file_id): } json_string = json.dumps(all_items) api.database_handle.saveJsonFileToTable(job_file_id, json_string, self.db, self.cursor) + self.db.commit() self.db.close() return all_items @@ -180,6 +184,7 @@ def indivisual_scrape(self,job_file_id): json_string = json.dumps(all_items) api.database_handle.saveJsonFileToTable(job_file_id, json_string, self.db, self.cursor) + self.db.commit() self.db.close() return all_items @@ -208,6 +213,7 @@ def indivisual_scrape(self,job_file_id): } json_string = json.dumps(all_items) api.database_handle.saveJsonFileToTable(job_file_id, json_string, self.db, self.cursor) + self.db.commit() self.db.close() self.close_driver() return all_items @@ -260,7 +266,7 @@ def get_scraper(company, url): return scraper(company,url) return None -def is_job_json_existed_in_mysql(job_file_id,cursor,tableName="NL_TECH_JOBS"): +def is_job_json_existed_in_mysql(job_file_id,cursor,tableName="nl_tech_jobs"): try: sql = f"select json_data from {tableName} where job_id = %s" cursor.execute(sql,(job_file_id,)) @@ -272,21 +278,22 @@ def is_job_json_existed_in_mysql(job_file_id,cursor,tableName="NL_TECH_JOBS"): return json_data else: return None - except mysql.connector.Error as err: + except psycopg2.Error as err: print(f"Error in is_job_json_existed_in_mysql: {err}") return None #################################################### main for testing ################################### def main(): - mysaurl = "https://getmysa.com/pages/careers-ca" - company = "mysa" - scraper = ScraperFactory.get_scraper(company,mysaurl) + # mysaurl = "https://getmysa.com/pages/careers-ca" + # company = "mysa" + # scraper = ScraperFactory.get_scraper(company,mysaurl) + # print(scraper.scrape()) + + verafin_link = "https://nasdaq.wd1.myworkdayjobs.com/en-US/US_External_Career_Site?q=verafin" + company = "verafin" + scraper = ScraperFactory.get_scraper(company, verafin_link) print(scraper.scrape()) - # verafin_link = "https://nasdaq.wd1.myworkdayjobs.com/en-US/US_External_Career_Site?q=verafin" - # jobFile = checkVerafin(verafin_link) - # print(jobFile) - # colab_link = "https://www.colabsoftware.com/careers#openings" # jobfile = checkColab(colab_link) # print(jobfile)