-
Notifications
You must be signed in to change notification settings - Fork 0
/
One_Uci_Game.py
59 lines (36 loc) · 1.76 KB
/
One_Uci_Game.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
### My first task was to try and download the data from a single game from the UCI website. I later realized that
### I wanted to download all the data straight from the NCAA website so I could get everything I needed from one site rather then
### every single teams' websites.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
import pandas as pd #for exporting data into csv
url = 'https://ucirvinesports.com/sports/baseball/stats/2023/tulane/boxscore/11733'
chrome_path = 'Users/katelynvuong/Downloads/chromedriver'
chrome_options = Options()
#chrome_options.add_argument("--headless")
service = Service(chrome_path)
driver = webdriver.Chrome(service=service, options=chrome_options)
driver.get(url)
play_by_play_button = driver.find_element("id", 'ui-id-2') #clicking on the play-by-play button using unique id
play_by_play_button.click()
innings = driver.find_elements(By.TAG_NAME, 'tbody')
play = []
tulane_score = []
uci_score = []
for inning in innings:
rows = inning.find_elements(By.TAG_NAME, 'tr')
for row in rows:
cells = row.find_elements(By.TAG_NAME, 'td')
if len(cells) == 3: #so we don't get the data from the first table
if cells[0].text != '': # so we get rid of the empty values and all the columns are the same length
play.append(cells[0].text)
if cells[1].text != '':
tulane_score.append(cells[1].text)
if cells[2].text != '':
uci_score.append(cells[2].text)
df = pd.DataFrame({'Play': play , 'Tulane Score' : tulane_score, 'UCI Score': uci_score})
df.to_csv('uci_tulane_2172023.csv')
print(df)
driver.quit()