-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpa-facilities.py
35 lines (24 loc) · 1.04 KB
/
pa-facilities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
'''
Goal: Download the HTML behind the big list of drug and alcohol facilities in Pennsylvania.
This is step one of what would need to be a multi-part scraper to get the details, but this is the only step that needs an automated browser.
'''
from playwright.sync_api import sync_playwright
# create a playwright object in a context manager
with sync_playwright() as p:
# create a chromium browser in non-headless mode
browser = p.chromium.launch(headless=False)
# open a new page
page = browser.new_page()
# navigate to the search page
page.goto('https://sais.health.pa.gov/commonpoc/Content/PublicWeb/DAFind.aspx')
# click the form submit and increase the timeout to 30 seconds
page.click('#btnSubmit2', timeout=300000)
# wait for the next page to appear
page.wait_for_selector('form#frmFacInfo')
# grab the HTML content
html = page.content()
# write the HTML to file
with open('pa-facility-list.html', 'w') as outfile:
outfile.write(html)
# and close the browser
browser.close()