Skip to content
This repository has been archived by the owner on Nov 4, 2024. It is now read-only.

feat: api v2 for mdn observatory #522

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions httpobs/database/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
periodic_maintenance,
refresh_materialized_views,
select_scan_host_history,
select_scan_most_recent_scan,
select_scan_recent_finished_scans,
select_scan_recent_scan,
select_scan_scanner_statistics,
Expand All @@ -23,6 +24,7 @@
'select_scan_host_history',
'select_scan_recent_finished_scans',
'select_scan_recent_scan',
'select_scan_most_recent_scan',
'select_scan_scanner_statistics',
'select_site_headers',
'select_site_id',
Expand Down
34 changes: 25 additions & 9 deletions httpobs/database/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ def insert_scan(site_id: int, hidden: bool = False) -> dict:
def insert_test_results(site_id: int, scan_id: int, data: dict) -> dict:
with get_cursor() as cur:
for name, test in data["tests"].items():
test = test.copy() # don't mutate argument
expectation = test.pop('expectation')
passed = test.pop('pass')
result = test.pop('result')
Expand Down Expand Up @@ -327,6 +328,20 @@ def select_scan_recent_scan(site_id: int, recent_in_seconds=API_CACHED_RESULT_TI
return {}


def select_scan_most_recent_scan(site_id: int) -> dict | None:
with get_cursor() as cur:
cur.execute(
"""SELECT * FROM scans
WHERE site_id = %s
ORDER BY start_time DESC
LIMIT 1""",
(site_id,),
)

if cur.rowcount > 0:
return dict(cur.fetchone())


def select_site_headers(hostname: str) -> dict:
# Return the site's headers
with get_cursor() as cur:
Expand All @@ -351,7 +366,7 @@ def select_site_headers(hostname: str) -> dict:
return {}


def select_site_id(hostname: str) -> int:
def select_site_id(hostname: str, create=True) -> int | None:
# See if the site exists already
with get_cursor() as cur:
cur.execute(
Expand All @@ -366,15 +381,16 @@ def select_site_id(hostname: str) -> int:
return cur.fetchone()['id']

# If not, let's create the site
with get_cursor() as cur:
cur.execute(
"""INSERT INTO sites (domain, creation_time)
VALUES (%s, NOW())
RETURNING id""",
(hostname,),
)
if create:
with get_cursor() as cur:
cur.execute(
"""INSERT INTO sites (domain, creation_time)
VALUES (%s, NOW())
RETURNING id""",
(hostname,),
)

return cur.fetchone()['id']
return cur.fetchone()['id']


def select_test_results(scan_id: int) -> dict:
Expand Down
2 changes: 1 addition & 1 deletion httpobs/scanner/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from httpobs.scanner.utils import sanitize_headers

# Current algorithm version
ALGORITHM_VERSION = 2
ALGORITHM_VERSION = 3


def scan(hostname: str, **kwargs):
Expand Down
6 changes: 6 additions & 0 deletions httpobs/website/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@
# TODO: Implement API to write public and private headers to the database


@api.route('/')
@add_response_headers()
def main() -> str:
return 'Welcome to the HTTP Observatory!'


@api.route('/api/v1/analyze', methods=['GET', 'OPTIONS', 'POST'])
@add_response_headers(cors=True)
@sanitized_api_response
Expand Down
134 changes: 134 additions & 0 deletions httpobs/website/api_v2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import sys
from datetime import datetime, timedelta

from flask import Blueprint, jsonify, request

import httpobs.database as database
import httpobs.scanner as scanner
from httpobs import STATE_FAILED
from httpobs.conf import API_COOLDOWN, DEVELOPMENT_MODE
from httpobs.scanner.grader import get_score_description
from httpobs.website import add_response_headers
from httpobs.website.utils import valid_hostname

api_v2 = Blueprint("api_v2", __name__)


@api_v2.route("/analyze", methods=["GET", "OPTIONS", "POST"])
LeoMcA marked this conversation as resolved.
Show resolved Hide resolved
@add_response_headers(cors=True)
def api_post_scan_hostname():
status_code = 200
scan = {}
tests = {}

host = request.args.get("host", "").lower().strip()
try:
site_id = database.select_site_id(host, create=False)
except IOError:
return {
"error": "database-down",
"text": "Unable to connect to database",
}, 500
LeoMcA marked this conversation as resolved.
Show resolved Hide resolved

if site_id is not None:
hostname = host
else:
ip = True if valid_hostname(host) is None else False
if ip:
return {
"error": "invalid-hostname-ip",
"text": "Cannot scan IP addresses",
}, 400

hostname = valid_hostname(host) or (
valid_hostname("www." + host) if host else False
) # prepend www. if necessary
if not hostname:
return {
"error": "invalid-hostname",
"text": f"{host} is an invalid hostname",
}, 400

site_id: int = database.select_site_id(host, create=True)
scan = database.select_scan_most_recent_scan(site_id)

if scan and request.method == "POST":
time_since_scan = datetime.now() - scan["end_time"]
if time_since_scan < timedelta(seconds=API_COOLDOWN):
status_code = 429
LeoMcA marked this conversation as resolved.
Show resolved Hide resolved
else:
scan = None

if scan:
scan_id = scan["id"]

tests = database.select_test_results(scan_id)
for name, test in tests.items():
del test["id"]
del test["scan_id"]
del test["site_id"]
del test["name"]
test["score_description"] = get_score_description(test["result"])
tests[name] = {**test.pop("output"), **test}

else:
hidden = request.form.get("hidden", "false") == "true"

scan = database.insert_scan(site_id, hidden=hidden)
scan_id = scan["id"]

# Get the site's cookies and headers
# TODO: add API to insert these into the db
# headers = database.select_site_headers(hostname)

try:
result = scanner.scan(hostname)
scan = result["scan"]
tests = result["tests"]

if "error" in result:
scan = database.update_scan_state(scan_id, STATE_FAILED, error=result["error"])
else:
scan = database.insert_test_results(
site_id,
scan_id,
result,
)
except:
# If we are unsuccessful, close out the scan in the database
scan = database.update_scan_state(scan_id, STATE_FAILED)

# Print the exception to stderr if we're in dev
if DEVELOPMENT_MODE:
import traceback

print("Error detected in scan for: " + hostname)
traceback.print_exc(file=sys.stderr)

scan["start_time"] = scan["start_time"].isoformat()
scan["end_time"] = scan["end_time"].isoformat()

history = database.select_scan_host_history(site_id)

# Prune for when the score doesn't change; thanks to chuck for the elegant list comprehension
history = [
{
"end_time": v["end_time"].isoformat(),
"grade": v["grade"],
"id": v["scan_id"],
"score": v["score"],
}
for k, v in enumerate(history)
if history[k].get('score') is not history[k - 1].get('score') or k == 0
]

return (
jsonify(
{
"scan": scan,
"tests": tests,
"history": history,
}
),
status_code,
)
34 changes: 16 additions & 18 deletions httpobs/website/main.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,32 @@
import sys

from flask import Flask

from httpobs.conf import API_PORT, API_PROPAGATE_EXCEPTIONS, DEVELOPMENT_MODE
from httpobs.website import add_response_headers
from httpobs.website.api import api
from httpobs.website.monitoring import monitoring_api


def __exit_with(msg: str) -> None:
print(msg)
sys.exit(1)

def create_app():
# Register the application with flask
app = Flask('http-observatory')
app.config['PROPAGATE_EXCEPTIONS'] = API_PROPAGATE_EXCEPTIONS

# Register the application with flask
app = Flask('http-observatory')
app.config['PROPAGATE_EXCEPTIONS'] = API_PROPAGATE_EXCEPTIONS
app.register_blueprint(api)
app.register_blueprint(monitoring_api)
from httpobs.website.api import api
from httpobs.website.api_v2 import api_v2
from httpobs.website.monitoring import monitoring_api

app.register_blueprint(api)
app.register_blueprint(api_v2, url_prefix="/api/v2")
app.register_blueprint(monitoring_api)

@app.route('/')
@add_response_headers()
def main() -> str:
return 'Welcome to the HTTP Observatory!'
return app


def run():
app = create_app()
app.run(debug=DEVELOPMENT_MODE, port=API_PORT)


if __name__ == '__main__':
run()

# make backwards compatible with uwsgi setup
# TODO: move into wsgi.py
app = create_app()