diff --git a/httpobs/database/__init__.py b/httpobs/database/__init__.py index cffca49..9c0f8b8 100644 --- a/httpobs/database/__init__.py +++ b/httpobs/database/__init__.py @@ -5,6 +5,7 @@ periodic_maintenance, refresh_materialized_views, select_scan_host_history, + select_scan_most_recent_scan, select_scan_recent_finished_scans, select_scan_recent_scan, select_scan_scanner_statistics, @@ -23,6 +24,7 @@ 'select_scan_host_history', 'select_scan_recent_finished_scans', 'select_scan_recent_scan', + 'select_scan_most_recent_scan', 'select_scan_scanner_statistics', 'select_site_headers', 'select_site_id', diff --git a/httpobs/database/database.py b/httpobs/database/database.py index d9f0a65..3f7f03c 100644 --- a/httpobs/database/database.py +++ b/httpobs/database/database.py @@ -109,6 +109,7 @@ def insert_scan(site_id: int, hidden: bool = False) -> dict: def insert_test_results(site_id: int, scan_id: int, data: dict) -> dict: with get_cursor() as cur: for name, test in data["tests"].items(): + test = test.copy() # don't mutate argument expectation = test.pop('expectation') passed = test.pop('pass') result = test.pop('result') @@ -327,6 +328,20 @@ def select_scan_recent_scan(site_id: int, recent_in_seconds=API_CACHED_RESULT_TI return {} +def select_scan_most_recent_scan(site_id: int) -> dict | None: + with get_cursor() as cur: + cur.execute( + """SELECT * FROM scans + WHERE site_id = %s AND end_time IS NOT NULL + ORDER BY start_time DESC + LIMIT 1""", + (site_id,), + ) + + if cur.rowcount > 0: + return dict(cur.fetchone()) + + def select_site_headers(hostname: str) -> dict: # Return the site's headers with get_cursor() as cur: @@ -351,7 +366,7 @@ def select_site_headers(hostname: str) -> dict: return {} -def select_site_id(hostname: str) -> int: +def select_site_id(hostname: str, create=True) -> int | None: # See if the site exists already with get_cursor() as cur: cur.execute( @@ -366,15 +381,16 @@ def select_site_id(hostname: str) -> int: return cur.fetchone()['id'] # If not, let's create the site - with get_cursor() as cur: - cur.execute( - """INSERT INTO sites (domain, creation_time) - VALUES (%s, NOW()) - RETURNING id""", - (hostname,), - ) + if create: + with get_cursor() as cur: + cur.execute( + """INSERT INTO sites (domain, creation_time) + VALUES (%s, NOW()) + RETURNING id""", + (hostname,), + ) - return cur.fetchone()['id'] + return cur.fetchone()['id'] def select_test_results(scan_id: int) -> dict: @@ -404,6 +420,18 @@ def update_scan_state(scan_id, state: str, error=None) -> dict: row = dict(cur.fetchone()) + elif state == STATE_FAILED: + with get_cursor() as cur: + cur.execute( + """UPDATE scans + SET (state, end_time) = (%s, NOW()) + WHERE id = %s + RETURNING *""", + (state, scan_id), + ) + + row = dict(cur.fetchone()) + else: with get_cursor() as cur: cur.execute( diff --git a/httpobs/website/api.py b/httpobs/website/api.py index 9ef3b8f..c062863 100644 --- a/httpobs/website/api.py +++ b/httpobs/website/api.py @@ -19,6 +19,12 @@ # TODO: Implement API to write public and private headers to the database +@api.route('/') +@add_response_headers() +def main() -> str: + return 'Welcome to the HTTP Observatory!' + + @api.route('/api/v1/analyze', methods=['GET', 'OPTIONS', 'POST']) @add_response_headers(cors=True) @sanitized_api_response diff --git a/httpobs/website/api_v2.py b/httpobs/website/api_v2.py new file mode 100644 index 0000000..4b80b6f --- /dev/null +++ b/httpobs/website/api_v2.py @@ -0,0 +1,137 @@ +import sys +from datetime import datetime, timedelta + +from flask import Blueprint, jsonify, request + +import httpobs.database as database +import httpobs.scanner as scanner +from httpobs import STATE_FAILED +from httpobs.conf import API_COOLDOWN, DEVELOPMENT_MODE +from httpobs.scanner.grader import get_score_description +from httpobs.website import add_response_headers +from httpobs.website.utils import valid_hostname + +api_v2 = Blueprint("api_v2", __name__) + + +@api_v2.route("/analyze", methods=["GET", "OPTIONS", "POST"]) +@add_response_headers(cors=True) +def api_post_scan_hostname(): + status_code = 200 + scan = {} + tests = {} + + host = request.args.get("host", "").lower().strip() + try: + site_id = database.select_site_id(host, create=False) + except IOError: + return { + "error": "database-down", + "text": "Unable to connect to database", + }, 503 + + if site_id is not None: + hostname = host + else: + ip = True if valid_hostname(host) is None else False + if ip: + return { + "error": "invalid-hostname-ip", + "text": "Cannot scan IP addresses", + }, 400 + + hostname = valid_hostname(host) or ( + valid_hostname("www." + host) if host else False + ) # prepend www. if necessary + if not hostname: + return { + "error": "invalid-hostname", + "text": f"{host} is an invalid hostname", + }, 400 + + site_id: int = database.select_site_id(host, create=True) + scan = database.select_scan_most_recent_scan(site_id) + + if scan and request.method == "POST": + time_since_scan = datetime.now() - scan["end_time"] + if time_since_scan < timedelta(seconds=API_COOLDOWN): + status_code = 429 # keep going, we'll respond with the most recent scan + else: + scan = None # clear the scan, and we'll do another + + if scan: + scan_id = scan["id"] + + tests = database.select_test_results(scan_id) + for name, test in tests.items(): + del test["id"] + del test["scan_id"] + del test["site_id"] + del test["name"] + test["score_description"] = get_score_description(test["result"]) + tests[name] = {**test.pop("output"), **test} + + else: + # no scan means we're a POST which hasn't been rate limited + # or we're a GET for a host which has no scans in the db + # either way, we need to perform a scan + + hidden = request.form.get("hidden", "false") == "true" + + scan = database.insert_scan(site_id, hidden=hidden) + scan_id = scan["id"] + + # Get the site's cookies and headers + # TODO: add API to insert these into the db + # headers = database.select_site_headers(hostname) + + try: + result = scanner.scan(hostname) + + if "error" in result: + scan = database.update_scan_state(scan_id, STATE_FAILED, error=result["error"]) + else: + scan = database.insert_test_results( + site_id, + scan_id, + result, + ) + tests = result["tests"] + except: + # If we are unsuccessful, close out the scan in the database + scan = database.update_scan_state(scan_id, STATE_FAILED) + + # Print the exception to stderr if we're in dev + if DEVELOPMENT_MODE: + import traceback + + print("Error detected in scan for: " + hostname) + traceback.print_exc(file=sys.stderr) + + scan["start_time"] = scan["start_time"].isoformat() + scan["end_time"] = scan["end_time"].isoformat() + + history = database.select_scan_host_history(site_id) + + # Prune for when the score doesn't change; thanks to chuck for the elegant list comprehension + history = [ + { + "end_time": v["end_time"].isoformat(), + "grade": v["grade"], + "id": v["scan_id"], + "score": v["score"], + } + for k, v in enumerate(history) + if history[k].get('score') is not history[k - 1].get('score') or k == 0 + ] + + return ( + jsonify( + { + "scan": scan, + "tests": tests, + "history": history, + } + ), + status_code, + ) diff --git a/httpobs/website/main.py b/httpobs/website/main.py index b7e6ad7..89c5677 100644 --- a/httpobs/website/main.py +++ b/httpobs/website/main.py @@ -1,34 +1,32 @@ -import sys - from flask import Flask from httpobs.conf import API_PORT, API_PROPAGATE_EXCEPTIONS, DEVELOPMENT_MODE -from httpobs.website import add_response_headers -from httpobs.website.api import api -from httpobs.website.monitoring import monitoring_api - -def __exit_with(msg: str) -> None: - print(msg) - sys.exit(1) +def create_app(): + # Register the application with flask + app = Flask('http-observatory') + app.config['PROPAGATE_EXCEPTIONS'] = API_PROPAGATE_EXCEPTIONS -# Register the application with flask -app = Flask('http-observatory') -app.config['PROPAGATE_EXCEPTIONS'] = API_PROPAGATE_EXCEPTIONS -app.register_blueprint(api) -app.register_blueprint(monitoring_api) + from httpobs.website.api import api + from httpobs.website.api_v2 import api_v2 + from httpobs.website.monitoring import monitoring_api + app.register_blueprint(api) + app.register_blueprint(api_v2, url_prefix="/api/v2") + app.register_blueprint(monitoring_api) -@app.route('/') -@add_response_headers() -def main() -> str: - return 'Welcome to the HTTP Observatory!' + return app def run(): + app = create_app() app.run(debug=DEVELOPMENT_MODE, port=API_PORT) if __name__ == '__main__': run() + +# make backwards compatible with uwsgi setup +# TODO: move into wsgi.py +app = create_app()