Skip to content
This repository has been archived by the owner on Dec 17, 2021. It is now read-only.

WIP: Creating a Python package #231

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
Open
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,5 @@ __pycache__
scripts/pulse-results/*.json
.DS_Store
venv
build/
dist/
File renamed without changes.
File renamed without changes.
4 changes: 2 additions & 2 deletions gatherers/censys.py → domain_scan/gatherers/censys.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
from google.oauth2 import service_account
import google.api_core.exceptions

from gatherers.gathererabc import Gatherer
from utils import utils
from domain_scan.gatherers.gathererabc import Gatherer
from domain_scan.utils import utils

# Options:
#
Expand Down
2 changes: 1 addition & 1 deletion gatherers/rdns.py → domain_scan/gatherers/rdns.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import re
from typing import Generator, List, Pattern

from gatherers.gathererabc import Gatherer
from domain_scan.gatherers.gathererabc import Gatherer

# Reverse DNS
#
Expand Down
4 changes: 2 additions & 2 deletions gatherers/url.py → domain_scan/gatherers/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

import requests

from gatherers.gathererabc import Gatherer
from utils import utils
from domain_scan.gatherers.gathererabc import Gatherer
from domain_scan.utils import utils


class Gatherer(Gatherer):
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion runner/runner.py → domain_scan/runner/runner.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from utils import utils
from domain_scan.utils import utils


def write_rows(rows, domain, base_domain, scanner, csv_writer, meta=None):
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion scanners/a11y.py → domain_scan/scanners/a11y.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import requests
import yaml

from utils import utils
from domain_scan.utils import utils


workers = 3
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import os

from utils import utils
from domain_scan.utils import utils

# Check whether a domain is present in a CSV, set in --analytics.

Expand Down
2 changes: 1 addition & 1 deletion scanners/csp.py → domain_scan/scanners/csp.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
import requests
from scanners import utils
from domain_scan.scanners import utils

###
# CSP Scanner - check the presence of CSP headers
Expand Down
Empty file.
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import json

from utils import utils
from domain_scan.utils import utils

###
# Local Python bridge to the JS bridge to the JS scanner.
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion scanners/pshtt.py → domain_scan/scanners/pshtt.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import re

from pshtt import pshtt
from utils import utils
from domain_scan.utils import utils

###
# Measure a site's HTTP behavior using DHS NCATS' pshtt tool.
Expand Down
2 changes: 1 addition & 1 deletion scanners/sslyze.py → domain_scan/scanners/sslyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from cryptography.hazmat.primitives.serialization import Encoding
from cryptography.hazmat.primitives.asymmetric import ec, dsa, rsa

from utils import utils
from domain_scan.utils import utils

# Number of seconds to wait during sslyze connection check.
# Not much patience here, and very willing to move on.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging

from utils import utils
from domain_scan.utils import utils

# Evaluate third party service usage using Chrome headless.

Expand Down
Empty file added domain_scan/utils/__init__.py
Empty file.
File renamed without changes.
6 changes: 3 additions & 3 deletions gather
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import requests
import logging
import importlib

from utils import utils
from domain_scan.utils import utils

# some metadata about the scan itself
start_time = utils.local_now()
Expand Down Expand Up @@ -55,14 +55,14 @@ def run(options=None, cache_dir="./cache", results_dir="./results"):

try:
gatherer_module = importlib.import_module(
"gatherers.%s" % source)
"domain_scan.gatherers.%s" % source)
gatherer = gatherer_module.Gatherer(suffixes, options, extra)
except ImportError:
# If it's not a registered module, allow it to be "hot registered"
# as long as the user gave us a flag with that name that can be
# used as the --url option to the URL module.
if options.get(source):
gatherer_module = importlib.import_module("gatherers.url")
gatherer_module = importlib.import_module("domain_scan.gatherers.url")
extra['name'] = source
gatherer = gatherer_module.Gatherer(suffixes, options, extra)
else:
Expand Down
5 changes: 2 additions & 3 deletions lambda/lambda_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import sys
import logging

from utils import utils
from domain_scan.utils import utils

# Central handler for all Lambda events.
def handler(event, context):
Expand All @@ -19,7 +19,7 @@ def handler(event, context):

# Might be acceptable to let this crash the module, in Lambda.
try:
scanner = importlib.import_module("scanners.%s" % name)
scanner = importlib.import_module("domain_scan.scanners.%s" % name)
except ImportError:
exc_type, exc_value, exc_traceback = sys.exc_info()
logging.error("[%s] Scanner not found, or had an error during loading.\n\tERROR: %s\n\t%s" % (name, exc_type, exc_value))
Expand Down Expand Up @@ -49,4 +49,3 @@ def handler(event, context):
# date transform functions in one place, before Amazon's built-in
# JSON serialization prepares the data for transport.
return utils.from_json(utils.json_for(response))

8 changes: 4 additions & 4 deletions scan
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ import boto3
import botocore
from concurrent.futures import ThreadPoolExecutor

from scanners.headless.local_bridge import headless_scan
from utils import utils
from runner import runner
from domain_scan.scanners.headless.local_bridge import headless_scan
from domain_scan.utils import utils
from domain_scan.runner import runner


# Default and maximum for local workers (threads) per-scanner.
Expand Down Expand Up @@ -130,7 +130,7 @@ def run(options=None):

for name in options.get("scan").split(","):
try:
scanner = importlib.import_module("scanners.%s" % name)
scanner = importlib.import_module("domain_scan.scanners.%s" % name)
except ImportError:
exc_type, exc_value, exc_traceback = sys.exc_info()
logging.error("[%s] Scanner not found, or had an error during loading.\n\tERROR: %s\n\t%s" % (name, exc_type, exc_value))
Expand Down
2 changes: 2 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[bdist_wheel]
universal = true
84 changes: 84 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
"""
setup module for domain-scan

Based on:

- https://packaging.python.org/distributing/
- https://github.com/pypa/sampleproject/blob/master/setup.py
- https://github.com/dhs-ncats/pshtt/blob/master/setup.py
"""

from setuptools import setup, find_packages

setup(
name='domain-scan',

# Versions should comply with PEP440
version='0.1.0-dev1',
description='lightweight scan pipeline for orchestrating third party tools, at scale and (optionally) using serverless infrastructure',

# NCATS "homepage"
url='https://18f.gsa.gov',
# The project's main homepage
download_url='https://github.com/18F/domain-scan',

# Author details
author='GSA 18F',
author_email='[email protected]',

license='License :: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication',

# See https://pypi.python.org/pypi?%3Aaction=list_classifiers
classifiers=[
# How mature is this project? Common values are
# 3 - Alpha
# 4 - Beta
# 5 - Production/Stable
'Development Status :: 4 - Beta',

# Indicate who your project is intended for
'Intended Audience :: Developers',

# Pick your license as you wish (should match "license" above)
'License :: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication',

# Specify the Python versions you support here. In particular, ensure
# that you indicate whether you support Python 2, Python 3 or both.
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
],

# What does your project relate to?
keywords='https best practices web-crawling domain scanning',

packages=find_packages(),

install_requires=[
'strict-rfc3339',
'publicsuffix',
'boto3',
'ipython',
'sslyze>=1.3.4,<1.4.0',
'cryptography',
'pyyaml',
'requests',
'google-cloud-bigquery',
'google-auth-oauthlib'
],

extras_require={
'test': [
'pytest'
],
},

# Conveniently allows one to run the CLI scripts
scripts=[
'gather',
'scan',
]
)
2 changes: 1 addition & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import sys
import pytest
from .context import utils # noqa
from utils import utils as subutils
from domain_scan.utils import utils as subutils


def get_default_false_values(parser):
Expand Down