Skip to content

Commit

Permalink
release 0.3.0
Browse files Browse the repository at this point in the history
Adds "Programming Language :: Python :: 3.12" to the package classifiers definition
Bump github actions/checkout to v4 (from v3)
Increase HTTPX timeout to 30 seconds from 5 seconds
Improve generate_filepath rendering of paths with user-home ~
Loosen package dependencies to allow upgrades beyond current minor version
Update dev-packages to make sure latest development packages in use
Add 14x new exec end-to-end tests covering all sub-commands and option variations
Improve type handing and use generic `generate_filepath()` function
Hide --metadata-path option by default since it's an unusual use case scenario and likely to cause confusion
  • Loading branch information
ndejong committed Jan 21, 2024
1 parent 80cca7a commit 6e931be
Show file tree
Hide file tree
Showing 22 changed files with 723 additions and 79 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/build-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11"]
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: NiklasRosenstein/slap@gha/install/v1

- name: Set up Python ${{ matrix.python-version }}
Expand Down
33 changes: 19 additions & 14 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "hibp-downloader"
version = "0.2.1"
version = "0.3.0"
description = "Efficiently download HIBP new pwned password data by hash-prefix for a local-copy"
authors = ["Nicholas de Jong <[email protected]>"]
license = "BSD-3-Clause"
Expand All @@ -14,6 +14,11 @@ classifiers = [
"Environment :: Console",
"Intended Audience :: System Administrators",
"License :: OSI Approved :: BSD License",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12"
]
keywords = ["hibp-downloader", "hibp", "haveibeenpwned", "haveibeenpwned-downloader", "sha1", "ntlm"]

Expand All @@ -28,26 +33,26 @@ hibp-downloader = "hibp_downloader.main:entrypoint"

[tool.poetry.dependencies]
python = "^3.8"
"httpx[http2]" = "^0.25" # https://pypi.org/project/httpx/#history
httpcore = ">=0.17.2" # https://pypi.org/project/httpcore/#history
"typer[all]" = "^0.9" # https://pypi.org/project/typer/#history
shellingham = "^1.5" # https://pypi.org/project/shellingham/#history
aiofiles = "^23.0" # https://pypi.org/project/aiofiles/#history
"httpx[http2]" = ">=0.21" # https://pypi.org/project/httpx/#history
httpcore = ">=0.14" # https://pypi.org/project/httpcore/#history
aiofiles = ">=0.8" # https://pypi.org/project/aiofiles/#history
"typer[all]" = ">=0.9.0" # https://pypi.org/project/typer/#history
shellingham = ">=1.3.0" # https://pypi.org/project/shellingham/#history

[tool.poetry.dev-dependencies]
black = "^23.11" # https://pypi.org/project/black/#history
flake8 = "^6.1" # https://pypi.org/project/flake8/#history
isort = "^5.12" # https://pypi.org/project/isort/#history
mypy = "^1.7" # https://pypi.org/project/mypy/#history
pycln = "^2.3" # https://pypi.org/project/pycln/#history
black = "^23.12" # https://pypi.org/project/black/#history
flake8 = "^7.0" # https://pypi.org/project/flake8/#history
isort = "^5.13" # https://pypi.org/project/isort/#history
mypy = "^1.8" # https://pypi.org/project/mypy/#history
pycln = "^2.4" # https://pypi.org/project/pycln/#history
pytest = "^7.4" # https://pypi.org/project/pytest/#history
safety = "^2.4.0b1" # https://pypi.org/project/safety/#history
urllib3 = "~2.0" # https://pypi.org/project/urllib3/#history
safety = "^3.0.1" # https://pypi.org/project/safety/#history
urllib3 = "~2.1" # https://pypi.org/project/urllib3/#history
types-aiofiles = "23.2" # https://pypi.org/project/types-aiofiles/#history
# NB: pip installs in .readthedocs.yml need to be kept up-to-date manually
novella = "^0.2" # https://pypi.org/project/novella/#history
pydoc-markdown = "^4.8" # https://pypi.org/project/pydoc-markdown/#history
mkdocs-material = "^9.4" # https://pypi.org/project/mkdocs-material/#history
mkdocs-material = "^9.5" # https://pypi.org/project/mkdocs-material/#history

[tool.poetry.plugins."slap.plugins.check"]
changelog = "slap.ext.checks.changelog:ChangelogValidationCheckPlugin"
Expand Down
3 changes: 2 additions & 1 deletion src/hibp_downloader/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@
from .models import AppContext

__title__ = "HIBP Downloader"
__version__ = "0.2.1"
__version__ = "0.3.0"

LOGGER_NAME = "hibp-downloader"
PWNEDPASSWORDS_API_URL = "https://api.pwnedpasswords.com"
HTTPX_TIMEOUT_SECONDS = 30
LOCAL_CACHE_TTL_DEFAULT = 12 * 3600
MULTIPROCESSING_PROCESSES_DEFAULT = int(cpu_count() if cpu_count() else 4) # type: ignore[arg-type]
MULTIPROCESSING_PREFIXES_CHUNK_SIZE = 10
Expand Down
32 changes: 21 additions & 11 deletions src/hibp_downloader/commands/hibp_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,11 @@ def main(
help="Number of hash-prefixes to consume (asynchronously) per iteration per process",
),
] = MULTIPROCESSING_PREFIXES_CHUNK_SIZE,
force: Annotated[bool, typer.Option(help="Same as setting --local_cache_ttl=0 and --ignore-etag")] = False,
force: Annotated[
bool, typer.Option("--force", help="Same as setting --local_cache_ttl=0 and --ignore-etag")
] = False,
ignore_etag: Annotated[
bool, typer.Option(help="Do not use request etag headers to manage local/remote cached data")
bool, typer.Option("--ignore-etag", help="Do not use request etag headers to manage local/remote cached data")
] = False,
local_cache_ttl: Annotated[
int,
Expand Down Expand Up @@ -115,8 +117,8 @@ def main(
work_queue: Queue = Queue()
worker_args = WorkerArgs(
hash_type=hash_type,
data_path=Path(os.path.join(app_context.data_path, hash_type.value)), # type: ignore[arg-type]
metadata_path=Path(os.path.join(app_context.metadata_path, hash_type.value)), # type: ignore[arg-type]
data_path=app_context.data_path, # type: ignore[arg-type]
metadata_path=app_context.metadata_path, # type: ignore[arg-type]
encoding_type=ENCODING_TYPE,
ignore_etag=ignore_etag,
local_cache_ttl=local_cache_ttl,
Expand Down Expand Up @@ -188,8 +190,8 @@ async def pwnedpasswords_get_store_gather(result_queue: Queue, hash_prefixes: tu
async def pwnedpasswords_get_and_store_async(
prefix: str,
hash_type: HashType,
data_path: str,
metadata_path: str,
data_path: Path,
metadata_path: Path,
encoding_type: str,
ignore_etag: bool,
local_cache_ttl: int,
Expand All @@ -207,9 +209,16 @@ async def pwnedpasswords_get_and_store_async(

# get existing metadata if available
metadata_existing = await load_metadata(
prefix=prefix, metadata_path=metadata_path, data_path=data_path, datafile_suffix=datafile_suffix
metadata_path=metadata_path,
data_path=data_path,
prefix=prefix,
hash_type=hash_type.value,
datafile_suffix=datafile_suffix,
)

if not metadata_existing:
logger.debug(f"No existing metadata, will generate new metadata for {prefix!r}")

etag = None
if metadata_existing.data_source:
if metadata_existing.server_timestamp:
Expand Down Expand Up @@ -239,6 +248,7 @@ async def pwnedpasswords_get_and_store_async(
if binary:
await save_datafile(
data_path=data_path,
hash_type=hash_type,
prefix=prefix,
content=binary,
filename_suffix=datafile_suffix,
Expand All @@ -249,7 +259,7 @@ async def pwnedpasswords_get_and_store_async(
PrefixMetadataDataSource.local_source_ttl_cache,
PrefixMetadataDataSource.unknown_source_status,
):
await save_metadatafile(metadata_path=metadata_path, prefix=prefix, metadata=metadata)
await save_metadatafile(metadata_path=metadata_path, hash_type=hash_type, prefix=prefix, metadata=metadata)

return metadata

Expand Down Expand Up @@ -284,7 +294,7 @@ async def pwnedpasswords_get(
if response.status_code == 304: # HTTP 304 Not Modified status
metadata.data_source = PrefixMetadataDataSource.local_source_etag_match
elif response.status_code == 200:
if response.headers.get("cf-cache-status").upper() == "HIT":
if response.headers.get("cf-cache-status").upper() == "HIT": # Fragile: relies on HIBP hosted via Cloudflare
metadata.data_source = PrefixMetadataDataSource.remote_source_remote_cache
else:
metadata.data_source = PrefixMetadataDataSource.remote_source_origin_source
Expand All @@ -301,7 +311,7 @@ def results_queue_processor(q: Queue):
metadata_items = q.get()
if metadata_items == QUEUE_WORKER_EXIT_SENTINEL:
running_stats.end_trigger()
logger.info(f"Finished in {round(running_stats.run_time/60,1)}min")
logger.info(f"Finished in {round(running_stats.run_time / 60, 1)}min")
break

if metadata_items:
Expand All @@ -319,7 +329,7 @@ def results_queue_processor(q: Queue):
f"~{int(running_stats.bytes_processed_rate_total / APPROX_GZIP_BYTES_PER_HASH)}H/s] "
f"api=[{int(running_stats.request_rate_total)}req/s "
f"{to_mbytes(running_stats.bytes_received_sum, 1)}MB] "
f"runtime={round(running_stats.run_time/60,1)}min"
f"runtime={round(running_stats.run_time / 60, 1)}min"
)


Expand Down
11 changes: 6 additions & 5 deletions src/hibp_downloader/commands/hibp_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ async def pwnedpasswords_datastore_sorted_gather(filename, hash_type, first_hash
output = ""
for k, v in dict(sorted({k: v for d in results for k, v in d.items()}.items())).items():
output = f"{output}\n{v}"
await append_stringfile(filename=filename, content=output)
await append_stringfile(filepath=filename, content=output)


async def pwnedpasswords_datastore_sorted_async(prefix, hash_type):
Expand All @@ -88,12 +88,13 @@ async def pwnedpasswords_datastore_sorted_async(prefix, hash_type):
else:
raise HibpDownloaderException(f"Unsupported ENCODING_TYPE {ENCODING_TYPE}")

source_data = await load_datafile(
data_path=os.path.join(app_context.data_path, hash_type),
datafile_content, datafile_filepath = await load_datafile(
data_path=app_context.data_path,
hash_type=hash_type,
prefix=prefix,
filename_suffix=filename_suffix,
datafile_suffix=filename_suffix,
decompression_type=decompression_mode,
prepend_prefix=True,
)

return {prefix: source_data}
return {prefix: datafile_content, f"{prefix}_datafile": datafile_filepath}
15 changes: 10 additions & 5 deletions src/hibp_downloader/commands/hibp_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ def main(
typer.Option(
prompt=True,
hide_input=True,
help="Cleartext password string to query the local data store",
help="Cleartext password string to query the --data-path content; prompts for hidden STDIN if not "
"supplied as CLI option.",
envvar="HIBPDL_PASSWORD",
),
],
Expand Down Expand Up @@ -84,18 +85,22 @@ async def pwnedpasswords_query_datastore(password_hashed: str, hash_type: HashTy
}

try:
source_data = await load_datafile(
data_path=os.path.join(app_context.data_path, hash_type), # type: ignore[arg-type]
datafile_content, datafile_filepath = await load_datafile(
data_path=app_context.data_path, # type: ignore[arg-type]
hash_type=hash_type,
prefix=prefix,
filename_suffix=filename_suffix,
datafile_suffix=filename_suffix,
decompression_type=decompression_mode,
prepend_prefix=True,
)
except HibpDownloaderException as e:
result["status"] = str(e)
return stdout_json(result)

for line in source_data.split("\n"):
if datafile_filepath:
result["data_file"] = str(datafile_filepath)

for line in datafile_content.split("\n"):
if password_hashed.upper() in line:
line_parts = line.split(":")
if len(line_parts) >= 2:
Expand Down
5 changes: 4 additions & 1 deletion src/hibp_downloader/lib/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,17 @@ def main(
help="Path where metadata is saved; by default both data and metadata are saved in the same --data-path",
envvar="HIBPDL_METADATA_PATH",
show_envvar=False,
hidden=False if app_context.debug else True,
),
] = "",
debug: Annotated[
bool, typer.Option(help="Set logging to debug-level messages", envvar="HIBPDL_DEBUG", show_envvar=False)
bool,
typer.Option("--debug", help="Set logging to debug-level messages", envvar="HIBPDL_DEBUG", show_envvar=False),
] = False,
quiet: Annotated[
bool,
typer.Option(
"--quiet",
help="Set logging to fatal-level messages; overrides --debug option",
envvar="HIBPDL_QUIET",
show_envvar=False,
Expand Down
Loading

0 comments on commit 6e931be

Please sign in to comment.