Skip to content

Commit

Permalink
Merge branch 'main' of github.com:cocrawler/cdx_toolkit
Browse files Browse the repository at this point in the history
  • Loading branch information
Greg Lindahl committed Sep 2, 2024
2 parents 01de1fd + d888e9e commit 91a9bda
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 9 deletions.
68 changes: 68 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
name: CI

on:
# runtime is erratic and up to an hour
push:
branches:
- main
pull_request:
branches:
- main

jobs:
unit-tests:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
#max-parallel: 1
matrix:
python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12']
os: [ubuntu-latest]
EXTRA: [false] # used to force includes to get included
include:
- python-version: '3.12'
os: ubuntu-latest
EXTRA: true
env:
LOGLEVEL=DEBUG
- python-version: '3.11'
os: macos-latest
EXTRA: true
- python-version: '3.12'
os: macos-latest
EXTRA: true
- python-version: '3.7'
os: windows-latest
EXTRA: true
- python-version: '3.12'
os: windows-latest
EXTRA: true
- python-version: '3.7'
os: ubuntu-20.04 # oldest version on github actions
EXTRA: true

steps:
- name: checkout
uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install setuptools on python 3.12+
if: ${{ matrix.python-version >= '3.12' }}
run: |
pip install setuptools
- name: Install cdx_toolkit
run: pip install .[test]

- name: Run tests
run: |
make test_coverage
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
with:
token: ${{ secrets.CODECOV_TOKEN }}
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# remember: keep requires synchronized with requirements.txt
requires = ['requests', 'warcio']

test_requirements = ['pytest', 'pytest-cov', 'coveralls']
test_requirements = ['pytest', 'pytest-cov']

package_requirements = ['twine', 'setuptools', 'setuptools-scm']

Expand All @@ -37,7 +37,7 @@
author_email='[email protected]',
url='https://github.com/cocrawler/cdx_toolkit',
packages=packages,
python_requires=">=3.6",
python_requires=">=3.7",
extras_require=extras_require,
setup_requires=['setuptools-scm'],
install_requires=requires,
Expand All @@ -59,7 +59,7 @@
'License :: OSI Approved :: Apache Software License',
'Programming Language :: Python',
#'Programming Language :: Python :: 3.5', # setuptools-scm problem
'Programming Language :: Python :: 3.6',
#'Programming Language :: Python :: 3.6', # not offered in github actions
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
Expand Down
26 changes: 20 additions & 6 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ def test_multi_cc2(capsys, caplog):
multi_helper(t, capsys, caplog)


@pytest.mark.skip(reason='needs some ratelimit love XXX')
def test_multi_ia(capsys, caplog):
tests = [
[{'service': '--ia', 'mods': '--limit 10', 'cmd': 'iter', 'rest': 'commoncrawl.org/*'},
Expand All @@ -121,7 +122,7 @@ def test_multi_ia(capsys, caplog):
multi_helper(t, capsys, caplog)


def test_multi_rest(capsys, caplog):
def test_multi_misc_notia(capsys, caplog):
tests = [
[{'service': '--source https://web.archive.org/cdx/search/cdx', 'mods': '--limit 10', 'cmd': 'iter', 'rest': 'commoncrawl.org/*'},
{'count': 10, 'linefgrep': 'commoncrawl.org'}],
Expand All @@ -132,17 +133,26 @@ def test_multi_rest(capsys, caplog):

[{'service': '--cc', 'mods': '--limit 10', 'cmd': 'size', 'rest': 'commoncrawl.org/*'},
{'count': 1, 'is_int': True}],
[{'service': '--ia', 'mods': '--limit 10', 'cmd': 'size', 'rest': 'commoncrawl.org/*'},
{'count': 1, 'is_int': True}],
[{'service': '--cc', 'mods': '--limit 10', 'cmd': 'size', 'rest': '--details commoncrawl.org/*'},
{'count': 2}],

[{'service': '', 'mods': '--limit 10', 'cmd': 'iter', 'rest': 'commoncrawl.org/*'},
{'exception': ValueError}],
]

for t in tests:
multi_helper(t, capsys, caplog)


@pytest.mark.skip(reason='needs some ratelimit love XXX')
def test_multi_misc_ia(capsys, caplog):
tests = [
[{'service': '--ia', 'mods': '--limit 10', 'cmd': 'size', 'rest': 'commoncrawl.org/*'},
{'count': 1, 'is_int': True}],
[{'service': '--ia', 'mods': '--limit 10', 'cmd': 'size', 'rest': '--details commoncrawl.org/*'},
{'count': 2}],
[{'service': '--ia', 'mods': '--from 20180101 --to 20180110 --limit 10', 'cmd': 'size', 'rest': '--details commoncrawl.org'},
{'count': 2}],

[{'service': '', 'mods': '--limit 10', 'cmd': 'iter', 'rest': 'commoncrawl.org/*'},
{'exception': ValueError}],
]

for t in tests:
Expand All @@ -163,6 +173,9 @@ def test_warc(tmpdir, caplog):

with tmpdir.as_cwd():
for p in prefixes:
if '--ia' in p or 'archive.org' in p:
# XXX skip
continue
cmdline = p + base
print(cmdline, file=sys.stderr)
args = cmdline.split()
Expand All @@ -182,6 +195,7 @@ def one_ia_corner(tmpdir, cmdline):
main(args=cmdline.split())


@pytest.mark.skip(reason='needs some ratelimit love XXX')
def test_warc_ia_corners(tmpdir, caplog):
'''
To test these more properly, need to add a --exact-warcname and then postprocess.
Expand Down

0 comments on commit 91a9bda

Please sign in to comment.