Skip to content

Commit

Permalink
Merge pull request #16 from NoPlagiarism/domain_constant
Browse files Browse the repository at this point in the history
  • Loading branch information
NoPlagiarism authored Dec 10, 2023
2 parents 405cc57 + 01d73ad commit 44963b2
Show file tree
Hide file tree
Showing 4 changed files with 111 additions and 77 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/parse.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
- name: Set up Python 3.9
uses: actions/setup-python@v3
with:
python-version: "3.9"
python-version: "3.12"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand Down
14 changes: 14 additions & 0 deletions parser/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,20 @@ class Retries:
trace_errors = get_bool_from_env("FIL_TRACE_ERRORS", True)


class Regex:
# https://stackoverflow.com/questions/7930751/regexp-for-subdomain
# TODO: Bug found. Failed on wgl.frail.duckdns.org
DOMAIN_BASE_REGEX = r"(?:[a-zA-Z0-9](?:[-a-zA-Z0-9]{0,61}[a-zA-Z0-9])?\.)?(?:[a-zA-Z0-9]{1,2}(?:[-a-zA-Z0-9]{0,252}[a-zA-Z0-9])?)"
# (?:[a-zA-Z0-9](?:[-a-zA-Z0-9]{0,61}[a-zA-Z0-9])?\.)?(?:[a-zA-Z0-9]{1,2}(?:[-a-zA-Z0-9]{0,252}[a-zA-Z0-9])?)\.(?:[a-zA-Z]{2,63})
DOMAIN = DOMAIN_BASE_REGEX + r"\.(?:[a-zA-Z]{2,63})"
# (?:[a-zA-Z0-9](?:[-a-zA-Z0-9]{0,61}[a-zA-Z0-9])?\.)?(?:[a-zA-Z0-9]{1,2}(?:[-a-zA-Z0-9]{0,252}[a-zA-Z0-9])?)\.onion
DOMAIN_ONION = DOMAIN_BASE_REGEX + r"\.onion"
# (?:[a-zA-Z0-9](?:[-a-zA-Z0-9]{0,61}[a-zA-Z0-9])?\.)?(?:[a-zA-Z0-9]{1,2}(?:[-a-zA-Z0-9]{0,252}[a-zA-Z0-9])?)\.i2p
DOMAIN_I2P = DOMAIN_BASE_REGEX + r"\.i2p"
# (?:[a-zA-Z0-9](?:[-a-zA-Z0-9]{0,61}[a-zA-Z0-9])?\.)?(?:[a-zA-Z0-9]{1,2}(?:[-a-zA-Z0-9]{0,252}[a-zA-Z0-9])?)\.loki
DOMAIN_LOKI = DOMAIN_BASE_REGEX + r"\.loki"


INST_FOLDER = "instances"

LOG_DOMAIN_FROM_HEADERS = get_bool_from_env("FIL_LOG_DOMAIN_FROM_HEADERS", True)
Expand Down
Loading

0 comments on commit 44963b2

Please sign in to comment.