-
Notifications
You must be signed in to change notification settings - Fork 51
/
pyproject.toml
executable file
·96 lines (90 loc) · 3.15 KB
/
pyproject.toml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
[tool.poetry]
name = "newspaper4k"
version = "0.9.3.1"
description = "Simplified python article discovery & extraction."
authors = ["Andrei Paraschiv <[email protected]>"]
license = "MIT"
readme = "README.md"
packages = [{ include = "newspaper" }]
keywords = ["nlp", "scraping", "newspaper", "article", "curation", "extraction"]
classifiers = [
"Development Status :: 4 - Beta",
"Programming Language :: Python :: 3",
"Natural Language :: English",
"Topic :: Text Processing",
"Topic :: Text Processing :: Markup :: HTML",
"Topic :: Software Development :: Libraries :: Python Modules",
"Intended Audience :: Developers",
"Intended Audience :: Education",
"Intended Audience :: Information Technology",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: MIT License",
]
homepage = "https://github.com/AndyTheFactory/newspaper4k"
repository = "https://github.com/AndyTheFactory/newspaper4k"
documentation = "https://newspaper4k.readthedocs.io/en/latest/"
[tool.poetry.dependencies]
python = "^3.8"
beautifulsoup4 = ">=4.9.3"
Pillow = ">=4.0.0"
PyYAML = ">=5.1"
lxml = ">=4.2.0, <5.2.0"
nltk = ">=3.6.6"
requests = ">=2.26.0"
feedparser = ">=6.0.0"
tldextract = ">=2.0.1"
python-dateutil = ">=2.6.1"
numpy = [
{ version = ">=1.25", python = ">=3.11", optional = true },
{ version = "^1.24", python = ">=3.8, <3.11", optional = true },
]
pandas = [
{ version = ">=2.1.0", optional = true, python = ">=3.11" },
{ version = ">=1.4", optional = true, python = ">=3.8, <3.11" },
]
typing-extensions = ">=4.10.0"
# Language specific dependencies
tinysegmenter = { version = ">=0.4", optional = true }
pythainlp = { version = ">=2.3.2", optional = true }
jieba = { version = ">=0.42.1", optional = true }
indic-nlp-library = { version = ">=0.90", optional = true }
cloudscraper = { version = ">=1.2.0", optional = true }
gnews = { version = ">=0.3.6", optional = true }
[tool.poetry.extras]
zh = ["jieba"]
th = ["pythainlp"]
ja = ["tinysegmenter"]
bn = ["indic-nlp-library"]
hi = ["indic-nlp-library"]
np = ["indic-nlp-library"]
ta = ["indic-nlp-library"]
cloudflare = ["cloudscraper"]
gnews = ["gnews"]
all = [
"tinysegmenter",
"pythainlp",
"jieba",
"indic-nlp-library",
"cloudscraper",
"gnews",
]
[tool.poetry.group.dev.dependencies]
coverage = { version = ">=7.3.2", python = "^3.8" }
pre-commit = { version = ">=3.5.0", python = "^3.8" }
ruff = { version = ">=0.1.2", python = "^3.8" }
codespell = { version = ">=2.2.6 ", python = "^3.8" }
pytest = { version = ">=7.0.0", python = "^3.8" }
mypy = { version = "^1.8.0", python = "^3.8" }
lxml-stubs = { version = "^0.5.1", python = "^3.8" }
types-pillow = { version = "^10.2.0.20240213", python = "^3.8" }
types-python-dateutil = { version = "^2.8.19.20240106", python = "^3.8" }
types-requests = "^2.27.1"
types-beautifulsoup4 = { version = "^4.12.0.20240106", python = "^3.8" }
virtualenv = { version = ">=20.25.1" }
[tool.poetry.group.docs.dependencies]
sphinx = { version = ">=7.0.0", python = "^3.8" }
sphinx-argparse = { version = "*", python = "^3.8" }
sphinx-rtd-theme = { version = ">=1.3.0", python = "^3.8" }
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"