Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[setup] migrate setup script to pyproject.toml (reland #22539) #35077

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions .github/workflows/check-vcs-installation.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
name: Check VCS installation

on:
push:
branches:
- main
- v*-release
pull_request:
workflow_dispatch:

permissions:
contents: read

concurrency:
group: "${{ github.workflow }}-${{ github.ref }}"
cancel-in-progress: ${{ github.event_name == 'pull_request' }}

jobs:
check-vcs-installation:
runs-on: ubuntu-20.04 # the oldest Ubuntu LTS version
timeout-minutes: 30
steps:
- name: Setup system pip
run: |
sudo apt-get update && sudo apt-get install -y python3-dev python3-pip
echo '$ which -a python3' && which -a python3 || true
echo '$ which -a python' && which -a python || true
echo '$ which -a pip3' && which -a pip3 || true
echo '$ which -a pip' && which -a pip || true
echo '$ /usr/bin/python3 --version' && /usr/bin/python3 --version
echo '$ /usr/bin/python3 -m pip --version' && /usr/bin/python3 -m pip --version

- name: Print commit information
run: |
if [[ "${{ github.event_name }}" != 'pull_request' ]]; then
REPOSITORY="${{ github.repository }}"
else
REPOSITORY="${{ github.event.pull_request.head.repo.full_name }}" # name of the fork repository
fi
SHA="${{ github.sha }}"
BRANCH_NAME="${{ github.head_ref || github.ref_name }}"
echo "REPOSITORY: ${REPOSITORY}"
echo "SHA: ${SHA}"
echo "BRANCH_NAME: ${BRANCH_NAME}"
echo "VCS_URL=https://github.com/${REPOSITORY}@${BRANCH_NAME}" >> "${GITHUB_ENV}"

- name: Check transformers installation from VCS URL
run: |
/usr/bin/python3 -m pip install -vvv "git+${VCS_URL}"
(cd /tmp && /usr/bin/python3 -c 'import transformers')
/usr/bin/python3 -m pip uninstall transformers --yes

- name: Check transformers installation from VCS URL (editable)
run: |
/usr/bin/python3 -m pip install -vvv -e "git+${VCS_URL}#egg=transformers"
(cd /tmp && /usr/bin/python3 -c 'import transformers')
/usr/bin/python3 -m pip uninstall transformers --yes

- name: Checkout transformers
uses: actions/checkout@v4
with:
submodules: "recursive"

- name: Check transformers installation from VCS repo
run: |
/usr/bin/python3 -m pip install -vvv .
(cd /tmp && /usr/bin/python3 -c 'import transformers')
/usr/bin/python3 -m pip uninstall transformers --yes

- name: Check transformers installation from VCS repo (editable)
run: |
/usr/bin/python3 -m pip install -vvv -e .
(cd /tmp && /usr/bin/python3 -c 'import transformers')
/usr/bin/python3 -m pip uninstall transformers --yes
96 changes: 81 additions & 15 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,24 +1,73 @@
[tool.coverage.run]
source = ["transformers"]
omit = [
"*/convert_*",
"*/__main__.py"
]
# Package ######################################################################

[tool.coverage.report]
exclude_lines = [
"pragma: no cover",
"raise",
"except",
"register_parameter"
[build-system]
requires = ["setuptools"]
build-backend = "setuptools.build_meta"

[project]
name = "transformers"
description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow"
readme = "README.md"
requires-python = ">= 3.9" # NOTE: also update the classifiers below
authors = [
{ name = "The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)", email = "[email protected]" },
]
license = { text = "Apache 2.0 License" }
keywords = [
"NLP",
"vision",
"speech",
"deep learning",
"transformer",
"pytorch",
"tensorflow",
"jax",
"BERT",
"GPT-2",
"Wav2Vec2",
"ViT",
]
classifiers = [
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"Intended Audience :: Education",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
]
dynamic = ["dependencies", "optional-dependencies", "version"]

[project.scripts]
transformers-cli = "transformers.commands.transformers_cli:main"

[project.urls]
Homepage = "https://huggingface.co"
Repository = "https://github.com/huggingface/transformers"
Documentation = "https://huggingface.co/docs/transformers"
"Bug Report" = "https://github.com/huggingface/transformers/issues"

[tool.setuptools]
include-package-data = true
zip-safe = false

[tool.setuptools.packages.find]
where = ["src"]

[tool.setuptools.package-data]
transformers = ["*.cu", "*.cpp", "*.cuh", "*.h", "*.pyx"]

# Linter tools #################################################################

[tool.ruff]
line-length = 119

[tool.ruff.lint]
# Never enforce `E501` (line length violations).
ignore = ["C901", "E501", "E741", "F402", "F823" ]
ignore = ["C901", "E501", "E741", "F402", "F823"]
select = ["C", "E", "F", "I", "W"]

# Ignore import violations in all `__init__.py` files.
Expand All @@ -44,11 +93,28 @@ skip-magic-trailing-comma = false
# Like Black, automatically detect the appropriate line ending.
line-ending = "auto"

# Testing ######################################################################

[tool.pytest.ini_options]
addopts = "--doctest-glob='**/*.md'"
doctest_optionflags="NUMBER NORMALIZE_WHITESPACE ELLIPSIS"
doctest_optionflags = "NUMBER NORMALIZE_WHITESPACE ELLIPSIS"
markers = [
"flash_attn_test: marks tests related to flash attention (deselect with '-m \"not flash_attn_test\"')",
"bitsandbytes: select (or deselect with `not`) bitsandbytes integration tests",
"generate: marks tests that use the GenerationTesterMixin"
"generate: marks tests that use the GenerationTesterMixin",
]

[tool.coverage.run]
source = ["transformers"]
omit = [
"*/convert_*",
"*/__main__.py",
]

[tool.coverage.report]
exclude_lines = [
"pragma: no cover",
"raise",
"except",
"register_parameter",
]
46 changes: 9 additions & 37 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,21 +72,19 @@
import shutil
from pathlib import Path

from setuptools import Command, find_packages, setup
from setuptools import Command, setup


# Remove stale transformers.egg-info directory to avoid https://github.com/pypa/pip/issues/5466
stale_egg_info = Path(__file__).parent / "transformers.egg-info"
if stale_egg_info.exists():
print(
(
"Warning: {} exists.\n\n"
"If you recently updated transformers to 3.0 or later, this is expected,\n"
"but it may prevent transformers from installing in editable mode.\n\n"
"This directory is automatically generated by Python's packaging tools.\n"
"I will remove it now.\n\n"
"See https://github.com/pypa/pip/issues/5466 for details.\n"
).format(stale_egg_info)
f"Warning: {stale_egg_info} exists.\n\n"
"If you recently updated transformers to 3.0 or later, this is expected,\n"
"but it may prevent transformers from installing in editable mode.\n\n"
"This directory is automatically generated by Python's packaging tools.\n"
"I will remove it now.\n\n"
"See https://github.com/pypa/pip/issues/5466 for details.\n"
)
shutil.rmtree(stale_egg_info)

Expand Down Expand Up @@ -437,36 +435,10 @@ def run(self):

setup(
name="transformers",
version="4.48.0.dev0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
author="The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)",
author_email="[email protected]",
description="State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow",
long_description=open("README.md", "r", encoding="utf-8").read(),
long_description_content_type="text/markdown",
keywords="NLP vision speech deep learning transformer pytorch tensorflow jax BERT GPT-2 Wav2Vec2 ViT",
license="Apache 2.0 License",
url="https://github.com/huggingface/transformers",
package_dir={"": "src"},
packages=find_packages("src"),
include_package_data=True,
package_data={"": ["**/*.cu", "**/*.cpp", "**/*.cuh", "**/*.h", "**/*.pyx"]},
zip_safe=False,
# expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
version="4.48.0.dev0",
extras_require=extras,
entry_points={"console_scripts": ["transformers-cli=transformers.commands.transformers_cli:main"]},
python_requires=">=3.9.0",
install_requires=list(install_requires),
classifiers=[
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"Intended Audience :: Education",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
],
cmdclass={"deps_table_update": DepsTableUpdateCommand},
)

Expand Down
10 changes: 8 additions & 2 deletions tests/models/encoder_decoder/test_modeling_encoder_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,10 @@ def check_encoder_decoder_model_from_pretrained_using_model_paths(
**kwargs,
):
encoder_model, decoder_model = self.get_encoder_decoder_model(config, decoder_config)
with tempfile.TemporaryDirectory() as encoder_tmp_dirname, tempfile.TemporaryDirectory() as decoder_tmp_dirname:
with (
tempfile.TemporaryDirectory() as encoder_tmp_dirname,
tempfile.TemporaryDirectory() as decoder_tmp_dirname,
):
encoder_model.save_pretrained(encoder_tmp_dirname)
decoder_model.save_pretrained(decoder_tmp_dirname)
model_kwargs = {"encoder_hidden_dropout_prob": 0.0}
Expand Down Expand Up @@ -306,7 +309,10 @@ def check_save_and_load_encoder_decoder_model(
out_2 = outputs[0].cpu().numpy()
out_2[np.isnan(out_2)] = 0

with tempfile.TemporaryDirectory() as encoder_tmp_dirname, tempfile.TemporaryDirectory() as decoder_tmp_dirname:
with (
tempfile.TemporaryDirectory() as encoder_tmp_dirname,
tempfile.TemporaryDirectory() as decoder_tmp_dirname,
):
enc_dec_model.encoder.save_pretrained(encoder_tmp_dirname)
enc_dec_model.decoder.save_pretrained(decoder_tmp_dirname)
enc_dec_model = EncoderDecoderModel.from_encoder_decoder_pretrained(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,10 @@ def check_save_and_load_encoder_decoder_model(
out_2 = outputs[0].cpu().numpy()
out_2[np.isnan(out_2)] = 0

with tempfile.TemporaryDirectory() as encoder_tmp_dirname, tempfile.TemporaryDirectory() as decoder_tmp_dirname:
with (
tempfile.TemporaryDirectory() as encoder_tmp_dirname,
tempfile.TemporaryDirectory() as decoder_tmp_dirname,
):
enc_dec_model.encoder.save_pretrained(encoder_tmp_dirname)
enc_dec_model.decoder.save_pretrained(decoder_tmp_dirname)
SpeechEncoderDecoderModel.from_encoder_decoder_pretrained(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,10 @@ def check_save_and_load_encoder_decoder_model(
out_2 = outputs[0].cpu().numpy()
out_2[np.isnan(out_2)] = 0

with tempfile.TemporaryDirectory() as encoder_tmp_dirname, tempfile.TemporaryDirectory() as decoder_tmp_dirname:
with (
tempfile.TemporaryDirectory() as encoder_tmp_dirname,
tempfile.TemporaryDirectory() as decoder_tmp_dirname,
):
enc_dec_model.encoder.save_pretrained(encoder_tmp_dirname)
enc_dec_model.decoder.save_pretrained(decoder_tmp_dirname)
VisionEncoderDecoderModel.from_encoder_decoder_pretrained(
Expand Down
7 changes: 4 additions & 3 deletions tests/models/wav2vec2/test_modeling_flax_wav2vec2.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,9 +623,10 @@ def test_wav2vec2_with_lm_pool(self):
self.assertEqual(transcription[0], "bien y qué regalo vas a abrir primero")

# user-managed pool + num_processes should trigger a warning
with CaptureLogger(processing_wav2vec2_with_lm.logger) as cl, multiprocessing.get_context("fork").Pool(
2
) as pool:
with (
CaptureLogger(processing_wav2vec2_with_lm.logger) as cl,
multiprocessing.get_context("fork").Pool(2) as pool,
):
transcription = processor.batch_decode(np.array(logits), pool, num_processes=2).text

self.assertIn("num_process", cl.out)
Expand Down
7 changes: 4 additions & 3 deletions tests/models/wav2vec2/test_modeling_tf_wav2vec2.py
Original file line number Diff line number Diff line change
Expand Up @@ -827,9 +827,10 @@ def test_wav2vec2_with_lm_pool(self):
self.assertEqual(transcription[0], "el libro ha sido escrito por cervantes")

# user-managed pool + num_processes should trigger a warning
with CaptureLogger(processing_wav2vec2_with_lm.logger) as cl, multiprocessing.get_context("fork").Pool(
2
) as pool:
with (
CaptureLogger(processing_wav2vec2_with_lm.logger) as cl,
multiprocessing.get_context("fork").Pool(2) as pool,
):
transcription = processor.batch_decode(logits.numpy(), pool, num_processes=2).text

self.assertIn("num_process", cl.out)
Expand Down
7 changes: 4 additions & 3 deletions tests/models/wav2vec2/test_modeling_wav2vec2.py
Original file line number Diff line number Diff line change
Expand Up @@ -1889,9 +1889,10 @@ def test_wav2vec2_with_lm_pool(self):
self.assertEqual(transcription[0], "habitan aguas poco profundas y rocosas")

# user-managed pool + num_processes should trigger a warning
with CaptureLogger(processing_wav2vec2_with_lm.logger) as cl, multiprocessing.get_context("fork").Pool(
2
) as pool:
with (
CaptureLogger(processing_wav2vec2_with_lm.logger) as cl,
multiprocessing.get_context("fork").Pool(2) as pool,
):
transcription = processor.batch_decode(logits.cpu().numpy(), pool, num_processes=2).text

self.assertIn("num_process", cl.out)
Expand Down
15 changes: 9 additions & 6 deletions utils/download_glue_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,11 @@ def format_mrpc(data_dir, path_to_data):
for row in ids_fh:
dev_ids.append(row.strip().split("\t"))

with open(mrpc_train_file, encoding="utf8") as data_fh, open(
os.path.join(mrpc_dir, "train.tsv"), "w", encoding="utf8"
) as train_fh, open(os.path.join(mrpc_dir, "dev.tsv"), "w", encoding="utf8") as dev_fh:
with (
open(mrpc_train_file, encoding="utf8") as data_fh,
open(os.path.join(mrpc_dir, "train.tsv"), "w", encoding="utf8") as train_fh,
open(os.path.join(mrpc_dir, "dev.tsv"), "w", encoding="utf8") as dev_fh,
):
header = data_fh.readline()
train_fh.write(header)
dev_fh.write(header)
Expand All @@ -92,9 +94,10 @@ def format_mrpc(data_dir, path_to_data):
else:
train_fh.write("%s\t%s\t%s\t%s\t%s\n" % (label, id1, id2, s1, s2))

with open(mrpc_test_file, encoding="utf8") as data_fh, open(
os.path.join(mrpc_dir, "test.tsv"), "w", encoding="utf8"
) as test_fh:
with (
open(mrpc_test_file, encoding="utf8") as data_fh,
open(os.path.join(mrpc_dir, "test.tsv"), "w", encoding="utf8") as test_fh,
):
header = data_fh.readline()
test_fh.write("index\t#1 ID\t#2 ID\t#1 String\t#2 String\n")
for idx, row in enumerate(data_fh):
Expand Down