From 016086c421c0686648257dfc4b01752ad4281e46 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Tue, 28 May 2024 16:38:11 -0400 Subject: [PATCH] Add verify-alpha-spec hook (#28) * Add verify-alpha-spec hook * Add to .pre-commit-hooks.yaml * Add alpha spec to requirements.txt too * Fix formatting * Add alpha spec regardless of output type * Check -cu* suffixed packages * Add test for reference * Refactoring * Simplify tag checking * Review feedback * All packages have CUDA suffix * Deduplicate list of packages * Add more alpha spec packages * Alphabetize package list * Change a few tests to CUDA 11 * Alphabetize entry points * Use regex to search for CUDA suffix * s/<=/=3.9" dependencies = [ + "PyYAML", "bashlex", "gitpython", + "packaging", "rich", "tomlkit", ] @@ -45,6 +47,7 @@ test = [ ] [project.scripts] +verify-alpha-spec = "rapids_pre_commit_hooks.alpha_spec:main" verify-conda-yes = "rapids_pre_commit_hooks.shell.verify_conda_yes:main" verify-copyright = "rapids_pre_commit_hooks.copyright:main" verify-pyproject-license = "rapids_pre_commit_hooks.pyproject_license:main" diff --git a/src/rapids_pre_commit_hooks/alpha_spec.py b/src/rapids_pre_commit_hooks/alpha_spec.py new file mode 100644 index 0000000..aa74591 --- /dev/null +++ b/src/rapids_pre_commit_hooks/alpha_spec.py @@ -0,0 +1,222 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +from functools import total_ordering + +import yaml +from packaging.requirements import Requirement + +from .lint import LintMain + +RAPIDS_ALPHA_SPEC_PACKAGES = { + "cubinlinker", + "cucim", + "cudf", + "cugraph", + "cugraph-dgl", + "cugraph-equivariant", + "cugraph-pyg", + "cuml", + "cuproj", + "cuspatial", + "cuxfilter", + "dask-cuda", + "dask-cudf", + "distributed-ucxx", + "librmm", + "libucx", + "nx-cugraph", + "ptxcompiler", + "pylibcugraph", + "pylibcugraphops", + "pylibraft", + "pylibwholegraph", + "pynvjitlink", + "raft-dask", + "rmm", + "ucx-py", + "ucxx", +} + +RAPIDS_NON_CUDA_SUFFIXED_PACKAGES = { + "dask-cuda", +} + +RAPIDS_CUDA_SUFFIXED_PACKAGES = ( + RAPIDS_ALPHA_SPEC_PACKAGES - RAPIDS_NON_CUDA_SUFFIXED_PACKAGES +) + +ALPHA_SPECIFIER = ">=0.0.0a0" + +ALPHA_SPEC_OUTPUT_TYPES = { + "pyproject", + "requirements", +} + +CUDA_SUFFIX_REGEX = re.compile(r"^(?P.*)-cu[0-9]{2}$") + + +def node_has_type(node, tag_type): + return node.tag == f"tag:yaml.org,2002:{tag_type}" + + +def is_rapids_cuda_suffixed_package(name): + return any( + (match := CUDA_SUFFIX_REGEX.search(name)) and match.group("package") == package + for package in RAPIDS_CUDA_SUFFIXED_PACKAGES + ) + + +def check_package_spec(linter, args, node): + @total_ordering + class SpecPriority: + def __init__(self, spec): + self.spec = spec + + def __eq__(self, other): + return self.spec == other.spec + + def __lt__(self, other): + if self.spec == other.spec: + return False + if self.spec == ALPHA_SPECIFIER: + return False + if other.spec == ALPHA_SPECIFIER: + return True + return self.sort_str() < other.sort_str() + + def sort_str(self): + return "".join(c for c in self.spec if c not in "<>=") + + def create_specifier_string(specifiers): + return ",".join(sorted(specifiers, key=SpecPriority)) + + if node_has_type(node, "str"): + req = Requirement(node.value) + if req.name in RAPIDS_ALPHA_SPEC_PACKAGES or is_rapids_cuda_suffixed_package( + req.name + ): + has_alpha_spec = any(str(s) == ALPHA_SPECIFIER for s in req.specifier) + if args.mode == "development" and not has_alpha_spec: + linter.add_warning( + (node.start_mark.index, node.end_mark.index), + f"add alpha spec for RAPIDS package {req.name}", + ).add_replacement( + (node.start_mark.index, node.end_mark.index), + str( + req.name + + create_specifier_string( + {str(s) for s in req.specifier} | {ALPHA_SPECIFIER} + ) + ), + ) + elif args.mode == "release" and has_alpha_spec: + linter.add_warning( + (node.start_mark.index, node.end_mark.index), + f"remove alpha spec for RAPIDS package {req.name}", + ).add_replacement( + (node.start_mark.index, node.end_mark.index), + str( + req.name + + create_specifier_string( + {str(s) for s in req.specifier} - {ALPHA_SPECIFIER} + ) + ), + ) + + +def check_packages(linter, args, node): + if node_has_type(node, "seq"): + for package_spec in node.value: + check_package_spec(linter, args, package_spec) + + +def check_common(linter, args, node): + if node_has_type(node, "seq"): + for dependency_set in node.value: + if node_has_type(dependency_set, "map"): + for dependency_set_key, dependency_set_value in dependency_set.value: + if ( + node_has_type(dependency_set_key, "str") + and dependency_set_key.value == "packages" + ): + check_packages(linter, args, dependency_set_value) + + +def check_matrices(linter, args, node): + if node_has_type(node, "seq"): + for item in node.value: + if node_has_type(item, "map"): + for matrix_key, matrix_value in item.value: + if ( + node_has_type(matrix_key, "str") + and matrix_key.value == "packages" + ): + check_packages(linter, args, matrix_value) + + +def check_specific(linter, args, node): + if node_has_type(node, "seq"): + for matrix_matcher in node.value: + if node_has_type(matrix_matcher, "map"): + for matrix_matcher_key, matrix_matcher_value in matrix_matcher.value: + if ( + node_has_type(matrix_matcher_key, "str") + and matrix_matcher_key.value == "matrices" + ): + check_matrices(linter, args, matrix_matcher_value) + + +def check_dependencies(linter, args, node): + if node_has_type(node, "map"): + for _, dependencies_value in node.value: + if node_has_type(dependencies_value, "map"): + for dependency_key, dependency_value in dependencies_value.value: + if node_has_type(dependency_key, "str"): + if dependency_key.value == "common": + check_common(linter, args, dependency_value) + elif dependency_key.value == "specific": + check_specific(linter, args, dependency_value) + + +def check_root(linter, args, node): + if node_has_type(node, "map"): + for root_key, root_value in node.value: + if node_has_type(root_key, "str") and root_key.value == "dependencies": + check_dependencies(linter, args, root_value) + + +def check_alpha_spec(linter, args): + check_root(linter, args, yaml.compose(linter.content)) + + +def main(): + m = LintMain() + m.argparser.description = ( + "Verify that RAPIDS packages in dependencies.yaml do (or do not) have " + "the alpha spec." + ) + m.argparser.add_argument( + "--mode", + help="mode to use (development has alpha spec, release does not)", + choices=["development", "release"], + default="development", + ) + with m.execute() as ctx: + ctx.add_check(check_alpha_spec) + + +if __name__ == "__main__": + main() diff --git a/test/rapids_pre_commit_hooks/test_alpha_spec.py b/test/rapids_pre_commit_hooks/test_alpha_spec.py new file mode 100644 index 0000000..425c2b4 --- /dev/null +++ b/test/rapids_pre_commit_hooks/test_alpha_spec.py @@ -0,0 +1,349 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from itertools import chain +from textwrap import dedent +from unittest.mock import Mock, call, patch + +import pytest +import yaml + +from rapids_pre_commit_hooks import alpha_spec, lint + + +@pytest.mark.parametrize( + ["name", "is_suffixed"], + [ + *chain( + *( + [ + (f"{p}-cu11", True), + (f"{p}-cu12", True), + (f"{p}-cuda", False), + ] + for p in alpha_spec.RAPIDS_CUDA_SUFFIXED_PACKAGES + ) + ), + *chain( + *( + [ + (f"{p}-cu11", False), + (f"{p}-cu12", False), + (f"{p}-cuda", False), + ] + for p in alpha_spec.RAPIDS_NON_CUDA_SUFFIXED_PACKAGES + ) + ), + ], +) +def test_is_rapids_cuda_suffixed_package(name, is_suffixed): + assert alpha_spec.is_rapids_cuda_suffixed_package(name) == is_suffixed + + +@pytest.mark.parametrize( + ["package", "content", "mode", "replacement"], + [ + *chain( + *( + [ + (p, p, "development", f"{p}>=0.0.0a0"), + (p, p, "release", None), + (p, f"{p}>=0.0.0a0", "development", None), + (p, f"{p}>=0.0.0a0", "release", p), + ] + for p in alpha_spec.RAPIDS_ALPHA_SPEC_PACKAGES + ) + ), + *chain( + *( + [ + (f"{p}-cu12", f"{p}-cu12", "development", f"{p}-cu12>=0.0.0a0"), + (f"{p}-cu11", f"{p}-cu11", "release", None), + (f"{p}-cu12", f"{p}-cu12>=0.0.0a0", "development", None), + (f"{p}-cu11", f"{p}-cu11>=0.0.0a0", "release", f"{p}-cu11"), + ] + for p in alpha_spec.RAPIDS_CUDA_SUFFIXED_PACKAGES + ) + ), + *chain( + *( + [ + (f"{p}-cu12", f"{p}-cu12", "development", None), + (f"{p}-cu12", f"{p}-cu12>=0.0.0a0", "release", None), + ] + for p in alpha_spec.RAPIDS_NON_CUDA_SUFFIXED_PACKAGES + ) + ), + ("cuml", "cuml>=24.04,<24.06", "development", "cuml>=24.04,<24.06,>=0.0.0a0"), + ("cuml", "cuml>=24.04,<24.06,>=0.0.0a0", "release", "cuml>=24.04,<24.06"), + ( + "cuml", + "&cuml cuml>=24.04,<24.06,>=0.0.0a0", + "release", + "cuml>=24.04,<24.06", + ), + ("packaging", "packaging", "development", None), + ], +) +def test_check_package_spec(package, content, mode, replacement): + args = Mock(mode=mode) + linter = lint.Linter("dependencies.yaml", content) + composed = yaml.compose(content) + alpha_spec.check_package_spec(linter, args, composed) + if replacement is None: + assert linter.warnings == [] + else: + expected_linter = lint.Linter("dependencies.yaml", content) + expected_linter.add_warning( + (composed.start_mark.index, composed.end_mark.index), + f"{'add' if mode == 'development' else 'remove'} " + f"alpha spec for RAPIDS package {package}", + ).add_replacement((0, len(content)), replacement) + assert linter.warnings == expected_linter.warnings + + +@pytest.mark.parametrize( + ["content", "indices"], + [ + ( + dedent( + """\ + - package_a + - &package_b package_b + """ + ), + [0, 1], + ), + ( + "null", + [], + ), + ], +) +def test_check_packages(content, indices): + with patch( + "rapids_pre_commit_hooks.alpha_spec.check_package_spec", Mock() + ) as mock_check_package_spec: + args = Mock() + linter = lint.Linter("dependencies.yaml", content) + composed = yaml.compose(content) + alpha_spec.check_packages(linter, args, composed) + assert mock_check_package_spec.mock_calls == [ + call(linter, args, composed.value[i]) for i in indices + ] + + +@pytest.mark.parametrize( + ["content", "indices"], + [ + ( + dedent( + """\ + - output_types: [pyproject, conda] + packages: + - package_a + - output_types: [conda] + packages: + - package_b + - packages: + - package_c + output_types: pyproject + """ + ), + [(0, 1), (1, 1), (2, 0)], + ), + ], +) +def test_check_common(content, indices): + with patch( + "rapids_pre_commit_hooks.alpha_spec.check_packages", Mock() + ) as mock_check_packages: + args = Mock() + linter = lint.Linter("dependencies.yaml", content) + composed = yaml.compose(content) + alpha_spec.check_common(linter, args, composed) + assert mock_check_packages.mock_calls == [ + call(linter, args, composed.value[i].value[j][1]) for i, j in indices + ] + + +@pytest.mark.parametrize( + ["content", "indices"], + [ + ( + dedent( + """\ + - matrix: + arch: x86_64 + packages: + - package_a + - packages: + - package_b + matrix: + """ + ), + [(0, 1), (1, 0)], + ), + ], +) +def test_check_matrices(content, indices): + with patch( + "rapids_pre_commit_hooks.alpha_spec.check_packages", Mock() + ) as mock_check_packages: + args = Mock() + linter = lint.Linter("dependencies.yaml", content) + composed = yaml.compose(content) + alpha_spec.check_matrices(linter, args, composed) + assert mock_check_packages.mock_calls == [ + call(linter, args, composed.value[i].value[j][1]) for i, j in indices + ] + + +@pytest.mark.parametrize( + ["content", "indices"], + [ + ( + dedent( + """\ + - output_types: [pyproject, conda] + matrices: + - matrix: + arch: x86_64 + packages: + - package_a + - output_types: [conda] + matrices: + - matrix: + arch: x86_64 + packages: + - package_b + - matrices: + - matrix: + arch: x86_64 + packages: + - package_c + output_types: pyproject + """ + ), + [(0, 1), (1, 1), (2, 0)], + ), + ], +) +def test_check_specific(content, indices): + with patch( + "rapids_pre_commit_hooks.alpha_spec.check_matrices", Mock() + ) as mock_check_matrices: + args = Mock() + linter = lint.Linter("dependencies.yaml", content) + composed = yaml.compose(content) + alpha_spec.check_specific(linter, args, composed) + assert mock_check_matrices.mock_calls == [ + call(linter, args, composed.value[i].value[j][1]) for i, j in indices + ] + + +@pytest.mark.parametrize( + ["content", "common_indices", "specific_indices"], + [ + ( + dedent( + """\ + set_a: + common: + - output_types: [pyproject] + packages: + - package_a + specific: + - output_types: [pyproject] + matrices: + - matrix: + arch: x86_64 + packages: + - package_b + set_b: + specific: + - output_types: [pyproject] + matrices: + - matrix: + arch: x86_64 + packages: + - package_c + common: + - output_types: [pyproject] + packages: + - package_d + """ + ), + [(0, 0), (1, 1)], + [(0, 1), (1, 0)], + ), + ], +) +def test_check_dependencies(content, common_indices, specific_indices): + with patch( + "rapids_pre_commit_hooks.alpha_spec.check_common", Mock() + ) as mock_check_common, patch( + "rapids_pre_commit_hooks.alpha_spec.check_specific", Mock() + ) as mock_check_specific: + args = Mock() + linter = lint.Linter("dependencies.yaml", content) + composed = yaml.compose(content) + alpha_spec.check_dependencies(linter, args, composed) + assert mock_check_common.mock_calls == [ + call(linter, args, composed.value[i][1].value[j][1]) for i, j in common_indices + ] + assert mock_check_specific.mock_calls == [ + call(linter, args, composed.value[i][1].value[j][1]) + for i, j in specific_indices + ] + + +@pytest.mark.parametrize( + ["content", "indices"], + [ + ( + dedent( + """\ + files: {} + channels: [] + dependencies: {} + """ + ), + [2], + ), + ], +) +def test_check_root(content, indices): + with patch( + "rapids_pre_commit_hooks.alpha_spec.check_dependencies", Mock() + ) as mock_check_dependencies: + args = Mock() + linter = lint.Linter("dependencies.yaml", content) + composed = yaml.compose(content) + alpha_spec.check_root(linter, args, composed) + assert mock_check_dependencies.mock_calls == [ + call(linter, args, composed.value[i][1]) for i in indices + ] + + +def test_check_alpha_spec(): + CONTENT = "dependencies: []" + with patch( + "rapids_pre_commit_hooks.alpha_spec.check_root", Mock() + ) as mock_check_root, patch("yaml.compose", Mock()) as mock_yaml_compose: + args = Mock() + linter = lint.Linter("dependencies.yaml", CONTENT) + alpha_spec.check_alpha_spec(linter, args) + mock_yaml_compose.assert_called_once_with(CONTENT) + mock_check_root.assert_called_once_with(linter, args, mock_yaml_compose())