From 6fc643674983e27ec5cc312f2e83468050d1d364 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Wed, 4 Dec 2024 08:58:22 +0000 Subject: [PATCH 01/48] gh-127572: Fix `test_structmembers` initialization (GH-127577) gh-127572: Fix `test_structmembers` initialization. The 'C' format code expects an `int` as a destination (not a `char`). This led to test failures on big-endian platforms like s390x. Use the 'c' format code, which expects a `char` as the destination (but requires a Python byte objects instead of a str). --- Lib/test/test_capi/test_structmembers.py | 2 +- Modules/_testcapi/structmember.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_capi/test_structmembers.py b/Lib/test/test_capi/test_structmembers.py index ae9168fc39243f..f14ad9a9a5f512 100644 --- a/Lib/test/test_capi/test_structmembers.py +++ b/Lib/test/test_capi/test_structmembers.py @@ -39,7 +39,7 @@ def _make_test_object(cls): "hi", # T_STRING_INPLACE 12, # T_LONGLONG 13, # T_ULONGLONG - "c", # T_CHAR + b"c", # T_CHAR ) diff --git a/Modules/_testcapi/structmember.c b/Modules/_testcapi/structmember.c index c1861db18c4af2..ef30a5a9944e3c 100644 --- a/Modules/_testcapi/structmember.c +++ b/Modules/_testcapi/structmember.c @@ -60,7 +60,7 @@ test_structmembers_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) "T_FLOAT", "T_DOUBLE", "T_STRING_INPLACE", "T_LONGLONG", "T_ULONGLONG", "T_CHAR", NULL}; - static const char fmt[] = "|bbBhHiIlknfds#LKC"; + static const char fmt[] = "|bbBhHiIlknfds#LKc"; test_structmembers *ob; const char *s = NULL; Py_ssize_t string_len = 0; From ad9d059eb10ef132edd73075fa6d8d96d95b8701 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Wed, 4 Dec 2024 13:01:46 +0300 Subject: [PATCH 02/48] gh-126524: Run `regen-unicodedata` as a part of our CI (#126682) --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1f8c468475470c..55effee0e1e393 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -120,7 +120,7 @@ jobs: - name: Build CPython run: | make -j4 regen-all - make regen-stdlib-module-names regen-sbom + make regen-stdlib-module-names regen-sbom regen-unicodedata - name: Check for changes run: | git add -u From bc0f2e945993747c8b1a6dd66cbe902fddd5758b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 4 Dec 2024 14:13:52 +0100 Subject: [PATCH 03/48] gh-123378: Ensure results of `PyUnicode*Error_Get{Start,End}` are clamped (GH-123380) Co-authored-by: Sergey B Kirpichev --- Doc/c-api/exceptions.rst | 20 +- Doc/library/exceptions.rst | 6 + Lib/test/test_capi/test_exceptions.py | 150 +++++++++++ ...-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst | 6 + ...-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst | 6 + Modules/_testcapi/exceptions.c | 167 ++++++++++++ Objects/exceptions.c | 248 ++++++++++-------- 7 files changed, 492 insertions(+), 111 deletions(-) create mode 100644 Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst create mode 100644 Misc/NEWS.d/next/C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst diff --git a/Doc/c-api/exceptions.rst b/Doc/c-api/exceptions.rst index fc2336d120c259..c1f0bd750361d6 100644 --- a/Doc/c-api/exceptions.rst +++ b/Doc/c-api/exceptions.rst @@ -853,12 +853,23 @@ The following functions are used to create and modify Unicode exceptions from C. *\*start*. *start* must not be ``NULL``. Return ``0`` on success, ``-1`` on failure. + If the :attr:`UnicodeError.object` is an empty sequence, the resulting + *start* is ``0``. Otherwise, it is clipped to ``[0, len(object) - 1]``. + + .. seealso:: :attr:`UnicodeError.start` + .. c:function:: int PyUnicodeDecodeError_SetStart(PyObject *exc, Py_ssize_t start) int PyUnicodeEncodeError_SetStart(PyObject *exc, Py_ssize_t start) int PyUnicodeTranslateError_SetStart(PyObject *exc, Py_ssize_t start) - Set the *start* attribute of the given exception object to *start*. Return - ``0`` on success, ``-1`` on failure. + Set the *start* attribute of the given exception object to *start*. + Return ``0`` on success, ``-1`` on failure. + + .. note:: + + While passing a negative *start* does not raise an exception, + the corresponding getters will not consider it as a relative + offset. .. c:function:: int PyUnicodeDecodeError_GetEnd(PyObject *exc, Py_ssize_t *end) int PyUnicodeEncodeError_GetEnd(PyObject *exc, Py_ssize_t *end) @@ -868,6 +879,9 @@ The following functions are used to create and modify Unicode exceptions from C. *\*end*. *end* must not be ``NULL``. Return ``0`` on success, ``-1`` on failure. + If the :attr:`UnicodeError.object` is an empty sequence, the resulting + *end* is ``0``. Otherwise, it is clipped to ``[1, len(object)]``. + .. c:function:: int PyUnicodeDecodeError_SetEnd(PyObject *exc, Py_ssize_t end) int PyUnicodeEncodeError_SetEnd(PyObject *exc, Py_ssize_t end) int PyUnicodeTranslateError_SetEnd(PyObject *exc, Py_ssize_t end) @@ -875,6 +889,8 @@ The following functions are used to create and modify Unicode exceptions from C. Set the *end* attribute of the given exception object to *end*. Return ``0`` on success, ``-1`` on failure. + .. seealso:: :attr:`UnicodeError.end` + .. c:function:: PyObject* PyUnicodeDecodeError_GetReason(PyObject *exc) PyObject* PyUnicodeEncodeError_GetReason(PyObject *exc) PyObject* PyUnicodeTranslateError_GetReason(PyObject *exc) diff --git a/Doc/library/exceptions.rst b/Doc/library/exceptions.rst index b5ba86f1b19223..f72b11e34c5c3d 100644 --- a/Doc/library/exceptions.rst +++ b/Doc/library/exceptions.rst @@ -644,10 +644,16 @@ The following exceptions are the exceptions that are usually raised. The first index of invalid data in :attr:`object`. + This value should not be negative as it is interpreted as an + absolute offset but this constraint is not enforced at runtime. + .. attribute:: end The index after the last invalid data in :attr:`object`. + This value should not be negative as it is interpreted as an + absolute offset but this constraint is not enforced at runtime. + .. exception:: UnicodeEncodeError diff --git a/Lib/test/test_capi/test_exceptions.py b/Lib/test/test_capi/test_exceptions.py index b22ddd8ad858d4..666e2f2ab09548 100644 --- a/Lib/test/test_capi/test_exceptions.py +++ b/Lib/test/test_capi/test_exceptions.py @@ -415,6 +415,156 @@ def test_err_formatunraisable(self): # CRASHES formatunraisable(NULL, NULL) +class TestUnicodeTranslateError(UnicodeTranslateError): + # UnicodeTranslateError takes 4 arguments instead of 5, + # so we just make a UnicodeTranslateError class that is + # compatible with the UnicodeError.__init__. + def __init__(self, encoding, *args, **kwargs): + super().__init__(*args, **kwargs) + + +class TestUnicodeError(unittest.TestCase): + + def _check_no_crash(self, exc): + # ensure that the __str__() method does not crash + _ = str(exc) + + def test_unicode_encode_error_get_start(self): + get_start = _testcapi.unicode_encode_get_start + self._test_unicode_error_get_start('x', UnicodeEncodeError, get_start) + + def test_unicode_decode_error_get_start(self): + get_start = _testcapi.unicode_decode_get_start + self._test_unicode_error_get_start(b'x', UnicodeDecodeError, get_start) + + def test_unicode_translate_error_get_start(self): + get_start = _testcapi.unicode_translate_get_start + self._test_unicode_error_get_start('x', TestUnicodeTranslateError, get_start) + + def _test_unicode_error_get_start(self, literal, exc_type, get_start): + for obj_len, start, c_start in [ + # normal cases + (5, 0, 0), + (5, 1, 1), + (5, 2, 2), + # out of range start is clamped to max(0, obj_len - 1) + (0, 0, 0), + (0, 1, 0), + (0, 10, 0), + (5, 5, 4), + (5, 10, 4), + # negative values are allowed but clipped in the getter + (0, -1, 0), + (1, -1, 0), + (2, -1, 0), + (2, -2, 0), + ]: + obj = literal * obj_len + with self.subTest(obj, exc_type=exc_type, start=start): + exc = exc_type('utf-8', obj, start, obj_len, 'reason') + self.assertEqual(get_start(exc), c_start) + self._check_no_crash(exc) + + def test_unicode_encode_error_set_start(self): + set_start = _testcapi.unicode_encode_set_start + self._test_unicode_error_set_start('x', UnicodeEncodeError, set_start) + + def test_unicode_decode_error_set_start(self): + set_start = _testcapi.unicode_decode_set_start + self._test_unicode_error_set_start(b'x', UnicodeDecodeError, set_start) + + def test_unicode_translate_error_set_start(self): + set_start = _testcapi.unicode_translate_set_start + self._test_unicode_error_set_start('x', TestUnicodeTranslateError, set_start) + + def _test_unicode_error_set_start(self, literal, exc_type, set_start): + obj_len = 5 + obj = literal * obj_len + for new_start in range(-2 * obj_len, 2 * obj_len): + with self.subTest('C-API', obj=obj, exc_type=exc_type, new_start=new_start): + exc = exc_type('utf-8', obj, 0, obj_len, 'reason') + # arbitrary value is allowed in the C API setter + set_start(exc, new_start) + self.assertEqual(exc.start, new_start) + self._check_no_crash(exc) + + with self.subTest('Py-API', obj=obj, exc_type=exc_type, new_start=new_start): + exc = exc_type('utf-8', obj, 0, obj_len, 'reason') + # arbitrary value is allowed in the attribute setter + exc.start = new_start + self.assertEqual(exc.start, new_start) + self._check_no_crash(exc) + + def test_unicode_encode_error_get_end(self): + get_end = _testcapi.unicode_encode_get_end + self._test_unicode_error_get_end('x', UnicodeEncodeError, get_end) + + def test_unicode_decode_error_get_end(self): + get_end = _testcapi.unicode_decode_get_end + self._test_unicode_error_get_end(b'x', UnicodeDecodeError, get_end) + + def test_unicode_translate_error_get_end(self): + get_end = _testcapi.unicode_translate_get_end + self._test_unicode_error_get_end('x', TestUnicodeTranslateError, get_end) + + def _test_unicode_error_get_end(self, literal, exc_type, get_end): + for obj_len, end, c_end in [ + # normal cases + (5, 0, 1), + (5, 1, 1), + (5, 2, 2), + # out-of-range clipped in [MIN(1, OBJLEN), MAX(MIN(1, OBJLEN), OBJLEN)] + (0, 0, 0), + (0, 1, 0), + (0, 10, 0), + (1, 1, 1), + (1, 2, 1), + (5, 5, 5), + (5, 5, 5), + (5, 10, 5), + # negative values are allowed but clipped in the getter + (0, -1, 0), + (1, -1, 1), + (2, -1, 1), + (2, -2, 1), + ]: + obj = literal * obj_len + with self.subTest(obj, exc_type=exc_type, end=end): + exc = exc_type('utf-8', obj, 0, end, 'reason') + self.assertEqual(get_end(exc), c_end) + self._check_no_crash(exc) + + def test_unicode_encode_error_set_end(self): + set_end = _testcapi.unicode_encode_set_end + self._test_unicode_error_set_end('x', UnicodeEncodeError, set_end) + + def test_unicode_decode_error_set_end(self): + set_end = _testcapi.unicode_decode_set_end + self._test_unicode_error_set_end(b'x', UnicodeDecodeError, set_end) + + def test_unicode_translate_error_set_end(self): + set_end = _testcapi.unicode_translate_set_end + self._test_unicode_error_set_end('x', TestUnicodeTranslateError, set_end) + + def _test_unicode_error_set_end(self, literal, exc_type, set_end): + obj_len = 5 + obj = literal * obj_len + for new_end in range(-2 * obj_len, 2 * obj_len): + with self.subTest('C-API', obj=obj, exc_type=exc_type, new_end=new_end): + exc = exc_type('utf-8', obj, 0, obj_len, 'reason') + # arbitrary value is allowed in the C API setter + set_end(exc, new_end) + self.assertEqual(exc.end, new_end) + self._check_no_crash(exc) + + with self.subTest('Py-API', obj=obj, exc_type=exc_type, new_end=new_end): + exc = exc_type('utf-8', obj, 0, obj_len, 'reason') + # arbitrary value is allowed in the attribute setter + exc.end = new_end + self.assertEqual(exc.end, new_end) + self._check_no_crash(exc) + + class Test_PyUnstable_Exc_PrepReraiseStar(ExceptionIsLikeMixin, unittest.TestCase): def setUp(self): diff --git a/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst b/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst new file mode 100644 index 00000000000000..2cfb8b8a1e245a --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst @@ -0,0 +1,6 @@ +Ensure that the value of :attr:`UnicodeEncodeError.start ` +retrieved by :c:func:`PyUnicodeEncodeError_GetStart` lie in +``[0, max(0, objlen - 1)]`` where *objlen* is the length of +:attr:`UnicodeEncodeError.object `. Similar +arguments apply to :exc:`UnicodeDecodeError` and :exc:`UnicodeTranslateError` +and their corresponding C interface. Patch by Bénédikt Tran. diff --git a/Misc/NEWS.d/next/C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst b/Misc/NEWS.d/next/C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst new file mode 100644 index 00000000000000..107751579c4d91 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst @@ -0,0 +1,6 @@ +Ensure that the value of :attr:`UnicodeEncodeError.end ` +retrieved by :c:func:`PyUnicodeEncodeError_GetEnd` lies in ``[min(1, objlen), +max(min(1, objlen), objlen)]`` where *objlen* is the length of +:attr:`UnicodeEncodeError.object `. Similar arguments +apply to :exc:`UnicodeDecodeError` and :exc:`UnicodeTranslateError` and their +corresponding C interface. Patch by Bénédikt Tran. diff --git a/Modules/_testcapi/exceptions.c b/Modules/_testcapi/exceptions.c index 316ef0e7ad7e55..e92d9670e7c792 100644 --- a/Modules/_testcapi/exceptions.c +++ b/Modules/_testcapi/exceptions.c @@ -359,6 +359,161 @@ _testcapi_unstable_exc_prep_reraise_star_impl(PyObject *module, return PyUnstable_Exc_PrepReraiseStar(orig, excs); } +/* Test PyUnicodeEncodeError_GetStart */ +static PyObject * +unicode_encode_get_start(PyObject *Py_UNUSED(module), PyObject *arg) +{ + Py_ssize_t start; + if (PyUnicodeEncodeError_GetStart(arg, &start) < 0) { + return NULL; + } + RETURN_SIZE(start); +} + +/* Test PyUnicodeDecodeError_GetStart */ +static PyObject * +unicode_decode_get_start(PyObject *Py_UNUSED(module), PyObject *arg) +{ + Py_ssize_t start; + if (PyUnicodeDecodeError_GetStart(arg, &start) < 0) { + return NULL; + } + RETURN_SIZE(start); +} + +/* Test PyUnicodeTranslateError_GetStart */ +static PyObject * +unicode_translate_get_start(PyObject *Py_UNUSED(module), PyObject *arg) +{ + Py_ssize_t start; + if (PyUnicodeTranslateError_GetStart(arg, &start) < 0) { + return NULL; + } + RETURN_SIZE(start); +} + +/* Test PyUnicodeEncodeError_SetStart */ +static PyObject * +unicode_encode_set_start(PyObject *Py_UNUSED(module), PyObject *args) +{ + PyObject *exc; + Py_ssize_t start; + if (PyArg_ParseTuple(args, "On", &exc, &start) < 0) { + return NULL; + } + if (PyUnicodeEncodeError_SetStart(exc, start) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + +/* Test PyUnicodeDecodeError_SetStart */ +static PyObject * +unicode_decode_set_start(PyObject *Py_UNUSED(module), PyObject *args) +{ + PyObject *exc; + Py_ssize_t start; + if (PyArg_ParseTuple(args, "On", &exc, &start) < 0) { + return NULL; + } + if (PyUnicodeDecodeError_SetStart(exc, start) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + +/* Test PyUnicodeTranslateError_SetStart */ +static PyObject * +unicode_translate_set_start(PyObject *Py_UNUSED(module), PyObject *args) +{ + PyObject *exc; + Py_ssize_t start; + if (PyArg_ParseTuple(args, "On", &exc, &start) < 0) { + return NULL; + } + if (PyUnicodeTranslateError_SetStart(exc, start) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + +/* Test PyUnicodeEncodeError_GetEnd */ +static PyObject * +unicode_encode_get_end(PyObject *Py_UNUSED(module), PyObject *arg) +{ + Py_ssize_t end; + if (PyUnicodeEncodeError_GetEnd(arg, &end) < 0) { + return NULL; + } + RETURN_SIZE(end); +} + +/* Test PyUnicodeDecodeError_GetEnd */ +static PyObject * +unicode_decode_get_end(PyObject *Py_UNUSED(module), PyObject *arg) +{ + Py_ssize_t end; + if (PyUnicodeDecodeError_GetEnd(arg, &end) < 0) { + return NULL; + } + RETURN_SIZE(end); +} + +/* Test PyUnicodeTranslateError_GetEnd */ +static PyObject * +unicode_translate_get_end(PyObject *Py_UNUSED(module), PyObject *arg) +{ + Py_ssize_t end; + if (PyUnicodeTranslateError_GetEnd(arg, &end) < 0) { + return NULL; + } + RETURN_SIZE(end); +} + +/* Test PyUnicodeEncodeError_SetEnd */ +static PyObject * +unicode_encode_set_end(PyObject *Py_UNUSED(module), PyObject *args) +{ + PyObject *exc; + Py_ssize_t end; + if (PyArg_ParseTuple(args, "On", &exc, &end) < 0) { + return NULL; + } + if (PyUnicodeEncodeError_SetEnd(exc, end) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + +/* Test PyUnicodeDecodeError_SetEnd */ +static PyObject * +unicode_decode_set_end(PyObject *Py_UNUSED(module), PyObject *args) +{ + PyObject *exc; + Py_ssize_t end; + if (PyArg_ParseTuple(args, "On", &exc, &end) < 0) { + return NULL; + } + if (PyUnicodeDecodeError_SetEnd(exc, end) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + +/* Test PyUnicodeTranslateError_SetEnd */ +static PyObject * +unicode_translate_set_end(PyObject *Py_UNUSED(module), PyObject *args) +{ + PyObject *exc; + Py_ssize_t end; + if (PyArg_ParseTuple(args, "On", &exc, &end) < 0) { + return NULL; + } + if (PyUnicodeTranslateError_SetEnd(exc, end) < 0) { + return NULL; + } + Py_RETURN_NONE; +} /* * Define the PyRecurdingInfinitelyError_Type @@ -403,6 +558,18 @@ static PyMethodDef test_methods[] = { _TESTCAPI_SET_EXCEPTION_METHODDEF _TESTCAPI_TRACEBACK_PRINT_METHODDEF _TESTCAPI_UNSTABLE_EXC_PREP_RERAISE_STAR_METHODDEF + {"unicode_encode_get_start", unicode_encode_get_start, METH_O}, + {"unicode_decode_get_start", unicode_decode_get_start, METH_O}, + {"unicode_translate_get_start", unicode_translate_get_start, METH_O}, + {"unicode_encode_set_start", unicode_encode_set_start, METH_VARARGS}, + {"unicode_decode_set_start", unicode_decode_set_start, METH_VARARGS}, + {"unicode_translate_set_start", unicode_translate_set_start, METH_VARARGS}, + {"unicode_encode_get_end", unicode_encode_get_end, METH_O}, + {"unicode_decode_get_end", unicode_decode_get_end, METH_O}, + {"unicode_translate_get_end", unicode_translate_get_end, METH_O}, + {"unicode_encode_set_end", unicode_encode_set_end, METH_VARARGS}, + {"unicode_decode_set_end", unicode_decode_set_end, METH_VARARGS}, + {"unicode_translate_set_end", unicode_translate_set_end, METH_VARARGS}, {NULL}, }; diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 6fbe0f197eaebf..124b591ee3a13f 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2708,6 +2708,46 @@ set_unicodefromstring(PyObject **attr, const char *value) return 0; } +/* + * Adjust the (inclusive) 'start' value of a UnicodeError object. + * + * The 'start' can be negative or not, but when adjusting the value, + * we clip it in [0, max(0, objlen - 1)] but do not intepret it as + * a relative offset. + */ +static inline Py_ssize_t +unicode_error_adjust_start(Py_ssize_t start, Py_ssize_t objlen) +{ + assert(objlen >= 0); + if (start < 0) { + start = 0; + } + if (start >= objlen) { + start = objlen == 0 ? 0 : objlen - 1; + } + return start; +} + +/* + * Adjust the (exclusive) 'end' value of a UnicodeError object. + * + * The 'end' can be negative or not, but when adjusting the value, + * we clip it in [min(1, objlen), max(min(1, objlen), objlen)] but + * do not intepret it as a relative offset. + */ +static inline Py_ssize_t +unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen) +{ + assert(objlen >= 0); + if (end < 1) { + end = 1; + } + if (end > objlen) { + end = objlen; + } + return end; +} + PyObject * PyUnicodeEncodeError_GetEncoding(PyObject *exc) { @@ -2739,38 +2779,31 @@ PyUnicodeTranslateError_GetObject(PyObject *exc) } int -PyUnicodeEncodeError_GetStart(PyObject *exc, Py_ssize_t *start) +PyUnicodeEncodeError_GetStart(PyObject *self, Py_ssize_t *start) { - Py_ssize_t size; - PyObject *obj = get_unicode(((PyUnicodeErrorObject *)exc)->object, - "object"); - if (!obj) + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyObject *obj = get_unicode(exc->object, "object"); + if (obj == NULL) { return -1; - *start = ((PyUnicodeErrorObject *)exc)->start; - size = PyUnicode_GET_LENGTH(obj); - if (*start<0) - *start = 0; /*XXX check for values <0*/ - if (*start>=size) - *start = size-1; + } + Py_ssize_t size = PyUnicode_GET_LENGTH(obj); Py_DECREF(obj); + *start = unicode_error_adjust_start(exc->start, size); return 0; } int -PyUnicodeDecodeError_GetStart(PyObject *exc, Py_ssize_t *start) +PyUnicodeDecodeError_GetStart(PyObject *self, Py_ssize_t *start) { - Py_ssize_t size; - PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object, "object"); - if (!obj) + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyObject *obj = get_string(exc->object, "object"); + if (obj == NULL) { return -1; - size = PyBytes_GET_SIZE(obj); - *start = ((PyUnicodeErrorObject *)exc)->start; - if (*start<0) - *start = 0; - if (*start>=size) - *start = size-1; + } + Py_ssize_t size = PyBytes_GET_SIZE(obj); Py_DECREF(obj); + *start = unicode_error_adjust_start(exc->start, size); return 0; } @@ -2782,63 +2815,61 @@ PyUnicodeTranslateError_GetStart(PyObject *exc, Py_ssize_t *start) } +static inline int +unicode_error_set_start_impl(PyObject *self, Py_ssize_t start) +{ + ((PyUnicodeErrorObject *)self)->start = start; + return 0; +} + + int PyUnicodeEncodeError_SetStart(PyObject *exc, Py_ssize_t start) { - ((PyUnicodeErrorObject *)exc)->start = start; - return 0; + return unicode_error_set_start_impl(exc, start); } int PyUnicodeDecodeError_SetStart(PyObject *exc, Py_ssize_t start) { - ((PyUnicodeErrorObject *)exc)->start = start; - return 0; + return unicode_error_set_start_impl(exc, start); } int PyUnicodeTranslateError_SetStart(PyObject *exc, Py_ssize_t start) { - ((PyUnicodeErrorObject *)exc)->start = start; - return 0; + return unicode_error_set_start_impl(exc, start); } int -PyUnicodeEncodeError_GetEnd(PyObject *exc, Py_ssize_t *end) +PyUnicodeEncodeError_GetEnd(PyObject *self, Py_ssize_t *end) { - Py_ssize_t size; - PyObject *obj = get_unicode(((PyUnicodeErrorObject *)exc)->object, - "object"); - if (!obj) + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyObject *obj = get_unicode(exc->object, "object"); + if (obj == NULL) { return -1; - *end = ((PyUnicodeErrorObject *)exc)->end; - size = PyUnicode_GET_LENGTH(obj); - if (*end<1) - *end = 1; - if (*end>size) - *end = size; + } + Py_ssize_t size = PyUnicode_GET_LENGTH(obj); Py_DECREF(obj); + *end = unicode_error_adjust_end(exc->end, size); return 0; } int -PyUnicodeDecodeError_GetEnd(PyObject *exc, Py_ssize_t *end) +PyUnicodeDecodeError_GetEnd(PyObject *self, Py_ssize_t *end) { - Py_ssize_t size; - PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object, "object"); - if (!obj) + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyObject *obj = get_string(exc->object, "object"); + if (obj == NULL) { return -1; - size = PyBytes_GET_SIZE(obj); - *end = ((PyUnicodeErrorObject *)exc)->end; - if (*end<1) - *end = 1; - if (*end>size) - *end = size; + } + Py_ssize_t size = PyBytes_GET_SIZE(obj); Py_DECREF(obj); + *end = unicode_error_adjust_end(exc->end, size); return 0; } @@ -2850,27 +2881,32 @@ PyUnicodeTranslateError_GetEnd(PyObject *exc, Py_ssize_t *end) } -int -PyUnicodeEncodeError_SetEnd(PyObject *exc, Py_ssize_t end) +static inline int +unicode_error_set_end_impl(PyObject *exc, Py_ssize_t end) { ((PyUnicodeErrorObject *)exc)->end = end; return 0; } +int +PyUnicodeEncodeError_SetEnd(PyObject *exc, Py_ssize_t end) +{ + return unicode_error_set_end_impl(exc, end); +} + + int PyUnicodeDecodeError_SetEnd(PyObject *exc, Py_ssize_t end) { - ((PyUnicodeErrorObject *)exc)->end = end; - return 0; + return unicode_error_set_end_impl(exc, end); } int PyUnicodeTranslateError_SetEnd(PyObject *exc, Py_ssize_t end) { - ((PyUnicodeErrorObject *)exc)->end = end; - return 0; + return unicode_error_set_end_impl(exc, end); } PyObject * @@ -2966,28 +3002,25 @@ static PyMemberDef UnicodeError_members[] = { static int UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds) { - PyUnicodeErrorObject *err; - - if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) + if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) { return -1; + } - err = (PyUnicodeErrorObject *)self; - - Py_CLEAR(err->encoding); - Py_CLEAR(err->object); - Py_CLEAR(err->reason); + PyObject *encoding = NULL, *object = NULL, *reason = NULL; // borrowed + Py_ssize_t start = -1, end = -1; if (!PyArg_ParseTuple(args, "UUnnU", - &err->encoding, &err->object, - &err->start, &err->end, &err->reason)) { - err->encoding = err->object = err->reason = NULL; + &encoding, &object, &start, &end, &reason)) + { return -1; } - Py_INCREF(err->encoding); - Py_INCREF(err->object); - Py_INCREF(err->reason); - + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + Py_XSETREF(exc->encoding, Py_NewRef(encoding)); + Py_XSETREF(exc->object, Py_NewRef(object)); + exc->start = start; + exc->end = end; + Py_XSETREF(exc->reason, Py_NewRef(reason)); return 0; } @@ -3073,44 +3106,42 @@ PyObject *PyExc_UnicodeEncodeError = (PyObject *)&_PyExc_UnicodeEncodeError; static int UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds) { - PyUnicodeErrorObject *ude; - - if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) + if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) { return -1; + } - ude = (PyUnicodeErrorObject *)self; - - Py_CLEAR(ude->encoding); - Py_CLEAR(ude->object); - Py_CLEAR(ude->reason); + PyObject *encoding = NULL, *object = NULL, *reason = NULL; // borrowed + Py_ssize_t start = -1, end = -1; if (!PyArg_ParseTuple(args, "UOnnU", - &ude->encoding, &ude->object, - &ude->start, &ude->end, &ude->reason)) { - ude->encoding = ude->object = ude->reason = NULL; - return -1; + &encoding, &object, &start, &end, &reason)) + { + return -1; } - Py_INCREF(ude->encoding); - Py_INCREF(ude->object); - Py_INCREF(ude->reason); - - if (!PyBytes_Check(ude->object)) { + if (PyBytes_Check(object)) { + Py_INCREF(object); // make 'object' a strong reference + } + else { Py_buffer view; - if (PyObject_GetBuffer(ude->object, &view, PyBUF_SIMPLE) != 0) - goto error; - Py_XSETREF(ude->object, PyBytes_FromStringAndSize(view.buf, view.len)); + if (PyObject_GetBuffer(object, &view, PyBUF_SIMPLE) != 0) { + return -1; + } + // 'object' is borrowed, so we can re-use the variable + object = PyBytes_FromStringAndSize(view.buf, view.len); PyBuffer_Release(&view); - if (!ude->object) - goto error; + if (object == NULL) { + return -1; + } } - return 0; -error: - Py_CLEAR(ude->encoding); - Py_CLEAR(ude->object); - Py_CLEAR(ude->reason); - return -1; + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + Py_XSETREF(exc->encoding, Py_NewRef(encoding)); + Py_XSETREF(exc->object, object /* already a strong reference */); + exc->start = start; + exc->end = end; + Py_XSETREF(exc->reason, Py_NewRef(reason)); + return 0; } static PyObject * @@ -3192,25 +3223,24 @@ PyUnicodeDecodeError_Create( */ static int -UnicodeTranslateError_init(PyUnicodeErrorObject *self, PyObject *args, - PyObject *kwds) +UnicodeTranslateError_init(PyObject *self, PyObject *args, PyObject *kwds) { - if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) + if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) { return -1; + } - Py_CLEAR(self->object); - Py_CLEAR(self->reason); + PyObject *object = NULL, *reason = NULL; // borrowed + Py_ssize_t start = -1, end = -1; - if (!PyArg_ParseTuple(args, "UnnU", - &self->object, - &self->start, &self->end, &self->reason)) { - self->object = self->reason = NULL; + if (!PyArg_ParseTuple(args, "UnnU", &object, &start, &end, &reason)) { return -1; } - Py_INCREF(self->object); - Py_INCREF(self->reason); - + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + Py_XSETREF(exc->object, Py_NewRef(object)); + exc->start = start; + exc->end = end; + Py_XSETREF(exc->reason, Py_NewRef(reason)); return 0; } From 6bc3e830a518112a4e242217807681e3908602f4 Mon Sep 17 00:00:00 2001 From: "RUANG (James Roy)" Date: Wed, 4 Dec 2024 21:30:38 +0800 Subject: [PATCH 04/48] gh-127481: Add `EPOLLWAKEUP` to the `select` module (GH-127482) --- Doc/library/select.rst | 6 ++++++ .../Library/2024-12-01-23-18-43.gh-issue-127481.K36AoP.rst | 1 + Modules/selectmodule.c | 4 ++++ 3 files changed, 11 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-12-01-23-18-43.gh-issue-127481.K36AoP.rst diff --git a/Doc/library/select.rst b/Doc/library/select.rst index f23a249f44b485..4fcff9198944a8 100644 --- a/Doc/library/select.rst +++ b/Doc/library/select.rst @@ -317,11 +317,17 @@ Edge and Level Trigger Polling (epoll) Objects +-------------------------+-----------------------------------------------+ | :const:`EPOLLMSG` | Ignored. | +-------------------------+-----------------------------------------------+ + | :const:`EPOLLWAKEUP` | Prevents sleep during event waiting. | + +-------------------------+-----------------------------------------------+ .. versionadded:: 3.6 :const:`EPOLLEXCLUSIVE` was added. It's only supported by Linux Kernel 4.5 or later. + .. versionadded:: next + :const:`EPOLLWAKEUP` was added. It's only supported by Linux Kernel 3.5 + or later. + .. method:: epoll.close() Close the control file descriptor of the epoll object. diff --git a/Misc/NEWS.d/next/Library/2024-12-01-23-18-43.gh-issue-127481.K36AoP.rst b/Misc/NEWS.d/next/Library/2024-12-01-23-18-43.gh-issue-127481.K36AoP.rst new file mode 100644 index 00000000000000..8ada0b57ddc257 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-01-23-18-43.gh-issue-127481.K36AoP.rst @@ -0,0 +1 @@ +Add the ``EPOLLWAKEUP`` constant to the :mod:`select` module. diff --git a/Modules/selectmodule.c b/Modules/selectmodule.c index 6ced71cca73178..e14e114a6dafd0 100644 --- a/Modules/selectmodule.c +++ b/Modules/selectmodule.c @@ -2715,6 +2715,10 @@ _select_exec(PyObject *m) #ifdef EPOLLMSG ADD_INT(EPOLLMSG); #endif +#ifdef EPOLLWAKEUP + /* Kernel 3.5+ */ + ADD_INT(EPOLLWAKEUP); +#endif #ifdef EPOLL_CLOEXEC ADD_INT(EPOLL_CLOEXEC); From 51cfa569e379f84b3418db0971a71b1ef575a42b Mon Sep 17 00:00:00 2001 From: Beomsoo Kim Date: Thu, 5 Dec 2024 03:30:51 +0900 Subject: [PATCH 05/48] =?UTF-8?q?gh-127552:=20Remove=20comment=20questioni?= =?UTF-8?q?ng=204-digit=20restriction=20for=20=E2=80=98Y=E2=80=99=20in=20d?= =?UTF-8?q?atetime.strptime=20patterns=20(#127590)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The code has required 4 digits for the year since its inclusion in the stdlib in 2002 (over 22 years ago as of this commit). --- Lib/_strptime.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/Lib/_strptime.py b/Lib/_strptime.py index 5f4d2475c0169b..e6e23596db6f99 100644 --- a/Lib/_strptime.py +++ b/Lib/_strptime.py @@ -301,8 +301,6 @@ def __init__(self, locale_time=None): 'V': r"(?P5[0-3]|0[1-9]|[1-4]\d|\d)", # W is set below by using 'U' 'y': r"(?P\d\d)", - #XXX: Does 'Y' need to worry about having less or more than - # 4 digits? 'Y': r"(?P\d\d\d\d)", 'z': r"(?P[+-]\d\d:?[0-5]\d(:?[0-5]\d(\.\d{1,6})?)?|(?-i:Z))", 'A': self.__seqToRE(self.locale_time.f_weekday, 'A'), From e51da64ac3bc6cd45339864db32d05115af39ead Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Wed, 4 Dec 2024 19:12:15 +0000 Subject: [PATCH 06/48] gh-127536: Add missing locks in listobject.c (GH-127580) We were missing locks around some list operations in the free threading build. --- ...-12-03-21-07-06.gh-issue-127536.3jMMrT.rst | 2 + Objects/listobject.c | 50 +++++++++++++++---- 2 files changed, 42 insertions(+), 10 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-12-03-21-07-06.gh-issue-127536.3jMMrT.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-12-03-21-07-06.gh-issue-127536.3jMMrT.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-03-21-07-06.gh-issue-127536.3jMMrT.rst new file mode 100644 index 00000000000000..6e2b87fe38146b --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-03-21-07-06.gh-issue-127536.3jMMrT.rst @@ -0,0 +1,2 @@ +Add missing locks around some list assignment operations in the free +threading build. diff --git a/Objects/listobject.c b/Objects/listobject.c index 8abe9e8933420b..3832295600a0ab 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -3,6 +3,7 @@ #include "Python.h" #include "pycore_abstract.h" // _PyIndex_Check() #include "pycore_ceval.h" // _PyEval_GetBuiltin() +#include "pycore_critical_section.h" // _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED() #include "pycore_dict.h" // _PyDictViewObject #include "pycore_freelist.h" // _Py_FREELIST_FREE(), _Py_FREELIST_POP() #include "pycore_pyatomic_ft_wrappers.h" @@ -72,6 +73,11 @@ static void ensure_shared_on_resize(PyListObject *self) { #ifdef Py_GIL_DISABLED + // We can't use _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED here because + // the `CALL_LIST_APPEND` bytecode handler may lock the list without + // a critical section. + assert(Py_REFCNT(self) == 1 || PyMutex_IsLocked(&_PyObject_CAST(self)->ob_mutex)); + // Ensure that the list array is freed using QSBR if we are not the // owning thread. if (!_Py_IsOwnedByCurrentThread((PyObject *)self) && @@ -957,10 +963,12 @@ list_ass_slice(PyListObject *a, Py_ssize_t ilow, Py_ssize_t ihigh, PyObject *v) Py_ssize_t n = PyList_GET_SIZE(a); PyObject *copy = list_slice_lock_held(a, 0, n); if (copy == NULL) { - return -1; + ret = -1; + } + else { + ret = list_ass_slice_lock_held(a, ilow, ihigh, copy); + Py_DECREF(copy); } - ret = list_ass_slice_lock_held(a, ilow, ihigh, copy); - Py_DECREF(copy); Py_END_CRITICAL_SECTION(); } else if (v != NULL && PyList_CheckExact(v)) { @@ -1437,7 +1445,9 @@ PyList_Clear(PyObject *self) PyErr_BadInternalCall(); return -1; } + Py_BEGIN_CRITICAL_SECTION(self); list_clear((PyListObject*)self); + Py_END_CRITICAL_SECTION(); return 0; } @@ -3410,7 +3420,9 @@ list___init___impl(PyListObject *self, PyObject *iterable) /* Empty previous contents */ if (self->ob_item != NULL) { + Py_BEGIN_CRITICAL_SECTION(self); list_clear(self); + Py_END_CRITICAL_SECTION(); } if (iterable != NULL) { if (_list_extend(self, iterable) < 0) { @@ -3583,8 +3595,10 @@ adjust_slice_indexes(PyListObject *lst, } static int -list_ass_subscript(PyObject* _self, PyObject* item, PyObject* value) +list_ass_subscript_lock_held(PyObject *_self, PyObject *item, PyObject *value) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(_self); + PyListObject *self = (PyListObject *)_self; if (_PyIndex_Check(item)) { Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); @@ -3592,7 +3606,7 @@ list_ass_subscript(PyObject* _self, PyObject* item, PyObject* value) return -1; if (i < 0) i += PyList_GET_SIZE(self); - return list_ass_item((PyObject *)self, i, value); + return list_ass_item_lock_held(self, i, value); } else if (PySlice_Check(item)) { Py_ssize_t start, stop, step; @@ -3612,7 +3626,7 @@ list_ass_subscript(PyObject* _self, PyObject* item, PyObject* value) step); if (step == 1) - return list_ass_slice(self, start, stop, value); + return list_ass_slice_lock_held(self, start, stop, value); if (slicelength <= 0) return 0; @@ -3678,10 +3692,8 @@ list_ass_subscript(PyObject* _self, PyObject* item, PyObject* value) /* protect against a[::-1] = a */ if (self == (PyListObject*)value) { - Py_BEGIN_CRITICAL_SECTION(value); - seq = list_slice_lock_held((PyListObject*)value, 0, + seq = list_slice_lock_held((PyListObject *)value, 0, Py_SIZE(value)); - Py_END_CRITICAL_SECTION(); } else { seq = PySequence_Fast(value, @@ -3695,7 +3707,7 @@ list_ass_subscript(PyObject* _self, PyObject* item, PyObject* value) step); if (step == 1) { - int res = list_ass_slice(self, start, stop, seq); + int res = list_ass_slice_lock_held(self, start, stop, seq); Py_DECREF(seq); return res; } @@ -3751,6 +3763,24 @@ list_ass_subscript(PyObject* _self, PyObject* item, PyObject* value) } } +static int +list_ass_subscript(PyObject *self, PyObject *item, PyObject *value) +{ + int res; +#ifdef Py_GIL_DISABLED + if (PySlice_Check(item) && value != NULL && PyList_CheckExact(value)) { + Py_BEGIN_CRITICAL_SECTION2(self, value); + res = list_ass_subscript_lock_held(self, item, value); + Py_END_CRITICAL_SECTION2(); + return res; + } +#endif + Py_BEGIN_CRITICAL_SECTION(self); + res = list_ass_subscript_lock_held(self, item, value); + Py_END_CRITICAL_SECTION(); + return res; +} + static PyMappingMethods list_as_mapping = { list_length, list_subscript, From 7c5a6f67c726608a05a640e76fc62cfbae986a03 Mon Sep 17 00:00:00 2001 From: Diego Russo Date: Wed, 4 Dec 2024 22:12:06 +0000 Subject: [PATCH 07/48] Enable native AArch64 Ubuntu CI jobs (#127584) Co-authored-by: Brandt Bucher --- .github/workflows/build.yml | 9 +++ .github/workflows/jit.yml | 31 ++------- .github/workflows/reusable-ubuntu.yml | 16 ++--- Tools/jit/ignore-tests-emulated-linux.txt | 85 ----------------------- 4 files changed, 24 insertions(+), 117 deletions(-) delete mode 100644 Tools/jit/ignore-tests-emulated-linux.txt diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 55effee0e1e393..9b2f19fd6bcf54 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -235,10 +235,19 @@ jobs: free-threading: - false - true + os: + - ubuntu-24.04 + - ubuntu-24.04-aarch64 + is-fork: # only used for the exclusion trick + - ${{ github.repository_owner != 'python' }} + exclude: + - os: ubuntu-24.04-aarch64 + is-fork: true uses: ./.github/workflows/reusable-ubuntu.yml with: config_hash: ${{ needs.check_source.outputs.config_hash }} free-threading: ${{ matrix.free-threading }} + os: ${{ matrix.os }} build_ubuntu_ssltests: name: 'Ubuntu SSL tests with OpenSSL' diff --git a/.github/workflows/jit.yml b/.github/workflows/jit.yml index 4ef543d7369734..ee30cf5786d55b 100644 --- a/.github/workflows/jit.yml +++ b/.github/workflows/jit.yml @@ -28,7 +28,7 @@ concurrency: jobs: interpreter: name: Interpreter (Debug) - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 timeout-minutes: 90 steps: - uses: actions/checkout@v4 @@ -78,10 +78,11 @@ jobs: runner: macos-14 - target: x86_64-unknown-linux-gnu/gcc architecture: x86_64 - runner: ubuntu-22.04 + runner: ubuntu-24.04 - target: aarch64-unknown-linux-gnu/gcc architecture: aarch64 - runner: ubuntu-22.04 + # Forks don't have access to our paid AArch64 runners. These jobs are skipped below: + runner: ${{ github.repository_owner == 'python' && 'ubuntu-24.04-aarch64' || 'ubuntu-24.04' }} steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -118,7 +119,8 @@ jobs: ./python.exe -m test --multiprocess 0 --timeout 4500 --verbose2 --verbose3 - name: Native Linux - if: runner.os == 'Linux' && matrix.architecture == 'x86_64' + # Forks don't have access to our paid AArch64 runners. Skip those: + if: runner.os == 'Linux' && (matrix.architecture == 'x86_64' || github.repository_owner == 'python') run: | sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ./llvm.sh ${{ matrix.llvm }} export PATH="$(llvm-config-${{ matrix.llvm }} --bindir):$PATH" @@ -126,29 +128,10 @@ jobs: make all --jobs 4 ./python -m test --multiprocess 0 --timeout 4500 --verbose2 --verbose3 - - name: Emulated Linux - if: runner.os == 'Linux' && matrix.architecture != 'x86_64' - # The --ignorefile on ./python -m test is used to exclude tests known to fail when running on an emulated Linux. - run: | - sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ./llvm.sh ${{ matrix.llvm }} - export PATH="$(llvm-config-${{ matrix.llvm }} --bindir):$PATH" - ./configure --prefix="$(pwd)/../build" - make install --jobs 4 - make clean --jobs 4 - export HOST=${{ matrix.architecture }}-linux-gnu - sudo apt install --yes "gcc-$HOST" qemu-user - export QEMU_LD_PREFIX="/usr/$HOST" - CC="$HOST-gcc" \ - CPP="$HOST-gcc --preprocess" \ - HOSTRUNNER=qemu-${{ matrix.architecture }} \ - ./configure --enable-experimental-jit ${{ matrix.debug && '--with-pydebug' || '' }} --build=x86_64-linux-gnu --host="$HOST" --with-build-python=../build/bin/python3 --with-pkg-config=no ac_cv_buggy_getaddrinfo=no ac_cv_file__dev_ptc=no ac_cv_file__dev_ptmx=yes - make all --jobs 4 - ./python -m test --ignorefile=Tools/jit/ignore-tests-emulated-linux.txt --multiprocess 0 --timeout 4500 --verbose2 --verbose3 - jit-with-disabled-gil: name: Free-Threaded (Debug) needs: interpreter - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 strategy: matrix: llvm: diff --git a/.github/workflows/reusable-ubuntu.yml b/.github/workflows/reusable-ubuntu.yml index 642354f8b4f61b..2869202c7910c9 100644 --- a/.github/workflows/reusable-ubuntu.yml +++ b/.github/workflows/reusable-ubuntu.yml @@ -11,16 +11,16 @@ on: required: false type: boolean default: false + os: + description: OS to run the job + required: true + type: string jobs: build_ubuntu_reusable: - name: 'build and test' + name: build and test (${{ inputs.os }}) timeout-minutes: 60 - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-24.04, ubuntu-24.04-aarch64] + runs-on: ${{ inputs.os }} env: FORCE_COLOR: 1 OPENSSL_VER: 3.0.15 @@ -42,7 +42,7 @@ jobs: uses: actions/cache@v4 with: path: ./multissl/openssl/${{ env.OPENSSL_VER }} - key: ${{ matrix.os }}-multissl-openssl-${{ env.OPENSSL_VER }} + key: ${{ inputs.os }}-multissl-openssl-${{ env.OPENSSL_VER }} - name: Install OpenSSL if: steps.cache-openssl.outputs.cache-hit != 'true' run: python3 Tools/ssl/multissltests.py --steps=library --base-directory "$MULTISSL_DIR" --openssl "$OPENSSL_VER" --system Linux @@ -84,7 +84,7 @@ jobs: working-directory: ${{ env.CPYTHON_BUILDDIR }} run: make -j - name: Build CPython out-of-tree (for compiler warning check) - if: ${{ !inputs.free-threading}} + if: ${{ !inputs.free-threading }} working-directory: ${{ env.CPYTHON_BUILDDIR }} run: set -o pipefail; make -j --output-sync 2>&1 | tee compiler_output_ubuntu.txt - name: Display build info diff --git a/Tools/jit/ignore-tests-emulated-linux.txt b/Tools/jit/ignore-tests-emulated-linux.txt deleted file mode 100644 index 080a569574470c..00000000000000 --- a/Tools/jit/ignore-tests-emulated-linux.txt +++ /dev/null @@ -1,85 +0,0 @@ -test_multiprocessing_fork -test_strftime_y2k -test.test_asyncio.test_unix_events.TestFork.test_fork_asyncio_run -test.test_asyncio.test_unix_events.TestFork.test_fork_asyncio_subprocess -test.test_asyncio.test_unix_events.TestFork.test_fork_signal_handling -test.test_cmd_line.CmdLineTest.test_no_std_streams -test.test_cmd_line.CmdLineTest.test_no_stdin -test.test_concurrent_futures.test_init.ProcessPoolForkFailingInitializerTest.test_initializer -test.test_concurrent_futures.test_process_pool.ProcessPoolForkProcessPoolExecutorTest.test_ressources_gced_in_workers -test.test_external_inspection.TestGetStackTrace.test_remote_stack_trace -test.test_external_inspection.TestGetStackTrace.test_self_trace -test.test_faulthandler.FaultHandlerTests.test_enable_fd -test.test_faulthandler.FaultHandlerTests.test_enable_file -test.test_init.ProcessPoolForkFailingInitializerTest.test_initializer -test.test_logging.ConfigDictTest.test_111615 -test.test_logging.ConfigDictTest.test_config_queue_handler -test.test_logging.ConfigDictTest.test_multiprocessing_queues -test.test_logging.ConfigDictTest.test_config_queue_handler_multiprocessing_context -test.test_os.ForkTests.test_fork_warns_when_non_python_thread_exists -test.test_os.TimerfdTests.test_timerfd_initval -test.test_os.TimerfdTests.test_timerfd_interval -test.test_os.TimerfdTests.test_timerfd_TFD_TIMER_ABSTIME -test.test_pathlib.PathSubclassTest.test_is_mount_root -test.test_pathlib.PathTest.test_is_mount_root -test.test_pathlib.PosixPathTest.test_is_mount_root -test.test_pathlib.test_pathlib.PathSubclassTest.test_is_mount_root -test.test_pathlib.test_pathlib.PathTest.test_is_mount_root -test.test_pathlib.test_pathlib.PosixPathTest.test_is_mount_root -test.test_posix.TestPosixSpawn.test_close_file -test.test_posix.TestPosixSpawnP.test_close_file -test.test_posixpath.PosixPathTest.test_ismount -test.test_signal.StressTest.test_stress_modifying_handlers -test.test_socket.BasicCANTest.testFilter -test.test_socket.BasicCANTest.testLoopback -test.test_socket.LinuxKernelCryptoAPI.test_aead_aes_gcm -test.test_socket.LinuxKernelCryptoAPI.test_aes_cbc -test.test_socket.RecvmsgIntoRFC3542AncillaryUDP6Test.testSecondCmsgTrunc1 -test.test_socket.RecvmsgIntoRFC3542AncillaryUDP6Test.testSecondCmsgTrunc2Int -test.test_socket.RecvmsgIntoRFC3542AncillaryUDP6Test.testSecondCmsgTruncInData -test.test_socket.RecvmsgIntoRFC3542AncillaryUDP6Test.testSecondCmsgTruncLen0Minus1 -test.test_socket.RecvmsgIntoRFC3542AncillaryUDP6Test.testSingleCmsgTruncInData -test.test_socket.RecvmsgIntoRFC3542AncillaryUDP6Test.testSingleCmsgTruncLen0Minus1 -test.test_socket.RecvmsgIntoRFC3542AncillaryUDPLITE6Test.testSecondCmsgTrunc1 -test.test_socket.RecvmsgIntoRFC3542AncillaryUDPLITE6Test.testSecondCmsgTrunc2Int -test.test_socket.RecvmsgIntoRFC3542AncillaryUDPLITE6Test.testSecondCmsgTruncInData -test.test_socket.RecvmsgIntoRFC3542AncillaryUDPLITE6Test.testSecondCmsgTruncLen0Minus1 -test.test_socket.RecvmsgIntoRFC3542AncillaryUDPLITE6Test.testSingleCmsgTruncInData -test.test_socket.RecvmsgIntoRFC3542AncillaryUDPLITE6Test.testSingleCmsgTruncLen0Minus1 -test.test_socket.RecvmsgIntoSCMRightsStreamTest.testCmsgTruncLen0 -test.test_socket.RecvmsgIntoSCMRightsStreamTest.testCmsgTruncLen0Minus1 -test.test_socket.RecvmsgIntoSCMRightsStreamTest.testCmsgTruncLen0Plus1 -test.test_socket.RecvmsgIntoSCMRightsStreamTest.testCmsgTruncLen1 -test.test_socket.RecvmsgIntoSCMRightsStreamTest.testCmsgTruncLen2Minus1 -test.test_socket.RecvmsgRFC3542AncillaryUDP6Test.testSecondCmsgTrunc1 -test.test_socket.RecvmsgRFC3542AncillaryUDP6Test.testSecondCmsgTrunc2Int -test.test_socket.RecvmsgRFC3542AncillaryUDP6Test.testSecondCmsgTruncInData -test.test_socket.RecvmsgRFC3542AncillaryUDP6Test.testSecondCmsgTruncLen0Minus1 -test.test_socket.RecvmsgRFC3542AncillaryUDP6Test.testSingleCmsgTruncInData -test.test_socket.RecvmsgRFC3542AncillaryUDP6Test.testSingleCmsgTruncLen0Minus1 -test.test_socket.RecvmsgRFC3542AncillaryUDPLITE6Test.testSecondCmsgTrunc1 -test.test_socket.RecvmsgRFC3542AncillaryUDPLITE6Test.testSecondCmsgTrunc2Int -test.test_socket.RecvmsgRFC3542AncillaryUDPLITE6Test.testSecondCmsgTruncInData -test.test_socket.RecvmsgRFC3542AncillaryUDPLITE6Test.testSecondCmsgTruncLen0Minus1 -test.test_socket.RecvmsgRFC3542AncillaryUDPLITE6Test.testSingleCmsgTruncInData -test.test_socket.RecvmsgRFC3542AncillaryUDPLITE6Test.testSingleCmsgTruncLen0Minus1 -test.test_socket.RecvmsgRFC3542AncillaryUDPLITE6Test.testSingleCmsgTruncLen0Minus1 -test.test_socket.RecvmsgSCMRightsStreamTest.testCmsgTruncLen0 -test.test_socket.RecvmsgSCMRightsStreamTest.testCmsgTruncLen0Minus1 -test.test_socket.RecvmsgSCMRightsStreamTest.testCmsgTruncLen0Plus1 -test.test_socket.RecvmsgSCMRightsStreamTest.testCmsgTruncLen1 -test.test_socket.RecvmsgSCMRightsStreamTest.testCmsgTruncLen2Minus1 -test.test_subprocess.POSIXProcessTestCase.test_exception_bad_args_0 -test.test_subprocess.POSIXProcessTestCase.test_exception_bad_executable -test.test_subprocess.POSIXProcessTestCase.test_vfork_used_when_expected -test.test_subprocess.ProcessTestCase.test_cwd_with_relative_arg -test.test_subprocess.ProcessTestCase.test_cwd_with_relative_executable -test.test_subprocess.ProcessTestCase.test_empty_env -test.test_subprocess.ProcessTestCase.test_file_not_found_includes_filename -test.test_subprocess.ProcessTestCase.test_one_environment_variable -test.test_subprocess.ProcessTestCaseNoPoll.test_cwd_with_relative_arg -test.test_subprocess.ProcessTestCaseNoPoll.test_cwd_with_relative_executable -test.test_subprocess.ProcessTestCaseNoPoll.test_empty_env -test.test_subprocess.ProcessTestCaseNoPoll.test_file_not_found_includes_filename -test.test_subprocess.ProcessTestCaseNoPoll.test_one_environment_variable -test.test_venv.BasicTest.test_zippath_from_non_installed_posix From 94b8f8b40943bf38cf5c454773a3fb8f4ff71e01 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Wed, 4 Dec 2024 15:01:28 -0800 Subject: [PATCH 08/48] GH-126795: Increase the JIT side-exit threshold from 64 to 4096 (GH-127155) --- Include/internal/pycore_backoff.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_backoff.h b/Include/internal/pycore_backoff.h index 3e02728522828e..b5e33fa8b7abc0 100644 --- a/Include/internal/pycore_backoff.h +++ b/Include/internal/pycore_backoff.h @@ -115,10 +115,9 @@ initial_jump_backoff_counter(void) /* Initial exit temperature. * Must be larger than ADAPTIVE_COOLDOWN_VALUE, * otherwise when a side exit warms up we may construct - * a new trace before the Tier 1 code has properly re-specialized. - * Backoff sequence 64, 128, 256, 512, 1024, 2048, 4096. */ -#define SIDE_EXIT_INITIAL_VALUE 63 -#define SIDE_EXIT_INITIAL_BACKOFF 6 + * a new trace before the Tier 1 code has properly re-specialized. */ +#define SIDE_EXIT_INITIAL_VALUE 4095 +#define SIDE_EXIT_INITIAL_BACKOFF 12 static inline _Py_BackoffCounter initial_temperature_backoff_counter(void) From 2f1cee8477e22bfc36a704310e4c0f409357e7e9 Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Thu, 5 Dec 2024 01:25:06 +0100 Subject: [PATCH 09/48] gh-127111: Apply prettier formatter to Emscripten web example (#127551) Cleaned up formatting (and a stray closing tag) of the web example HTML and JS. --- Tools/wasm/emscripten/web_example/python.html | 782 ++++++++++-------- .../emscripten/web_example/python.worker.mjs | 175 ++-- 2 files changed, 511 insertions(+), 446 deletions(-) diff --git a/Tools/wasm/emscripten/web_example/python.html b/Tools/wasm/emscripten/web_example/python.html index fae1e9ad4e8acb..078f86eb764419 100644 --- a/Tools/wasm/emscripten/web_example/python.html +++ b/Tools/wasm/emscripten/web_example/python.html @@ -1,373 +1,433 @@ - + - - - - - - - wasm-python terminal - - - - - - -

Simple REPL for Python WASM

- -
- - - - -
-
-
- The simple REPL provides a limited Python experience in the browser. - - Tools/wasm/README.md contains a list of known limitations and - issues. Networking, subprocesses, and threading are not available. -
- + +
+ + + + +
+
+
+ The simple REPL provides a limited Python experience in the browser. + + Tools/wasm/README.md + + contains a list of known limitations and issues. Networking, + subprocesses, and threading are not available. +
+ diff --git a/Tools/wasm/emscripten/web_example/python.worker.mjs b/Tools/wasm/emscripten/web_example/python.worker.mjs index 42c2e1e08af24b..8043e419966743 100644 --- a/Tools/wasm/emscripten/web_example/python.worker.mjs +++ b/Tools/wasm/emscripten/web_example/python.worker.mjs @@ -1,104 +1,109 @@ import createEmscriptenModule from "./python.mjs"; class StdinBuffer { - constructor() { - this.sab = new SharedArrayBuffer(128 * Int32Array.BYTES_PER_ELEMENT) - this.buffer = new Int32Array(this.sab) - this.readIndex = 1; - this.numberOfCharacters = 0; - this.sentNull = true - } + constructor() { + this.sab = new SharedArrayBuffer(128 * Int32Array.BYTES_PER_ELEMENT); + this.buffer = new Int32Array(this.sab); + this.readIndex = 1; + this.numberOfCharacters = 0; + this.sentNull = true; + } - prompt() { - this.readIndex = 1 - Atomics.store(this.buffer, 0, -1) - postMessage({ - type: 'stdin', - buffer: this.sab - }) - Atomics.wait(this.buffer, 0, -1) - this.numberOfCharacters = this.buffer[0] - } + prompt() { + this.readIndex = 1; + Atomics.store(this.buffer, 0, -1); + postMessage({ + type: "stdin", + buffer: this.sab, + }); + Atomics.wait(this.buffer, 0, -1); + this.numberOfCharacters = this.buffer[0]; + } - stdin = () => { - while (this.numberOfCharacters + 1 === this.readIndex) { - if (!this.sentNull) { - // Must return null once to indicate we're done for now. - this.sentNull = true - return null - } - this.sentNull = false - // Prompt will reset this.readIndex to 1 - this.prompt() - } - const char = this.buffer[this.readIndex] - this.readIndex += 1 - return char + stdin = () => { + while (this.numberOfCharacters + 1 === this.readIndex) { + if (!this.sentNull) { + // Must return null once to indicate we're done for now. + this.sentNull = true; + return null; + } + this.sentNull = false; + // Prompt will reset this.readIndex to 1 + this.prompt(); } + const char = this.buffer[this.readIndex]; + this.readIndex += 1; + return char; + }; } const stdout = (charCode) => { - if (charCode) { - postMessage({ - type: 'stdout', - stdout: charCode, - }) - } else { - console.log(typeof charCode, charCode) - } -} + if (charCode) { + postMessage({ + type: "stdout", + stdout: charCode, + }); + } else { + console.log(typeof charCode, charCode); + } +}; const stderr = (charCode) => { - if (charCode) { - postMessage({ - type: 'stderr', - stderr: charCode, - }) - } else { - console.log(typeof charCode, charCode) - } -} + if (charCode) { + postMessage({ + type: "stderr", + stderr: charCode, + }); + } else { + console.log(typeof charCode, charCode); + } +}; -const stdinBuffer = new StdinBuffer() +const stdinBuffer = new StdinBuffer(); const emscriptenSettings = { - noInitialRun: true, - stdin: stdinBuffer.stdin, - stdout: stdout, - stderr: stderr, - onRuntimeInitialized: () => { - postMessage({type: 'ready', stdinBuffer: stdinBuffer.sab}) - }, - async preRun(Module) { - const versionHex = Module.HEAPU32[Module._Py_Version/4].toString(16); - const versionTuple = versionHex.padStart(8, "0").match(/.{1,2}/g).map((x) => parseInt(x, 16)); - const [major, minor, ..._] = versionTuple; - // Prevent complaints about not finding exec-prefix by making a lib-dynload directory - Module.FS.mkdirTree(`/lib/python${major}.${minor}/lib-dynload/`); - Module.addRunDependency("install-stdlib"); - const resp = await fetch(`python${major}.${minor}.zip`); - const stdlibBuffer = await resp.arrayBuffer(); - Module.FS.writeFile(`/lib/python${major}${minor}.zip`, new Uint8Array(stdlibBuffer), { canOwn: true }); - Module.removeRunDependency("install-stdlib"); - } -} + noInitialRun: true, + stdin: stdinBuffer.stdin, + stdout: stdout, + stderr: stderr, + onRuntimeInitialized: () => { + postMessage({ type: "ready", stdinBuffer: stdinBuffer.sab }); + }, + async preRun(Module) { + const versionHex = Module.HEAPU32[Module._Py_Version / 4].toString(16); + const versionTuple = versionHex + .padStart(8, "0") + .match(/.{1,2}/g) + .map((x) => parseInt(x, 16)); + const [major, minor, ..._] = versionTuple; + // Prevent complaints about not finding exec-prefix by making a lib-dynload directory + Module.FS.mkdirTree(`/lib/python${major}.${minor}/lib-dynload/`); + Module.addRunDependency("install-stdlib"); + const resp = await fetch(`python${major}.${minor}.zip`); + const stdlibBuffer = await resp.arrayBuffer(); + Module.FS.writeFile( + `/lib/python${major}${minor}.zip`, + new Uint8Array(stdlibBuffer), + { canOwn: true }, + ); + Module.removeRunDependency("install-stdlib"); + }, +}; const modulePromise = createEmscriptenModule(emscriptenSettings); - onmessage = async (event) => { - if (event.data.type === 'run') { - const Module = await modulePromise; - if (event.data.files) { - for (const [filename, contents] of Object.entries(event.data.files)) { - Module.FS.writeFile(filename, contents) - } - } - const ret = Module.callMain(event.data.args); - postMessage({ - type: 'finished', - returnCode: ret - }) + if (event.data.type === "run") { + const Module = await modulePromise; + if (event.data.files) { + for (const [filename, contents] of Object.entries(event.data.files)) { + Module.FS.writeFile(filename, contents); + } } -} - + const ret = Module.callMain(event.data.args); + postMessage({ + type: "finished", + returnCode: ret, + }); + } +}; From 43634fc1fcc88b35171aa79258f767ba6477f764 Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Thu, 5 Dec 2024 01:26:25 +0100 Subject: [PATCH 10/48] gh-127146: Emscripten: Skip segfaults in test suite (#127151) Added skips for tests known to cause problems when running on Emscripten. These mostly relate to the limited stack depth on Emscripten. --- Lib/test/list_tests.py | 3 ++- Lib/test/mapping_tests.py | 3 ++- Lib/test/support/__init__.py | 3 +++ Lib/test/test_ast/test_ast.py | 5 ++++- Lib/test/test_call.py | 3 ++- Lib/test/test_capi/test_misc.py | 1 + Lib/test/test_class.py | 3 ++- Lib/test/test_compile.py | 2 ++ Lib/test/test_copy.py | 3 +++ Lib/test/test_descr.py | 3 +++ Lib/test/test_dict.py | 1 + Lib/test/test_dictviews.py | 3 ++- Lib/test/test_exception_group.py | 4 +++- Lib/test/test_functools.py | 2 ++ Lib/test/test_isinstance.py | 3 +++ Lib/test/test_json/test_recursion.py | 3 +++ Lib/test/test_pathlib/test_pathlib_abc.py | 4 +++- Lib/test/test_traceback.py | 2 ++ Lib/test/test_xml_etree_c.py | 1 + configure | 1 + configure.ac | 1 + 21 files changed, 46 insertions(+), 8 deletions(-) diff --git a/Lib/test/list_tests.py b/Lib/test/list_tests.py index dbc5ef4f9f2cd5..dbd9f27872962d 100644 --- a/Lib/test/list_tests.py +++ b/Lib/test/list_tests.py @@ -6,7 +6,7 @@ from functools import cmp_to_key from test import seq_tests -from test.support import ALWAYS_EQ, NEVER_EQ, get_c_recursion_limit +from test.support import ALWAYS_EQ, NEVER_EQ, get_c_recursion_limit, skip_emscripten_stack_overflow class CommonTest(seq_tests.CommonTest): @@ -59,6 +59,7 @@ def test_repr(self): self.assertEqual(str(a2), "[0, 1, 2, [...], 3]") self.assertEqual(repr(a2), "[0, 1, 2, [...], 3]") + @skip_emscripten_stack_overflow() def test_repr_deep(self): a = self.type2test([]) for i in range(get_c_recursion_limit() + 1): diff --git a/Lib/test/mapping_tests.py b/Lib/test/mapping_tests.py index ed89a81a6ea685..f249f0021e9c1c 100644 --- a/Lib/test/mapping_tests.py +++ b/Lib/test/mapping_tests.py @@ -1,7 +1,7 @@ # tests common to dict and UserDict import unittest import collections -from test.support import get_c_recursion_limit +from test.support import get_c_recursion_limit, skip_emscripten_stack_overflow class BasicTestMappingProtocol(unittest.TestCase): @@ -622,6 +622,7 @@ def __repr__(self): d = self._full_mapping({1: BadRepr()}) self.assertRaises(Exc, repr, d) + @skip_emscripten_stack_overflow() def test_repr_deep(self): d = self._empty_mapping() for i in range(get_c_recursion_limit() + 1): diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 2ad267e3e08f0f..5c738ffaa27713 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -535,6 +535,9 @@ def skip_android_selinux(name): is_emscripten = sys.platform == "emscripten" is_wasi = sys.platform == "wasi" +def skip_emscripten_stack_overflow(): + return unittest.skipIf(is_emscripten, "Exhausts limited stack on Emscripten") + is_apple_mobile = sys.platform in {"ios", "tvos", "watchos"} is_apple = is_apple_mobile or sys.platform == "darwin" diff --git a/Lib/test/test_ast/test_ast.py b/Lib/test/test_ast/test_ast.py index 67ab8cf6baf657..c268a1f00f938e 100644 --- a/Lib/test/test_ast/test_ast.py +++ b/Lib/test/test_ast/test_ast.py @@ -18,7 +18,7 @@ _testinternalcapi = None from test import support -from test.support import os_helper, script_helper +from test.support import os_helper, script_helper, skip_emscripten_stack_overflow from test.support.ast_helper import ASTTestMixin from test.test_ast.utils import to_tuple from test.test_ast.snippets import ( @@ -745,6 +745,7 @@ def next(self): enum._test_simple_enum(_Precedence, ast._Precedence) @support.cpython_only + @skip_emscripten_stack_overflow() def test_ast_recursion_limit(self): fail_depth = support.exceeds_recursion_limit() crash_depth = 100_000 @@ -1661,6 +1662,7 @@ def test_level_as_none(self): exec(code, ns) self.assertIn('sleep', ns) + @skip_emscripten_stack_overflow() def test_recursion_direct(self): e = ast.UnaryOp(op=ast.Not(), lineno=0, col_offset=0, operand=ast.Constant(1)) e.operand = e @@ -1668,6 +1670,7 @@ def test_recursion_direct(self): with support.infinite_recursion(): compile(ast.Expression(e), "", "eval") + @skip_emscripten_stack_overflow() def test_recursion_indirect(self): e = ast.UnaryOp(op=ast.Not(), lineno=0, col_offset=0, operand=ast.Constant(1)) f = ast.UnaryOp(op=ast.Not(), lineno=0, col_offset=0, operand=ast.Constant(1)) diff --git a/Lib/test/test_call.py b/Lib/test/test_call.py index 9d5256b566b8af..78a706436aea0e 100644 --- a/Lib/test/test_call.py +++ b/Lib/test/test_call.py @@ -1,6 +1,6 @@ import unittest from test.support import (cpython_only, is_wasi, requires_limited_api, Py_DEBUG, - set_recursion_limit, skip_on_s390x) + set_recursion_limit, skip_on_s390x, skip_emscripten_stack_overflow) try: import _testcapi except ImportError: @@ -1038,6 +1038,7 @@ class TestRecursion(unittest.TestCase): @skip_on_s390x @unittest.skipIf(is_wasi and Py_DEBUG, "requires deep stack") @unittest.skipIf(_testcapi is None, "requires _testcapi") + @skip_emscripten_stack_overflow() def test_super_deep(self): def recurse(n): diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index 80e705a37c4c5e..8e0271919cc8a5 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2137,6 +2137,7 @@ def test_py_config_isoloated_per_interpreter(self): # test fails, assume that the environment in this process may # be altered and suspect. + @requires_subinterpreters @unittest.skipUnless(hasattr(os, "pipe"), "requires os.pipe()") def test_configured_settings(self): """ diff --git a/Lib/test/test_class.py b/Lib/test/test_class.py index 7720cf157fa9ae..e20e59944e9ce9 100644 --- a/Lib/test/test_class.py +++ b/Lib/test/test_class.py @@ -1,7 +1,7 @@ "Test the functionality of Python classes implementing operators." import unittest -from test.support import cpython_only, import_helper, script_helper +from test.support import cpython_only, import_helper, script_helper, skip_emscripten_stack_overflow testmeths = [ @@ -554,6 +554,7 @@ class Custom: self.assertFalse(hasattr(o, "__call__")) self.assertFalse(hasattr(c, "__call__")) + @skip_emscripten_stack_overflow() def testSFBug532646(self): # Test for SF bug 532646 diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index f7ea923ef17672..b5cf2ad18fe60b 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -121,6 +121,7 @@ def __getitem__(self, key): self.assertEqual(d['z'], 12) @unittest.skipIf(support.is_wasi, "exhausts limited stack on WASI") + @support.skip_emscripten_stack_overflow() def test_extended_arg(self): repeat = int(get_c_recursion_limit() * 0.9) longexpr = 'x = x or ' + '-x' * repeat @@ -709,6 +710,7 @@ def test_yet_more_evil_still_undecodable(self): @support.cpython_only @unittest.skipIf(support.is_wasi, "exhausts limited stack on WASI") + @support.skip_emscripten_stack_overflow() def test_compiler_recursion_limit(self): # Expected limit is Py_C_RECURSION_LIMIT limit = get_c_recursion_limit() diff --git a/Lib/test/test_copy.py b/Lib/test/test_copy.py index 3dec64cc9a2414..d76341417e9bef 100644 --- a/Lib/test/test_copy.py +++ b/Lib/test/test_copy.py @@ -371,6 +371,7 @@ def test_deepcopy_list(self): self.assertIsNot(x, y) self.assertIsNot(x[0], y[0]) + @support.skip_emscripten_stack_overflow() def test_deepcopy_reflexive_list(self): x = [] x.append(x) @@ -398,6 +399,7 @@ def test_deepcopy_tuple_of_immutables(self): y = copy.deepcopy(x) self.assertIs(x, y) + @support.skip_emscripten_stack_overflow() def test_deepcopy_reflexive_tuple(self): x = ([],) x[0].append(x) @@ -415,6 +417,7 @@ def test_deepcopy_dict(self): self.assertIsNot(x, y) self.assertIsNot(x["foo"], y["foo"]) + @support.skip_emscripten_stack_overflow() def test_deepcopy_reflexive_dict(self): x = {} x['foo'] = x diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index aa801b9c4f7ad9..168b78a477ee9c 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -3663,6 +3663,7 @@ def f(a): return a encoding='latin1', errors='replace') self.assertEqual(ba, b'abc\xbd?') + @support.skip_emscripten_stack_overflow() def test_recursive_call(self): # Testing recursive __call__() by setting to instance of class... class A(object): @@ -3942,6 +3943,7 @@ def __del__(self): # it as a leak. del C.__del__ + @unittest.skipIf(support.is_emscripten, "Seems to works in Pyodide?") def test_slots_trash(self): # Testing slot trash... # Deallocating deeply nested slotted trash caused stack overflows @@ -4864,6 +4866,7 @@ class Thing: # CALL_METHOD_DESCRIPTOR_O deque.append(thing, thing) + @support.skip_emscripten_stack_overflow() def test_repr_as_str(self): # Issue #11603: crash or infinite loop when rebinding __str__ as # __repr__. diff --git a/Lib/test/test_dict.py b/Lib/test/test_dict.py index c94dc2df4f0a7f..86b2f22dee5347 100644 --- a/Lib/test/test_dict.py +++ b/Lib/test/test_dict.py @@ -594,6 +594,7 @@ def __repr__(self): d = {1: BadRepr()} self.assertRaises(Exc, repr, d) + @support.skip_emscripten_stack_overflow() def test_repr_deep(self): d = {} for i in range(get_c_recursion_limit() + 1): diff --git a/Lib/test/test_dictviews.py b/Lib/test/test_dictviews.py index d9881611c19c43..d6bf00eeeb0013 100644 --- a/Lib/test/test_dictviews.py +++ b/Lib/test/test_dictviews.py @@ -2,7 +2,7 @@ import copy import pickle import unittest -from test.support import get_c_recursion_limit +from test.support import get_c_recursion_limit, skip_emscripten_stack_overflow class DictSetTest(unittest.TestCase): @@ -277,6 +277,7 @@ def test_recursive_repr(self): # Again. self.assertIsInstance(r, str) + @skip_emscripten_stack_overflow() def test_deeply_nested_repr(self): d = {} for i in range(get_c_recursion_limit()//2 + 100): diff --git a/Lib/test/test_exception_group.py b/Lib/test/test_exception_group.py index b4fc290b1f32b6..53212529c27e28 100644 --- a/Lib/test/test_exception_group.py +++ b/Lib/test/test_exception_group.py @@ -1,7 +1,7 @@ import collections.abc import types import unittest -from test.support import get_c_recursion_limit +from test.support import get_c_recursion_limit, skip_emscripten_stack_overflow class TestExceptionGroupTypeHierarchy(unittest.TestCase): def test_exception_group_types(self): @@ -464,11 +464,13 @@ def make_deep_eg(self): e = ExceptionGroup('eg', [e]) return e + @skip_emscripten_stack_overflow() def test_deep_split(self): e = self.make_deep_eg() with self.assertRaises(RecursionError): e.split(TypeError) + @skip_emscripten_stack_overflow() def test_deep_subgroup(self): e = self.make_deep_eg() with self.assertRaises(RecursionError): diff --git a/Lib/test/test_functools.py b/Lib/test/test_functools.py index 6d60f6941c4c5d..ffd2adb8665b45 100644 --- a/Lib/test/test_functools.py +++ b/Lib/test/test_functools.py @@ -404,6 +404,7 @@ def test_setstate_subclasses(self): self.assertEqual(r, ((1, 2), {})) self.assertIs(type(r[0]), tuple) + @support.skip_emscripten_stack_overflow() def test_recursive_pickle(self): with replaced_module('functools', self.module): f = self.partial(capture) @@ -2054,6 +2055,7 @@ def orig(a, /, b, c=True): ... @support.skip_on_s390x @unittest.skipIf(support.is_wasi, "WASI has limited C stack") + @support.skip_emscripten_stack_overflow() def test_lru_recursion(self): @self.module.lru_cache diff --git a/Lib/test/test_isinstance.py b/Lib/test/test_isinstance.py index 95a119ba683e09..abc75c82375d98 100644 --- a/Lib/test/test_isinstance.py +++ b/Lib/test/test_isinstance.py @@ -263,12 +263,14 @@ def test_subclass_tuple(self): self.assertEqual(True, issubclass(int, (int, (float, int)))) self.assertEqual(True, issubclass(str, (str, (Child, str)))) + @support.skip_emscripten_stack_overflow() def test_subclass_recursion_limit(self): # make sure that issubclass raises RecursionError before the C stack is # blown with support.infinite_recursion(): self.assertRaises(RecursionError, blowstack, issubclass, str, str) + @support.skip_emscripten_stack_overflow() def test_isinstance_recursion_limit(self): # make sure that issubclass raises RecursionError before the C stack is # blown @@ -315,6 +317,7 @@ def __bases__(self): self.assertRaises(RecursionError, issubclass, int, X()) self.assertRaises(RecursionError, isinstance, 1, X()) + @support.skip_emscripten_stack_overflow() def test_infinite_recursion_via_bases_tuple(self): """Regression test for bpo-30570.""" class Failure(object): diff --git a/Lib/test/test_json/test_recursion.py b/Lib/test/test_json/test_recursion.py index 290207e9c15b88..663c0643579ac8 100644 --- a/Lib/test/test_json/test_recursion.py +++ b/Lib/test/test_json/test_recursion.py @@ -68,6 +68,7 @@ def default(self, o): self.fail("didn't raise ValueError on default recursion") + @support.skip_emscripten_stack_overflow() def test_highly_nested_objects_decoding(self): # test that loading highly-nested objects doesn't segfault when C # accelerations are used. See #12017 @@ -81,6 +82,7 @@ def test_highly_nested_objects_decoding(self): with support.infinite_recursion(): self.loads('[' * 100000 + '1' + ']' * 100000) + @support.skip_emscripten_stack_overflow() def test_highly_nested_objects_encoding(self): # See #12051 l, d = [], {} @@ -93,6 +95,7 @@ def test_highly_nested_objects_encoding(self): with support.infinite_recursion(5000): self.dumps(d) + @support.skip_emscripten_stack_overflow() def test_endless_recursion(self): # See #12051 class EndlessJSONEncoder(self.json.JSONEncoder): diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index af94ac039808f0..5fa2f550cefcf4 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -9,7 +9,7 @@ from pathlib._abc import UnsupportedOperation, ParserBase, PurePathBase, PathBase import posixpath -from test.support import is_wasi +from test.support import is_wasi, is_emscripten from test.support.os_helper import TESTFN @@ -2298,6 +2298,7 @@ def _check(path, pattern, case_sensitive, expected): _check(path, "dirb/file*", False, ["dirB/fileB"]) @needs_symlinks + @unittest.skipIf(is_emscripten, "Hangs") def test_glob_recurse_symlinks_common(self): def _check(path, glob, expected): actual = {path for path in path.glob(glob, recurse_symlinks=True) @@ -2393,6 +2394,7 @@ def test_rglob_windows(self): self.assertEqual(set(p.rglob("*\\")), { P(self.base, "dirC/dirD/") }) @needs_symlinks + @unittest.skipIf(is_emscripten, "Hangs") def test_rglob_recurse_symlinks_common(self): def _check(path, glob, expected): actual = {path for path in path.rglob(glob, recurse_symlinks=True) diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index ea8d9f2137aca5..31f0a61d6a9d59 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -2097,6 +2097,7 @@ def deep_eg(self): return e @cpython_only + @support.skip_emscripten_stack_overflow() def test_exception_group_deep_recursion_capi(self): from _testcapi import exception_print LIMIT = 75 @@ -2108,6 +2109,7 @@ def test_exception_group_deep_recursion_capi(self): self.assertIn('ExceptionGroup', output) self.assertLessEqual(output.count('ExceptionGroup'), LIMIT) + @support.skip_emscripten_stack_overflow() def test_exception_group_deep_recursion_traceback(self): LIMIT = 75 eg = self.deep_eg() diff --git a/Lib/test/test_xml_etree_c.py b/Lib/test/test_xml_etree_c.py index 3a0fc572f457ff..db19af419bdeab 100644 --- a/Lib/test/test_xml_etree_c.py +++ b/Lib/test/test_xml_etree_c.py @@ -57,6 +57,7 @@ def test_del_attribute(self): del element.attrib self.assertEqual(element.attrib, {'A': 'B', 'C': 'D'}) + @unittest.skipIf(support.is_emscripten, "segfaults") def test_trashcan(self): # If this test fails, it will most likely die via segfault. e = root = cET.Element('root') diff --git a/configure b/configure index 7efda041ae69d4..c6790777793566 100755 --- a/configure +++ b/configure @@ -9436,6 +9436,7 @@ fi as_fn_append LDFLAGS_NODIST " -sFORCE_FILESYSTEM -lidbfs.js -lnodefs.js -lproxyfs.js -lworkerfs.js" as_fn_append LDFLAGS_NODIST " -sEXPORTED_RUNTIME_METHODS=FS,callMain" as_fn_append LDFLAGS_NODIST " -sEXPORTED_FUNCTIONS=_main,_Py_Version" + as_fn_append LDFLAGS_NODIST " -sSTACK_SIZE=5MB" if test "x$enable_wasm_dynamic_linking" = xyes then : diff --git a/configure.ac b/configure.ac index 15f7d07f22473b..9648e438cc7424 100644 --- a/configure.ac +++ b/configure.ac @@ -2334,6 +2334,7 @@ AS_CASE([$ac_sys_system], AS_VAR_APPEND([LDFLAGS_NODIST], [" -sFORCE_FILESYSTEM -lidbfs.js -lnodefs.js -lproxyfs.js -lworkerfs.js"]) AS_VAR_APPEND([LDFLAGS_NODIST], [" -sEXPORTED_RUNTIME_METHODS=FS,callMain"]) AS_VAR_APPEND([LDFLAGS_NODIST], [" -sEXPORTED_FUNCTIONS=_main,_Py_Version"]) + AS_VAR_APPEND([LDFLAGS_NODIST], [" -sSTACK_SIZE=5MB"]) AS_VAR_IF([enable_wasm_dynamic_linking], [yes], [ AS_VAR_APPEND([LINKFORSHARED], [" -sMAIN_MODULE"]) From 87faf0a9c4aa7f8eb5b6b6c8f6e8f5f99b1e3d9b Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Thu, 5 Dec 2024 01:44:50 +0100 Subject: [PATCH 11/48] gh-127503: Emscripten make Python.sh function as proper Python CLI (#127506) Modifies the python.sh script to work on macOS, and adapt to recent emscripten changes. --- Tools/wasm/emscripten/__main__.py | 21 ++++++++++++-- Tools/wasm/emscripten/node_entry.mjs | 43 +++++++++++++++++++--------- configure | 2 +- configure.ac | 2 +- 4 files changed, 51 insertions(+), 17 deletions(-) diff --git a/Tools/wasm/emscripten/__main__.py b/Tools/wasm/emscripten/__main__.py index 9ce8dd6a364ad6..c998ed71309dad 100644 --- a/Tools/wasm/emscripten/__main__.py +++ b/Tools/wasm/emscripten/__main__.py @@ -218,9 +218,26 @@ def configure_emscripten_python(context, working_dir): f"""\ #!/bin/sh + # Macs come with FreeBSD coreutils which doesn't have the -s option + # so feature detect and work around it. + if which grealpath > /dev/null; then + # It has brew installed gnu core utils, use that + REALPATH="grealpath -s" + elif which realpath > /dev/null && realpath --version 2&>1 | grep GNU > /dev/null; then + # realpath points to GNU realpath so use it. + REALPATH="realpath -s" + else + # Shim for macs without GNU coreutils + abs_path () {{ + echo "$(cd "$(dirname "$1")" || exit; pwd)/$(basename "$1")" + }} + REALPATH=abs_path + fi + # We compute our own path, not following symlinks and pass it in so that # node_entry.mjs can set sys.executable correctly. - exec {host_runner} {node_entry} "$(realpath -s $0)" "$@" + # Intentionally allow word splitting on NODEFLAGS. + exec {host_runner} $NODEFLAGS {node_entry} --this-program="$($REALPATH "$0")" "$@" """ ) ) @@ -233,7 +250,7 @@ def configure_emscripten_python(context, working_dir): def make_emscripten_python(context, working_dir): """Run `make` for the emscripten/host build.""" call( - ["make", "--jobs", str(cpu_count()), "commoninstall"], + ["make", "--jobs", str(cpu_count()), "all"], env=updated_env(), quiet=context.quiet, ) diff --git a/Tools/wasm/emscripten/node_entry.mjs b/Tools/wasm/emscripten/node_entry.mjs index cb1c6ff3cba6aa..40ab1515cf28c1 100644 --- a/Tools/wasm/emscripten/node_entry.mjs +++ b/Tools/wasm/emscripten/node_entry.mjs @@ -1,30 +1,47 @@ import EmscriptenModule from "./python.mjs"; -import { dirname } from 'node:path'; -import { fileURLToPath } from 'node:url'; +import fs from "node:fs"; if (process?.versions?.node) { const nodeVersion = Number(process.versions.node.split(".", 1)[0]); if (nodeVersion < 18) { - process.stderr.write( - `Node version must be >= 18, got version ${process.version}\n`, - ); - process.exit(1); + process.stderr.write( + `Node version must be >= 18, got version ${process.version}\n`, + ); + process.exit(1); } } +function rootDirsToMount(Module) { + return fs + .readdirSync("/") + .filter((dir) => !["dev", "lib", "proc"].includes(dir)) + .map((dir) => "/" + dir); +} + +function mountDirectories(Module) { + for (const dir of rootDirsToMount(Module)) { + Module.FS.mkdirTree(dir); + Module.FS.mount(Module.FS.filesystems.NODEFS, { root: dir }, dir); + } +} + +const thisProgram = "--this-program="; +const thisProgramIndex = process.argv.findIndex((x) => + x.startsWith(thisProgram), +); + const settings = { preRun(Module) { - const __dirname = dirname(fileURLToPath(import.meta.url)); - Module.FS.mkdirTree("/lib/"); - Module.FS.mount(Module.FS.filesystems.NODEFS, { root: __dirname + "/lib/" }, "/lib/"); + mountDirectories(Module); + Module.FS.chdir(process.cwd()); + Object.assign(Module.ENV, process.env); }, - // The first three arguments are: "node", path to this file, path to - // python.sh. After that come the arguments the user passed to python.sh. - arguments: process.argv.slice(3), // Ensure that sys.executable, sys._base_executable, etc point to python.sh // not to this file. To properly handle symlinks, python.sh needs to compute // its own path. - thisProgram: process.argv[2], + thisProgram: process.argv[thisProgramIndex], + // After python.sh come the arguments thatthe user passed to python.sh. + arguments: process.argv.slice(thisProgramIndex + 1), }; await EmscriptenModule(settings); diff --git a/configure b/configure index c6790777793566..2fa473b9fe32c0 100755 --- a/configure +++ b/configure @@ -9434,7 +9434,7 @@ fi as_fn_append LDFLAGS_NODIST " -sWASM_BIGINT" as_fn_append LDFLAGS_NODIST " -sFORCE_FILESYSTEM -lidbfs.js -lnodefs.js -lproxyfs.js -lworkerfs.js" - as_fn_append LDFLAGS_NODIST " -sEXPORTED_RUNTIME_METHODS=FS,callMain" + as_fn_append LDFLAGS_NODIST " -sEXPORTED_RUNTIME_METHODS=FS,callMain,ENV" as_fn_append LDFLAGS_NODIST " -sEXPORTED_FUNCTIONS=_main,_Py_Version" as_fn_append LDFLAGS_NODIST " -sSTACK_SIZE=5MB" diff --git a/configure.ac b/configure.ac index 9648e438cc7424..8ca8e0f7802742 100644 --- a/configure.ac +++ b/configure.ac @@ -2332,7 +2332,7 @@ AS_CASE([$ac_sys_system], dnl Include file system support AS_VAR_APPEND([LDFLAGS_NODIST], [" -sFORCE_FILESYSTEM -lidbfs.js -lnodefs.js -lproxyfs.js -lworkerfs.js"]) - AS_VAR_APPEND([LDFLAGS_NODIST], [" -sEXPORTED_RUNTIME_METHODS=FS,callMain"]) + AS_VAR_APPEND([LDFLAGS_NODIST], [" -sEXPORTED_RUNTIME_METHODS=FS,callMain,ENV"]) AS_VAR_APPEND([LDFLAGS_NODIST], [" -sEXPORTED_FUNCTIONS=_main,_Py_Version"]) AS_VAR_APPEND([LDFLAGS_NODIST], [" -sSTACK_SIZE=5MB"]) From 6cf77949fba7b44f6885794b2028f091f42f5d6c Mon Sep 17 00:00:00 2001 From: Feodor Fitsner Date: Wed, 4 Dec 2024 19:00:20 -0800 Subject: [PATCH 12/48] gh-127434: Fix iOS `xcrun --sdk` clang/ar scripts to allow arguments with spaces (#127575) Added shell escaping to ensure iOS compiler shims can accept arguments with spaces. --- .../2024-12-04-09-52-08.gh-issue-127434.RjkGT_.rst | 1 + iOS/Resources/bin/arm64-apple-ios-ar | 2 +- iOS/Resources/bin/arm64-apple-ios-clang | 2 +- iOS/Resources/bin/arm64-apple-ios-clang++ | 2 +- iOS/Resources/bin/arm64-apple-ios-cpp | 2 +- iOS/Resources/bin/arm64-apple-ios-simulator-ar | 2 +- iOS/Resources/bin/arm64-apple-ios-simulator-clang | 2 +- iOS/Resources/bin/arm64-apple-ios-simulator-clang++ | 2 +- iOS/Resources/bin/arm64-apple-ios-simulator-cpp | 2 +- iOS/Resources/bin/x86_64-apple-ios-simulator-ar | 2 +- iOS/Resources/bin/x86_64-apple-ios-simulator-clang | 2 +- iOS/Resources/bin/x86_64-apple-ios-simulator-clang++ | 2 +- iOS/Resources/bin/x86_64-apple-ios-simulator-cpp | 2 +- 13 files changed, 13 insertions(+), 12 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-12-04-09-52-08.gh-issue-127434.RjkGT_.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-12-04-09-52-08.gh-issue-127434.RjkGT_.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-04-09-52-08.gh-issue-127434.RjkGT_.rst new file mode 100644 index 00000000000000..08b27a7890bb1c --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-04-09-52-08.gh-issue-127434.RjkGT_.rst @@ -0,0 +1 @@ +The iOS compiler shims can now accept arguments with spaces. diff --git a/iOS/Resources/bin/arm64-apple-ios-ar b/iOS/Resources/bin/arm64-apple-ios-ar index 8122332b9c1de0..3cf3eb218741fa 100755 --- a/iOS/Resources/bin/arm64-apple-ios-ar +++ b/iOS/Resources/bin/arm64-apple-ios-ar @@ -1,2 +1,2 @@ #!/bin/sh -xcrun --sdk iphoneos${IOS_SDK_VERSION} ar $@ +xcrun --sdk iphoneos${IOS_SDK_VERSION} ar "$@" diff --git a/iOS/Resources/bin/arm64-apple-ios-clang b/iOS/Resources/bin/arm64-apple-ios-clang index 4d525751eba798..c39519cd1f8c94 100755 --- a/iOS/Resources/bin/arm64-apple-ios-clang +++ b/iOS/Resources/bin/arm64-apple-ios-clang @@ -1,2 +1,2 @@ #!/bin/sh -xcrun --sdk iphoneos${IOS_SDK_VERSION} clang -target arm64-apple-ios $@ +xcrun --sdk iphoneos${IOS_SDK_VERSION} clang -target arm64-apple-ios "$@" diff --git a/iOS/Resources/bin/arm64-apple-ios-clang++ b/iOS/Resources/bin/arm64-apple-ios-clang++ index f24bec11268f7e..d9b12925f384b9 100755 --- a/iOS/Resources/bin/arm64-apple-ios-clang++ +++ b/iOS/Resources/bin/arm64-apple-ios-clang++ @@ -1,2 +1,2 @@ #!/bin/sh -xcrun --sdk iphoneos${IOS_SDK_VERSION} clang++ -target arm64-apple-ios $@ +xcrun --sdk iphoneos${IOS_SDK_VERSION} clang++ -target arm64-apple-ios "$@" diff --git a/iOS/Resources/bin/arm64-apple-ios-cpp b/iOS/Resources/bin/arm64-apple-ios-cpp index 891bb25bb4318c..24da23d3448ae0 100755 --- a/iOS/Resources/bin/arm64-apple-ios-cpp +++ b/iOS/Resources/bin/arm64-apple-ios-cpp @@ -1,2 +1,2 @@ #!/bin/sh -xcrun --sdk iphoneos${IOS_SDK_VERSION} clang -target arm64-apple-ios -E $@ +xcrun --sdk iphoneos${IOS_SDK_VERSION} clang -target arm64-apple-ios -E "$@" diff --git a/iOS/Resources/bin/arm64-apple-ios-simulator-ar b/iOS/Resources/bin/arm64-apple-ios-simulator-ar index 74ed3bc6df1c2b..b836b6db9025bb 100755 --- a/iOS/Resources/bin/arm64-apple-ios-simulator-ar +++ b/iOS/Resources/bin/arm64-apple-ios-simulator-ar @@ -1,2 +1,2 @@ #!/bin/sh -xcrun --sdk iphonesimulator${IOS_SDK_VERSION} ar $@ +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} ar "$@" diff --git a/iOS/Resources/bin/arm64-apple-ios-simulator-clang b/iOS/Resources/bin/arm64-apple-ios-simulator-clang index 32574cad284441..92e8d853d6ebc3 100755 --- a/iOS/Resources/bin/arm64-apple-ios-simulator-clang +++ b/iOS/Resources/bin/arm64-apple-ios-simulator-clang @@ -1,2 +1,2 @@ #!/bin/sh -xcrun --sdk iphonesimulator${IOS_SDK_VERSION} clang -target arm64-apple-ios-simulator $@ +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} clang -target arm64-apple-ios-simulator "$@" diff --git a/iOS/Resources/bin/arm64-apple-ios-simulator-clang++ b/iOS/Resources/bin/arm64-apple-ios-simulator-clang++ index ef37d05b512959..076469cc70cf98 100755 --- a/iOS/Resources/bin/arm64-apple-ios-simulator-clang++ +++ b/iOS/Resources/bin/arm64-apple-ios-simulator-clang++ @@ -1,2 +1,2 @@ #!/bin/sh -xcrun --sdk iphonesimulator${IOS_SDK_VERSION} clang++ -target arm64-apple-ios-simulator $@ +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} clang++ -target arm64-apple-ios-simulator "$@" diff --git a/iOS/Resources/bin/arm64-apple-ios-simulator-cpp b/iOS/Resources/bin/arm64-apple-ios-simulator-cpp index 6aaf6fbe188c32..c57f28cee5bcfe 100755 --- a/iOS/Resources/bin/arm64-apple-ios-simulator-cpp +++ b/iOS/Resources/bin/arm64-apple-ios-simulator-cpp @@ -1,2 +1,2 @@ #!/bin/sh -xcrun --sdk iphonesimulator${IOS_SDK_VERSION} clang -target arm64-apple-ios-simulator -E $@ +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} clang -target arm64-apple-ios-simulator -E "$@" diff --git a/iOS/Resources/bin/x86_64-apple-ios-simulator-ar b/iOS/Resources/bin/x86_64-apple-ios-simulator-ar index 74ed3bc6df1c2b..b836b6db9025bb 100755 --- a/iOS/Resources/bin/x86_64-apple-ios-simulator-ar +++ b/iOS/Resources/bin/x86_64-apple-ios-simulator-ar @@ -1,2 +1,2 @@ #!/bin/sh -xcrun --sdk iphonesimulator${IOS_SDK_VERSION} ar $@ +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} ar "$@" diff --git a/iOS/Resources/bin/x86_64-apple-ios-simulator-clang b/iOS/Resources/bin/x86_64-apple-ios-simulator-clang index bcbe91f6061e16..17cbe0c8a1e213 100755 --- a/iOS/Resources/bin/x86_64-apple-ios-simulator-clang +++ b/iOS/Resources/bin/x86_64-apple-ios-simulator-clang @@ -1,2 +1,2 @@ #!/bin/sh -xcrun --sdk iphonesimulator${IOS_SDK_VERSION} clang -target x86_64-apple-ios-simulator $@ +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} clang -target x86_64-apple-ios-simulator "$@" diff --git a/iOS/Resources/bin/x86_64-apple-ios-simulator-clang++ b/iOS/Resources/bin/x86_64-apple-ios-simulator-clang++ index 86f03ea32bc2fd..565d47b24c214b 100755 --- a/iOS/Resources/bin/x86_64-apple-ios-simulator-clang++ +++ b/iOS/Resources/bin/x86_64-apple-ios-simulator-clang++ @@ -1,2 +1,2 @@ #!/bin/sh -xcrun --sdk iphonesimulator${IOS_SDK_VERSION} clang++ -target x86_64-apple-ios-simulator $@ +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} clang++ -target x86_64-apple-ios-simulator "$@" diff --git a/iOS/Resources/bin/x86_64-apple-ios-simulator-cpp b/iOS/Resources/bin/x86_64-apple-ios-simulator-cpp index e6a42d9b85dec7..63fc8e8de2d38d 100755 --- a/iOS/Resources/bin/x86_64-apple-ios-simulator-cpp +++ b/iOS/Resources/bin/x86_64-apple-ios-simulator-cpp @@ -1,2 +1,2 @@ #!/bin/sh -xcrun --sdk iphonesimulator${IOS_SDK_VERSION} clang -target x86_64-apple-ios-simulator -E $@ +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} clang -target x86_64-apple-ios-simulator -E "$@" From 1ef6e8ca3faf2c2b008fb170c7c44c38b86e874a Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 5 Dec 2024 10:37:14 +0100 Subject: [PATCH 13/48] gh-119182: Complete PyUnicodeWriter documentation (#127607) --- Doc/c-api/unicode.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 59bd7661965d93..dcbc8804cd6b89 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -1588,6 +1588,11 @@ object. Create a Unicode writer instance. + *length* must be greater than or equal to ``0``. + + If *length* is greater than ``0``, preallocate an internal buffer of + *length* characters. + Set an exception and return ``NULL`` on error. .. c:function:: PyObject* PyUnicodeWriter_Finish(PyUnicodeWriter *writer) @@ -1596,12 +1601,16 @@ object. Set an exception and return ``NULL`` on error. + The writer instance is invalid after this call. + .. c:function:: void PyUnicodeWriter_Discard(PyUnicodeWriter *writer) Discard the internal Unicode buffer and destroy the writer instance. If *writer* is ``NULL``, no operation is performed. + The writer instance is invalid after this call. + .. c:function:: int PyUnicodeWriter_WriteChar(PyUnicodeWriter *writer, Py_UCS4 ch) Write the single Unicode character *ch* into *writer*. From fcbe6ecdb6ed4dd93b2ee144f89a73af755e2634 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 5 Dec 2024 10:39:44 +0100 Subject: [PATCH 14/48] gh-93312: Include to get PIDFD_NONBLOCK (#127593) --- .../Library/2024-12-04-11-01-16.gh-issue-93312.9sB-Qw.rst | 2 ++ Modules/posixmodule.c | 3 +++ configure | 6 ++++++ configure.ac | 2 +- pyconfig.h.in | 3 +++ 5 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-12-04-11-01-16.gh-issue-93312.9sB-Qw.rst diff --git a/Misc/NEWS.d/next/Library/2024-12-04-11-01-16.gh-issue-93312.9sB-Qw.rst b/Misc/NEWS.d/next/Library/2024-12-04-11-01-16.gh-issue-93312.9sB-Qw.rst new file mode 100644 index 00000000000000..e245fa2bdd00b4 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-04-11-01-16.gh-issue-93312.9sB-Qw.rst @@ -0,0 +1,2 @@ +Include ```` to get ``os.PIDFD_NONBLOCK`` constant. Patch by +Victor Stinner. diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 6eb7054b566e3f..2c26fbeac9a1be 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -73,6 +73,9 @@ #ifdef HAVE_SYS_TIME_H # include // futimes() #endif +#ifdef HAVE_SYS_PIDFD_H +# include // PIDFD_NONBLOCK +#endif // SGI apparently needs this forward declaration diff --git a/configure b/configure index 2fa473b9fe32c0..5e9bcb602d884e 100755 --- a/configure +++ b/configure @@ -11178,6 +11178,12 @@ if test "x$ac_cv_header_sys_param_h" = xyes then : printf "%s\n" "#define HAVE_SYS_PARAM_H 1" >>confdefs.h +fi +ac_fn_c_check_header_compile "$LINENO" "sys/pidfd.h" "ac_cv_header_sys_pidfd_h" "$ac_includes_default" +if test "x$ac_cv_header_sys_pidfd_h" = xyes +then : + printf "%s\n" "#define HAVE_SYS_PIDFD_H 1" >>confdefs.h + fi ac_fn_c_check_header_compile "$LINENO" "sys/poll.h" "ac_cv_header_sys_poll_h" "$ac_includes_default" if test "x$ac_cv_header_sys_poll_h" = xyes diff --git a/configure.ac b/configure.ac index 8ca8e0f7802742..bf3685e1b1b209 100644 --- a/configure.ac +++ b/configure.ac @@ -2932,7 +2932,7 @@ AC_CHECK_HEADERS([ \ linux/tipc.h linux/wait.h netdb.h net/ethernet.h netinet/in.h netpacket/packet.h poll.h process.h pthread.h pty.h \ sched.h setjmp.h shadow.h signal.h spawn.h stropts.h sys/audioio.h sys/bsdtty.h sys/devpoll.h \ sys/endian.h sys/epoll.h sys/event.h sys/eventfd.h sys/file.h sys/ioctl.h sys/kern_control.h \ - sys/loadavg.h sys/lock.h sys/memfd.h sys/mkdev.h sys/mman.h sys/modem.h sys/param.h sys/poll.h \ + sys/loadavg.h sys/lock.h sys/memfd.h sys/mkdev.h sys/mman.h sys/modem.h sys/param.h sys/pidfd.h sys/poll.h \ sys/random.h sys/resource.h sys/select.h sys/sendfile.h sys/socket.h sys/soundcard.h sys/stat.h \ sys/statvfs.h sys/sys_domain.h sys/syscall.h sys/sysmacros.h sys/termio.h sys/time.h sys/times.h sys/timerfd.h \ sys/types.h sys/uio.h sys/un.h sys/utsname.h sys/wait.h sys/xattr.h sysexits.h syslog.h \ diff --git a/pyconfig.h.in b/pyconfig.h.in index 924d86627b0e9b..6a1f1284650b9f 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -1388,6 +1388,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_SYS_PARAM_H +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_PIDFD_H + /* Define to 1 if you have the header file. */ #undef HAVE_SYS_POLL_H From 67b9a5331ae45aa126877d7f96a1e235600f9c4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 5 Dec 2024 16:01:59 +0100 Subject: [PATCH 15/48] gh-127413: allow to show specialized bytecode via `dis` CLI (#127414) --- Doc/library/dis.rst | 8 +++++++- Doc/whatsnew/3.14.rst | 6 ++++++ Lib/dis.py | 5 ++++- .../2024-11-29-14-45-26.gh-issue-127413.z11AUc.rst | 2 ++ 4 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-29-14-45-26.gh-issue-127413.z11AUc.rst diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst index e2926f2440af6d..f8f4188d27b472 100644 --- a/Doc/library/dis.rst +++ b/Doc/library/dis.rst @@ -60,6 +60,8 @@ interpreter. The :option:`-P ` command-line option and the ``show_positions`` argument were added. + The :option:`-S ` command-line option is added. + Example: Given the function :func:`!myfunc`:: def myfunc(alist): @@ -89,7 +91,7 @@ The :mod:`dis` module can be invoked as a script from the command line: .. code-block:: sh - python -m dis [-h] [-C] [-O] [-P] [infile] + python -m dis [-h] [-C] [-O] [-P] [-S] [infile] The following options are accepted: @@ -111,6 +113,10 @@ The following options are accepted: Show positions of instructions in the source code. +.. cmdoption:: -S, --specialized + + Show specialized bytecode. + If :file:`infile` is specified, its disassembled code will be written to stdout. Otherwise, disassembly is performed on compiled source code received from stdin. diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 52a6d6e4340194..e83c509a025ab5 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -348,12 +348,18 @@ dis This feature is also exposed via :option:`dis --show-positions`. (Contributed by Bénédikt Tran in :gh:`123165`.) +* Add the :option:`dis --specialized` command-line option to + show specialized bytecode. + (Contributed by Bénédikt Tran in :gh:`127413`.) + + errno ----- * Add :data:`errno.EHWPOISON` error code. (Contributed by James Roy in :gh:`126585`.) + fractions --------- diff --git a/Lib/dis.py b/Lib/dis.py index 1718e39cceb638..6b3e9ef8399e1c 100644 --- a/Lib/dis.py +++ b/Lib/dis.py @@ -1125,6 +1125,8 @@ def main(): help='show instruction offsets') parser.add_argument('-P', '--show-positions', action='store_true', help='show instruction positions') + parser.add_argument('-S', '--specialized', action='store_true', + help='show specialized bytecode') parser.add_argument('infile', nargs='?', default='-') args = parser.parse_args() if args.infile == '-': @@ -1135,7 +1137,8 @@ def main(): with open(args.infile, 'rb') as infile: source = infile.read() code = compile(source, name, "exec") - dis(code, show_caches=args.show_caches, show_offsets=args.show_offsets, show_positions=args.show_positions) + dis(code, show_caches=args.show_caches, adaptive=args.specialized, + show_offsets=args.show_offsets, show_positions=args.show_positions) if __name__ == "__main__": main() diff --git a/Misc/NEWS.d/next/Library/2024-11-29-14-45-26.gh-issue-127413.z11AUc.rst b/Misc/NEWS.d/next/Library/2024-11-29-14-45-26.gh-issue-127413.z11AUc.rst new file mode 100644 index 00000000000000..2330fb66253265 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-29-14-45-26.gh-issue-127413.z11AUc.rst @@ -0,0 +1,2 @@ +Add the :option:`dis --specialized` command-line option to show specialized +bytecode. Patch by Bénédikt Tran. From 208b0fb645c0e14b0826c0014e74a0b70c58c9d6 Mon Sep 17 00:00:00 2001 From: Peter Bierma Date: Thu, 5 Dec 2024 11:07:38 -0500 Subject: [PATCH 16/48] gh-122431: Disallow negative values in `readline.append_history_file` (#122469) Co-authored-by: Victor Stinner --- Lib/test/test_readline.py | 8 ++++++++ .../2024-07-30-11-37-40.gh-issue-122431.lAzVtu.rst | 1 + Modules/readline.c | 6 ++++++ 3 files changed, 15 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-07-30-11-37-40.gh-issue-122431.lAzVtu.rst diff --git a/Lib/test/test_readline.py b/Lib/test/test_readline.py index 50e77cbbb6be13..8b8772c66ee654 100644 --- a/Lib/test/test_readline.py +++ b/Lib/test/test_readline.py @@ -114,6 +114,14 @@ def test_write_read_append(self): # write_history_file can create the target readline.write_history_file(hfilename) + # Negative values should be disallowed + with self.assertRaises(ValueError): + readline.append_history_file(-42, hfilename) + + # See gh-122431, using the minimum signed integer value caused a segfault + with self.assertRaises(ValueError): + readline.append_history_file(-2147483648, hfilename) + def test_nonascii_history(self): readline.clear_history() try: diff --git a/Misc/NEWS.d/next/Library/2024-07-30-11-37-40.gh-issue-122431.lAzVtu.rst b/Misc/NEWS.d/next/Library/2024-07-30-11-37-40.gh-issue-122431.lAzVtu.rst new file mode 100644 index 00000000000000..16ad75792aefa2 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-07-30-11-37-40.gh-issue-122431.lAzVtu.rst @@ -0,0 +1 @@ +:func:`readline.append_history_file` now raises a :exc:`ValueError` when given a negative value. diff --git a/Modules/readline.c b/Modules/readline.c index 35655c70a4618f..7d1f703f7dbdde 100644 --- a/Modules/readline.c +++ b/Modules/readline.c @@ -351,6 +351,12 @@ readline_append_history_file_impl(PyObject *module, int nelements, PyObject *filename_obj) /*[clinic end generated code: output=5df06fc9da56e4e4 input=784b774db3a4b7c5]*/ { + if (nelements < 0) + { + PyErr_SetString(PyExc_ValueError, "nelements must be positive"); + return NULL; + } + PyObject *filename_bytes; const char *filename; int err; From d958d9f4a1b71c6d30960bf6c53c41046ea94590 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Kul=C3=ADk?= Date: Thu, 5 Dec 2024 19:43:19 +0100 Subject: [PATCH 17/48] GH-126727: Fix test_era_nl_langinfo with Japanese ERAs on Solaris (GH-127327) Fix test_era_nl_langinfo with Japanese ERAs on Solaris --- Lib/test/test__locale.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Lib/test/test__locale.py b/Lib/test/test__locale.py index 2c751033ebb3e2..cef84fd9580c37 100644 --- a/Lib/test/test__locale.py +++ b/Lib/test/test__locale.py @@ -102,6 +102,11 @@ def accept(loc): # ps_AF doesn't work on Windows: see bpo-38324 (msg361830) del known_numerics['ps_AF'] +if sys.platform == 'sunos5': + # On Solaris, Japanese ERAs start with the year 1927, + # and thus there's less of them. + known_era['ja_JP'] = (5, '+:1:2019/05/01:2019/12/31:令和:%EC元年') + class _LocaleTests(unittest.TestCase): def setUp(self): From 23f2e8f13c4e4a34106cf96fad9329cbfbf8844d Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Thu, 5 Dec 2024 21:10:46 +0200 Subject: [PATCH 18/48] gh-127221: Add colour to unittest output (#127223) Co-authored-by: Kirill Podoprigora --- Doc/conf.py | 7 ++ Doc/library/doctest.rst | 4 + Doc/library/traceback.rst | 4 + Doc/library/unittest.rst | 4 +- Doc/using/cmdline.rst | 8 -- Doc/whatsnew/3.13.rst | 9 -- Doc/whatsnew/3.14.rst | 7 ++ Lib/test/test_unittest/test_async_case.py | 2 + Lib/test/test_unittest/test_program.py | 6 + Lib/test/test_unittest/test_result.py | 16 ++- Lib/test/test_unittest/test_runner.py | 13 +++ Lib/test/test_unittest/test_skipping.py | 3 + Lib/unittest/result.py | 4 +- Lib/unittest/runner.py | 108 +++++++++++------- ...-11-23-00-17-29.gh-issue-127221.OSXdFE.rst | 1 + 15 files changed, 136 insertions(+), 60 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-23-00-17-29.gh-issue-127221.OSXdFE.rst diff --git a/Doc/conf.py b/Doc/conf.py index 738c9901eef06f..9cde394cbaed69 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -78,6 +78,13 @@ .. |python_version_literal| replace:: ``Python {version}`` .. |python_x_dot_y_literal| replace:: ``python{version}`` .. |usr_local_bin_python_x_dot_y_literal| replace:: ``/usr/local/bin/python{version}`` + +.. Apparently this how you hack together a formatted link: + (https://www.docutils.org/docs/ref/rst/directives.html#replacement-text) +.. |FORCE_COLOR| replace:: ``FORCE_COLOR`` +.. _FORCE_COLOR: https://force-color.org/ +.. |NO_COLOR| replace:: ``NO_COLOR`` +.. _NO_COLOR: https://no-color.org/ """ # There are two options for replacing |today|. Either, you set today to some diff --git a/Doc/library/doctest.rst b/Doc/library/doctest.rst index 6b0282eed49566..106b0a6c95b7be 100644 --- a/Doc/library/doctest.rst +++ b/Doc/library/doctest.rst @@ -136,6 +136,10 @@ examples of doctests in the standard Python test suite and libraries. Especially useful examples can be found in the standard test file :file:`Lib/test/test_doctest/test_doctest.py`. +.. versionadded:: 3.13 + Output is colorized by default and can be + :ref:`controlled using environment variables `. + .. _doctest-simple-testmod: diff --git a/Doc/library/traceback.rst b/Doc/library/traceback.rst index 8f94fc448f2482..4899ed64ebad8d 100644 --- a/Doc/library/traceback.rst +++ b/Doc/library/traceback.rst @@ -44,6 +44,10 @@ The module's API can be divided into two parts: necessary for later formatting without holding references to actual exception and traceback objects. +.. versionadded:: 3.13 + Output is colorized by default and can be + :ref:`controlled using environment variables `. + Module-Level Functions ---------------------- diff --git a/Doc/library/unittest.rst b/Doc/library/unittest.rst index 38bad9405597dd..7f8b710f611002 100644 --- a/Doc/library/unittest.rst +++ b/Doc/library/unittest.rst @@ -46,7 +46,6 @@ test runner a textual interface, or return a special value to indicate the results of executing the tests. - .. seealso:: Module :mod:`doctest` @@ -198,6 +197,9 @@ For a list of all the command-line options:: In earlier versions it was only possible to run individual test methods and not modules or classes. +.. versionadded:: 3.14 + Output is colorized by default and can be + :ref:`controlled using environment variables `. Command-line options ~~~~~~~~~~~~~~~~~~~~ diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index 6cf42b27718022..7db2f4820f346a 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -663,14 +663,6 @@ output. To control the color output only in the Python interpreter, the precedence over ``NO_COLOR``, which in turn takes precedence over ``FORCE_COLOR``. -.. Apparently this how you hack together a formatted link: - -.. |FORCE_COLOR| replace:: ``FORCE_COLOR`` -.. _FORCE_COLOR: https://force-color.org/ - -.. |NO_COLOR| replace:: ``NO_COLOR`` -.. _NO_COLOR: https://no-color.org/ - Options you shouldn't use ~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 664b1866172378..9f6d98b9950d19 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -252,15 +252,6 @@ Improved error messages the canonical |NO_COLOR|_ and |FORCE_COLOR|_ environment variables. (Contributed by Pablo Galindo Salgado in :gh:`112730`.) -.. Apparently this how you hack together a formatted link: - (https://www.docutils.org/docs/ref/rst/directives.html#replacement-text) - -.. |FORCE_COLOR| replace:: ``FORCE_COLOR`` -.. _FORCE_COLOR: https://force-color.org/ - -.. |NO_COLOR| replace:: ``NO_COLOR`` -.. _NO_COLOR: https://no-color.org/ - * A common mistake is to write a script with the same name as a standard library module. When this results in errors, we now display a more helpful error message: diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index e83c509a025ab5..db25c037e509b6 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -616,6 +616,13 @@ unicodedata unittest -------- +* :mod:`unittest` output is now colored by default. + This can be controlled via the :envvar:`PYTHON_COLORS` environment + variable as well as the canonical |NO_COLOR|_ + and |FORCE_COLOR|_ environment variables. + See also :ref:`using-on-controlling-color`. + (Contributed by Hugo van Kemenade in :gh:`127221`.) + * unittest discovery supports :term:`namespace package` as start directory again. It was removed in Python 3.11. (Contributed by Jacob Walls in :gh:`80958`.) diff --git a/Lib/test/test_unittest/test_async_case.py b/Lib/test/test_unittest/test_async_case.py index 00ef55bdf9bc83..8ea244bff05c5f 100644 --- a/Lib/test/test_unittest/test_async_case.py +++ b/Lib/test/test_unittest/test_async_case.py @@ -2,6 +2,7 @@ import contextvars import unittest from test import support +from test.support import force_not_colorized support.requires_working_socket(module=True) @@ -252,6 +253,7 @@ async def on_cleanup(self): test.doCleanups() self.assertEqual(events, ['asyncSetUp', 'test', 'asyncTearDown', 'cleanup']) + @force_not_colorized def test_exception_in_tear_clean_up(self): class Test(unittest.IsolatedAsyncioTestCase): async def asyncSetUp(self): diff --git a/Lib/test/test_unittest/test_program.py b/Lib/test/test_unittest/test_program.py index 7241cf59f73d4f..0b46f338ac77e1 100644 --- a/Lib/test/test_unittest/test_program.py +++ b/Lib/test/test_unittest/test_program.py @@ -4,6 +4,7 @@ from test import support import unittest import test.test_unittest +from test.support import force_not_colorized from test.test_unittest.test_result import BufferedWriter @@ -120,6 +121,7 @@ def run(self, test): self.assertEqual(['test.test_unittest', 'test.test_unittest2'], program.testNames) + @force_not_colorized def test_NonExit(self): stream = BufferedWriter() program = unittest.main(exit=False, @@ -135,6 +137,7 @@ def test_NonExit(self): 'expected failures=1, unexpected successes=1)\n') self.assertTrue(out.endswith(expected)) + @force_not_colorized def test_Exit(self): stream = BufferedWriter() with self.assertRaises(SystemExit) as cm: @@ -152,6 +155,7 @@ def test_Exit(self): 'expected failures=1, unexpected successes=1)\n') self.assertTrue(out.endswith(expected)) + @force_not_colorized def test_ExitAsDefault(self): stream = BufferedWriter() with self.assertRaises(SystemExit): @@ -167,6 +171,7 @@ def test_ExitAsDefault(self): 'expected failures=1, unexpected successes=1)\n') self.assertTrue(out.endswith(expected)) + @force_not_colorized def test_ExitSkippedSuite(self): stream = BufferedWriter() with self.assertRaises(SystemExit) as cm: @@ -179,6 +184,7 @@ def test_ExitSkippedSuite(self): expected = '\n\nOK (skipped=1)\n' self.assertTrue(out.endswith(expected)) + @force_not_colorized def test_ExitEmptySuite(self): stream = BufferedWriter() with self.assertRaises(SystemExit) as cm: diff --git a/Lib/test/test_unittest/test_result.py b/Lib/test/test_unittest/test_result.py index 4e5ec54e9c892a..746b9fa2677717 100644 --- a/Lib/test/test_unittest/test_result.py +++ b/Lib/test/test_unittest/test_result.py @@ -7,6 +7,7 @@ import traceback import unittest from unittest.util import strclass +from test.support import force_not_colorized from test.test_unittest.support import BufferedWriter @@ -14,7 +15,7 @@ class MockTraceback(object): class TracebackException: def __init__(self, *args, **kwargs): self.capture_locals = kwargs.get('capture_locals', False) - def format(self): + def format(self, **kwargs): result = ['A traceback'] if self.capture_locals: result.append('locals') @@ -205,6 +206,7 @@ def test_1(self): self.assertIs(test_case, test) self.assertIsInstance(formatted_exc, str) + @force_not_colorized def test_addFailure_filter_traceback_frames(self): class Foo(unittest.TestCase): def test_1(self): @@ -231,6 +233,7 @@ def get_exc_info(): self.assertEqual(len(dropped), 1) self.assertIn("raise self.failureException(msg)", dropped[0]) + @force_not_colorized def test_addFailure_filter_traceback_frames_context(self): class Foo(unittest.TestCase): def test_1(self): @@ -260,6 +263,7 @@ def get_exc_info(): self.assertEqual(len(dropped), 1) self.assertIn("raise self.failureException(msg)", dropped[0]) + @force_not_colorized def test_addFailure_filter_traceback_frames_chained_exception_self_loop(self): class Foo(unittest.TestCase): def test_1(self): @@ -285,6 +289,7 @@ def get_exc_info(): formatted_exc = result.failures[0][1] self.assertEqual(formatted_exc.count("Exception: Loop\n"), 1) + @force_not_colorized def test_addFailure_filter_traceback_frames_chained_exception_cycle(self): class Foo(unittest.TestCase): def test_1(self): @@ -446,6 +451,7 @@ def testFailFast(self): result.addUnexpectedSuccess(None) self.assertTrue(result.shouldStop) + @force_not_colorized def testFailFastSetByRunner(self): stream = BufferedWriter() runner = unittest.TextTestRunner(stream=stream, failfast=True) @@ -619,6 +625,7 @@ def _run_test(self, test_name, verbosity, tearDownError=None): test.run(result) return stream.getvalue() + @force_not_colorized def testDotsOutput(self): self.assertEqual(self._run_test('testSuccess', 1), '.') self.assertEqual(self._run_test('testSkip', 1), 's') @@ -627,6 +634,7 @@ def testDotsOutput(self): self.assertEqual(self._run_test('testExpectedFailure', 1), 'x') self.assertEqual(self._run_test('testUnexpectedSuccess', 1), 'u') + @force_not_colorized def testLongOutput(self): classname = f'{__name__}.{self.Test.__qualname__}' self.assertEqual(self._run_test('testSuccess', 2), @@ -642,17 +650,21 @@ def testLongOutput(self): self.assertEqual(self._run_test('testUnexpectedSuccess', 2), f'testUnexpectedSuccess ({classname}.testUnexpectedSuccess) ... unexpected success\n') + @force_not_colorized def testDotsOutputSubTestSuccess(self): self.assertEqual(self._run_test('testSubTestSuccess', 1), '.') + @force_not_colorized def testLongOutputSubTestSuccess(self): classname = f'{__name__}.{self.Test.__qualname__}' self.assertEqual(self._run_test('testSubTestSuccess', 2), f'testSubTestSuccess ({classname}.testSubTestSuccess) ... ok\n') + @force_not_colorized def testDotsOutputSubTestMixed(self): self.assertEqual(self._run_test('testSubTestMixed', 1), 'sFE') + @force_not_colorized def testLongOutputSubTestMixed(self): classname = f'{__name__}.{self.Test.__qualname__}' self.assertEqual(self._run_test('testSubTestMixed', 2), @@ -661,6 +673,7 @@ def testLongOutputSubTestMixed(self): f' testSubTestMixed ({classname}.testSubTestMixed) [fail] (c=3) ... FAIL\n' f' testSubTestMixed ({classname}.testSubTestMixed) [error] (d=4) ... ERROR\n') + @force_not_colorized def testDotsOutputTearDownFail(self): out = self._run_test('testSuccess', 1, AssertionError('fail')) self.assertEqual(out, 'F') @@ -671,6 +684,7 @@ def testDotsOutputTearDownFail(self): out = self._run_test('testSkip', 1, AssertionError('fail')) self.assertEqual(out, 'sF') + @force_not_colorized def testLongOutputTearDownFail(self): classname = f'{__name__}.{self.Test.__qualname__}' out = self._run_test('testSuccess', 2, AssertionError('fail')) diff --git a/Lib/test/test_unittest/test_runner.py b/Lib/test/test_unittest/test_runner.py index 1b9cef43e3f9c5..1131cd73128866 100644 --- a/Lib/test/test_unittest/test_runner.py +++ b/Lib/test/test_unittest/test_runner.py @@ -4,6 +4,7 @@ import pickle import subprocess from test import support +from test.support import force_not_colorized import unittest from unittest.case import _Outcome @@ -106,6 +107,7 @@ def cleanup2(*args, **kwargs): self.assertTrue(test.doCleanups()) self.assertEqual(cleanups, [(2, (), {}), (1, (1, 2, 3), dict(four='hello', five='goodbye'))]) + @force_not_colorized def testCleanUpWithErrors(self): class TestableTest(unittest.TestCase): def testNothing(self): @@ -416,6 +418,7 @@ def cleanup2(): self.assertIsInstance(e2[1], CustomError) self.assertEqual(str(e2[1]), 'cleanup1') + @force_not_colorized def test_with_errors_addCleanUp(self): ordering = [] class TestableTest(unittest.TestCase): @@ -439,6 +442,7 @@ def tearDownClass(cls): ['setUpClass', 'setUp', 'cleanup_exc', 'tearDownClass', 'cleanup_good']) + @force_not_colorized def test_run_with_errors_addClassCleanUp(self): ordering = [] class TestableTest(unittest.TestCase): @@ -462,6 +466,7 @@ def tearDownClass(cls): ['setUpClass', 'setUp', 'test', 'cleanup_good', 'tearDownClass', 'cleanup_exc']) + @force_not_colorized def test_with_errors_in_addClassCleanup_and_setUps(self): ordering = [] class_blow_up = False @@ -514,6 +519,7 @@ def tearDownClass(cls): ['setUpClass', 'setUp', 'tearDownClass', 'cleanup_exc']) + @force_not_colorized def test_with_errors_in_tearDownClass(self): ordering = [] class TestableTest(unittest.TestCase): @@ -590,6 +596,7 @@ def test(self): 'inner setup', 'inner test', 'inner cleanup', 'end outer test', 'outer cleanup']) + @force_not_colorized def test_run_empty_suite_error_message(self): class EmptyTest(unittest.TestCase): pass @@ -663,6 +670,7 @@ class Module(object): self.assertEqual(cleanups, [((1, 2), {'function': 'hello'})]) + @force_not_colorized def test_run_module_cleanUp(self): blowUp = True ordering = [] @@ -802,6 +810,7 @@ def tearDownClass(cls): 'tearDownClass', 'cleanup_good']) self.assertEqual(unittest.case._module_cleanups, []) + @force_not_colorized def test_run_module_cleanUp_when_teardown_exception(self): ordering = [] class Module(object): @@ -963,6 +972,7 @@ def testNothing(self): self.assertEqual(cleanups, [((1, 2), {'function': 3, 'self': 4})]) + @force_not_colorized def test_with_errors_in_addClassCleanup(self): ordering = [] @@ -996,6 +1006,7 @@ def tearDownClass(cls): ['setUpModule', 'setUpClass', 'test', 'tearDownClass', 'cleanup_exc', 'tearDownModule', 'cleanup_good']) + @force_not_colorized def test_with_errors_in_addCleanup(self): ordering = [] class Module(object): @@ -1026,6 +1037,7 @@ def tearDown(self): ['setUpModule', 'setUp', 'test', 'tearDown', 'cleanup_exc', 'tearDownModule', 'cleanup_good']) + @force_not_colorized def test_with_errors_in_addModuleCleanup_and_setUps(self): ordering = [] module_blow_up = False @@ -1318,6 +1330,7 @@ def MockResultClass(*args): expectedresult = (runner.stream, DESCRIPTIONS, VERBOSITY) self.assertEqual(runner._makeResult(), expectedresult) + @force_not_colorized @support.requires_subprocess() def test_warnings(self): """ diff --git a/Lib/test/test_unittest/test_skipping.py b/Lib/test/test_unittest/test_skipping.py index f146dcac18ecc0..f5cb860c60b156 100644 --- a/Lib/test/test_unittest/test_skipping.py +++ b/Lib/test/test_unittest/test_skipping.py @@ -1,5 +1,6 @@ import unittest +from test.support import force_not_colorized from test.test_unittest.support import LoggingResult @@ -293,6 +294,7 @@ def test_die(self): self.assertFalse(result.unexpectedSuccesses) self.assertTrue(result.wasSuccessful()) + @force_not_colorized def test_expected_failure_and_fail_in_cleanup(self): class Foo(unittest.TestCase): @unittest.expectedFailure @@ -372,6 +374,7 @@ def test_die(self): self.assertEqual(result.unexpectedSuccesses, [test]) self.assertFalse(result.wasSuccessful()) + @force_not_colorized def test_unexpected_success_and_fail_in_cleanup(self): class Foo(unittest.TestCase): @unittest.expectedFailure diff --git a/Lib/unittest/result.py b/Lib/unittest/result.py index 3ace0a5b7bf2ef..97262735aa8311 100644 --- a/Lib/unittest/result.py +++ b/Lib/unittest/result.py @@ -189,7 +189,9 @@ def _exc_info_to_string(self, err, test): tb_e = traceback.TracebackException( exctype, value, tb, capture_locals=self.tb_locals, compact=True) - msgLines = list(tb_e.format()) + from _colorize import can_colorize + + msgLines = list(tb_e.format(colorize=can_colorize())) if self.buffer: output = sys.stdout.getvalue() diff --git a/Lib/unittest/runner.py b/Lib/unittest/runner.py index 2bcadf0c998bd9..d60c295a1eddf7 100644 --- a/Lib/unittest/runner.py +++ b/Lib/unittest/runner.py @@ -4,6 +4,8 @@ import time import warnings +from _colorize import get_colors + from . import result from .case import _SubTest from .signals import registerResult @@ -13,18 +15,18 @@ class _WritelnDecorator(object): """Used to decorate file-like objects with a handy 'writeln' method""" - def __init__(self,stream): + def __init__(self, stream): self.stream = stream def __getattr__(self, attr): if attr in ('stream', '__getstate__'): raise AttributeError(attr) - return getattr(self.stream,attr) + return getattr(self.stream, attr) def writeln(self, arg=None): if arg: self.write(arg) - self.write('\n') # text-mode streams translate to \r\n if needed + self.write('\n') # text-mode streams translate to \r\n if needed class TextTestResult(result.TestResult): @@ -43,6 +45,7 @@ def __init__(self, stream, descriptions, verbosity, *, durations=None): self.showAll = verbosity > 1 self.dots = verbosity == 1 self.descriptions = descriptions + self._ansi = get_colors() self._newline = True self.durations = durations @@ -76,86 +79,102 @@ def _write_status(self, test, status): def addSubTest(self, test, subtest, err): if err is not None: + red, reset = self._ansi.RED, self._ansi.RESET if self.showAll: if issubclass(err[0], subtest.failureException): - self._write_status(subtest, "FAIL") + self._write_status(subtest, f"{red}FAIL{reset}") else: - self._write_status(subtest, "ERROR") + self._write_status(subtest, f"{red}ERROR{reset}") elif self.dots: if issubclass(err[0], subtest.failureException): - self.stream.write('F') + self.stream.write(f"{red}F{reset}") else: - self.stream.write('E') + self.stream.write(f"{red}E{reset}") self.stream.flush() super(TextTestResult, self).addSubTest(test, subtest, err) def addSuccess(self, test): super(TextTestResult, self).addSuccess(test) + green, reset = self._ansi.GREEN, self._ansi.RESET if self.showAll: - self._write_status(test, "ok") + self._write_status(test, f"{green}ok{reset}") elif self.dots: - self.stream.write('.') + self.stream.write(f"{green}.{reset}") self.stream.flush() def addError(self, test, err): super(TextTestResult, self).addError(test, err) + red, reset = self._ansi.RED, self._ansi.RESET if self.showAll: - self._write_status(test, "ERROR") + self._write_status(test, f"{red}ERROR{reset}") elif self.dots: - self.stream.write('E') + self.stream.write(f"{red}E{reset}") self.stream.flush() def addFailure(self, test, err): super(TextTestResult, self).addFailure(test, err) + red, reset = self._ansi.RED, self._ansi.RESET if self.showAll: - self._write_status(test, "FAIL") + self._write_status(test, f"{red}FAIL{reset}") elif self.dots: - self.stream.write('F') + self.stream.write(f"{red}F{reset}") self.stream.flush() def addSkip(self, test, reason): super(TextTestResult, self).addSkip(test, reason) + yellow, reset = self._ansi.YELLOW, self._ansi.RESET if self.showAll: - self._write_status(test, "skipped {0!r}".format(reason)) + self._write_status(test, f"{yellow}skipped{reset} {reason!r}") elif self.dots: - self.stream.write("s") + self.stream.write(f"{yellow}s{reset}") self.stream.flush() def addExpectedFailure(self, test, err): super(TextTestResult, self).addExpectedFailure(test, err) + yellow, reset = self._ansi.YELLOW, self._ansi.RESET if self.showAll: - self.stream.writeln("expected failure") + self.stream.writeln(f"{yellow}expected failure{reset}") self.stream.flush() elif self.dots: - self.stream.write("x") + self.stream.write(f"{yellow}x{reset}") self.stream.flush() def addUnexpectedSuccess(self, test): super(TextTestResult, self).addUnexpectedSuccess(test) + red, reset = self._ansi.RED, self._ansi.RESET if self.showAll: - self.stream.writeln("unexpected success") + self.stream.writeln(f"{red}unexpected success{reset}") self.stream.flush() elif self.dots: - self.stream.write("u") + self.stream.write(f"{red}u{reset}") self.stream.flush() def printErrors(self): + bold_red = self._ansi.BOLD_RED + red = self._ansi.RED + reset = self._ansi.RESET if self.dots or self.showAll: self.stream.writeln() self.stream.flush() - self.printErrorList('ERROR', self.errors) - self.printErrorList('FAIL', self.failures) - unexpectedSuccesses = getattr(self, 'unexpectedSuccesses', ()) + self.printErrorList(f"{red}ERROR{reset}", self.errors) + self.printErrorList(f"{red}FAIL{reset}", self.failures) + unexpectedSuccesses = getattr(self, "unexpectedSuccesses", ()) if unexpectedSuccesses: self.stream.writeln(self.separator1) for test in unexpectedSuccesses: - self.stream.writeln(f"UNEXPECTED SUCCESS: {self.getDescription(test)}") + self.stream.writeln( + f"{red}UNEXPECTED SUCCESS{bold_red}: " + f"{self.getDescription(test)}{reset}" + ) self.stream.flush() def printErrorList(self, flavour, errors): + bold_red, reset = self._ansi.BOLD_RED, self._ansi.RESET for test, err in errors: self.stream.writeln(self.separator1) - self.stream.writeln("%s: %s" % (flavour,self.getDescription(test))) + self.stream.writeln( + f"{flavour}{bold_red}: {self.getDescription(test)}{reset}" + ) self.stream.writeln(self.separator2) self.stream.writeln("%s" % err) self.stream.flush() @@ -232,7 +251,7 @@ def run(self, test): if self.warnings: # if self.warnings is set, use it to filter all the warnings warnings.simplefilter(self.warnings) - startTime = time.perf_counter() + start_time = time.perf_counter() startTestRun = getattr(result, 'startTestRun', None) if startTestRun is not None: startTestRun() @@ -242,8 +261,8 @@ def run(self, test): stopTestRun = getattr(result, 'stopTestRun', None) if stopTestRun is not None: stopTestRun() - stopTime = time.perf_counter() - timeTaken = stopTime - startTime + stop_time = time.perf_counter() + time_taken = stop_time - start_time result.printErrors() if self.durations is not None: self._printDurations(result) @@ -253,10 +272,10 @@ def run(self, test): run = result.testsRun self.stream.writeln("Ran %d test%s in %.3fs" % - (run, run != 1 and "s" or "", timeTaken)) + (run, run != 1 and "s" or "", time_taken)) self.stream.writeln() - expectedFails = unexpectedSuccesses = skipped = 0 + expected_fails = unexpected_successes = skipped = 0 try: results = map(len, (result.expectedFailures, result.unexpectedSuccesses, @@ -264,26 +283,35 @@ def run(self, test): except AttributeError: pass else: - expectedFails, unexpectedSuccesses, skipped = results + expected_fails, unexpected_successes, skipped = results infos = [] + ansi = get_colors() + bold_red = ansi.BOLD_RED + green = ansi.GREEN + red = ansi.RED + reset = ansi.RESET + yellow = ansi.YELLOW + if not result.wasSuccessful(): - self.stream.write("FAILED") + self.stream.write(f"{bold_red}FAILED{reset}") failed, errored = len(result.failures), len(result.errors) if failed: - infos.append("failures=%d" % failed) + infos.append(f"{bold_red}failures={failed}{reset}") if errored: - infos.append("errors=%d" % errored) + infos.append(f"{bold_red}errors={errored}{reset}") elif run == 0 and not skipped: - self.stream.write("NO TESTS RAN") + self.stream.write(f"{yellow}NO TESTS RAN{reset}") else: - self.stream.write("OK") + self.stream.write(f"{green}OK{reset}") if skipped: - infos.append("skipped=%d" % skipped) - if expectedFails: - infos.append("expected failures=%d" % expectedFails) - if unexpectedSuccesses: - infos.append("unexpected successes=%d" % unexpectedSuccesses) + infos.append(f"{yellow}skipped={skipped}{reset}") + if expected_fails: + infos.append(f"{yellow}expected failures={expected_fails}{reset}") + if unexpected_successes: + infos.append( + f"{red}unexpected successes={unexpected_successes}{reset}" + ) if infos: self.stream.writeln(" (%s)" % (", ".join(infos),)) else: diff --git a/Misc/NEWS.d/next/Library/2024-11-23-00-17-29.gh-issue-127221.OSXdFE.rst b/Misc/NEWS.d/next/Library/2024-11-23-00-17-29.gh-issue-127221.OSXdFE.rst new file mode 100644 index 00000000000000..0e4a03caf9f49d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-23-00-17-29.gh-issue-127221.OSXdFE.rst @@ -0,0 +1 @@ +Add colour to :mod:`unittest` output. Patch by Hugo van Kemenade. From 657d0e99aa8754372786120d6ec00c9d9970e775 Mon Sep 17 00:00:00 2001 From: Maciej Olko Date: Thu, 5 Dec 2024 21:52:58 +0100 Subject: [PATCH 19/48] [Docs] GDB howto: Fix block type of a cast example (#127621) --- Doc/howto/gdb_helpers.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/howto/gdb_helpers.rst b/Doc/howto/gdb_helpers.rst index 53bbf7ddaa2ab9..98ce813ca4ab02 100644 --- a/Doc/howto/gdb_helpers.rst +++ b/Doc/howto/gdb_helpers.rst @@ -180,7 +180,7 @@ regular machine-level integer:: (gdb) p some_python_integer $4 = 42 -The internal structure can be revealed with a cast to :c:expr:`PyLongObject *`: +The internal structure can be revealed with a cast to :c:expr:`PyLongObject *`:: (gdb) p *(PyLongObject*)some_python_integer $5 = {ob_base = {ob_base = {ob_refcnt = 8, ob_type = 0x3dad39f5e0}, ob_size = 1}, From f4f530804b9d8f089eba0f157ec2144c03b13651 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 5 Dec 2024 21:07:31 +0000 Subject: [PATCH 20/48] gh-127582: Make object resurrection thread-safe for free threading. (GH-127612) Objects may be temporarily "resurrected" in destructors when calling finalizers or watcher callbacks. We previously undid the resurrection by decrementing the reference count using `Py_SET_REFCNT`. This was not thread-safe because other threads might be accessing the object (modifying its reference count) if it was exposed by the finalizer, watcher callback, or temporarily accessed by a racy dictionary or list access. This adds internal-only thread-safe functions for temporary object resurrection during destructors. --- Include/internal/pycore_object.h | 44 +++++++++++++++++++ ...-12-05-19-25-00.gh-issue-127582.ogUY2a.rst | 2 + Objects/codeobject.c | 7 +-- Objects/dictobject.c | 7 +-- Objects/funcobject.c | 7 +-- Objects/object.c | 40 ++++++++++++++--- 6 files changed, 87 insertions(+), 20 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-12-05-19-25-00.gh-issue-127582.ogUY2a.rst diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index ce876b093b2522..6b0b464a6fdb96 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -697,8 +697,52 @@ _PyObject_SetMaybeWeakref(PyObject *op) } } +extern int _PyObject_ResurrectEndSlow(PyObject *op); #endif +// Temporarily resurrects an object during deallocation. The refcount is set +// to one. +static inline void +_PyObject_ResurrectStart(PyObject *op) +{ + assert(Py_REFCNT(op) == 0); +#ifdef Py_REF_DEBUG + _Py_IncRefTotal(_PyThreadState_GET()); +#endif +#ifdef Py_GIL_DISABLED + _Py_atomic_store_uintptr_relaxed(&op->ob_tid, _Py_ThreadId()); + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, 1); + _Py_atomic_store_ssize_relaxed(&op->ob_ref_shared, 0); +#else + Py_SET_REFCNT(op, 1); +#endif +} + +// Undoes an object resurrection by decrementing the refcount without calling +// _Py_Dealloc(). Returns 0 if the object is dead (the normal case), and +// deallocation should continue. Returns 1 if the object is still alive. +static inline int +_PyObject_ResurrectEnd(PyObject *op) +{ +#ifdef Py_REF_DEBUG + _Py_DecRefTotal(_PyThreadState_GET()); +#endif +#ifndef Py_GIL_DISABLED + Py_SET_REFCNT(op, Py_REFCNT(op) - 1); + return Py_REFCNT(op) != 0; +#else + uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local); + Py_ssize_t shared = _Py_atomic_load_ssize_acquire(&op->ob_ref_shared); + if (_Py_IsOwnedByCurrentThread(op) && local == 1 && shared == 0) { + // Fast-path: object has a single refcount and is owned by this thread + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, 0); + return 0; + } + // Slow-path: object has a shared refcount or is not owned by this thread + return _PyObject_ResurrectEndSlow(op); +#endif +} + /* Tries to incref op and returns 1 if successful or 0 otherwise. */ static inline int _Py_TryIncref(PyObject *op) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-12-05-19-25-00.gh-issue-127582.ogUY2a.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-05-19-25-00.gh-issue-127582.ogUY2a.rst new file mode 100644 index 00000000000000..59491feeb9bcfa --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-05-19-25-00.gh-issue-127582.ogUY2a.rst @@ -0,0 +1,2 @@ +Fix non-thread-safe object resurrection when calling finalizers and watcher +callbacks in the free threading build. diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 148350cc4b9195..eb8de136ee6432 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1867,14 +1867,11 @@ free_monitoring_data(_PyCoMonitoringData *data) static void code_dealloc(PyCodeObject *co) { - assert(Py_REFCNT(co) == 0); - Py_SET_REFCNT(co, 1); + _PyObject_ResurrectStart((PyObject *)co); notify_code_watchers(PY_CODE_EVENT_DESTROY, co); - if (Py_REFCNT(co) > 1) { - Py_SET_REFCNT(co, Py_REFCNT(co) - 1); + if (_PyObject_ResurrectEnd((PyObject *)co)) { return; } - Py_SET_REFCNT(co, 0); #ifdef Py_GIL_DISABLED PyObject_GC_UnTrack(co); diff --git a/Objects/dictobject.c b/Objects/dictobject.c index a13d8084d14d66..1c9f86438dadc3 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -3162,14 +3162,11 @@ dict_dealloc(PyObject *self) { PyDictObject *mp = (PyDictObject *)self; PyInterpreterState *interp = _PyInterpreterState_GET(); - assert(Py_REFCNT(mp) == 0); - Py_SET_REFCNT(mp, 1); + _PyObject_ResurrectStart(self); _PyDict_NotifyEvent(interp, PyDict_EVENT_DEALLOCATED, mp, NULL, NULL); - if (Py_REFCNT(mp) > 1) { - Py_SET_REFCNT(mp, Py_REFCNT(mp) - 1); + if (_PyObject_ResurrectEnd(self)) { return; } - Py_SET_REFCNT(mp, 0); PyDictValues *values = mp->ma_values; PyDictKeysObject *keys = mp->ma_keys; Py_ssize_t i, n; diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 4ba47285f7152f..cca7f01498013e 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -1092,14 +1092,11 @@ static void func_dealloc(PyObject *self) { PyFunctionObject *op = _PyFunction_CAST(self); - assert(Py_REFCNT(op) == 0); - Py_SET_REFCNT(op, 1); + _PyObject_ResurrectStart(self); handle_func_event(PyFunction_EVENT_DESTROY, op, NULL); - if (Py_REFCNT(op) > 1) { - Py_SET_REFCNT(op, Py_REFCNT(op) - 1); + if (_PyObject_ResurrectEnd(self)) { return; } - Py_SET_REFCNT(op, 0); _PyObject_GC_UNTRACK(op); if (op->func_weakreflist != NULL) { PyObject_ClearWeakRefs((PyObject *) op); diff --git a/Objects/object.c b/Objects/object.c index 8868fa29066404..74f47fa4239032 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -362,8 +362,10 @@ is_dead(PyObject *o) } # endif -void -_Py_DecRefSharedDebug(PyObject *o, const char *filename, int lineno) +// Decrement the shared reference count of an object. Return 1 if the object +// is dead and should be deallocated, 0 otherwise. +static int +_Py_DecRefSharedIsDead(PyObject *o, const char *filename, int lineno) { // Should we queue the object for the owning thread to merge? int should_queue; @@ -404,6 +406,15 @@ _Py_DecRefSharedDebug(PyObject *o, const char *filename, int lineno) } else if (new_shared == _Py_REF_MERGED) { // refcount is zero AND merged + return 1; + } + return 0; +} + +void +_Py_DecRefSharedDebug(PyObject *o, const char *filename, int lineno) +{ + if (_Py_DecRefSharedIsDead(o, filename, lineno)) { _Py_Dealloc(o); } } @@ -472,6 +483,26 @@ _Py_ExplicitMergeRefcount(PyObject *op, Py_ssize_t extra) &shared, new_shared)); return refcnt; } + +// The more complicated "slow" path for undoing the resurrection of an object. +int +_PyObject_ResurrectEndSlow(PyObject *op) +{ + if (_Py_IsImmortal(op)) { + return 1; + } + if (_Py_IsOwnedByCurrentThread(op)) { + // If the object is owned by the current thread, give up ownership and + // merge the refcount. This isn't necessary in all cases, but it + // simplifies the implementation. + Py_ssize_t refcount = _Py_ExplicitMergeRefcount(op, -1); + return refcount != 0; + } + int is_dead = _Py_DecRefSharedIsDead(op, NULL, 0); + return !is_dead; +} + + #endif /* Py_GIL_DISABLED */ @@ -550,7 +581,7 @@ PyObject_CallFinalizerFromDealloc(PyObject *self) } /* Temporarily resurrect the object. */ - Py_SET_REFCNT(self, 1); + _PyObject_ResurrectStart(self); PyObject_CallFinalizer(self); @@ -560,8 +591,7 @@ PyObject_CallFinalizerFromDealloc(PyObject *self) /* Undo the temporary resurrection; can't use DECREF here, it would * cause a recursive call. */ - Py_SET_REFCNT(self, Py_REFCNT(self) - 1); - if (Py_REFCNT(self) == 0) { + if (!_PyObject_ResurrectEnd(self)) { return 0; /* this is the normal path out */ } From 8b3cccf3f9508572d85b0044519f2bd5715dacad Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Thu, 5 Dec 2024 21:39:43 +0000 Subject: [PATCH 21/48] GH-125413: Revert addition of `pathlib.Path.scandir()` method (#127377) Remove documentation for `pathlib.Path.scandir()`, and rename the method to `_scandir()`. In the private pathlib ABCs, make `iterdir()` abstract and call it from `_scandir()`. It's not worthwhile to add this method at the moment - see discussion: https://discuss.python.org/t/ergonomics-of-new-pathlib-path-scandir/71721 Co-authored-by: Steve Dower --- Doc/library/pathlib.rst | 29 ----------- Doc/whatsnew/3.14.rst | 6 --- Lib/pathlib/_abc.py | 15 +++--- Lib/pathlib/_local.py | 4 +- Lib/test/test_pathlib/test_pathlib_abc.py | 48 ++++--------------- Misc/NEWS.d/3.14.0a2.rst | 2 +- ...-11-29-00-15-59.gh-issue-125413.WCN0vv.rst | 3 ++ 7 files changed, 22 insertions(+), 85 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-29-00-15-59.gh-issue-125413.WCN0vv.rst diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index a42ac1f8bcdf71..4b48880d6d9a18 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1289,35 +1289,6 @@ Reading directories raised. -.. method:: Path.scandir() - - When the path points to a directory, return an iterator of - :class:`os.DirEntry` objects corresponding to entries in the directory. The - returned iterator supports the :term:`context manager` protocol. It is - implemented using :func:`os.scandir` and gives the same guarantees. - - Using :meth:`~Path.scandir` instead of :meth:`~Path.iterdir` can - significantly increase the performance of code that also needs file type or - file attribute information, because :class:`os.DirEntry` objects expose - this information if the operating system provides it when scanning a - directory. - - The following example displays the names of subdirectories. The - ``entry.is_dir()`` check will generally not make an additional system call:: - - >>> p = Path('docs') - >>> with p.scandir() as entries: - ... for entry in entries: - ... if entry.is_dir(): - ... entry.name - ... - '_templates' - '_build' - '_static' - - .. versionadded:: 3.14 - - .. method:: Path.glob(pattern, *, case_sensitive=None, recurse_symlinks=False) Glob the given relative *pattern* in the directory represented by this path, diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index db25c037e509b6..b300e348679438 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -532,12 +532,6 @@ pathlib (Contributed by Barney Gale in :gh:`73991`.) -* Add :meth:`pathlib.Path.scandir` to scan a directory and return an iterator - of :class:`os.DirEntry` objects. This is exactly equivalent to calling - :func:`os.scandir` on a path object. - - (Contributed by Barney Gale in :gh:`125413`.) - pdb --- diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 2b314b6c9a16bf..86617ff2616f33 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -94,7 +94,7 @@ class PathGlobber(_GlobberBase): lexists = operator.methodcaller('exists', follow_symlinks=False) add_slash = operator.methodcaller('joinpath', '') - scandir = operator.methodcaller('scandir') + scandir = operator.methodcaller('_scandir') @staticmethod def concat_path(path, text): @@ -632,13 +632,14 @@ def write_text(self, data, encoding=None, errors=None, newline=None): with self.open(mode='w', encoding=encoding, errors=errors, newline=newline) as f: return f.write(data) - def scandir(self): - """Yield os.DirEntry objects of the directory contents. + def _scandir(self): + """Yield os.DirEntry-like objects of the directory contents. The children are yielded in arbitrary order, and the special entries '.' and '..' are not included. """ - raise UnsupportedOperation(self._unsupported_msg('scandir()')) + import contextlib + return contextlib.nullcontext(self.iterdir()) def iterdir(self): """Yield path objects of the directory contents. @@ -646,9 +647,7 @@ def iterdir(self): The children are yielded in arbitrary order, and the special entries '.' and '..' are not included. """ - with self.scandir() as entries: - names = [entry.name for entry in entries] - return map(self.joinpath, names) + raise UnsupportedOperation(self._unsupported_msg('iterdir()')) def _glob_selector(self, parts, case_sensitive, recurse_symlinks): if case_sensitive is None: @@ -698,7 +697,7 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False): if not top_down: paths.append((path, dirnames, filenames)) try: - with path.scandir() as entries: + with path._scandir() as entries: for entry in entries: name = entry.name try: diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index b5d9dc49f58463..bb8a252c0e94e2 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -634,8 +634,8 @@ def _filter_trailing_slash(self, paths): path_str = path_str[:-1] yield path_str - def scandir(self): - """Yield os.DirEntry objects of the directory contents. + def _scandir(self): + """Yield os.DirEntry-like objects of the directory contents. The children are yielded in arbitrary order, and the special entries '.' and '..' are not included. diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 5fa2f550cefcf4..7ba3fa823a30b9 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1,5 +1,4 @@ import collections -import contextlib import io import os import errno @@ -1418,24 +1417,6 @@ def close(self): 'st_mode st_ino st_dev st_nlink st_uid st_gid st_size st_atime st_mtime st_ctime') -class DummyDirEntry: - """ - Minimal os.DirEntry-like object. Returned from DummyPath.scandir(). - """ - __slots__ = ('name', '_is_symlink', '_is_dir') - - def __init__(self, name, is_symlink, is_dir): - self.name = name - self._is_symlink = is_symlink - self._is_dir = is_dir - - def is_symlink(self): - return self._is_symlink - - def is_dir(self, *, follow_symlinks=True): - return self._is_dir and (follow_symlinks or not self._is_symlink) - - class DummyPath(PathBase): """ Simple implementation of PathBase that keeps files and directories in @@ -1503,25 +1484,14 @@ def open(self, mode='r', buffering=-1, encoding=None, stream = io.TextIOWrapper(stream, encoding=encoding, errors=errors, newline=newline) return stream - @contextlib.contextmanager - def scandir(self): - path = self.resolve() - path_str = str(path) - if path_str in self._files: - raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path_str) - elif path_str in self._directories: - yield iter([path.joinpath(name)._dir_entry for name in self._directories[path_str]]) + def iterdir(self): + path = str(self.resolve()) + if path in self._files: + raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path) + elif path in self._directories: + return iter([self / name for name in self._directories[path]]) else: - raise FileNotFoundError(errno.ENOENT, "File not found", path_str) - - @property - def _dir_entry(self): - path_str = str(self) - is_symlink = path_str in self._symlinks - is_directory = (path_str in self._directories - if not is_symlink - else self._symlinks[path_str][1]) - return DummyDirEntry(self.name, is_symlink, is_directory) + raise FileNotFoundError(errno.ENOENT, "File not found", path) def mkdir(self, mode=0o777, parents=False, exist_ok=False): path = str(self.parent.resolve() / self.name) @@ -2214,9 +2184,9 @@ def test_iterdir_nodir(self): def test_scandir(self): p = self.cls(self.base) - with p.scandir() as entries: + with p._scandir() as entries: self.assertTrue(list(entries)) - with p.scandir() as entries: + with p._scandir() as entries: for entry in entries: child = p / entry.name self.assertIsNotNone(entry) diff --git a/Misc/NEWS.d/3.14.0a2.rst b/Misc/NEWS.d/3.14.0a2.rst index 7384ce54cb8914..d82ec98b7a3c87 100644 --- a/Misc/NEWS.d/3.14.0a2.rst +++ b/Misc/NEWS.d/3.14.0a2.rst @@ -597,7 +597,7 @@ TypeError is now raised instead of ValueError for some logical errors. .. nonce: Jat5kq .. section: Library -Add :meth:`pathlib.Path.scandir` method to efficiently fetch directory +Add :meth:`!pathlib.Path.scandir` method to efficiently fetch directory children and their file attributes. This is a trivial wrapper of :func:`os.scandir`. diff --git a/Misc/NEWS.d/next/Library/2024-11-29-00-15-59.gh-issue-125413.WCN0vv.rst b/Misc/NEWS.d/next/Library/2024-11-29-00-15-59.gh-issue-125413.WCN0vv.rst new file mode 100644 index 00000000000000..b56a77b4294ace --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-29-00-15-59.gh-issue-125413.WCN0vv.rst @@ -0,0 +1,3 @@ +Revert addition of :meth:`!pathlib.Path.scandir`. This method was added in +3.14.0a2. The optimizations remain for file system paths, but other +subclasses should only have to implement :meth:`pathlib.Path.iterdir`. From 25eee578c8e369b027da6d9d2725f29df6ef1cbd Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Fri, 6 Dec 2024 03:47:51 +0100 Subject: [PATCH 22/48] gh-127627: Add `posix._emscripten_debugger` function (#127628) Add a posix._emscripten_debugger function to add an emscripten breakpoint. --- ...-12-05-10-14-52.gh-issue-127627.fgCHOZ.rst | 2 ++ Modules/clinic/posixmodule.c.h | 28 ++++++++++++++++++- Modules/posixmodule.c | 22 ++++++++++++++- 3 files changed, 50 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-12-05-10-14-52.gh-issue-127627.fgCHOZ.rst diff --git a/Misc/NEWS.d/next/Library/2024-12-05-10-14-52.gh-issue-127627.fgCHOZ.rst b/Misc/NEWS.d/next/Library/2024-12-05-10-14-52.gh-issue-127627.fgCHOZ.rst new file mode 100644 index 00000000000000..48a6c7d30b4a26 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-05-10-14-52.gh-issue-127627.fgCHOZ.rst @@ -0,0 +1,2 @@ +Added ``posix._emscripten_debugger()`` to help with debugging the test suite on +the Emscripten target. diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index cd0c4faeac83d1..554299b8598299 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -12447,6 +12447,28 @@ os__create_environ(PyObject *module, PyObject *Py_UNUSED(ignored)) return os__create_environ_impl(module); } +#if defined(__EMSCRIPTEN__) + +PyDoc_STRVAR(os__emscripten_debugger__doc__, +"_emscripten_debugger($module, /)\n" +"--\n" +"\n" +"Create a breakpoint for the JavaScript debugger. Emscripten only."); + +#define OS__EMSCRIPTEN_DEBUGGER_METHODDEF \ + {"_emscripten_debugger", (PyCFunction)os__emscripten_debugger, METH_NOARGS, os__emscripten_debugger__doc__}, + +static PyObject * +os__emscripten_debugger_impl(PyObject *module); + +static PyObject * +os__emscripten_debugger(PyObject *module, PyObject *Py_UNUSED(ignored)) +{ + return os__emscripten_debugger_impl(module); +} + +#endif /* defined(__EMSCRIPTEN__) */ + #ifndef OS_TTYNAME_METHODDEF #define OS_TTYNAME_METHODDEF #endif /* !defined(OS_TTYNAME_METHODDEF) */ @@ -13114,4 +13136,8 @@ os__create_environ(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #define OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #endif /* !defined(OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF) */ -/*[clinic end generated code: output=7ee14f5e880092f5 input=a9049054013a1b77]*/ + +#ifndef OS__EMSCRIPTEN_DEBUGGER_METHODDEF + #define OS__EMSCRIPTEN_DEBUGGER_METHODDEF +#endif /* !defined(OS__EMSCRIPTEN_DEBUGGER_METHODDEF) */ +/*[clinic end generated code: output=9c2ca1dbf986c62c input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 2c26fbeac9a1be..2045c6065b8e7a 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -84,6 +84,9 @@ extern char * _getpty(int *, int, mode_t, int); #endif +#ifdef __EMSCRIPTEN__ +#include "emscripten.h" // emscripten_debugger() +#endif /* * A number of APIs are available on macOS from a certain macOS version. @@ -16845,8 +16848,24 @@ os__create_environ_impl(PyObject *module) } -static PyMethodDef posix_methods[] = { +#ifdef __EMSCRIPTEN__ +/*[clinic input] +os._emscripten_debugger + +Create a breakpoint for the JavaScript debugger. Emscripten only. +[clinic start generated code]*/ + +static PyObject * +os__emscripten_debugger_impl(PyObject *module) +/*[clinic end generated code: output=ad47dc3bf0661343 input=d814b1877fb6083a]*/ +{ + emscripten_debugger(); + Py_RETURN_NONE; +} +#endif /* __EMSCRIPTEN__ */ + +static PyMethodDef posix_methods[] = { OS_STAT_METHODDEF OS_ACCESS_METHODDEF OS_TTYNAME_METHODDEF @@ -17060,6 +17079,7 @@ static PyMethodDef posix_methods[] = { OS__INPUTHOOK_METHODDEF OS__IS_INPUTHOOK_INSTALLED_METHODDEF OS__CREATE_ENVIRON_METHODDEF + OS__EMSCRIPTEN_DEBUGGER_METHODDEF {NULL, NULL} /* Sentinel */ }; From e991ac8f2037d78140e417cc9a9486223eb3e786 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Thu, 5 Dec 2024 22:33:03 -0600 Subject: [PATCH 23/48] gh-127655: Ensure `_SelectorSocketTransport.writelines` pauses the protocol if needed (#127656) Ensure `_SelectorSocketTransport.writelines` pauses the protocol if it reaches the high water mark as needed. Co-authored-by: Kumar Aditya --- Lib/asyncio/selector_events.py | 1 + Lib/test/test_asyncio/test_selector_events.py | 12 ++++++++++++ .../2024-12-05-21-35-19.gh-issue-127655.xpPoOf.rst | 1 + 3 files changed, 14 insertions(+) create mode 100644 Misc/NEWS.d/next/Security/2024-12-05-21-35-19.gh-issue-127655.xpPoOf.rst diff --git a/Lib/asyncio/selector_events.py b/Lib/asyncio/selector_events.py index f94bf10b4225e7..f1ab9b12d69a5d 100644 --- a/Lib/asyncio/selector_events.py +++ b/Lib/asyncio/selector_events.py @@ -1175,6 +1175,7 @@ def writelines(self, list_of_data): # If the entire buffer couldn't be written, register a write handler if self._buffer: self._loop._add_writer(self._sock_fd, self._write_ready) + self._maybe_pause_protocol() def can_write_eof(self): return True diff --git a/Lib/test/test_asyncio/test_selector_events.py b/Lib/test/test_asyncio/test_selector_events.py index aaeda33dd0c677..efca30f37414f9 100644 --- a/Lib/test/test_asyncio/test_selector_events.py +++ b/Lib/test/test_asyncio/test_selector_events.py @@ -805,6 +805,18 @@ def test_writelines_send_partial(self): self.assertTrue(self.sock.send.called) self.assertTrue(self.loop.writers) + def test_writelines_pauses_protocol(self): + data = memoryview(b'data') + self.sock.send.return_value = 2 + self.sock.send.fileno.return_value = 7 + + transport = self.socket_transport() + transport._high_water = 1 + transport.writelines([data]) + self.assertTrue(self.protocol.pause_writing.called) + self.assertTrue(self.sock.send.called) + self.assertTrue(self.loop.writers) + @unittest.skipUnless(selector_events._HAS_SENDMSG, 'no sendmsg') def test_write_sendmsg_full(self): data = memoryview(b'data') diff --git a/Misc/NEWS.d/next/Security/2024-12-05-21-35-19.gh-issue-127655.xpPoOf.rst b/Misc/NEWS.d/next/Security/2024-12-05-21-35-19.gh-issue-127655.xpPoOf.rst new file mode 100644 index 00000000000000..76cfc58121d3bd --- /dev/null +++ b/Misc/NEWS.d/next/Security/2024-12-05-21-35-19.gh-issue-127655.xpPoOf.rst @@ -0,0 +1 @@ +Fixed the :class:`!asyncio.selector_events._SelectorSocketTransport` transport not pausing writes for the protocol when the buffer reaches the high water mark when using :meth:`asyncio.WriteTransport.writelines`. From 8b7c194c7bf7e547e4f6317528f0dcb9344c18c7 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Fri, 6 Dec 2024 13:28:32 +0300 Subject: [PATCH 24/48] gh-120010: Fix invalid (nan+nanj) results in _Py_c_prod() (GH-120287) In some cases, previously computed as (nan+nanj), we could recover meaningful component values in the result, see e.g. the C11, Annex G.5.1, routine _Cmultd(): >>> z = 1e300+1j >>> z*(nan+infj) # was (nan+nanj) (-inf+infj) That also fix some complex powers for small integer exponents, computed with optimized algorithm (by squaring): >>> z**5 # was (nan+nanj) Traceback (most recent call last): File "", line 1, in z**5 ~^^~ OverflowError: complex exponentiation --- Lib/test/test_complex.py | 17 ++++++ ...-06-04-08-26-25.gh-issue-120010._z-AWz.rst | 2 + Objects/complexobject.c | 60 +++++++++++++++++-- 3 files changed, 75 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-06-04-08-26-25.gh-issue-120010._z-AWz.rst diff --git a/Lib/test/test_complex.py b/Lib/test/test_complex.py index 179556f57e884f..fd002fb00ac338 100644 --- a/Lib/test/test_complex.py +++ b/Lib/test/test_complex.py @@ -299,6 +299,22 @@ def test_mul(self): self.assertRaises(TypeError, operator.mul, 1j, None) self.assertRaises(TypeError, operator.mul, None, 1j) + for z, w, r in [(1e300+1j, complex(INF, INF), complex(NAN, INF)), + (1e300+1j, complex(NAN, INF), complex(-INF, INF)), + (1e300+1j, complex(INF, NAN), complex(INF, INF)), + (complex(INF, 1), complex(NAN, INF), complex(NAN, INF)), + (complex(INF, 1), complex(INF, NAN), complex(INF, NAN)), + (complex(NAN, 1), complex(1, INF), complex(-INF, NAN)), + (complex(1, NAN), complex(1, INF), complex(NAN, INF)), + (complex(1e200, NAN), complex(1e200, NAN), complex(INF, NAN)), + (complex(1e200, NAN), complex(NAN, 1e200), complex(NAN, INF)), + (complex(NAN, 1e200), complex(1e200, NAN), complex(NAN, INF)), + (complex(NAN, 1e200), complex(NAN, 1e200), complex(-INF, NAN)), + (complex(NAN, NAN), complex(NAN, NAN), complex(NAN, NAN))]: + with self.subTest(z=z, w=w, r=r): + self.assertComplexesAreIdentical(z * w, r) + self.assertComplexesAreIdentical(w * z, r) + def test_mod(self): # % is no longer supported on complex numbers with self.assertRaises(TypeError): @@ -340,6 +356,7 @@ def test_pow(self): self.assertAlmostEqual(pow(1j, 200), 1) self.assertRaises(ValueError, pow, 1+1j, 1+1j, 1+1j) self.assertRaises(OverflowError, pow, 1e200+1j, 1e200+1j) + self.assertRaises(OverflowError, pow, 1e200+1j, 5) self.assertRaises(TypeError, pow, 1j, None) self.assertRaises(TypeError, pow, None, 1j) self.assertAlmostEqual(pow(1j, 0.5), 0.7071067811865476+0.7071067811865475j) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-06-04-08-26-25.gh-issue-120010._z-AWz.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-06-04-08-26-25.gh-issue-120010._z-AWz.rst new file mode 100644 index 00000000000000..7954c7f5927397 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-06-04-08-26-25.gh-issue-120010._z-AWz.rst @@ -0,0 +1,2 @@ +Correct invalid corner cases which resulted in ``(nan+nanj)`` output in complex +multiplication, e.g., ``(1e300+1j)*(nan+infj)``. Patch by Sergey B Kirpichev. diff --git a/Objects/complexobject.c b/Objects/complexobject.c index 8fbca3cb02d80a..bf6187efac941f 100644 --- a/Objects/complexobject.c +++ b/Objects/complexobject.c @@ -85,11 +85,63 @@ _Py_c_neg(Py_complex a) } Py_complex -_Py_c_prod(Py_complex a, Py_complex b) +_Py_c_prod(Py_complex z, Py_complex w) { - Py_complex r; - r.real = a.real*b.real - a.imag*b.imag; - r.imag = a.real*b.imag + a.imag*b.real; + double a = z.real, b = z.imag, c = w.real, d = w.imag; + double ac = a*c, bd = b*d, ad = a*d, bc = b*c; + Py_complex r = {ac - bd, ad + bc}; + + /* Recover infinities that computed as nan+nanj. See e.g. the C11, + Annex G.5.1, routine _Cmultd(). */ + if (isnan(r.real) && isnan(r.imag)) { + int recalc = 0; + + if (isinf(a) || isinf(b)) { /* z is infinite */ + /* "Box" the infinity and change nans in the other factor to 0 */ + a = copysign(isinf(a) ? 1.0 : 0.0, a); + b = copysign(isinf(b) ? 1.0 : 0.0, b); + if (isnan(c)) { + c = copysign(0.0, c); + } + if (isnan(d)) { + d = copysign(0.0, d); + } + recalc = 1; + } + if (isinf(c) || isinf(d)) { /* w is infinite */ + /* "Box" the infinity and change nans in the other factor to 0 */ + c = copysign(isinf(c) ? 1.0 : 0.0, c); + d = copysign(isinf(d) ? 1.0 : 0.0, d); + if (isnan(a)) { + a = copysign(0.0, a); + } + if (isnan(b)) { + b = copysign(0.0, b); + } + recalc = 1; + } + if (!recalc && (isinf(ac) || isinf(bd) || isinf(ad) || isinf(bc))) { + /* Recover infinities from overflow by changing nans to 0 */ + if (isnan(a)) { + a = copysign(0.0, a); + } + if (isnan(b)) { + b = copysign(0.0, b); + } + if (isnan(c)) { + c = copysign(0.0, c); + } + if (isnan(d)) { + d = copysign(0.0, d); + } + recalc = 1; + } + if (recalc) { + r.real = Py_INFINITY*(a*c - b*d); + r.imag = Py_INFINITY*(a*d + b*c); + } + } + return r; } From 023b7d2141467017abc27de864f3f44677768cb3 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 6 Dec 2024 10:46:59 +0000 Subject: [PATCH 25/48] GH-126491: Lower heap size limit with faster marking (GH-127519) * Faster marking of reachable objects * Changes calculation of work to do and work done. * Merges transitive closure calculations --- InternalDocs/garbage_collector.md | 50 ++++- Lib/test/test_gc.py | 14 +- Objects/dictobject.c | 4 +- Objects/genobject.c | 69 +------ Objects/typeobject.c | 13 ++ Python/gc.c | 301 ++++++++++++++---------------- 6 files changed, 208 insertions(+), 243 deletions(-) diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index 08db080a200ea4..4761f78f3593e3 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -199,22 +199,22 @@ unreachable: ```pycon >>> import gc ->>> +>>> >>> class Link: ... def __init__(self, next_link=None): ... self.next_link = next_link -... +... >>> link_3 = Link() >>> link_2 = Link(link_3) >>> link_1 = Link(link_2) >>> link_3.next_link = link_1 >>> A = link_1 >>> del link_1, link_2, link_3 ->>> +>>> >>> link_4 = Link() >>> link_4.next_link = link_4 >>> del link_4 ->>> +>>> >>> # Collect the unreachable Link object (and its .__dict__ dict). >>> gc.collect() 2 @@ -459,11 +459,11 @@ specifically in a generation by calling `gc.collect(generation=NUM)`. >>> # Create a reference cycle. >>> x = MyObj() >>> x.self = x ->>> +>>> >>> # Initially the object is in the young generation. >>> gc.get_objects(generation=0) [..., <__main__.MyObj object at 0x7fbcc12a3400>, ...] ->>> +>>> >>> # After a collection of the youngest generation the object >>> # moves to the old generation. >>> gc.collect(generation=0) @@ -515,6 +515,44 @@ increment. All objects directly referred to from those stack frames are added to the working set. Then the above algorithm is repeated, starting from step 2. +Determining how much work to do +------------------------------- + +We need to do a certain amount of work to enusre that garbage is collected, +but doing too much work slows down execution. + +To work out how much work we need to do, consider a heap with `L` live objects +and `G0` garbage objects at the start of a full scavenge and `G1` garbage objects +at the end of the scavenge. We don't want the amount of garbage to grow, `G1 ≤ G0`, and +we don't want too much garbage (say 1/3 of the heap maximum), `G0 ≤ L/2`. +For each full scavenge we must visit all objects, `T == L + G0 + G1`, during which +`G1` garbage objects are created. + +The number of new objects created `N` must be at least the new garbage created, `N ≥ G1`, +assuming that the number of live objects remains roughly constant. +If we set `T == 4*N` we get `T > 4*G1` and `T = L + G0 + G1` => `L + G0 > 3G1` +For a steady state heap (`G0 == G1`) we get `L > 2G0` and the desired garbage ratio. + +In other words, to keep the garbage fraction to 1/3 or less we need to visit +4 times as many objects as are newly created. + +We can do better than this though. Not all new objects will be garbage. +Consider the heap at the end of the scavenge with `L1` live objects and `G1` +garbage. Also, note that `T == M + I` where `M` is the number of objects marked +as reachable and `I` is the number of objects visited in increments. +Everything in `M` is live, so `I ≥ G0` and in practice `I` is closer to `G0 + G1`. + +If we choose the amount of work done such that `2*M + I == 6N` then we can do +less work in most cases, but are still guaranteed to keep up. +Since `I ≳ G0 + G1` (not strictly true, but close enough) +`T == M + I == (6N + I)/2` and `(6N + I)/2 ≳ 4G`, so we can keep up. + +The reason that this improves performance is that `M` is usually much larger +than `I`. If `M == 10I`, then `T ≅ 3N`. + +Finally, instead of using a fixed multiple of 8, we gradually increase it as the +heap grows. This avoids wasting work for small heaps and during startup. + Optimization: reusing fields to save memory =========================================== diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index b5140057a69d36..baf8e95dffdfce 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1161,27 +1161,19 @@ def make_ll(depth): return head head = make_ll(1000) - count = 1000 - - # There will be some objects we aren't counting, - # e.g. the gc stats dicts. This test checks - # that the counts don't grow, so we try to - # correct for the uncounted objects - # This is just an estimate. - CORRECTION = 20 enabled = gc.isenabled() gc.enable() olds = [] initial_heap_size = _testinternalcapi.get_tracked_heap_size() - for i in range(20_000): + iterations = max(20_000, initial_heap_size) + for i in range(iterations): newhead = make_ll(20) - count += 20 newhead.surprise = head olds.append(newhead) if len(olds) == 20: new_objects = _testinternalcapi.get_tracked_heap_size() - initial_heap_size - self.assertLess(new_objects, 27_000, f"Heap growing. Reached limit after {i} iterations") + self.assertLess(new_objects, initial_heap_size/2, f"Heap growing. Reached limit after {i} iterations") del olds[:] if not enabled: gc.disable() diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 1c9f86438dadc3..de518b8dc5024b 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -7064,9 +7064,7 @@ int PyObject_VisitManagedDict(PyObject *obj, visitproc visit, void *arg) { PyTypeObject *tp = Py_TYPE(obj); - if((tp->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0) { - return 0; - } + assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT); if (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) { PyDictValues *values = _PyObject_InlineValues(obj); if (values->valid) { diff --git a/Objects/genobject.c b/Objects/genobject.c index e87f199c2504ba..33679afecb420f 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -882,25 +882,7 @@ PyTypeObject PyGen_Type = { gen_methods, /* tp_methods */ gen_memberlist, /* tp_members */ gen_getsetlist, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - 0, /* tp_new */ - 0, /* tp_free */ - 0, /* tp_is_gc */ - 0, /* tp_bases */ - 0, /* tp_mro */ - 0, /* tp_cache */ - 0, /* tp_subclasses */ - 0, /* tp_weaklist */ - 0, /* tp_del */ - 0, /* tp_version_tag */ - _PyGen_Finalize, /* tp_finalize */ + .tp_finalize = _PyGen_Finalize, }; static PyObject * @@ -1242,24 +1224,7 @@ PyTypeObject PyCoro_Type = { coro_methods, /* tp_methods */ coro_memberlist, /* tp_members */ coro_getsetlist, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - 0, /* tp_new */ - 0, /* tp_free */ - 0, /* tp_is_gc */ - 0, /* tp_bases */ - 0, /* tp_mro */ - 0, /* tp_cache */ - 0, /* tp_subclasses */ - 0, /* tp_weaklist */ - 0, /* tp_del */ - 0, /* tp_version_tag */ - _PyGen_Finalize, /* tp_finalize */ + .tp_finalize = _PyGen_Finalize, }; static void @@ -1464,7 +1429,6 @@ typedef struct _PyAsyncGenWrappedValue { (assert(_PyAsyncGenWrappedValue_CheckExact(op)), \ _Py_CAST(_PyAsyncGenWrappedValue*, (op))) - static int async_gen_traverse(PyObject *self, visitproc visit, void *arg) { @@ -1673,24 +1637,7 @@ PyTypeObject PyAsyncGen_Type = { async_gen_methods, /* tp_methods */ async_gen_memberlist, /* tp_members */ async_gen_getsetlist, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - 0, /* tp_new */ - 0, /* tp_free */ - 0, /* tp_is_gc */ - 0, /* tp_bases */ - 0, /* tp_mro */ - 0, /* tp_cache */ - 0, /* tp_subclasses */ - 0, /* tp_weaklist */ - 0, /* tp_del */ - 0, /* tp_version_tag */ - _PyGen_Finalize, /* tp_finalize */ + .tp_finalize = _PyGen_Finalize, }; @@ -1935,16 +1882,6 @@ PyTypeObject _PyAsyncGenASend_Type = { PyObject_SelfIter, /* tp_iter */ async_gen_asend_iternext, /* tp_iternext */ async_gen_asend_methods, /* tp_methods */ - 0, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - 0, /* tp_new */ .tp_finalize = async_gen_asend_finalize, }; diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 2068d6aa9be52b..cc95b9857e3f2d 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -2355,6 +2355,16 @@ subtype_traverse(PyObject *self, visitproc visit, void *arg) return 0; } + +static int +plain_object_traverse(PyObject *self, visitproc visit, void *arg) +{ + PyTypeObject *type = Py_TYPE(self); + assert(type->tp_flags & Py_TPFLAGS_MANAGED_DICT); + Py_VISIT(type); + return PyObject_VisitManagedDict(self, visit, arg); +} + static void clear_slots(PyTypeObject *type, PyObject *self) { @@ -4147,6 +4157,9 @@ type_new_descriptors(const type_new_ctx *ctx, PyTypeObject *type) assert((type->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0); type->tp_flags |= Py_TPFLAGS_MANAGED_DICT; type->tp_dictoffset = -1; + if (type->tp_basicsize == sizeof(PyObject)) { + type->tp_traverse = plain_object_traverse; + } } type->tp_basicsize = slotoffset; diff --git a/Python/gc.c b/Python/gc.c index 5b9588c8741b97..fd29a48518e71b 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1277,18 +1277,13 @@ gc_list_set_space(PyGC_Head *list, int space) * space faster than objects are added to the old space. * * Each young or incremental collection adds a number of - * objects, S (for survivors) to the old space, and - * incremental collectors scan I objects from the old space. - * I > S must be true. We also want I > S * N to be where - * N > 1. Higher values of N mean that the old space is + * new objects (N) to the heap, and incremental collectors + * scan I objects from the old space. + * I > N must be true. We also want I > N * K to be where + * K > 2. Higher values of K mean that the old space is * scanned more rapidly. - * The default incremental threshold of 10 translates to - * N == 1.4 (1 + 4/threshold) */ - -/* Divide by 10, so that the default incremental threshold of 10 - * scans objects at 1% of the heap size */ -#define SCAN_RATE_DIVISOR 10 +#define SCAN_RATE_DIVISOR 5 static void add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats) @@ -1330,69 +1325,76 @@ gc_collect_young(PyThreadState *tstate, validate_spaces(gcstate); } -#ifndef NDEBUG -static inline int -IS_IN_VISITED(PyGC_Head *gc, int visited_space) +typedef struct work_stack { + PyGC_Head *top; + int visited_space; +} WorkStack; + +/* Remove gc from the list it is currently in and push it to the stack */ +static inline void +push_to_stack(PyGC_Head *gc, WorkStack *stack) { - assert(visited_space == 0 || other_space(visited_space) == 0); - return gc_old_space(gc) == visited_space; + PyGC_Head *prev = GC_PREV(gc); + PyGC_Head *next = GC_NEXT(gc); + _PyGCHead_SET_NEXT(prev, next); + _PyGCHead_SET_PREV(next, prev); + _PyGCHead_SET_PREV(gc, stack->top); + stack->top = gc; } -#endif -struct container_and_flag { - PyGC_Head *container; - int visited_space; - intptr_t size; -}; +static inline PyGC_Head * +pop_from_stack(WorkStack *stack) +{ + PyGC_Head *gc = stack->top; + stack->top = _PyGCHead_PREV(gc); + return gc; +} -/* A traversal callback for adding to container) */ -static int -visit_add_to_container(PyObject *op, void *arg) +/* append list `from` to `stack`; `from` becomes an empty list */ +static void +move_list_to_stack(PyGC_Head *from, WorkStack *stack) { - OBJECT_STAT_INC(object_visits); - struct container_and_flag *cf = (struct container_and_flag *)arg; - int visited = cf->visited_space; - assert(visited == get_gc_state()->visited_space); - if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { + if (!gc_list_is_empty(from)) { + PyGC_Head *from_head = GC_NEXT(from); + PyGC_Head *from_tail = GC_PREV(from); + _PyGCHead_SET_PREV(from_head, stack->top); + stack->top = from_tail; + gc_list_init(from); + } +} + +static inline void +move_to_stack(PyObject *op, WorkStack *stack, int visited_space) +{ + assert(op != NULL); + if (_PyObject_IS_GC(op)) { PyGC_Head *gc = AS_GC(op); if (_PyObject_GC_IS_TRACKED(op) && - gc_old_space(gc) != visited) { + gc_old_space(gc) != visited_space) { + assert(!_Py_IsImmortal(op)); gc_flip_old_space(gc); - gc_list_move(gc, cf->container); - cf->size++; + push_to_stack(gc, stack); } } - return 0; } -static intptr_t -expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCState *gcstate) -{ - struct container_and_flag arg = { - .container = container, - .visited_space = gcstate->visited_space, - .size = 0 - }; - assert(GC_NEXT(gc) == container); - while (gc != container) { - /* Survivors will be moved to visited space, so they should - * have been marked as visited */ - assert(IS_IN_VISITED(gc, gcstate->visited_space)); - PyObject *op = FROM_GC(gc); - assert(_PyObject_GC_IS_TRACKED(op)); - if (_Py_IsImmortal(op)) { - PyGC_Head *next = GC_NEXT(gc); - gc_list_move(gc, &get_gc_state()->permanent_generation.head); - gc = next; - continue; - } - traverseproc traverse = Py_TYPE(op)->tp_traverse; - (void) traverse(op, - visit_add_to_container, - &arg); - gc = GC_NEXT(gc); - } - return arg.size; +static void +move_unvisited(PyObject *op, WorkStack *stack, int visited_space) +{ + move_to_stack(op, stack, visited_space); +} + +#define MOVE_UNVISITED(O, T, V) if ((O) != NULL) move_unvisited((O), (T), (V)) + +/* A traversal callback for adding to container */ +static int +visit_add_to_container(PyObject *op, void *arg) +{ + OBJECT_STAT_INC(object_visits); + WorkStack *stack = (WorkStack *)arg; + assert(stack->visited_space == get_gc_state()->visited_space); + move_to_stack(op, stack, stack->visited_space); + return 0; } /* Do bookkeeping for a completed GC cycle */ @@ -1420,54 +1422,62 @@ completed_scavenge(GCState *gcstate) gc_list_set_space(&gcstate->old[not_visited].head, not_visited); } assert(gc_list_is_empty(&gcstate->old[visited].head)); - gcstate->work_to_do = 0; gcstate->phase = GC_PHASE_MARK; } -static intptr_t -move_to_reachable(PyObject *op, PyGC_Head *reachable, int visited_space) -{ - if (op != NULL && !_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { - PyGC_Head *gc = AS_GC(op); - if (_PyObject_GC_IS_TRACKED(op) && - gc_old_space(gc) != visited_space) { - gc_flip_old_space(gc); - gc_list_move(gc, reachable); - return 1; +static void +frame_move_unvisited(_PyInterpreterFrame *frame, WorkStack *stack, int visited_space) +{ + _PyStackRef *locals = frame->localsplus; + _PyStackRef *sp = frame->stackpointer; + if (frame->f_locals != NULL) { + move_unvisited(frame->f_locals, stack, visited_space); + } + PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj); + move_unvisited(func, stack, visited_space); + while (sp > locals) { + sp--; + _PyStackRef ref = *sp; + if (!PyStackRef_IsNull(ref)) { + PyObject *op = PyStackRef_AsPyObjectBorrow(ref); + if (!_Py_IsImmortal(op)) { + move_unvisited(op, stack, visited_space); + } } } - return 0; } -static intptr_t -mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space) +static Py_ssize_t +move_all_transitively_reachable(WorkStack *stack, PyGC_Head *visited, int visited_space) { // Transitively traverse all objects from reachable, until empty - struct container_and_flag arg = { - .container = reachable, - .visited_space = visited_space, - .size = 0 - }; - while (!gc_list_is_empty(reachable)) { - PyGC_Head *gc = _PyGCHead_NEXT(reachable); + Py_ssize_t objects_marked = 0; + while (stack->top != NULL) { + PyGC_Head *gc = pop_from_stack(stack); assert(gc_old_space(gc) == visited_space); - gc_list_move(gc, visited); + gc_list_append(gc, visited); + objects_marked++; PyObject *op = FROM_GC(gc); - traverseproc traverse = Py_TYPE(op)->tp_traverse; - (void) traverse(op, - visit_add_to_container, - &arg); + assert(PyObject_IS_GC(op)); + assert(_PyObject_GC_IS_TRACKED(op)); + if (_Py_IsImmortal(op)) { + _PyObject_GC_UNTRACK(op); + } + else { + traverseproc traverse = Py_TYPE(op)->tp_traverse; + (void) traverse(op, visit_add_to_container, stack); + } } gc_list_validate_space(visited, visited_space); - return arg.size; + return objects_marked; } static intptr_t mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, bool start) { - PyGC_Head reachable; - gc_list_init(&reachable); - Py_ssize_t objects_marked = 0; + WorkStack stack; + stack.top = NULL; + stack.visited_space = visited_space; // Move all objects on stacks to reachable _PyRuntimeState *runtime = &_PyRuntime; HEAD_LOCK(runtime); @@ -1480,27 +1490,7 @@ mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, b frame = frame->previous; continue; } - _PyStackRef *locals = frame->localsplus; - _PyStackRef *sp = frame->stackpointer; - objects_marked += move_to_reachable(frame->f_locals, &reachable, visited_space); - PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj); - objects_marked += move_to_reachable(func, &reachable, visited_space); - while (sp > locals) { - sp--; - if (PyStackRef_IsNull(*sp)) { - continue; - } - PyObject *op = PyStackRef_AsPyObjectBorrow(*sp); - if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { - PyGC_Head *gc = AS_GC(op); - if (_PyObject_GC_IS_TRACKED(op) && - gc_old_space(gc) != visited_space) { - gc_flip_old_space(gc); - objects_marked++; - gc_list_move(gc, &reachable); - } - } - } + frame_move_unvisited(frame, &stack, visited_space); if (!start && frame->visited) { // If this frame has already been visited, then the lower frames // will have already been visited and will not have changed @@ -1513,31 +1503,31 @@ mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, b ts = PyThreadState_Next(ts); HEAD_UNLOCK(runtime); } - objects_marked += mark_all_reachable(&reachable, visited, visited_space); - assert(gc_list_is_empty(&reachable)); + Py_ssize_t objects_marked = move_all_transitively_reachable(&stack, visited, visited_space); + assert(stack.top == NULL); return objects_marked; } static intptr_t mark_global_roots(PyInterpreterState *interp, PyGC_Head *visited, int visited_space) { - PyGC_Head reachable; - gc_list_init(&reachable); - Py_ssize_t objects_marked = 0; - objects_marked += move_to_reachable(interp->sysdict, &reachable, visited_space); - objects_marked += move_to_reachable(interp->builtins, &reachable, visited_space); - objects_marked += move_to_reachable(interp->dict, &reachable, visited_space); + WorkStack stack; + stack.top = NULL; + stack.visited_space = visited_space; + MOVE_UNVISITED(interp->sysdict, &stack, visited_space); + MOVE_UNVISITED(interp->builtins, &stack, visited_space); + MOVE_UNVISITED(interp->dict, &stack, visited_space); struct types_state *types = &interp->types; for (int i = 0; i < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; i++) { - objects_marked += move_to_reachable(types->builtins.initialized[i].tp_dict, &reachable, visited_space); - objects_marked += move_to_reachable(types->builtins.initialized[i].tp_subclasses, &reachable, visited_space); + MOVE_UNVISITED(types->builtins.initialized[i].tp_dict, &stack, visited_space); + MOVE_UNVISITED(types->builtins.initialized[i].tp_subclasses, &stack, visited_space); } for (int i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) { - objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_dict, &reachable, visited_space); - objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_subclasses, &reachable, visited_space); + MOVE_UNVISITED(types->for_extensions.initialized[i].tp_dict, &stack, visited_space); + MOVE_UNVISITED(types->for_extensions.initialized[i].tp_subclasses, &stack, visited_space); } - objects_marked += mark_all_reachable(&reachable, visited, visited_space); - assert(gc_list_is_empty(&reachable)); + Py_ssize_t objects_marked = move_all_transitively_reachable(&stack, visited, visited_space); + assert(stack.top == NULL); return objects_marked; } @@ -1549,39 +1539,35 @@ mark_at_start(PyThreadState *tstate) PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; Py_ssize_t objects_marked = mark_global_roots(tstate->interp, visited, gcstate->visited_space); objects_marked += mark_stacks(tstate->interp, visited, gcstate->visited_space, true); - gcstate->work_to_do -= objects_marked; gcstate->phase = GC_PHASE_COLLECT; validate_spaces(gcstate); return objects_marked; } + +/* See InternalDocs/garbage_collector.md for more details. */ +#define MAX_HEAP_PORTION_MULTIPLIER 5 +#define MARKING_PROGRESS_MULTIPLIER 2 + static intptr_t assess_work_to_do(GCState *gcstate) { - /* The amount of work we want to do depends on three things. + /* The amount of work we want to do depends on two things. * 1. The number of new objects created - * 2. The growth in heap size since the last collection - * 3. The heap size (up to the number of new objects, to avoid quadratic effects) - * - * For a steady state heap, the amount of work to do is three times the number - * of new objects added to the heap. This ensures that we stay ahead in the - * worst case of all new objects being garbage. - * - * This could be improved by tracking survival rates, but it is still a - * large improvement on the non-marking approach. + * 2. The heap size (up to a multiple of the number of new objects, to avoid quadratic effects) */ intptr_t scale_factor = gcstate->old[0].threshold; if (scale_factor < 2) { scale_factor = 2; } intptr_t new_objects = gcstate->young.count; - intptr_t max_heap_fraction = new_objects*3/2; - intptr_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; - if (heap_fraction > max_heap_fraction) { - heap_fraction = max_heap_fraction; + intptr_t max_heap_portion = new_objects * MAX_HEAP_PORTION_MULTIPLIER; + intptr_t heap_portion = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; + if (heap_portion > max_heap_portion) { + heap_portion = max_heap_portion; } gcstate->young.count = 0; - return new_objects + heap_fraction; + return new_objects + heap_portion; } static void @@ -1594,36 +1580,37 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) if (gcstate->phase == GC_PHASE_MARK) { Py_ssize_t objects_marked = mark_at_start(tstate); GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); - gcstate->work_to_do -= objects_marked; + gcstate->work_to_do -= objects_marked * MARKING_PROGRESS_MULTIPLIER; validate_spaces(gcstate); return; } PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; - PyGC_Head increment; - gc_list_init(&increment); - int scale_factor = gcstate->old[0].threshold; - if (scale_factor < 2) { - scale_factor = 2; - } intptr_t objects_marked = mark_stacks(tstate->interp, visited, gcstate->visited_space, false); GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); - gcstate->work_to_do -= objects_marked; + gcstate->work_to_do -= objects_marked * MARKING_PROGRESS_MULTIPLIER; gc_list_set_space(&gcstate->young.head, gcstate->visited_space); - gc_list_merge(&gcstate->young.head, &increment); + PyGC_Head increment; + gc_list_init(&increment); + WorkStack working; + working.top = 0; + working.visited_space = gcstate->visited_space; + move_list_to_stack(&gcstate->young.head, &working); + Py_ssize_t increment_size = move_all_transitively_reachable(&working, &increment, gcstate->visited_space); gc_list_validate_space(&increment, gcstate->visited_space); - Py_ssize_t increment_size = gc_list_size(&increment); + assert(working.top == NULL); while (increment_size < gcstate->work_to_do) { if (gc_list_is_empty(not_visited)) { break; } PyGC_Head *gc = _PyGCHead_NEXT(not_visited); - gc_list_move(gc, &increment); - increment_size++; - assert(!_Py_IsImmortal(FROM_GC(gc))); gc_set_old_space(gc, gcstate->visited_space); - increment_size += expand_region_transitively_reachable(&increment, gc, gcstate); + push_to_stack(gc, &working); + assert(!_Py_IsImmortal(FROM_GC(gc))); + increment_size += move_all_transitively_reachable(&working, &increment, gcstate->visited_space); + assert(working.top == NULL); } + assert(increment_size == gc_list_size(&increment)); GC_STAT_ADD(1, objects_not_transitively_reachable, increment_size); validate_list(&increment, collecting_clear_unreachable_clear); gc_list_validate_space(&increment, gcstate->visited_space); @@ -1632,7 +1619,6 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) gc_collect_region(tstate, &increment, &survivors, stats); gc_list_merge(&survivors, visited); assert(gc_list_is_empty(&increment)); - gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; gcstate->work_to_do -= increment_size; add_stats(gcstate, 1, stats); @@ -1668,6 +1654,7 @@ gc_collect_full(PyThreadState *tstate, gcstate->old[0].count = 0; gcstate->old[1].count = 0; completed_scavenge(gcstate); + gcstate->work_to_do = -gcstate->young.threshold; _PyGC_ClearAllFreeLists(tstate->interp); validate_spaces(gcstate); add_stats(gcstate, 2, stats); From 77a61c0465c27c1c4ba7cddf4638d9ed75259671 Mon Sep 17 00:00:00 2001 From: Yuki Kobayashi Date: Fri, 6 Dec 2024 23:09:20 +0900 Subject: [PATCH 26/48] gh-101100: amend references starting with `!~` in gh-127054 (#127684) --- Doc/tutorial/datastructures.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Doc/tutorial/datastructures.rst b/Doc/tutorial/datastructures.rst index 263b0c2e2815a1..cbe780e075baf5 100644 --- a/Doc/tutorial/datastructures.rst +++ b/Doc/tutorial/datastructures.rst @@ -142,8 +142,8 @@ Using Lists as Stacks The list methods make it very easy to use a list as a stack, where the last element added is the first element retrieved ("last-in, first-out"). To add an -item to the top of the stack, use :meth:`!~list.append`. To retrieve an item from the -top of the stack, use :meth:`!~list.pop` without an explicit index. For example:: +item to the top of the stack, use :meth:`!append`. To retrieve an item from the +top of the stack, use :meth:`!pop` without an explicit index. For example:: >>> stack = [3, 4, 5] >>> stack.append(6) @@ -340,7 +340,7 @@ The :keyword:`!del` statement ============================= There is a way to remove an item from a list given its index instead of its -value: the :keyword:`del` statement. This differs from the :meth:`!~list.pop` method +value: the :keyword:`del` statement. This differs from the :meth:`!pop` method which returns a value. The :keyword:`!del` statement can also be used to remove slices from a list or clear the entire list (which we did earlier by assignment of an empty list to the slice). For example:: @@ -500,8 +500,8 @@ any immutable type; strings and numbers can always be keys. Tuples can be used as keys if they contain only strings, numbers, or tuples; if a tuple contains any mutable object either directly or indirectly, it cannot be used as a key. You can't use lists as keys, since lists can be modified in place using index -assignments, slice assignments, or methods like :meth:`!~list.append` and -:meth:`!~list.extend`. +assignments, slice assignments, or methods like :meth:`!append` and +:meth:`!extend`. It is best to think of a dictionary as a set of *key: value* pairs, with the requirement that the keys are unique (within one dictionary). A pair of From 36c6178d372b075e9c74b786cfb5e47702976b1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 6 Dec 2024 15:31:30 +0100 Subject: [PATCH 27/48] gh-126024: fix UBSan failure in `unicodeobject.c:find_first_nonascii` (GH-127566) --- Objects/unicodeobject.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 463da06445984b..33c4747bbef488 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5083,12 +5083,9 @@ find_first_nonascii(const unsigned char *start, const unsigned char *end) const unsigned char *p2 = _Py_ALIGN_UP(p, SIZEOF_SIZE_T); #if PY_LITTLE_ENDIAN && HAVE_CTZ if (p < p2) { -#if defined(_M_AMD64) || defined(_M_IX86) || defined(__x86_64__) || defined(__i386__) - // x86 and amd64 are little endian and can load unaligned memory. - size_t u = *(const size_t*)p & ASCII_CHAR_MASK; -#else - size_t u = load_unaligned(p, p2 - p) & ASCII_CHAR_MASK; -#endif + size_t u; + memcpy(&u, p, sizeof(size_t)); + u &= ASCII_CHAR_MASK; if (u) { return (ctz(u) - 7) / 8; } From a353455fca1b8f468ff3ffbb4b5e316510b4fd43 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Fri, 6 Dec 2024 15:48:24 +0000 Subject: [PATCH 28/48] gh-125610: Fix `STORE_ATTR_INSTANCE_VALUE` specialization check (GH-125612) The `STORE_ATTR_INSTANCE_VALUE` opcode doesn't support objects with non-NULL managed dictionaries, so don't specialize to that op in that case. --- Python/specialize.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Python/specialize.c b/Python/specialize.c index ec2cd7025e5054..d3fea717243847 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -947,7 +947,10 @@ specialize_dict_access( return 0; } _PyAttrCache *cache = (_PyAttrCache *)(instr + 1); - if (type->tp_flags & Py_TPFLAGS_INLINE_VALUES && _PyObject_InlineValues(owner)->valid) { + if (type->tp_flags & Py_TPFLAGS_INLINE_VALUES && + _PyObject_InlineValues(owner)->valid && + !(base_op == STORE_ATTR && _PyObject_GetManagedDict(owner) != NULL)) + { PyDictKeysObject *keys = ((PyHeapTypeObject *)type)->ht_cached_keys; assert(PyUnicode_CheckExact(name)); Py_ssize_t index = _PyDictKeys_StringLookup(keys, name); From 12680ec5bd45c85b6daebe0739d30ef45f089efa Mon Sep 17 00:00:00 2001 From: Peter Bierma Date: Fri, 6 Dec 2024 10:58:19 -0500 Subject: [PATCH 29/48] gh-127314: Don't mention the GIL when calling without a thread state on the free-threaded build (#127315) Co-authored-by: Victor Stinner --- Include/internal/pycore_pystate.h | 8 ++++++++ Lib/test/test_capi/test_mem.py | 9 +++++++-- Lib/test/test_capi/test_misc.py | 17 ++++++++++++----- ...24-11-26-22-06-10.gh-issue-127314.SsRrIu.rst | 2 ++ Objects/obmalloc.c | 7 +++++++ 5 files changed, 36 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/C_API/2024-11-26-22-06-10.gh-issue-127314.SsRrIu.rst diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 54d8803bc0bdb6..1e73e541ef8de0 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -190,10 +190,18 @@ static inline void _Py_EnsureFuncTstateNotNULL(const char *func, PyThreadState *tstate) { if (tstate == NULL) { +#ifndef Py_GIL_DISABLED _Py_FatalErrorFunc(func, "the function must be called with the GIL held, " "after Python initialization and before Python finalization, " "but the GIL is released (the current Python thread state is NULL)"); +#else + _Py_FatalErrorFunc(func, + "the function must be called with an active thread state, " + "after Python initialization and before Python finalization, " + "but it was called without an active thread state. " + "Are you trying to call the C API inside of a Py_BEGIN_ALLOW_THREADS block?"); +#endif } } diff --git a/Lib/test/test_capi/test_mem.py b/Lib/test/test_capi/test_mem.py index 6ab7b685c2e18b..5035b2b4829bf6 100644 --- a/Lib/test/test_capi/test_mem.py +++ b/Lib/test/test_capi/test_mem.py @@ -68,8 +68,13 @@ def test_api_misuse(self): def check_malloc_without_gil(self, code): out = self.check(code) - expected = ('Fatal Python error: _PyMem_DebugMalloc: ' - 'Python memory allocator called without holding the GIL') + if not support.Py_GIL_DISABLED: + expected = ('Fatal Python error: _PyMem_DebugMalloc: ' + 'Python memory allocator called without holding the GIL') + else: + expected = ('Fatal Python error: _PyMem_DebugMalloc: ' + 'Python memory allocator called without an active thread state. ' + 'Are you trying to call it inside of a Py_BEGIN_ALLOW_THREADS block?') self.assertIn(expected, out) def test_pymem_malloc_without_gil(self): diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index 8e0271919cc8a5..61512e610f46f2 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -100,11 +100,18 @@ def test_no_FatalError_infinite_loop(self): _rc, out, err = run_result self.assertEqual(out, b'') # This used to cause an infinite loop. - msg = ("Fatal Python error: PyThreadState_Get: " - "the function must be called with the GIL held, " - "after Python initialization and before Python finalization, " - "but the GIL is released " - "(the current Python thread state is NULL)").encode() + if not support.Py_GIL_DISABLED: + msg = ("Fatal Python error: PyThreadState_Get: " + "the function must be called with the GIL held, " + "after Python initialization and before Python finalization, " + "but the GIL is released " + "(the current Python thread state is NULL)").encode() + else: + msg = ("Fatal Python error: PyThreadState_Get: " + "the function must be called with an active thread state, " + "after Python initialization and before Python finalization, " + "but it was called without an active thread state. " + "Are you trying to call the C API inside of a Py_BEGIN_ALLOW_THREADS block?").encode() self.assertTrue(err.rstrip().startswith(msg), err) diff --git a/Misc/NEWS.d/next/C_API/2024-11-26-22-06-10.gh-issue-127314.SsRrIu.rst b/Misc/NEWS.d/next/C_API/2024-11-26-22-06-10.gh-issue-127314.SsRrIu.rst new file mode 100644 index 00000000000000..8ea3c4ee2a2c53 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-11-26-22-06-10.gh-issue-127314.SsRrIu.rst @@ -0,0 +1,2 @@ +Improve error message when calling the C API without an active thread state +on the :term:`free-threaded ` build. diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 2cc0377f68f990..b103deb01ca712 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -2910,9 +2910,16 @@ static inline void _PyMem_DebugCheckGIL(const char *func) { if (!PyGILState_Check()) { +#ifndef Py_GIL_DISABLED _Py_FatalErrorFunc(func, "Python memory allocator called " "without holding the GIL"); +#else + _Py_FatalErrorFunc(func, + "Python memory allocator called " + "without an active thread state. " + "Are you trying to call it inside of a Py_BEGIN_ALLOW_THREADS block?"); +#endif } } From 67b18a18b66b89e253f38895057ef9f6bae92e7b Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 6 Dec 2024 17:27:12 +0100 Subject: [PATCH 30/48] gh-59705: Add _thread.set_name() function (#127338) On Linux, threading.Thread now sets the thread name to the operating system. * configure now checks if pthread_getname_np() and pthread_setname_np() functions are available. * Add PYTHREAD_NAME_MAXLEN macro. * Add _thread._NAME_MAXLEN constant for test_threading. Co-authored-by: Serhiy Storchaka --- Lib/test/test_threading.py | 60 ++++++++++ Lib/threading.py | 9 ++ ...4-11-27-17-04-38.gh-issue-59705.sAGyvs.rst | 2 + Modules/_threadmodule.c | 108 ++++++++++++++++++ Modules/clinic/_threadmodule.c.h | 104 +++++++++++++++++ configure | 30 +++++ configure.ac | 22 +++- pyconfig.h.in | 9 ++ 8 files changed, 342 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-27-17-04-38.gh-issue-59705.sAGyvs.rst create mode 100644 Modules/clinic/_threadmodule.c.h diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index fe225558fc4f0b..d05161f46f1034 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -2104,6 +2104,66 @@ def test__all__(self): support.check__all__(self, threading, ('threading', '_thread'), extra=extra, not_exported=not_exported) + @unittest.skipUnless(hasattr(_thread, 'set_name'), "missing _thread.set_name") + @unittest.skipUnless(hasattr(_thread, '_get_name'), "missing _thread._get_name") + def test_set_name(self): + # set_name() limit in bytes + truncate = getattr(_thread, "_NAME_MAXLEN", None) + limit = truncate or 100 + + tests = [ + # test short ASCII name + "CustomName", + + # test short non-ASCII name + "namé€", + + # embedded null character: name is truncated + # at the first null character + "embed\0null", + + # Test long ASCII names (not truncated) + "x" * limit, + + # Test long ASCII names (truncated) + "x" * (limit + 10), + + # Test long non-ASCII name (truncated) + "x" * (limit - 1) + "é€", + ] + if os_helper.FS_NONASCII: + tests.append(f"nonascii:{os_helper.FS_NONASCII}") + if os_helper.TESTFN_UNENCODABLE: + tests.append(os_helper.TESTFN_UNENCODABLE) + + if sys.platform.startswith("solaris"): + encoding = "utf-8" + else: + encoding = sys.getfilesystemencoding() + + def work(): + nonlocal work_name + work_name = _thread._get_name() + + for name in tests: + encoded = name.encode(encoding, "replace") + if b'\0' in encoded: + encoded = encoded.split(b'\0', 1)[0] + if truncate is not None: + encoded = encoded[:truncate] + if sys.platform.startswith("solaris"): + expected = encoded.decode("utf-8", "surrogateescape") + else: + expected = os.fsdecode(encoded) + + with self.subTest(name=name, expected=expected): + work_name = None + thread = threading.Thread(target=work, name=name) + thread.start() + thread.join() + self.assertEqual(work_name, expected, + f"{len(work_name)=} and {len(expected)=}") + class InterruptMainTests(unittest.TestCase): def check_interrupt_main_with_signal_handler(self, signum): diff --git a/Lib/threading.py b/Lib/threading.py index 94ea2f08178369..3abd22a2aa1b72 100644 --- a/Lib/threading.py +++ b/Lib/threading.py @@ -48,6 +48,10 @@ __all__.append('get_native_id') except AttributeError: _HAVE_THREAD_NATIVE_ID = False +try: + _set_name = _thread.set_name +except AttributeError: + _set_name = None ThreadError = _thread.error try: _CRLock = _thread.RLock @@ -1027,6 +1031,11 @@ def _bootstrap_inner(self): self._set_ident() if _HAVE_THREAD_NATIVE_ID: self._set_native_id() + if _set_name is not None and self._name: + try: + _set_name(self._name) + except OSError: + pass self._started.set() with _active_limbo_lock: _active[self._ident] = self diff --git a/Misc/NEWS.d/next/Library/2024-11-27-17-04-38.gh-issue-59705.sAGyvs.rst b/Misc/NEWS.d/next/Library/2024-11-27-17-04-38.gh-issue-59705.sAGyvs.rst new file mode 100644 index 00000000000000..a8c7b3d00755e6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-27-17-04-38.gh-issue-59705.sAGyvs.rst @@ -0,0 +1,2 @@ +On Linux, :class:`threading.Thread` now sets the thread name to the +operating system. Patch by Victor Stinner. diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index 4a45445e2f62db..35c032fbeaa94f 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -17,6 +17,8 @@ # include // SIGINT #endif +#include "clinic/_threadmodule.c.h" + // ThreadError is just an alias to PyExc_RuntimeError #define ThreadError PyExc_RuntimeError @@ -44,6 +46,13 @@ get_thread_state(PyObject *module) return (thread_module_state *)state; } + +/*[clinic input] +module _thread +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=be8dbe5cc4b16df7]*/ + + // _ThreadHandle type // Handles state transitions according to the following diagram: @@ -2354,6 +2363,96 @@ PyDoc_STRVAR(thread__get_main_thread_ident_doc, Internal only. Return a non-zero integer that uniquely identifies the main thread\n\ of the main interpreter."); + +#ifdef HAVE_PTHREAD_GETNAME_NP +/*[clinic input] +_thread._get_name + +Get the name of the current thread. +[clinic start generated code]*/ + +static PyObject * +_thread__get_name_impl(PyObject *module) +/*[clinic end generated code: output=20026e7ee3da3dd7 input=35cec676833d04c8]*/ +{ + // Linux and macOS are limited to respectively 16 and 64 bytes + char name[100]; + pthread_t thread = pthread_self(); + int rc = pthread_getname_np(thread, name, Py_ARRAY_LENGTH(name)); + if (rc) { + errno = rc; + return PyErr_SetFromErrno(PyExc_OSError); + } + +#ifdef __sun + return PyUnicode_DecodeUTF8(name, strlen(name), "surrogateescape"); +#else + return PyUnicode_DecodeFSDefault(name); +#endif +} +#endif // HAVE_PTHREAD_GETNAME_NP + + +#ifdef HAVE_PTHREAD_SETNAME_NP +/*[clinic input] +_thread.set_name + + name as name_obj: unicode + +Set the name of the current thread. +[clinic start generated code]*/ + +static PyObject * +_thread_set_name_impl(PyObject *module, PyObject *name_obj) +/*[clinic end generated code: output=402b0c68e0c0daed input=7e7acd98261be82f]*/ +{ +#ifdef __sun + // Solaris always uses UTF-8 + const char *encoding = "utf-8"; +#else + // Encode the thread name to the filesystem encoding using the "replace" + // error handler + PyInterpreterState *interp = _PyInterpreterState_GET(); + const char *encoding = interp->unicode.fs_codec.encoding; +#endif + PyObject *name_encoded; + name_encoded = PyUnicode_AsEncodedString(name_obj, encoding, "replace"); + if (name_encoded == NULL) { + return NULL; + } + +#ifdef PYTHREAD_NAME_MAXLEN + // Truncate to PYTHREAD_NAME_MAXLEN bytes + the NUL byte if needed + size_t len = PyBytes_GET_SIZE(name_encoded); + if (len > PYTHREAD_NAME_MAXLEN) { + PyObject *truncated; + truncated = PyBytes_FromStringAndSize(PyBytes_AS_STRING(name_encoded), + PYTHREAD_NAME_MAXLEN); + if (truncated == NULL) { + Py_DECREF(name_encoded); + return NULL; + } + Py_SETREF(name_encoded, truncated); + } +#endif + + const char *name = PyBytes_AS_STRING(name_encoded); +#ifdef __APPLE__ + int rc = pthread_setname_np(name); +#else + pthread_t thread = pthread_self(); + int rc = pthread_setname_np(thread, name); +#endif + Py_DECREF(name_encoded); + if (rc) { + errno = rc; + return PyErr_SetFromErrno(PyExc_OSError); + } + Py_RETURN_NONE; +} +#endif // HAVE_PTHREAD_SETNAME_NP + + static PyMethodDef thread_methods[] = { {"start_new_thread", (PyCFunction)thread_PyThread_start_new_thread, METH_VARARGS, start_new_thread_doc}, @@ -2393,6 +2492,8 @@ static PyMethodDef thread_methods[] = { METH_O, thread__make_thread_handle_doc}, {"_get_main_thread_ident", thread__get_main_thread_ident, METH_NOARGS, thread__get_main_thread_ident_doc}, + _THREAD_SET_NAME_METHODDEF + _THREAD__GET_NAME_METHODDEF {NULL, NULL} /* sentinel */ }; @@ -2484,6 +2585,13 @@ thread_module_exec(PyObject *module) llist_init(&state->shutdown_handles); +#ifdef PYTHREAD_NAME_MAXLEN + if (PyModule_AddIntConstant(module, "_NAME_MAXLEN", + PYTHREAD_NAME_MAXLEN) < 0) { + return -1; + } +#endif + return 0; } diff --git a/Modules/clinic/_threadmodule.c.h b/Modules/clinic/_threadmodule.c.h new file mode 100644 index 00000000000000..8f0507d40285b3 --- /dev/null +++ b/Modules/clinic/_threadmodule.c.h @@ -0,0 +1,104 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) +# include "pycore_gc.h" // PyGC_Head +# include "pycore_runtime.h" // _Py_ID() +#endif +#include "pycore_modsupport.h" // _PyArg_UnpackKeywords() + +#if defined(HAVE_PTHREAD_GETNAME_NP) + +PyDoc_STRVAR(_thread__get_name__doc__, +"_get_name($module, /)\n" +"--\n" +"\n" +"Get the name of the current thread."); + +#define _THREAD__GET_NAME_METHODDEF \ + {"_get_name", (PyCFunction)_thread__get_name, METH_NOARGS, _thread__get_name__doc__}, + +static PyObject * +_thread__get_name_impl(PyObject *module); + +static PyObject * +_thread__get_name(PyObject *module, PyObject *Py_UNUSED(ignored)) +{ + return _thread__get_name_impl(module); +} + +#endif /* defined(HAVE_PTHREAD_GETNAME_NP) */ + +#if defined(HAVE_PTHREAD_SETNAME_NP) + +PyDoc_STRVAR(_thread_set_name__doc__, +"set_name($module, /, name)\n" +"--\n" +"\n" +"Set the name of the current thread."); + +#define _THREAD_SET_NAME_METHODDEF \ + {"set_name", _PyCFunction_CAST(_thread_set_name), METH_FASTCALL|METH_KEYWORDS, _thread_set_name__doc__}, + +static PyObject * +_thread_set_name_impl(PyObject *module, PyObject *name_obj); + +static PyObject * +_thread_set_name(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(name), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"name", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "set_name", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + PyObject *name_obj; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (!PyUnicode_Check(args[0])) { + _PyArg_BadArgument("set_name", "argument 'name'", "str", args[0]); + goto exit; + } + name_obj = args[0]; + return_value = _thread_set_name_impl(module, name_obj); + +exit: + return return_value; +} + +#endif /* defined(HAVE_PTHREAD_SETNAME_NP) */ + +#ifndef _THREAD__GET_NAME_METHODDEF + #define _THREAD__GET_NAME_METHODDEF +#endif /* !defined(_THREAD__GET_NAME_METHODDEF) */ + +#ifndef _THREAD_SET_NAME_METHODDEF + #define _THREAD_SET_NAME_METHODDEF +#endif /* !defined(_THREAD_SET_NAME_METHODDEF) */ +/*[clinic end generated code: output=b5cb85aaccc45bf6 input=a9049054013a1b77]*/ diff --git a/configure b/configure index 5e9bcb602d884e..bcbab8dfcff190 100755 --- a/configure +++ b/configure @@ -821,6 +821,7 @@ MODULE_TIME_TRUE MODULE__IO_FALSE MODULE__IO_TRUE MODULE_BUILDTYPE +PYTHREAD_NAME_MAXLEN TEST_MODULES OPENSSL_LDFLAGS OPENSSL_LIBS @@ -18841,6 +18842,18 @@ if test "x$ac_cv_func_pthread_kill" = xyes then : printf "%s\n" "#define HAVE_PTHREAD_KILL 1" >>confdefs.h +fi +ac_fn_c_check_func "$LINENO" "pthread_getname_np" "ac_cv_func_pthread_getname_np" +if test "x$ac_cv_func_pthread_getname_np" = xyes +then : + printf "%s\n" "#define HAVE_PTHREAD_GETNAME_NP 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "pthread_setname_np" "ac_cv_func_pthread_setname_np" +if test "x$ac_cv_func_pthread_setname_np" = xyes +then : + printf "%s\n" "#define HAVE_PTHREAD_SETNAME_NP 1" >>confdefs.h + fi ac_fn_c_check_func "$LINENO" "ptsname" "ac_cv_func_ptsname" if test "x$ac_cv_func_ptsname" = xyes @@ -29081,6 +29094,23 @@ fi CPPFLAGS=$save_CPPFLAGS +# gh-59705: Maximum length in bytes of a thread name +case "$ac_sys_system" in + Linux*) PYTHREAD_NAME_MAXLEN=15;; # Linux and Android + SunOS*) PYTHREAD_NAME_MAXLEN=31;; + Darwin) PYTHREAD_NAME_MAXLEN=63;; + iOS) PYTHREAD_NAME_MAXLEN=63;; + FreeBSD*) PYTHREAD_NAME_MAXLEN=98;; + *) PYTHREAD_NAME_MAXLEN=;; +esac +if test -n "$PYTHREAD_NAME_MAXLEN"; then + +printf "%s\n" "#define PYTHREAD_NAME_MAXLEN $PYTHREAD_NAME_MAXLEN" >>confdefs.h + +fi + + + # stdlib diff --git a/configure.ac b/configure.ac index bf3685e1b1b209..922a125ea9608e 100644 --- a/configure.ac +++ b/configure.ac @@ -5110,8 +5110,10 @@ AC_CHECK_FUNCS([ \ mknod mknodat mktime mmap mremap nice openat opendir pathconf pause pipe \ pipe2 plock poll posix_fadvise posix_fallocate posix_openpt posix_spawn posix_spawnp \ posix_spawn_file_actions_addclosefrom_np \ - pread preadv preadv2 process_vm_readv pthread_cond_timedwait_relative_np pthread_condattr_setclock pthread_init \ - pthread_kill ptsname ptsname_r pwrite pwritev pwritev2 readlink readlinkat readv realpath renameat \ + pread preadv preadv2 process_vm_readv \ + pthread_cond_timedwait_relative_np pthread_condattr_setclock pthread_init \ + pthread_kill pthread_getname_np pthread_setname_np \ + ptsname ptsname_r pwrite pwritev pwritev2 readlink readlinkat readv realpath renameat \ rtpSpawn sched_get_priority_max sched_rr_get_interval sched_setaffinity \ sched_setparam sched_setscheduler sem_clockwait sem_getvalue sem_open \ sem_timedwait sem_unlink sendfile setegid seteuid setgid sethostname \ @@ -7498,6 +7500,22 @@ AS_VAR_IF([ac_cv_libatomic_needed], [yes], _RESTORE_VAR([CPPFLAGS]) +# gh-59705: Maximum length in bytes of a thread name +case "$ac_sys_system" in + Linux*) PYTHREAD_NAME_MAXLEN=15;; # Linux and Android + SunOS*) PYTHREAD_NAME_MAXLEN=31;; + Darwin) PYTHREAD_NAME_MAXLEN=63;; + iOS) PYTHREAD_NAME_MAXLEN=63;; + FreeBSD*) PYTHREAD_NAME_MAXLEN=98;; + *) PYTHREAD_NAME_MAXLEN=;; +esac +if test -n "$PYTHREAD_NAME_MAXLEN"; then + AC_DEFINE_UNQUOTED([PYTHREAD_NAME_MAXLEN], [$PYTHREAD_NAME_MAXLEN], + [Maximum length in bytes of a thread name]) +fi +AC_SUBST([PYTHREAD_NAME_MAXLEN]) + + # stdlib AC_DEFUN([PY_STDLIB_MOD_SET_NA], [ m4_foreach([mod], [$@], [ diff --git a/pyconfig.h.in b/pyconfig.h.in index 6a1f1284650b9f..166c195a8c66fc 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -981,6 +981,9 @@ /* Define to 1 if you have the `pthread_getcpuclockid' function. */ #undef HAVE_PTHREAD_GETCPUCLOCKID +/* Define to 1 if you have the `pthread_getname_np' function. */ +#undef HAVE_PTHREAD_GETNAME_NP + /* Define to 1 if you have the header file. */ #undef HAVE_PTHREAD_H @@ -990,6 +993,9 @@ /* Define to 1 if you have the `pthread_kill' function. */ #undef HAVE_PTHREAD_KILL +/* Define to 1 if you have the `pthread_setname_np' function. */ +#undef HAVE_PTHREAD_SETNAME_NP + /* Define to 1 if you have the `pthread_sigmask' function. */ #undef HAVE_PTHREAD_SIGMASK @@ -1650,6 +1656,9 @@ /* Define as the preferred size in bits of long digits */ #undef PYLONG_BITS_IN_DIGIT +/* Maximum length in bytes of a thread name */ +#undef PYTHREAD_NAME_MAXLEN + /* enabled builtin hash modules */ #undef PY_BUILTIN_HASHLIB_HASHES From 89fa7ec74e531870a8f495d5e32ec0b00dbcd32b Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Fri, 6 Dec 2024 16:36:06 +0000 Subject: [PATCH 31/48] gh-119786: Add jit.md. Move adaptive.md to a section of interpreter.md. (#127175) --- InternalDocs/README.md | 4 +- InternalDocs/adaptive.md | 146 ------------------------ InternalDocs/code_objects.md | 5 + InternalDocs/compiler.md | 10 -- InternalDocs/interpreter.md | 210 ++++++++++++++++++++++++++++++----- InternalDocs/jit.md | 134 ++++++++++++++++++++++ 6 files changed, 322 insertions(+), 187 deletions(-) delete mode 100644 InternalDocs/adaptive.md create mode 100644 InternalDocs/jit.md diff --git a/InternalDocs/README.md b/InternalDocs/README.md index 8cdd06d189f362..794b4f3c6aad42 100644 --- a/InternalDocs/README.md +++ b/InternalDocs/README.md @@ -34,9 +34,9 @@ Runtime Objects Program Execution --- -- [The Interpreter](interpreter.md) +- [The Bytecode Interpreter](interpreter.md) -- [Adaptive Instruction Families](adaptive.md) +- [The JIT](jit.md) - [Garbage Collector Design](garbage_collector.md) diff --git a/InternalDocs/adaptive.md b/InternalDocs/adaptive.md deleted file mode 100644 index 7cfa8e52310460..00000000000000 --- a/InternalDocs/adaptive.md +++ /dev/null @@ -1,146 +0,0 @@ -# Adding or extending a family of adaptive instructions. - -## Families of instructions - -The core part of [PEP 659](https://peps.python.org/pep-0659/) -(specializing adaptive interpreter) is the families of -instructions that perform the adaptive specialization. - -A family of instructions has the following fundamental properties: - -* It corresponds to a single instruction in the code - generated by the bytecode compiler. -* It has a single adaptive instruction that records an execution count and, - at regular intervals, attempts to specialize itself. If not specializing, - it executes the base implementation. -* It has at least one specialized form of the instruction that is tailored - for a particular value or set of values at runtime. -* All members of the family must have the same number of inline cache entries, - to ensure correct execution. - Individual family members do not need to use all of the entries, - but must skip over any unused entries when executing. - -The current implementation also requires the following, -although these are not fundamental and may change: - -* All families use one or more inline cache entries, - the first entry is always the counter. -* All instruction names should start with the name of the adaptive - instruction. -* Specialized forms should have names describing their specialization. - -## Example family - -The `LOAD_GLOBAL` instruction (in [Python/bytecodes.c](../Python/bytecodes.c)) -already has an adaptive family that serves as a relatively simple example. - -The `LOAD_GLOBAL` instruction performs adaptive specialization, -calling `_Py_Specialize_LoadGlobal()` when the counter reaches zero. - -There are two specialized instructions in the family, `LOAD_GLOBAL_MODULE` -which is specialized for global variables in the module, and -`LOAD_GLOBAL_BUILTIN` which is specialized for builtin variables. - -## Performance analysis - -The benefit of a specialization can be assessed with the following formula: -`Tbase/Tadaptive`. - -Where `Tbase` is the mean time to execute the base instruction, -and `Tadaptive` is the mean time to execute the specialized and adaptive forms. - -`Tadaptive = (sum(Ti*Ni) + Tmiss*Nmiss)/(sum(Ni)+Nmiss)` - -`Ti` is the time to execute the `i`th instruction in the family and `Ni` is -the number of times that instruction is executed. -`Tmiss` is the time to process a miss, including de-optimzation -and the time to execute the base instruction. - -The ideal situation is where misses are rare and the specialized -forms are much faster than the base instruction. -`LOAD_GLOBAL` is near ideal, `Nmiss/sum(Ni) ≈ 0`. -In which case we have `Tadaptive ≈ sum(Ti*Ni)`. -Since we can expect the specialized forms `LOAD_GLOBAL_MODULE` and -`LOAD_GLOBAL_BUILTIN` to be much faster than the adaptive base instruction, -we would expect the specialization of `LOAD_GLOBAL` to be profitable. - -## Design considerations - -While `LOAD_GLOBAL` may be ideal, instructions like `LOAD_ATTR` and -`CALL_FUNCTION` are not. For maximum performance we want to keep `Ti` -low for all specialized instructions and `Nmiss` as low as possible. - -Keeping `Nmiss` low means that there should be specializations for almost -all values seen by the base instruction. Keeping `sum(Ti*Ni)` low means -keeping `Ti` low which means minimizing branches and dependent memory -accesses (pointer chasing). These two objectives may be in conflict, -requiring judgement and experimentation to design the family of instructions. - -The size of the inline cache should as small as possible, -without impairing performance, to reduce the number of -`EXTENDED_ARG` jumps, and to reduce pressure on the CPU's data cache. - -### Gathering data - -Before choosing how to specialize an instruction, it is important to gather -some data. What are the patterns of usage of the base instruction? -Data can best be gathered by instrumenting the interpreter. Since a -specialization function and adaptive instruction are going to be required, -instrumentation can most easily be added in the specialization function. - -### Choice of specializations - -The performance of the specializing adaptive interpreter relies on the -quality of specialization and keeping the overhead of specialization low. - -Specialized instructions must be fast. In order to be fast, -specialized instructions should be tailored for a particular -set of values that allows them to: - -1. Verify that incoming value is part of that set with low overhead. -2. Perform the operation quickly. - -This requires that the set of values is chosen such that membership can be -tested quickly and that membership is sufficient to allow the operation to -performed quickly. - -For example, `LOAD_GLOBAL_MODULE` is specialized for `globals()` -dictionaries that have a keys with the expected version. - -This can be tested quickly: - -* `globals->keys->dk_version == expected_version` - -and the operation can be performed quickly: - -* `value = entries[cache->index].me_value;`. - -Because it is impossible to measure the performance of an instruction without -also measuring unrelated factors, the assessment of the quality of a -specialization will require some judgement. - -As a general rule, specialized instructions should be much faster than the -base instruction. - -### Implementation of specialized instructions - -In general, specialized instructions should be implemented in two parts: - -1. A sequence of guards, each of the form - `DEOPT_IF(guard-condition-is-false, BASE_NAME)`. -2. The operation, which should ideally have no branches and - a minimum number of dependent memory accesses. - -In practice, the parts may overlap, as data required for guards -can be re-used in the operation. - -If there are branches in the operation, then consider further specialization -to eliminate the branches. - -### Maintaining stats - -Finally, take care that stats are gather correctly. -After the last `DEOPT_IF` has passed, a hit should be recorded with -`STAT_INC(BASE_INSTRUCTION, hit)`. -After an optimization has been deferred in the adaptive instruction, -that should be recorded with `STAT_INC(BASE_INSTRUCTION, deferred)`. diff --git a/InternalDocs/code_objects.md b/InternalDocs/code_objects.md index d4e28c6b238b48..a91a7043c1b8d4 100644 --- a/InternalDocs/code_objects.md +++ b/InternalDocs/code_objects.md @@ -18,6 +18,11 @@ Code objects are typically produced by the bytecode [compiler](compiler.md), although they are often written to disk by one process and read back in by another. The disk version of a code object is serialized using the [marshal](https://docs.python.org/dev/library/marshal.html) protocol. +When a `CodeObject` is created, the function `_PyCode_Quicken()` from +[`Python/specialize.c`](../Python/specialize.c) is called to initialize +the caches of all adaptive instructions. This is required because the +on-disk format is a sequence of bytes, and some of the caches need to be +initialized with 16-bit values. Code objects are nominally immutable. Some fields (including `co_code_adaptive` and fields for runtime diff --git a/InternalDocs/compiler.md b/InternalDocs/compiler.md index 9e99f348acbd8f..c257bfd9faf78f 100644 --- a/InternalDocs/compiler.md +++ b/InternalDocs/compiler.md @@ -595,16 +595,6 @@ Objects * [Exception Handling](exception_handling.md): Describes the exception table -Specializing Adaptive Interpreter -================================= - -Adding a specializing, adaptive interpreter to CPython will bring significant -performance improvements. These documents provide more information: - -* [PEP 659: Specializing Adaptive Interpreter](https://peps.python.org/pep-0659/). -* [Adding or extending a family of adaptive instructions](adaptive.md) - - References ========== diff --git a/InternalDocs/interpreter.md b/InternalDocs/interpreter.md index ab149e43471072..fa4a54fdc54fac 100644 --- a/InternalDocs/interpreter.md +++ b/InternalDocs/interpreter.md @@ -1,8 +1,4 @@ -The bytecode interpreter -======================== - -Overview --------- +# The bytecode interpreter This document describes the workings and implementation of the bytecode interpreter, the part of python that executes compiled Python code. Its @@ -47,8 +43,7 @@ simply calls [`_PyEval_EvalFrameDefault()`] to execute the frame. However, as pe `_PyEval_EvalFrameDefault()`. -Instruction decoding --------------------- +## Instruction decoding The first task of the interpreter is to decode the bytecode instructions. Bytecode is stored as an array of 16-bit code units (`_Py_CODEUNIT`). @@ -110,8 +105,7 @@ snippet decode a complete instruction: For various reasons we'll get to later (mostly efficiency, given that `EXTENDED_ARG` is rare) the actual code is different. -Jumps -===== +## Jumps Note that when the `switch` statement is reached, `next_instr` (the "instruction offset") already points to the next instruction. @@ -120,25 +114,26 @@ Thus, jump instructions can be implemented by manipulating `next_instr`: - A jump forward (`JUMP_FORWARD`) sets `next_instr += oparg`. - A jump backward sets `next_instr -= oparg`. -Inline cache entries -==================== +## Inline cache entries Some (specialized or specializable) instructions have an associated "inline cache". The inline cache consists of one or more two-byte entries included in the bytecode array as additional words following the `opcode`/`oparg` pair. The size of the inline cache for a particular instruction is fixed by its `opcode`. Moreover, the inline cache size for all instructions in a -[family of specialized/specializable instructions](adaptive.md) +[family of specialized/specializable instructions](#Specialization) (for example, `LOAD_ATTR`, `LOAD_ATTR_SLOT`, `LOAD_ATTR_MODULE`) must all be the same. Cache entries are reserved by the compiler and initialized with zeros. Although they are represented by code units, cache entries do not conform to the `opcode` / `oparg` format. -If an instruction has an inline cache, the layout of its cache is described by -a `struct` definition in (`pycore_code.h`)[../Include/internal/pycore_code.h]. -This allows us to access the cache by casting `next_instr` to a pointer to this `struct`. -The size of such a `struct` must be independent of the machine architecture, word size -and alignment requirements. For a 32-bit field, the `struct` should use `_Py_CODEUNIT field[2]`. +If an instruction has an inline cache, the layout of its cache is described in +the instruction's definition in [`Python/bytecodes.c`](../Python/bytecodes.c). +The structs defined in [`pycore_code.h`](../Include/internal/pycore_code.h) +allow us to access the cache by casting `next_instr` to a pointer to the relevant +`struct`. The size of such a `struct` must be independent of the machine +architecture, word size and alignment requirements. For a 32-bit field, the +`struct` should use `_Py_CODEUNIT field[2]`. The instruction implementation is responsible for advancing `next_instr` past the inline cache. For example, if an instruction's inline cache is four bytes (that is, two code units) in size, @@ -153,8 +148,7 @@ Serializing non-zero cache entries would present a problem because the serializa More information about the use of inline caches can be found in [PEP 659](https://peps.python.org/pep-0659/#ancillary-data). -The evaluation stack --------------------- +## The evaluation stack Most instructions read or write some data in the form of object references (`PyObject *`). The CPython bytecode interpreter is a stack machine, meaning that its instructions operate @@ -193,16 +187,14 @@ For example, the following sequence is illegal, because it keeps pushing items o > Do not confuse the evaluation stack with the call stack, which is used to implement calling > and returning from functions. -Error handling --------------- +## Error handling When the implementation of an opcode raises an exception, it jumps to the `exception_unwind` label in [Python/ceval.c](../Python/ceval.c). The exception is then handled as described in the [`exception handling documentation`](exception_handling.md#handling-exceptions). -Python-to-Python calls ----------------------- +## Python-to-Python calls The `_PyEval_EvalFrameDefault()` function is recursive, because sometimes the interpreter calls some C function that calls back into the interpreter. @@ -227,8 +219,7 @@ returns from `_PyEval_EvalFrameDefault()` altogether, to a C caller. A similar check is performed when an unhandled exception occurs. -The call stack --------------- +## The call stack Up through 3.10, the call stack was implemented as a singly-linked list of [frame objects](frames.md). This was expensive because each call would require a @@ -262,8 +253,7 @@ See also the [generators](generators.md) section. -Introducing a new bytecode instruction --------------------------------------- +## Introducing a new bytecode instruction It is occasionally necessary to add a new opcode in order to implement a new feature or change the way that existing features are compiled. @@ -355,6 +344,169 @@ new bytecode properly. Run `make regen-importlib` for updating the bytecode of frozen importlib files. You have to run `make` again after this to recompile the generated C files. +## Specialization + +Bytecode specialization, which was introduced in +[PEP 659](https://peps.python.org/pep-0659/), speeds up program execution by +rewriting instructions based on runtime information. This is done by replacing +a generic instruction with a faster version that works for the case that this +program encounters. Each specializable instruction is responsible for rewriting +itself, using its [inline caches](#inline-cache-entries) for +bookkeeping. + +When an adaptive instruction executes, it may attempt to specialize itself, +depending on the argument and the contents of its cache. This is done +by calling one of the `_Py_Specialize_XXX` functions in +[`Python/specialize.c`](../Python/specialize.c). + + +The specialized instructions are responsible for checking that the special-case +assumptions still apply, and de-optimizing back to the generic version if not. + +## Families of instructions + +A *family* of instructions consists of an adaptive instruction along with the +specialized instructions that it can be replaced by. +It has the following fundamental properties: + +* It corresponds to a single instruction in the code + generated by the bytecode compiler. +* It has a single adaptive instruction that records an execution count and, + at regular intervals, attempts to specialize itself. If not specializing, + it executes the base implementation. +* It has at least one specialized form of the instruction that is tailored + for a particular value or set of values at runtime. +* All members of the family must have the same number of inline cache entries, + to ensure correct execution. + Individual family members do not need to use all of the entries, + but must skip over any unused entries when executing. + +The current implementation also requires the following, +although these are not fundamental and may change: + +* All families use one or more inline cache entries, + the first entry is always the counter. +* All instruction names should start with the name of the adaptive + instruction. +* Specialized forms should have names describing their specialization. + +## Example family + +The `LOAD_GLOBAL` instruction (in [Python/bytecodes.c](../Python/bytecodes.c)) +already has an adaptive family that serves as a relatively simple example. + +The `LOAD_GLOBAL` instruction performs adaptive specialization, +calling `_Py_Specialize_LoadGlobal()` when the counter reaches zero. + +There are two specialized instructions in the family, `LOAD_GLOBAL_MODULE` +which is specialized for global variables in the module, and +`LOAD_GLOBAL_BUILTIN` which is specialized for builtin variables. + +## Performance analysis + +The benefit of a specialization can be assessed with the following formula: +`Tbase/Tadaptive`. + +Where `Tbase` is the mean time to execute the base instruction, +and `Tadaptive` is the mean time to execute the specialized and adaptive forms. + +`Tadaptive = (sum(Ti*Ni) + Tmiss*Nmiss)/(sum(Ni)+Nmiss)` + +`Ti` is the time to execute the `i`th instruction in the family and `Ni` is +the number of times that instruction is executed. +`Tmiss` is the time to process a miss, including de-optimzation +and the time to execute the base instruction. + +The ideal situation is where misses are rare and the specialized +forms are much faster than the base instruction. +`LOAD_GLOBAL` is near ideal, `Nmiss/sum(Ni) ≈ 0`. +In which case we have `Tadaptive ≈ sum(Ti*Ni)`. +Since we can expect the specialized forms `LOAD_GLOBAL_MODULE` and +`LOAD_GLOBAL_BUILTIN` to be much faster than the adaptive base instruction, +we would expect the specialization of `LOAD_GLOBAL` to be profitable. + +## Design considerations + +While `LOAD_GLOBAL` may be ideal, instructions like `LOAD_ATTR` and +`CALL_FUNCTION` are not. For maximum performance we want to keep `Ti` +low for all specialized instructions and `Nmiss` as low as possible. + +Keeping `Nmiss` low means that there should be specializations for almost +all values seen by the base instruction. Keeping `sum(Ti*Ni)` low means +keeping `Ti` low which means minimizing branches and dependent memory +accesses (pointer chasing). These two objectives may be in conflict, +requiring judgement and experimentation to design the family of instructions. + +The size of the inline cache should as small as possible, +without impairing performance, to reduce the number of +`EXTENDED_ARG` jumps, and to reduce pressure on the CPU's data cache. + +### Gathering data + +Before choosing how to specialize an instruction, it is important to gather +some data. What are the patterns of usage of the base instruction? +Data can best be gathered by instrumenting the interpreter. Since a +specialization function and adaptive instruction are going to be required, +instrumentation can most easily be added in the specialization function. + +### Choice of specializations + +The performance of the specializing adaptive interpreter relies on the +quality of specialization and keeping the overhead of specialization low. + +Specialized instructions must be fast. In order to be fast, +specialized instructions should be tailored for a particular +set of values that allows them to: + +1. Verify that incoming value is part of that set with low overhead. +2. Perform the operation quickly. + +This requires that the set of values is chosen such that membership can be +tested quickly and that membership is sufficient to allow the operation to +performed quickly. + +For example, `LOAD_GLOBAL_MODULE` is specialized for `globals()` +dictionaries that have a keys with the expected version. + +This can be tested quickly: + +* `globals->keys->dk_version == expected_version` + +and the operation can be performed quickly: + +* `value = entries[cache->index].me_value;`. + +Because it is impossible to measure the performance of an instruction without +also measuring unrelated factors, the assessment of the quality of a +specialization will require some judgement. + +As a general rule, specialized instructions should be much faster than the +base instruction. + +### Implementation of specialized instructions + +In general, specialized instructions should be implemented in two parts: + +1. A sequence of guards, each of the form + `DEOPT_IF(guard-condition-is-false, BASE_NAME)`. +2. The operation, which should ideally have no branches and + a minimum number of dependent memory accesses. + +In practice, the parts may overlap, as data required for guards +can be re-used in the operation. + +If there are branches in the operation, then consider further specialization +to eliminate the branches. + +### Maintaining stats + +Finally, take care that stats are gathered correctly. +After the last `DEOPT_IF` has passed, a hit should be recorded with +`STAT_INC(BASE_INSTRUCTION, hit)`. +After an optimization has been deferred in the adaptive instruction, +that should be recorded with `STAT_INC(BASE_INSTRUCTION, deferred)`. + + Additional resources -------------------- diff --git a/InternalDocs/jit.md b/InternalDocs/jit.md new file mode 100644 index 00000000000000..1e9f385d5f87fa --- /dev/null +++ b/InternalDocs/jit.md @@ -0,0 +1,134 @@ +# The JIT + +The [adaptive interpreter](interpreter.md) consists of a main loop that +executes the bytecode instructions generated by the +[bytecode compiler](compiler.md) and their +[specializations](interpreter.md#Specialization). Runtime optimization in +this interpreter can only be done for one instruction at a time. The JIT +is based on a mechanism to replace an entire sequence of bytecode instructions, +and this enables optimizations that span multiple instructions. + +Historically, the adaptive interpreter was referred to as `tier 1` and +the JIT as `tier 2`. You will see remnants of this in the code. + +## The Optimizer and Executors + +The program begins running on the adaptive interpreter, until a `JUMP_BACKWARD` +instruction determines that it is "hot" because the counter in its +[inline cache](interpreter.md#inline-cache-entries) indicates that it +executed more than some threshold number of times (see +[`backoff_counter_triggers`](../Include/internal/pycore_backoff.h)). +It then calls the function `_PyOptimizer_Optimize()` in +[`Python/optimizer.c`](../Python/optimizer.c), passing it the current +[frame](frames.md) and instruction pointer. `_PyOptimizer_Optimize()` +constructs an object of type +[`_PyExecutorObject`](Include/internal/pycore_optimizer.h) which implements +an optimized version of the instruction trace beginning at this jump. + +The optimizer determines where the trace ends, and the executor is set up +to either return to the adaptive interpreter and resume execution, or +transfer control to another executor (see `_PyExitData` in +Include/internal/pycore_optimizer.h). + +The executor is stored on the [`code object`](code_objects.md) of the frame, +in the `co_executors` field which is an array of executors. The start +instruction of the trace (the `JUMP_BACKWARD`) is replaced by an +`ENTER_EXECUTOR` instruction whose `oparg` is equal to the index of the +executor in `co_executors`. + +## The micro-op optimizer + +The optimizer that `_PyOptimizer_Optimize()` runs is configurable via the +`_Py_SetTier2Optimizer()` function (this is used in test via +`_testinternalcapi.set_optimizer()`.) + +The micro-op (abbreviated `uop` to approximate `μop`) optimizer is defined in +[`Python/optimizer.c`](../Python/optimizer.c) as the type `_PyUOpOptimizer_Type`. +It translates an instruction trace into a sequence of micro-ops by replacing +each bytecode by an equivalent sequence of micro-ops (see +`_PyOpcode_macro_expansion` in +[pycore_opcode_metadata.h](../Include/internal/pycore_opcode_metadata.h) +which is generated from [`Python/bytecodes.c`](../Python/bytecodes.c)). +The micro-op sequence is then optimized by +`_Py_uop_analyze_and_optimize` in +[`Python/optimizer_analysis.c`](../Python/optimizer_analysis.c) +and an instance of `_PyUOpExecutor_Type` is created to contain it. + +## The JIT interpreter + +After a `JUMP_BACKWARD` instruction invokes the uop optimizer to create a uop +executor, it transfers control to this executor via the `GOTO_TIER_TWO` macro. + +CPython implements two executors. Here we describe the JIT interpreter, +which is the simpler of them and is therefore useful for debugging and analyzing +the uops generation and optimization stages. To run it, we configure the +JIT to run on its interpreter (i.e., python is configured with +[`--enable-experimental-jit=interpreter`](https://docs.python.org/dev/using/configure.html#cmdoption-enable-experimental-jit)). + +When invoked, the executor jumps to the `tier2_dispatch:` label in +[`Python/ceval.c`](../Python/ceval.c), where there is a loop that +executes the micro-ops. The body of this loop is a switch statement over +the uops IDs, resembling the one used in the adaptive interpreter. + +The swtich implementing the uops is in [`Python/executor_cases.c.h`](../Python/executor_cases.c.h), +which is generated by the build script +[`Tools/cases_generator/tier2_generator.py`](../Tools/cases_generator/tier2_generator.py) +from the bytecode definitions in +[`Python/bytecodes.c`](../Python/bytecodes.c). + +When an `_EXIT_TRACE` or `_DEOPT` uop is reached, the uop interpreter exits +and execution returns to the adaptive interpreter. + +## Invalidating Executors + +In addition to being stored on the code object, each executor is also +inserted into a list of all executors, which is stored in the interpreter +state's `executor_list_head` field. This list is used when it is necessary +to invalidate executors because values they used in their construction may +have changed. + +## The JIT + +When the full jit is enabled (python was configured with +[`--enable-experimental-jit`](https://docs.python.org/dev/using/configure.html#cmdoption-enable-experimental-jit), +the uop executor's `jit_code` field is populated with a pointer to a compiled +C function that implements the executor logic. This function's signature is +defined by `jit_func` in [`pycore_jit.h`](Include/internal/pycore_jit.h). +When the executor is invoked by `ENTER_EXECUTOR`, instead of jumping to +the uop interpreter at `tier2_dispatch`, the executor runs the function +that `jit_code` points to. This function returns the instruction pointer +of the next Tier 1 instruction that needs to execute. + +The generation of the jitted functions uses the copy-and-patch technique +which is described in +[Haoran Xu's article](https://sillycross.github.io/2023/05/12/2023-05-12/). +At its core are statically generated `stencils` for the implementation +of the micro ops, which are completed with runtime information while +the jitted code is constructed for an executor by +[`_PyJIT_Compile`](../Python/jit.c). + +The stencils are generated at build time under the Makefile target `regen-jit` +by the scripts in [`/Tools/jit`](/Tools/jit). This script reads +[`Python/executor_cases.c.h`](../Python/executor_cases.c.h) (which is +generated from [`Python/bytecodes.c`](../Python/bytecodes.c)). For +each opcode, it constructs a `.c` file that contains a function for +implementing this opcode, with some runtime information injected. +This is done by replacing `CASE` by the bytecode definition in the +template file [`Tools/jit/template.c`](../Tools/jit/template.c). + +Each of the `.c` files is compiled by LLVM, to produce an object file +that contains a function that executes the opcode. These compiled +functions are used to generate the file +[`jit_stencils.h`](../jit_stencils.h), which contains the functions +that the JIT can use to emit code for each of the bytecodes. + +For Python maintainers this means that changes to the bytecodes and +their implementations do not require changes related to the stencils, +because everything is automatically generated from +[`Python/bytecodes.c`](../Python/bytecodes.c) at build time. + +See Also: + +* [Copy-and-Patch Compilation: A fast compilation algorithm for high-level languages and bytecode](https://arxiv.org/abs/2011.13127) + +* [PyCon 2024: Building a JIT compiler for CPython](https://www.youtube.com/watch?v=kMO3Ju0QCDo) From e59caf67cdb8dae26470f00599ea8dbb00968a73 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Fri, 6 Dec 2024 17:50:58 +0000 Subject: [PATCH 32/48] Fix typo in `Lib/_android_support.py` (#127699) --- Lib/_android_support.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/_android_support.py b/Lib/_android_support.py index 7572745c851847..ae506f6a4b57b8 100644 --- a/Lib/_android_support.py +++ b/Lib/_android_support.py @@ -6,7 +6,7 @@ # The maximum length of a log message in bytes, including the level marker and # tag, is defined as LOGGER_ENTRY_MAX_PAYLOAD at # https://cs.android.com/android/platform/superproject/+/android-14.0.0_r1:system/logging/liblog/include/log/log.h;l=71. -# Messages longer than this will be be truncated by logcat. This limit has already +# Messages longer than this will be truncated by logcat. This limit has already # been reduced at least once in the history of Android (from 4076 to 4068 between # API level 23 and 26), so leave some headroom. MAX_BYTES_PER_WRITE = 4000 From 5b6635f772d187d6049a56bfea76855644cd4ca1 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Fri, 6 Dec 2024 18:10:00 +0000 Subject: [PATCH 33/48] GH-127381: pathlib ABCs: remove `PathBase.rename()` and `replace()` (#127658) These methods are obviated by `PathBase.move()`, which can move directories and supports any `PathBase` object as a target. --- Lib/pathlib/_abc.py | 37 +---------------------- Lib/pathlib/_local.py | 17 +++++++++++ Lib/test/test_pathlib/test_pathlib_abc.py | 2 -- 3 files changed, 18 insertions(+), 38 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 86617ff2616f33..11a11ecc4c8203 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -14,7 +14,7 @@ import functools import operator import posixpath -from errno import EINVAL, EXDEV +from errno import EINVAL from glob import _GlobberBase, _no_recurse_symlinks from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO from pathlib._os import copyfileobj @@ -902,45 +902,10 @@ def copy_into(self, target_dir, *, follow_symlinks=True, dirs_exist_ok=dirs_exist_ok, preserve_metadata=preserve_metadata) - def rename(self, target): - """ - Rename this path to the target path. - - The target path may be absolute or relative. Relative paths are - interpreted relative to the current working directory, *not* the - directory of the Path object. - - Returns the new Path instance pointing to the target path. - """ - raise UnsupportedOperation(self._unsupported_msg('rename()')) - - def replace(self, target): - """ - Rename this path to the target path, overwriting if that path exists. - - The target path may be absolute or relative. Relative paths are - interpreted relative to the current working directory, *not* the - directory of the Path object. - - Returns the new Path instance pointing to the target path. - """ - raise UnsupportedOperation(self._unsupported_msg('replace()')) - def move(self, target): """ Recursively move this file or directory tree to the given destination. """ - self._ensure_different_file(target) - try: - return self.replace(target) - except UnsupportedOperation: - pass - except TypeError: - if not isinstance(target, PathBase): - raise - except OSError as err: - if err.errno != EXDEV: - raise target = self.copy(target, follow_symlinks=False, preserve_metadata=True) self._delete() return target diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index bb8a252c0e94e2..250bc12956f5bc 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -4,6 +4,7 @@ import os import posixpath import sys +from errno import EXDEV from glob import _StringGlobber from itertools import chain from _collections_abc import Sequence @@ -876,6 +877,22 @@ def replace(self, target): os.replace(self, target) return self.with_segments(target) + def move(self, target): + """ + Recursively move this file or directory tree to the given destination. + """ + self._ensure_different_file(target) + try: + return self.replace(target) + except TypeError: + if not isinstance(target, PathBase): + raise + except OSError as err: + if err.errno != EXDEV: + raise + # Fall back to copy+delete. + return PathBase.move(self, target) + if hasattr(os, "symlink"): def symlink_to(self, target, target_is_directory=False): """ diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 7ba3fa823a30b9..00153e3f5e997e 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1376,8 +1376,6 @@ def test_unsupported_operation(self): self.assertRaises(e, p.hardlink_to, 'foo') self.assertRaises(e, p.mkdir) self.assertRaises(e, p.touch) - self.assertRaises(e, p.rename, 'foo') - self.assertRaises(e, p.replace, 'foo') self.assertRaises(e, p.chmod, 0o755) self.assertRaises(e, p.lchmod, 0o755) self.assertRaises(e, p.unlink) From 0fc4063747c96223575f6f5a0562eddf2ed0ed62 Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Fri, 6 Dec 2024 10:42:05 -0800 Subject: [PATCH 34/48] GH-127652: stop using `--wasi preview2` in `wasi.py` (GH-127704) It's only to use WASI 0.2 code to back preview1 APIs and is considered experimental anyway. --- Tools/wasm/wasi.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/Tools/wasm/wasi.py b/Tools/wasm/wasi.py index ac36d55587a38f..da847c4ff86215 100644 --- a/Tools/wasm/wasi.py +++ b/Tools/wasm/wasi.py @@ -297,8 +297,6 @@ def main(): # build. # Use 16 MiB stack. "--wasm max-wasm-stack=16777216 " - # Use WASI 0.2 primitives. - "--wasi preview2 " # Enable thread support; causes use of preview1. #"--wasm threads=y --wasi threads=y " # Map the checkout to / to load the stdlib from /Lib. From 31c9f3ced293492b38e784c17c4befe425da5dab Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Fri, 6 Dec 2024 21:39:45 +0000 Subject: [PATCH 35/48] GH-127381: pathlib ABCs: remove `PathBase.resolve()` and `absolute()` (#127707) Remove our implementation of POSIX path resolution in `PathBase.resolve()`. This functionality is rather fragile and isn't necessary in most cases. It depends on `PathBase.stat()`, which we're looking to remove. Also remove `PathBase.absolute()`. Many legitimate virtual filesystems lack the notion of a 'current directory', so it's wrong to include in the basic interface. --- Lib/pathlib/_abc.py | 64 +- Lib/test/test_pathlib/test_pathlib.py | 586 ++++++++++++++++++- Lib/test/test_pathlib/test_pathlib_abc.py | 680 +--------------------- 3 files changed, 599 insertions(+), 731 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 11a11ecc4c8203..820970fcd5889b 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -13,7 +13,6 @@ import functools import operator -import posixpath from errno import EINVAL from glob import _GlobberBase, _no_recurse_symlinks from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO @@ -115,11 +114,6 @@ class PurePathBase: # The `_raw_paths` slot stores unjoined string paths. This is set in # the `__init__()` method. '_raw_paths', - - # The '_resolving' slot stores a boolean indicating whether the path - # is being processed by `PathBase.resolve()`. This prevents duplicate - # work from occurring when `resolve()` calls `stat()` or `readlink()`. - '_resolving', ) parser = ParserBase() _globber = PathGlobber @@ -130,7 +124,6 @@ def __init__(self, *args): raise TypeError( f"argument should be a str, not {type(arg).__name__!r}") self._raw_paths = list(args) - self._resolving = False def with_segments(self, *pathsegments): """Construct a new path object from any number of path-like objects. @@ -339,9 +332,7 @@ def parent(self): path = str(self) parent = self.parser.split(path)[0] if path != parent: - parent = self.with_segments(parent) - parent._resolving = self._resolving - return parent + return self.with_segments(parent) return self @property @@ -424,9 +415,6 @@ class PathBase(PurePathBase): """ __slots__ = () - # Maximum number of symlinks to follow in resolve() - _max_symlinks = 40 - @classmethod def _unsupported_msg(cls, attribute): return f"{cls.__name__}.{attribute} is unsupported" @@ -720,20 +708,6 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False): yield path, dirnames, filenames paths += [path.joinpath(d) for d in reversed(dirnames)] - def absolute(self): - """Return an absolute version of this path - No normalization or symlink resolution is performed. - - Use resolve() to resolve symlinks and remove '..' segments. - """ - if self.is_absolute(): - return self - elif self.parser is not posixpath: - raise UnsupportedOperation(self._unsupported_msg('absolute()')) - else: - # Treat the root directory as the current working directory. - return self.with_segments('/', *self._raw_paths) - def expanduser(self): """ Return a new path with expanded ~ and ~user constructs (as returned by os.path.expanduser) @@ -745,42 +719,6 @@ def readlink(self): Return the path to which the symbolic link points. """ raise UnsupportedOperation(self._unsupported_msg('readlink()')) - readlink._supported = False - - def resolve(self, strict=False): - """ - Make the path absolute, resolving all symlinks on the way and also - normalizing it. - """ - if self._resolving: - return self - elif self.parser is not posixpath: - raise UnsupportedOperation(self._unsupported_msg('resolve()')) - - def raise_error(*args): - raise OSError("Unsupported operation.") - - getcwd = raise_error - if strict or getattr(self.readlink, '_supported', True): - def lstat(path_str): - path = self.with_segments(path_str) - path._resolving = True - return path.stat(follow_symlinks=False) - - def readlink(path_str): - path = self.with_segments(path_str) - path._resolving = True - return str(path.readlink()) - else: - # If the user has *not* overridden the `readlink()` method, then - # symlinks are unsupported and (in non-strict mode) we can improve - # performance by not calling `path.lstat()`. - lstat = readlink = raise_error - - return self.with_segments(posixpath._realpath( - str(self.absolute()), strict, self.parser.sep, - getcwd=getcwd, lstat=lstat, readlink=readlink, - maxlinks=self._max_symlinks)) def symlink_to(self, target, target_is_directory=False): """ diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 2c48eeeda145d0..8c9049f15d5bf9 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -1,3 +1,4 @@ +import collections import contextlib import io import os @@ -21,7 +22,7 @@ from test.support import os_helper from test.support.os_helper import TESTFN, FakePath from test.test_pathlib import test_pathlib_abc -from test.test_pathlib.test_pathlib_abc import needs_posix, needs_windows, needs_symlinks +from test.test_pathlib.test_pathlib_abc import needs_posix, needs_windows try: import fcntl @@ -55,6 +56,13 @@ def new_test(self): self.cls.replace = old_replace return new_test + +_tests_needing_symlinks = set() +def needs_symlinks(fn): + """Decorator that marks a test as requiring a path class that supports symlinks.""" + _tests_needing_symlinks.add(fn.__name__) + return fn + # # Tests for the pure classes. # @@ -533,6 +541,9 @@ class PathTest(test_pathlib_abc.DummyPathTest, PurePathTest): can_symlink = os_helper.can_symlink() def setUp(self): + name = self.id().split('.')[-1] + if name in _tests_needing_symlinks and not self.can_symlink: + self.skipTest('requires symlinks') super().setUp() os.chmod(self.parser.join(self.base, 'dirE'), 0) @@ -693,6 +704,34 @@ def test_copy_file_preserve_metadata(self): if hasattr(source_st, 'st_flags'): self.assertEqual(source_st.st_flags, target_st.st_flags) + @needs_symlinks + def test_copy_file_to_existing_symlink(self): + base = self.cls(self.base) + source = base / 'dirB' / 'fileB' + target = base / 'linkA' + real_target = base / 'fileA' + result = source.copy(target) + self.assertEqual(result, target) + self.assertTrue(target.exists()) + self.assertTrue(target.is_symlink()) + self.assertTrue(real_target.exists()) + self.assertFalse(real_target.is_symlink()) + self.assertEqual(source.read_text(), real_target.read_text()) + + @needs_symlinks + def test_copy_file_to_existing_symlink_follow_symlinks_false(self): + base = self.cls(self.base) + source = base / 'dirB' / 'fileB' + target = base / 'linkA' + real_target = base / 'fileA' + result = source.copy(target, follow_symlinks=False) + self.assertEqual(result, target) + self.assertTrue(target.exists()) + self.assertTrue(target.is_symlink()) + self.assertTrue(real_target.exists()) + self.assertFalse(real_target.is_symlink()) + self.assertEqual(source.read_text(), real_target.read_text()) + @os_helper.skip_unless_xattr def test_copy_file_preserve_metadata_xattrs(self): base = self.cls(self.base) @@ -702,6 +741,118 @@ def test_copy_file_preserve_metadata_xattrs(self): source.copy(target, preserve_metadata=True) self.assertEqual(os.getxattr(target, b'user.foo'), b'42') + @needs_symlinks + def test_copy_symlink_follow_symlinks_true(self): + base = self.cls(self.base) + source = base / 'linkA' + target = base / 'copyA' + result = source.copy(target) + self.assertEqual(result, target) + self.assertTrue(target.exists()) + self.assertFalse(target.is_symlink()) + self.assertEqual(source.read_text(), target.read_text()) + + @needs_symlinks + def test_copy_symlink_follow_symlinks_false(self): + base = self.cls(self.base) + source = base / 'linkA' + target = base / 'copyA' + result = source.copy(target, follow_symlinks=False) + self.assertEqual(result, target) + self.assertTrue(target.exists()) + self.assertTrue(target.is_symlink()) + self.assertEqual(source.readlink(), target.readlink()) + + @needs_symlinks + def test_copy_symlink_to_itself(self): + base = self.cls(self.base) + source = base / 'linkA' + self.assertRaises(OSError, source.copy, source) + + @needs_symlinks + def test_copy_symlink_to_existing_symlink(self): + base = self.cls(self.base) + source = base / 'copySource' + target = base / 'copyTarget' + source.symlink_to(base / 'fileA') + target.symlink_to(base / 'dirC') + self.assertRaises(OSError, source.copy, target) + self.assertRaises(OSError, source.copy, target, follow_symlinks=False) + + @needs_symlinks + def test_copy_symlink_to_existing_directory_symlink(self): + base = self.cls(self.base) + source = base / 'copySource' + target = base / 'copyTarget' + source.symlink_to(base / 'fileA') + target.symlink_to(base / 'dirC') + self.assertRaises(OSError, source.copy, target) + self.assertRaises(OSError, source.copy, target, follow_symlinks=False) + + @needs_symlinks + def test_copy_directory_symlink_follow_symlinks_false(self): + base = self.cls(self.base) + source = base / 'linkB' + target = base / 'copyA' + result = source.copy(target, follow_symlinks=False) + self.assertEqual(result, target) + self.assertTrue(target.exists()) + self.assertTrue(target.is_symlink()) + self.assertEqual(source.readlink(), target.readlink()) + + @needs_symlinks + def test_copy_directory_symlink_to_itself(self): + base = self.cls(self.base) + source = base / 'linkB' + self.assertRaises(OSError, source.copy, source) + self.assertRaises(OSError, source.copy, source, follow_symlinks=False) + + @needs_symlinks + def test_copy_directory_symlink_into_itself(self): + base = self.cls(self.base) + source = base / 'linkB' + target = base / 'linkB' / 'copyB' + self.assertRaises(OSError, source.copy, target) + self.assertRaises(OSError, source.copy, target, follow_symlinks=False) + self.assertFalse(target.exists()) + + @needs_symlinks + def test_copy_directory_symlink_to_existing_symlink(self): + base = self.cls(self.base) + source = base / 'copySource' + target = base / 'copyTarget' + source.symlink_to(base / 'dirC') + target.symlink_to(base / 'fileA') + self.assertRaises(FileExistsError, source.copy, target) + self.assertRaises(FileExistsError, source.copy, target, follow_symlinks=False) + + @needs_symlinks + def test_copy_directory_symlink_to_existing_directory_symlink(self): + base = self.cls(self.base) + source = base / 'copySource' + target = base / 'copyTarget' + source.symlink_to(base / 'dirC' / 'dirD') + target.symlink_to(base / 'dirC') + self.assertRaises(FileExistsError, source.copy, target) + self.assertRaises(FileExistsError, source.copy, target, follow_symlinks=False) + + @needs_symlinks + def test_copy_dangling_symlink(self): + base = self.cls(self.base) + source = base / 'source' + target = base / 'target' + + source.mkdir() + source.joinpath('link').symlink_to('nonexistent') + + self.assertRaises(FileNotFoundError, source.copy, target) + + target2 = base / 'target2' + result = source.copy(target2, follow_symlinks=False) + self.assertEqual(result, target2) + self.assertTrue(target2.joinpath('link').is_symlink()) + self.assertEqual(target2.joinpath('link').readlink(), self.cls('nonexistent')) + @needs_symlinks def test_copy_link_preserve_metadata(self): base = self.cls(self.base) @@ -801,6 +952,54 @@ def test_copy_dir_preserve_metadata_xattrs(self): target_file = target.joinpath('dirD', 'fileD') self.assertEqual(os.getxattr(target_file, b'user.foo'), b'42') + @needs_symlinks + def test_move_file_symlink(self): + base = self.cls(self.base) + source = base / 'linkA' + source_readlink = source.readlink() + target = base / 'linkA_moved' + result = source.move(target) + self.assertEqual(result, target) + self.assertFalse(source.exists()) + self.assertTrue(target.is_symlink()) + self.assertEqual(source_readlink, target.readlink()) + + @needs_symlinks + def test_move_file_symlink_to_itself(self): + base = self.cls(self.base) + source = base / 'linkA' + self.assertRaises(OSError, source.move, source) + + @needs_symlinks + def test_move_dir_symlink(self): + base = self.cls(self.base) + source = base / 'linkB' + source_readlink = source.readlink() + target = base / 'linkB_moved' + result = source.move(target) + self.assertEqual(result, target) + self.assertFalse(source.exists()) + self.assertTrue(target.is_symlink()) + self.assertEqual(source_readlink, target.readlink()) + + @needs_symlinks + def test_move_dir_symlink_to_itself(self): + base = self.cls(self.base) + source = base / 'linkB' + self.assertRaises(OSError, source.move, source) + + @needs_symlinks + def test_move_dangling_symlink(self): + base = self.cls(self.base) + source = base / 'brokenLink' + source_readlink = source.readlink() + target = base / 'brokenLink_moved' + result = source.move(target) + self.assertEqual(result, target) + self.assertFalse(source.exists()) + self.assertTrue(target.is_symlink()) + self.assertEqual(source_readlink, target.readlink()) + @patch_replace def test_move_file_other_fs(self): self.test_move_file() @@ -858,9 +1057,41 @@ def test_move_into_other_os(self): def test_move_into_empty_name_other_os(self): self.test_move_into_empty_name() + @needs_symlinks + def test_complex_symlinks_absolute(self): + self._check_complex_symlinks(self.base) + + @needs_symlinks + def test_complex_symlinks_relative(self): + self._check_complex_symlinks('.') + + @needs_symlinks + def test_complex_symlinks_relative_dot_dot(self): + self._check_complex_symlinks(self.parser.join('dirA', '..')) + def _check_complex_symlinks(self, link0_target): - super()._check_complex_symlinks(link0_target) + # Test solving a non-looping chain of symlinks (issue #19887). + parser = self.parser P = self.cls(self.base) + P.joinpath('link1').symlink_to(parser.join('link0', 'link0'), target_is_directory=True) + P.joinpath('link2').symlink_to(parser.join('link1', 'link1'), target_is_directory=True) + P.joinpath('link3').symlink_to(parser.join('link2', 'link2'), target_is_directory=True) + P.joinpath('link0').symlink_to(link0_target, target_is_directory=True) + + # Resolve absolute paths. + p = (P / 'link0').resolve() + self.assertEqual(p, P) + self.assertEqualNormCase(str(p), self.base) + p = (P / 'link1').resolve() + self.assertEqual(p, P) + self.assertEqualNormCase(str(p), self.base) + p = (P / 'link2').resolve() + self.assertEqual(p, P) + self.assertEqualNormCase(str(p), self.base) + p = (P / 'link3').resolve() + self.assertEqual(p, P) + self.assertEqualNormCase(str(p), self.base) + # Resolve relative paths. old_path = os.getcwd() os.chdir(self.base) @@ -880,6 +1111,118 @@ def _check_complex_symlinks(self, link0_target): finally: os.chdir(old_path) + def _check_resolve(self, p, expected, strict=True): + q = p.resolve(strict) + self.assertEqual(q, expected) + + # This can be used to check both relative and absolute resolutions. + _check_resolve_relative = _check_resolve_absolute = _check_resolve + + @needs_symlinks + def test_resolve_common(self): + P = self.cls + p = P(self.base, 'foo') + with self.assertRaises(OSError) as cm: + p.resolve(strict=True) + self.assertEqual(cm.exception.errno, errno.ENOENT) + # Non-strict + parser = self.parser + self.assertEqualNormCase(str(p.resolve(strict=False)), + parser.join(self.base, 'foo')) + p = P(self.base, 'foo', 'in', 'spam') + self.assertEqualNormCase(str(p.resolve(strict=False)), + parser.join(self.base, 'foo', 'in', 'spam')) + p = P(self.base, '..', 'foo', 'in', 'spam') + self.assertEqualNormCase(str(p.resolve(strict=False)), + parser.join(parser.dirname(self.base), 'foo', 'in', 'spam')) + # These are all relative symlinks. + p = P(self.base, 'dirB', 'fileB') + self._check_resolve_relative(p, p) + p = P(self.base, 'linkA') + self._check_resolve_relative(p, P(self.base, 'fileA')) + p = P(self.base, 'dirA', 'linkC', 'fileB') + self._check_resolve_relative(p, P(self.base, 'dirB', 'fileB')) + p = P(self.base, 'dirB', 'linkD', 'fileB') + self._check_resolve_relative(p, P(self.base, 'dirB', 'fileB')) + # Non-strict + p = P(self.base, 'dirA', 'linkC', 'fileB', 'foo', 'in', 'spam') + self._check_resolve_relative(p, P(self.base, 'dirB', 'fileB', 'foo', 'in', + 'spam'), False) + p = P(self.base, 'dirA', 'linkC', '..', 'foo', 'in', 'spam') + if self.cls.parser is not posixpath: + # In Windows, if linkY points to dirB, 'dirA\linkY\..' + # resolves to 'dirA' without resolving linkY first. + self._check_resolve_relative(p, P(self.base, 'dirA', 'foo', 'in', + 'spam'), False) + else: + # In Posix, if linkY points to dirB, 'dirA/linkY/..' + # resolves to 'dirB/..' first before resolving to parent of dirB. + self._check_resolve_relative(p, P(self.base, 'foo', 'in', 'spam'), False) + # Now create absolute symlinks. + d = self.tempdir() + P(self.base, 'dirA', 'linkX').symlink_to(d) + P(self.base, str(d), 'linkY').symlink_to(self.parser.join(self.base, 'dirB')) + p = P(self.base, 'dirA', 'linkX', 'linkY', 'fileB') + self._check_resolve_absolute(p, P(self.base, 'dirB', 'fileB')) + # Non-strict + p = P(self.base, 'dirA', 'linkX', 'linkY', 'foo', 'in', 'spam') + self._check_resolve_relative(p, P(self.base, 'dirB', 'foo', 'in', 'spam'), + False) + p = P(self.base, 'dirA', 'linkX', 'linkY', '..', 'foo', 'in', 'spam') + if self.cls.parser is not posixpath: + # In Windows, if linkY points to dirB, 'dirA\linkY\..' + # resolves to 'dirA' without resolving linkY first. + self._check_resolve_relative(p, P(d, 'foo', 'in', 'spam'), False) + else: + # In Posix, if linkY points to dirB, 'dirA/linkY/..' + # resolves to 'dirB/..' first before resolving to parent of dirB. + self._check_resolve_relative(p, P(self.base, 'foo', 'in', 'spam'), False) + + @needs_symlinks + def test_resolve_dot(self): + # See http://web.archive.org/web/20200623062557/https://bitbucket.org/pitrou/pathlib/issues/9/ + parser = self.parser + p = self.cls(self.base) + p.joinpath('0').symlink_to('.', target_is_directory=True) + p.joinpath('1').symlink_to(parser.join('0', '0'), target_is_directory=True) + p.joinpath('2').symlink_to(parser.join('1', '1'), target_is_directory=True) + q = p / '2' + self.assertEqual(q.resolve(strict=True), p) + r = q / '3' / '4' + self.assertRaises(FileNotFoundError, r.resolve, strict=True) + # Non-strict + self.assertEqual(r.resolve(strict=False), p / '3' / '4') + + def _check_symlink_loop(self, *args): + path = self.cls(*args) + with self.assertRaises(OSError) as cm: + path.resolve(strict=True) + self.assertEqual(cm.exception.errno, errno.ELOOP) + + @needs_posix + @needs_symlinks + def test_resolve_loop(self): + # Loops with relative symlinks. + self.cls(self.base, 'linkX').symlink_to('linkX/inside') + self._check_symlink_loop(self.base, 'linkX') + self.cls(self.base, 'linkY').symlink_to('linkY') + self._check_symlink_loop(self.base, 'linkY') + self.cls(self.base, 'linkZ').symlink_to('linkZ/../linkZ') + self._check_symlink_loop(self.base, 'linkZ') + # Non-strict + p = self.cls(self.base, 'linkZ', 'foo') + self.assertEqual(p.resolve(strict=False), p) + # Loops with absolute symlinks. + self.cls(self.base, 'linkU').symlink_to(self.parser.join(self.base, 'linkU/inside')) + self._check_symlink_loop(self.base, 'linkU') + self.cls(self.base, 'linkV').symlink_to(self.parser.join(self.base, 'linkV')) + self._check_symlink_loop(self.base, 'linkV') + self.cls(self.base, 'linkW').symlink_to(self.parser.join(self.base, 'linkW/../linkW')) + self._check_symlink_loop(self.base, 'linkW') + # Non-strict + q = self.cls(self.base, 'linkW', 'foo') + self.assertEqual(q.resolve(strict=False), q) + def test_resolve_nonexist_relative_issue38671(self): p = self.cls('non', 'exist') @@ -890,6 +1233,24 @@ def test_resolve_nonexist_relative_issue38671(self): finally: os.chdir(old_cwd) + @needs_symlinks + def test_readlink(self): + P = self.cls(self.base) + self.assertEqual((P / 'linkA').readlink(), self.cls('fileA')) + self.assertEqual((P / 'brokenLink').readlink(), + self.cls('non-existing')) + self.assertEqual((P / 'linkB').readlink(), self.cls('dirB')) + self.assertEqual((P / 'linkB' / 'linkD').readlink(), self.cls('../dirB')) + with self.assertRaises(OSError): + (P / 'fileA').readlink() + + @unittest.skipIf(hasattr(os, "readlink"), "os.readlink() is present") + def test_readlink_unsupported(self): + P = self.cls(self.base) + p = P / 'fileA' + with self.assertRaises(pathlib.UnsupportedOperation): + q.readlink(p) + @os_helper.skip_unless_working_chmod def test_chmod(self): p = self.cls(self.base) / 'fileA' @@ -991,6 +1352,41 @@ def test_group_no_follow_symlinks(self): self.assertEqual(expected_gid, gid_2) self.assertEqual(expected_name, link.group(follow_symlinks=False)) + @needs_symlinks + def test_delete_symlink(self): + tmp = self.cls(self.base, 'delete') + tmp.mkdir() + dir_ = tmp / 'dir' + dir_.mkdir() + link = tmp / 'link' + link.symlink_to(dir_) + link._delete() + self.assertTrue(dir_.exists()) + self.assertFalse(link.exists(follow_symlinks=False)) + + @needs_symlinks + def test_delete_inner_symlink(self): + tmp = self.cls(self.base, 'delete') + tmp.mkdir() + dir1 = tmp / 'dir1' + dir2 = dir1 / 'dir2' + dir3 = tmp / 'dir3' + for d in dir1, dir2, dir3: + d.mkdir() + file1 = tmp / 'file1' + file1.write_text('foo') + link1 = dir1 / 'link1' + link1.symlink_to(dir2) + link2 = dir1 / 'link2' + link2.symlink_to(dir3) + link3 = dir1 / 'link3' + link3.symlink_to(file1) + # make sure symlinks are removed but not followed + dir1._delete() + self.assertFalse(dir1.exists()) + self.assertTrue(dir3.exists()) + self.assertTrue(file1.exists()) + @unittest.skipIf(sys.platform[:6] == 'cygwin', "This test can't be run on Cygwin (issue #1071513).") @os_helper.skip_if_dac_override @@ -1354,6 +1750,12 @@ def test_symlink_to_unsupported(self): with self.assertRaises(pathlib.UnsupportedOperation): q.symlink_to(p) + @needs_symlinks + def test_stat_no_follow_symlinks(self): + p = self.cls(self.base) / 'linkA' + st = p.stat() + self.assertNotEqual(st, p.stat(follow_symlinks=False)) + @needs_symlinks def test_lstat(self): p = self.cls(self.base)/ 'linkA' @@ -1433,6 +1835,15 @@ def test_passing_kwargs_errors(self): with self.assertRaises(TypeError): self.cls(foo="bar") + @needs_symlinks + def test_iterdir_symlink(self): + # __iter__ on a symlink to a directory. + P = self.cls + p = P(self.base, 'linkB') + paths = set(p.iterdir()) + expected = { P(self.base, 'linkB', q) for q in ['fileB', 'linkD'] } + self.assertEqual(paths, expected) + def test_glob_empty_pattern(self): p = self.cls('') with self.assertRaisesRegex(ValueError, 'Unacceptable pattern'): @@ -1493,6 +1904,25 @@ def test_glob_dot(self): self.assertEqual( set(P('.').glob('**/*/*')), {P("dirD/fileD")}) + # See https://github.com/WebAssembly/wasi-filesystem/issues/26 + @unittest.skipIf(is_wasi, "WASI resolution of '..' parts doesn't match POSIX") + def test_glob_dotdot(self): + # ".." is not special in globs. + P = self.cls + p = P(self.base) + self.assertEqual(set(p.glob("..")), { P(self.base, "..") }) + self.assertEqual(set(p.glob("../..")), { P(self.base, "..", "..") }) + self.assertEqual(set(p.glob("dirA/..")), { P(self.base, "dirA", "..") }) + self.assertEqual(set(p.glob("dirA/../file*")), { P(self.base, "dirA/../fileA") }) + self.assertEqual(set(p.glob("dirA/../file*/..")), set()) + self.assertEqual(set(p.glob("../xyzzy")), set()) + if self.cls.parser is posixpath: + self.assertEqual(set(p.glob("xyzzy/..")), set()) + else: + # ".." segments are normalized first on Windows, so this path is stat()able. + self.assertEqual(set(p.glob("xyzzy/..")), { P(self.base, "xyzzy", "..") }) + self.assertEqual(set(p.glob("/".join([".."] * 50))), { P(self.base, *[".."] * 50)}) + def test_glob_inaccessible(self): P = self.cls p = P(self.base, "mydir1", "mydir2") @@ -1508,6 +1938,124 @@ def test_rglob_pathlike(self): self.assertEqual(expect, set(p.rglob(P(pattern)))) self.assertEqual(expect, set(p.rglob(FakePath(pattern)))) + @needs_symlinks + @unittest.skipIf(is_emscripten, "Hangs") + def test_glob_recurse_symlinks_common(self): + def _check(path, glob, expected): + actual = {path for path in path.glob(glob, recurse_symlinks=True) + if path.parts.count("linkD") <= 1} # exclude symlink loop. + self.assertEqual(actual, { P(self.base, q) for q in expected }) + P = self.cls + p = P(self.base) + _check(p, "fileB", []) + _check(p, "dir*/file*", ["dirB/fileB", "dirC/fileC"]) + _check(p, "*A", ["dirA", "fileA", "linkA"]) + _check(p, "*B/*", ["dirB/fileB", "dirB/linkD", "linkB/fileB", "linkB/linkD"]) + _check(p, "*/fileB", ["dirB/fileB", "linkB/fileB"]) + _check(p, "*/", ["dirA/", "dirB/", "dirC/", "dirE/", "linkB/"]) + _check(p, "dir*/*/..", ["dirC/dirD/..", "dirA/linkC/..", "dirB/linkD/.."]) + _check(p, "dir*/**", [ + "dirA/", "dirA/linkC", "dirA/linkC/fileB", "dirA/linkC/linkD", "dirA/linkC/linkD/fileB", + "dirB/", "dirB/fileB", "dirB/linkD", "dirB/linkD/fileB", + "dirC/", "dirC/fileC", "dirC/dirD", "dirC/dirD/fileD", "dirC/novel.txt", + "dirE/"]) + _check(p, "dir*/**/", ["dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", + "dirC/", "dirC/dirD/", "dirE/"]) + _check(p, "dir*/**/..", ["dirA/..", "dirA/linkC/..", "dirB/..", + "dirB/linkD/..", "dirA/linkC/linkD/..", + "dirC/..", "dirC/dirD/..", "dirE/.."]) + _check(p, "dir*/*/**", [ + "dirA/linkC/", "dirA/linkC/linkD", "dirA/linkC/fileB", "dirA/linkC/linkD/fileB", + "dirB/linkD/", "dirB/linkD/fileB", + "dirC/dirD/", "dirC/dirD/fileD"]) + _check(p, "dir*/*/**/", ["dirA/linkC/", "dirA/linkC/linkD/", "dirB/linkD/", "dirC/dirD/"]) + _check(p, "dir*/*/**/..", ["dirA/linkC/..", "dirA/linkC/linkD/..", + "dirB/linkD/..", "dirC/dirD/.."]) + _check(p, "dir*/**/fileC", ["dirC/fileC"]) + _check(p, "dir*/*/../dirD/**/", ["dirC/dirD/../dirD/"]) + _check(p, "*/dirD/**", ["dirC/dirD/", "dirC/dirD/fileD"]) + _check(p, "*/dirD/**/", ["dirC/dirD/"]) + + @needs_symlinks + @unittest.skipIf(is_emscripten, "Hangs") + def test_rglob_recurse_symlinks_common(self): + def _check(path, glob, expected): + actual = {path for path in path.rglob(glob, recurse_symlinks=True) + if path.parts.count("linkD") <= 1} # exclude symlink loop. + self.assertEqual(actual, { P(self.base, q) for q in expected }) + P = self.cls + p = P(self.base) + _check(p, "fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB", + "dirA/linkC/linkD/fileB", "dirB/linkD/fileB", "linkB/linkD/fileB"]) + _check(p, "*/fileA", []) + _check(p, "*/fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB", + "dirA/linkC/linkD/fileB", "dirB/linkD/fileB", "linkB/linkD/fileB"]) + _check(p, "file*", ["fileA", "dirA/linkC/fileB", "dirB/fileB", + "dirA/linkC/linkD/fileB", "dirB/linkD/fileB", "linkB/linkD/fileB", + "dirC/fileC", "dirC/dirD/fileD", "linkB/fileB"]) + _check(p, "*/", ["dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", + "dirC/", "dirC/dirD/", "dirE/", "linkB/", "linkB/linkD/"]) + _check(p, "", ["", "dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", + "dirC/", "dirE/", "dirC/dirD/", "linkB/", "linkB/linkD/"]) + + p = P(self.base, "dirC") + _check(p, "*", ["dirC/fileC", "dirC/novel.txt", + "dirC/dirD", "dirC/dirD/fileD"]) + _check(p, "file*", ["dirC/fileC", "dirC/dirD/fileD"]) + _check(p, "*/*", ["dirC/dirD/fileD"]) + _check(p, "*/", ["dirC/dirD/"]) + _check(p, "", ["dirC/", "dirC/dirD/"]) + # gh-91616, a re module regression + _check(p, "*.txt", ["dirC/novel.txt"]) + _check(p, "*.*", ["dirC/novel.txt"]) + + @needs_symlinks + def test_rglob_symlink_loop(self): + # Don't get fooled by symlink loops (Issue #26012). + P = self.cls + p = P(self.base) + given = set(p.rglob('*', recurse_symlinks=False)) + expect = {'brokenLink', + 'dirA', 'dirA/linkC', + 'dirB', 'dirB/fileB', 'dirB/linkD', + 'dirC', 'dirC/dirD', 'dirC/dirD/fileD', + 'dirC/fileC', 'dirC/novel.txt', + 'dirE', + 'fileA', + 'linkA', + 'linkB', + 'brokenLinkLoop', + } + self.assertEqual(given, {p / x for x in expect}) + + @needs_symlinks + def test_glob_permissions(self): + # See bpo-38894 + P = self.cls + base = P(self.base) / 'permissions' + base.mkdir() + + for i in range(100): + link = base / f"link{i}" + if i % 2: + link.symlink_to(P(self.base, "dirE", "nonexistent")) + else: + link.symlink_to(P(self.base, "dirC"), target_is_directory=True) + + self.assertEqual(len(set(base.glob("*"))), 100) + self.assertEqual(len(set(base.glob("*/"))), 50) + self.assertEqual(len(set(base.glob("*/fileC"))), 50) + self.assertEqual(len(set(base.glob("*/file*"))), 50) + + @needs_symlinks + def test_glob_long_symlink(self): + # See gh-87695 + base = self.cls(self.base) / 'long_symlink' + base.mkdir() + bad_link = base / 'bad_link' + bad_link.symlink_to("bad" * 200) + self.assertEqual(sorted(base.glob('**/*')), [bad_link]) + @needs_posix def test_absolute_posix(self): P = self.cls @@ -1822,6 +2370,9 @@ class PathWalkTest(test_pathlib_abc.DummyPathWalkTest): can_symlink = PathTest.can_symlink def setUp(self): + name = self.id().split('.')[-1] + if name in _tests_needing_symlinks and not self.can_symlink: + self.skipTest('requires symlinks') super().setUp() sub21_path= self.sub2_path / "SUB21" tmp5_path = sub21_path / "tmp3" @@ -1903,6 +2454,37 @@ def test_walk_above_recursion_limit(self): list(base.walk()) list(base.walk(top_down=False)) + @needs_symlinks + def test_walk_follow_symlinks(self): + walk_it = self.walk_path.walk(follow_symlinks=True) + for root, dirs, files in walk_it: + if root == self.link_path: + self.assertEqual(dirs, []) + self.assertEqual(files, ["tmp4"]) + break + else: + self.fail("Didn't follow symlink with follow_symlinks=True") + + @needs_symlinks + def test_walk_symlink_location(self): + # Tests whether symlinks end up in filenames or dirnames depending + # on the `follow_symlinks` argument. + walk_it = self.walk_path.walk(follow_symlinks=False) + for root, dirs, files in walk_it: + if root == self.sub2_path: + self.assertIn("link", files) + break + else: + self.fail("symlink not found") + + walk_it = self.walk_path.walk(follow_symlinks=True) + for root, dirs, files in walk_it: + if root == self.sub2_path: + self.assertIn("link", dirs) + break + else: + self.fail("symlink not found") + @unittest.skipIf(os.name == 'nt', 'test requires a POSIX-compatible system') class PosixPathTest(PathTest, PurePosixPathTest): diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 00153e3f5e997e..bf9ae6cc8a2433 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -8,13 +8,11 @@ from pathlib._abc import UnsupportedOperation, ParserBase, PurePathBase, PathBase import posixpath -from test.support import is_wasi, is_emscripten from test.support.os_helper import TESTFN _tests_needing_posix = set() _tests_needing_windows = set() -_tests_needing_symlinks = set() def needs_posix(fn): @@ -27,11 +25,6 @@ def needs_windows(fn): _tests_needing_windows.add(fn.__name__) return fn -def needs_symlinks(fn): - """Decorator that marks a test as requiring a path class that supports symlinks.""" - _tests_needing_symlinks.add(fn.__name__) - return fn - class UnsupportedOperationTest(unittest.TestCase): def test_is_notimplemented(self): @@ -1369,7 +1362,6 @@ def test_unsupported_operation(self): self.assertRaises(e, p.glob, '*') self.assertRaises(e, p.rglob, '*') self.assertRaises(e, lambda: list(p.walk())) - self.assertRaises(e, p.absolute) self.assertRaises(e, p.expanduser) self.assertRaises(e, p.readlink) self.assertRaises(e, p.symlink_to, 'foo') @@ -1425,7 +1417,6 @@ class DummyPath(PathBase): _files = {} _directories = {} - _symlinks = {} def __eq__(self, other): if not isinstance(other, DummyPath): @@ -1439,16 +1430,11 @@ def __repr__(self): return "{}({!r})".format(self.__class__.__name__, self.as_posix()) def stat(self, *, follow_symlinks=True): - if follow_symlinks or self.name in ('', '.', '..'): - path = str(self.resolve(strict=True)) - else: - path = str(self.parent.resolve(strict=True) / self.name) + path = str(self).rstrip('/') if path in self._files: st_mode = stat.S_IFREG elif path in self._directories: st_mode = stat.S_IFDIR - elif path in self._symlinks: - st_mode = stat.S_IFLNK else: raise FileNotFoundError(errno.ENOENT, "Not found", str(self)) return DummyPathStatResult(st_mode, hash(str(self)), 0, 0, 0, 0, 0, 0, 0, 0) @@ -1457,10 +1443,7 @@ def open(self, mode='r', buffering=-1, encoding=None, errors=None, newline=None): if buffering != -1 and not (buffering == 0 and 'b' in mode): raise NotImplementedError - path_obj = self.resolve() - path = str(path_obj) - name = path_obj.name - parent = str(path_obj.parent) + path = str(self) if path in self._directories: raise IsADirectoryError(errno.EISDIR, "Is a directory", path) @@ -1471,6 +1454,7 @@ def open(self, mode='r', buffering=-1, encoding=None, raise FileNotFoundError(errno.ENOENT, "File not found", path) stream = io.BytesIO(self._files[path]) elif mode == 'w': + parent, name = posixpath.split(path) if parent not in self._directories: raise FileNotFoundError(errno.ENOENT, "File not found", parent) stream = DummyPathIO(self._files, path) @@ -1483,7 +1467,7 @@ def open(self, mode='r', buffering=-1, encoding=None, return stream def iterdir(self): - path = str(self.resolve()) + path = str(self).rstrip('/') if path in self._files: raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path) elif path in self._directories: @@ -1492,9 +1476,9 @@ def iterdir(self): raise FileNotFoundError(errno.ENOENT, "File not found", path) def mkdir(self, mode=0o777, parents=False, exist_ok=False): - path = str(self.parent.resolve() / self.name) - parent = str(self.parent.resolve()) - if path in self._directories or path in self._symlinks: + path = str(self) + parent = str(self.parent) + if path in self._directories: if exist_ok: return else: @@ -1510,33 +1494,28 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False): self.mkdir(mode, parents=False, exist_ok=exist_ok) def unlink(self, missing_ok=False): - path_obj = self.parent.resolve(strict=True) / self.name - path = str(path_obj) - name = path_obj.name - parent = str(path_obj.parent) + path = str(self) + name = self.name + parent = str(self.parent) if path in self._directories: raise IsADirectoryError(errno.EISDIR, "Is a directory", path) elif path in self._files: self._directories[parent].remove(name) del self._files[path] - elif path in self._symlinks: - self._directories[parent].remove(name) - del self._symlinks[path] elif not missing_ok: raise FileNotFoundError(errno.ENOENT, "File not found", path) def rmdir(self): - path_obj = self.parent.resolve(strict=True) / self.name - path = str(path_obj) - if path in self._files or path in self._symlinks: + path = str(self) + if path in self._files: raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path) elif path not in self._directories: raise FileNotFoundError(errno.ENOENT, "File not found", path) elif self._directories[path]: raise OSError(errno.ENOTEMPTY, "Directory not empty", path) else: - name = path_obj.name - parent = str(path_obj.parent) + name = self.name + parent = str(self.parent) self._directories[parent].remove(name) del self._directories[path] @@ -1569,9 +1548,6 @@ class DummyPathTest(DummyPurePathTest): def setUp(self): super().setUp() - name = self.id().split('.')[-1] - if name in _tests_needing_symlinks and not self.can_symlink: - self.skipTest('requires symlinks') parser = self.cls.parser p = self.cls(self.base) p.mkdir(parents=True) @@ -1604,7 +1580,6 @@ def tearDown(self): cls = self.cls cls._files.clear() cls._directories.clear() - cls._symlinks.clear() def tempdir(self): path = self.cls(self.base).with_name('tmp-dirD') @@ -1730,101 +1705,6 @@ def test_copy_file(self): self.assertTrue(target.exists()) self.assertEqual(source.read_text(), target.read_text()) - @needs_symlinks - def test_copy_symlink_follow_symlinks_true(self): - base = self.cls(self.base) - source = base / 'linkA' - target = base / 'copyA' - result = source.copy(target) - self.assertEqual(result, target) - self.assertTrue(target.exists()) - self.assertFalse(target.is_symlink()) - self.assertEqual(source.read_text(), target.read_text()) - - @needs_symlinks - def test_copy_symlink_follow_symlinks_false(self): - base = self.cls(self.base) - source = base / 'linkA' - target = base / 'copyA' - result = source.copy(target, follow_symlinks=False) - self.assertEqual(result, target) - self.assertTrue(target.exists()) - self.assertTrue(target.is_symlink()) - self.assertEqual(source.readlink(), target.readlink()) - - @needs_symlinks - def test_copy_symlink_to_itself(self): - base = self.cls(self.base) - source = base / 'linkA' - self.assertRaises(OSError, source.copy, source) - - @needs_symlinks - def test_copy_symlink_to_existing_symlink(self): - base = self.cls(self.base) - source = base / 'copySource' - target = base / 'copyTarget' - source.symlink_to(base / 'fileA') - target.symlink_to(base / 'dirC') - self.assertRaises(OSError, source.copy, target) - self.assertRaises(OSError, source.copy, target, follow_symlinks=False) - - @needs_symlinks - def test_copy_symlink_to_existing_directory_symlink(self): - base = self.cls(self.base) - source = base / 'copySource' - target = base / 'copyTarget' - source.symlink_to(base / 'fileA') - target.symlink_to(base / 'dirC') - self.assertRaises(OSError, source.copy, target) - self.assertRaises(OSError, source.copy, target, follow_symlinks=False) - - @needs_symlinks - def test_copy_directory_symlink_follow_symlinks_false(self): - base = self.cls(self.base) - source = base / 'linkB' - target = base / 'copyA' - result = source.copy(target, follow_symlinks=False) - self.assertEqual(result, target) - self.assertTrue(target.exists()) - self.assertTrue(target.is_symlink()) - self.assertEqual(source.readlink(), target.readlink()) - - @needs_symlinks - def test_copy_directory_symlink_to_itself(self): - base = self.cls(self.base) - source = base / 'linkB' - self.assertRaises(OSError, source.copy, source) - self.assertRaises(OSError, source.copy, source, follow_symlinks=False) - - @needs_symlinks - def test_copy_directory_symlink_into_itself(self): - base = self.cls(self.base) - source = base / 'linkB' - target = base / 'linkB' / 'copyB' - self.assertRaises(OSError, source.copy, target) - self.assertRaises(OSError, source.copy, target, follow_symlinks=False) - self.assertFalse(target.exists()) - - @needs_symlinks - def test_copy_directory_symlink_to_existing_symlink(self): - base = self.cls(self.base) - source = base / 'copySource' - target = base / 'copyTarget' - source.symlink_to(base / 'dirC') - target.symlink_to(base / 'fileA') - self.assertRaises(FileExistsError, source.copy, target) - self.assertRaises(FileExistsError, source.copy, target, follow_symlinks=False) - - @needs_symlinks - def test_copy_directory_symlink_to_existing_directory_symlink(self): - base = self.cls(self.base) - source = base / 'copySource' - target = base / 'copyTarget' - source.symlink_to(base / 'dirC' / 'dirD') - target.symlink_to(base / 'dirC') - self.assertRaises(FileExistsError, source.copy, target) - self.assertRaises(FileExistsError, source.copy, target, follow_symlinks=False) - def test_copy_file_to_existing_file(self): base = self.cls(self.base) source = base / 'fileA' @@ -1840,34 +1720,6 @@ def test_copy_file_to_existing_directory(self): target = base / 'dirA' self.assertRaises(OSError, source.copy, target) - @needs_symlinks - def test_copy_file_to_existing_symlink(self): - base = self.cls(self.base) - source = base / 'dirB' / 'fileB' - target = base / 'linkA' - real_target = base / 'fileA' - result = source.copy(target) - self.assertEqual(result, target) - self.assertTrue(target.exists()) - self.assertTrue(target.is_symlink()) - self.assertTrue(real_target.exists()) - self.assertFalse(real_target.is_symlink()) - self.assertEqual(source.read_text(), real_target.read_text()) - - @needs_symlinks - def test_copy_file_to_existing_symlink_follow_symlinks_false(self): - base = self.cls(self.base) - source = base / 'dirB' / 'fileB' - target = base / 'linkA' - real_target = base / 'fileA' - result = source.copy(target, follow_symlinks=False) - self.assertEqual(result, target) - self.assertTrue(target.exists()) - self.assertTrue(target.is_symlink()) - self.assertTrue(real_target.exists()) - self.assertFalse(real_target.is_symlink()) - self.assertEqual(source.read_text(), real_target.read_text()) - def test_copy_file_empty(self): base = self.cls(self.base) source = base / 'empty' @@ -1985,23 +1837,6 @@ def test_copy_dir_into_itself(self): self.assertRaises(OSError, source.copy, target, follow_symlinks=False) self.assertFalse(target.exists()) - @needs_symlinks - def test_copy_dangling_symlink(self): - base = self.cls(self.base) - source = base / 'source' - target = base / 'target' - - source.mkdir() - source.joinpath('link').symlink_to('nonexistent') - - self.assertRaises(FileNotFoundError, source.copy, target) - - target2 = base / 'target2' - result = source.copy(target2, follow_symlinks=False) - self.assertEqual(result, target2) - self.assertTrue(target2.joinpath('link').is_symlink()) - self.assertEqual(target2.joinpath('link').readlink(), self.cls('nonexistent')) - def test_copy_into(self): base = self.cls(self.base) source = base / 'fileA' @@ -2087,54 +1922,6 @@ def test_move_dir_into_itself(self): self.assertTrue(source.exists()) self.assertFalse(target.exists()) - @needs_symlinks - def test_move_file_symlink(self): - base = self.cls(self.base) - source = base / 'linkA' - source_readlink = source.readlink() - target = base / 'linkA_moved' - result = source.move(target) - self.assertEqual(result, target) - self.assertFalse(source.exists()) - self.assertTrue(target.is_symlink()) - self.assertEqual(source_readlink, target.readlink()) - - @needs_symlinks - def test_move_file_symlink_to_itself(self): - base = self.cls(self.base) - source = base / 'linkA' - self.assertRaises(OSError, source.move, source) - - @needs_symlinks - def test_move_dir_symlink(self): - base = self.cls(self.base) - source = base / 'linkB' - source_readlink = source.readlink() - target = base / 'linkB_moved' - result = source.move(target) - self.assertEqual(result, target) - self.assertFalse(source.exists()) - self.assertTrue(target.is_symlink()) - self.assertEqual(source_readlink, target.readlink()) - - @needs_symlinks - def test_move_dir_symlink_to_itself(self): - base = self.cls(self.base) - source = base / 'linkB' - self.assertRaises(OSError, source.move, source) - - @needs_symlinks - def test_move_dangling_symlink(self): - base = self.cls(self.base) - source = base / 'brokenLink' - source_readlink = source.readlink() - target = base / 'brokenLink_moved' - result = source.move(target) - self.assertEqual(result, target) - self.assertFalse(source.exists()) - self.assertTrue(target.is_symlink()) - self.assertEqual(source_readlink, target.readlink()) - def test_move_into(self): base = self.cls(self.base) source = base / 'fileA' @@ -2161,15 +1948,6 @@ def test_iterdir(self): expected += ['linkA', 'linkB', 'brokenLink', 'brokenLinkLoop'] self.assertEqual(paths, { P(self.base, q) for q in expected }) - @needs_symlinks - def test_iterdir_symlink(self): - # __iter__ on a symlink to a directory. - P = self.cls - p = P(self.base, 'linkB') - paths = set(p.iterdir()) - expected = { P(self.base, 'linkB', q) for q in ['fileB', 'linkD'] } - self.assertEqual(paths, expected) - def test_iterdir_nodir(self): # __iter__ on something that is not a directory. p = self.cls(self.base, 'fileA') @@ -2196,7 +1974,6 @@ def test_scandir(self): if entry.name != 'brokenLinkLoop': self.assertEqual(entry.is_dir(), child.is_dir()) - def test_glob_common(self): def _check(glob, expected): self.assertEqual(set(glob), { P(self.base, q) for q in expected }) @@ -2250,8 +2027,6 @@ def test_glob_empty_pattern(self): P = self.cls p = P(self.base) self.assertEqual(list(p.glob("")), [p]) - self.assertEqual(list(p.glob(".")), [p / "."]) - self.assertEqual(list(p.glob("./")), [p / "./"]) def test_glob_case_sensitive(self): P = self.cls @@ -2265,44 +2040,6 @@ def _check(path, pattern, case_sensitive, expected): _check(path, "dirb/file*", True, []) _check(path, "dirb/file*", False, ["dirB/fileB"]) - @needs_symlinks - @unittest.skipIf(is_emscripten, "Hangs") - def test_glob_recurse_symlinks_common(self): - def _check(path, glob, expected): - actual = {path for path in path.glob(glob, recurse_symlinks=True) - if path.parts.count("linkD") <= 1} # exclude symlink loop. - self.assertEqual(actual, { P(self.base, q) for q in expected }) - P = self.cls - p = P(self.base) - _check(p, "fileB", []) - _check(p, "dir*/file*", ["dirB/fileB", "dirC/fileC"]) - _check(p, "*A", ["dirA", "fileA", "linkA"]) - _check(p, "*B/*", ["dirB/fileB", "dirB/linkD", "linkB/fileB", "linkB/linkD"]) - _check(p, "*/fileB", ["dirB/fileB", "linkB/fileB"]) - _check(p, "*/", ["dirA/", "dirB/", "dirC/", "dirE/", "linkB/"]) - _check(p, "dir*/*/..", ["dirC/dirD/..", "dirA/linkC/..", "dirB/linkD/.."]) - _check(p, "dir*/**", [ - "dirA/", "dirA/linkC", "dirA/linkC/fileB", "dirA/linkC/linkD", "dirA/linkC/linkD/fileB", - "dirB/", "dirB/fileB", "dirB/linkD", "dirB/linkD/fileB", - "dirC/", "dirC/fileC", "dirC/dirD", "dirC/dirD/fileD", "dirC/novel.txt", - "dirE/"]) - _check(p, "dir*/**/", ["dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", - "dirC/", "dirC/dirD/", "dirE/"]) - _check(p, "dir*/**/..", ["dirA/..", "dirA/linkC/..", "dirB/..", - "dirB/linkD/..", "dirA/linkC/linkD/..", - "dirC/..", "dirC/dirD/..", "dirE/.."]) - _check(p, "dir*/*/**", [ - "dirA/linkC/", "dirA/linkC/linkD", "dirA/linkC/fileB", "dirA/linkC/linkD/fileB", - "dirB/linkD/", "dirB/linkD/fileB", - "dirC/dirD/", "dirC/dirD/fileD"]) - _check(p, "dir*/*/**/", ["dirA/linkC/", "dirA/linkC/linkD/", "dirB/linkD/", "dirC/dirD/"]) - _check(p, "dir*/*/**/..", ["dirA/linkC/..", "dirA/linkC/linkD/..", - "dirB/linkD/..", "dirC/dirD/.."]) - _check(p, "dir*/**/fileC", ["dirC/fileC"]) - _check(p, "dir*/*/../dirD/**/", ["dirC/dirD/../dirD/"]) - _check(p, "*/dirD/**", ["dirC/dirD/", "dirC/dirD/fileD"]) - _check(p, "*/dirD/**/", ["dirC/dirD/"]) - def test_rglob_recurse_symlinks_false(self): def _check(path, glob, expected): actual = set(path.rglob(glob, recurse_symlinks=False)) @@ -2361,252 +2098,6 @@ def test_rglob_windows(self): self.assertEqual(set(p.rglob("FILEd")), { P(self.base, "dirC/dirD/fileD") }) self.assertEqual(set(p.rglob("*\\")), { P(self.base, "dirC/dirD/") }) - @needs_symlinks - @unittest.skipIf(is_emscripten, "Hangs") - def test_rglob_recurse_symlinks_common(self): - def _check(path, glob, expected): - actual = {path for path in path.rglob(glob, recurse_symlinks=True) - if path.parts.count("linkD") <= 1} # exclude symlink loop. - self.assertEqual(actual, { P(self.base, q) for q in expected }) - P = self.cls - p = P(self.base) - _check(p, "fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB", - "dirA/linkC/linkD/fileB", "dirB/linkD/fileB", "linkB/linkD/fileB"]) - _check(p, "*/fileA", []) - _check(p, "*/fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB", - "dirA/linkC/linkD/fileB", "dirB/linkD/fileB", "linkB/linkD/fileB"]) - _check(p, "file*", ["fileA", "dirA/linkC/fileB", "dirB/fileB", - "dirA/linkC/linkD/fileB", "dirB/linkD/fileB", "linkB/linkD/fileB", - "dirC/fileC", "dirC/dirD/fileD", "linkB/fileB"]) - _check(p, "*/", ["dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", - "dirC/", "dirC/dirD/", "dirE/", "linkB/", "linkB/linkD/"]) - _check(p, "", ["", "dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", - "dirC/", "dirE/", "dirC/dirD/", "linkB/", "linkB/linkD/"]) - - p = P(self.base, "dirC") - _check(p, "*", ["dirC/fileC", "dirC/novel.txt", - "dirC/dirD", "dirC/dirD/fileD"]) - _check(p, "file*", ["dirC/fileC", "dirC/dirD/fileD"]) - _check(p, "*/*", ["dirC/dirD/fileD"]) - _check(p, "*/", ["dirC/dirD/"]) - _check(p, "", ["dirC/", "dirC/dirD/"]) - # gh-91616, a re module regression - _check(p, "*.txt", ["dirC/novel.txt"]) - _check(p, "*.*", ["dirC/novel.txt"]) - - @needs_symlinks - def test_rglob_symlink_loop(self): - # Don't get fooled by symlink loops (Issue #26012). - P = self.cls - p = P(self.base) - given = set(p.rglob('*', recurse_symlinks=False)) - expect = {'brokenLink', - 'dirA', 'dirA/linkC', - 'dirB', 'dirB/fileB', 'dirB/linkD', - 'dirC', 'dirC/dirD', 'dirC/dirD/fileD', - 'dirC/fileC', 'dirC/novel.txt', - 'dirE', - 'fileA', - 'linkA', - 'linkB', - 'brokenLinkLoop', - } - self.assertEqual(given, {p / x for x in expect}) - - # See https://github.com/WebAssembly/wasi-filesystem/issues/26 - @unittest.skipIf(is_wasi, "WASI resolution of '..' parts doesn't match POSIX") - def test_glob_dotdot(self): - # ".." is not special in globs. - P = self.cls - p = P(self.base) - self.assertEqual(set(p.glob("..")), { P(self.base, "..") }) - self.assertEqual(set(p.glob("../..")), { P(self.base, "..", "..") }) - self.assertEqual(set(p.glob("dirA/..")), { P(self.base, "dirA", "..") }) - self.assertEqual(set(p.glob("dirA/../file*")), { P(self.base, "dirA/../fileA") }) - self.assertEqual(set(p.glob("dirA/../file*/..")), set()) - self.assertEqual(set(p.glob("../xyzzy")), set()) - if self.cls.parser is posixpath: - self.assertEqual(set(p.glob("xyzzy/..")), set()) - else: - # ".." segments are normalized first on Windows, so this path is stat()able. - self.assertEqual(set(p.glob("xyzzy/..")), { P(self.base, "xyzzy", "..") }) - self.assertEqual(set(p.glob("/".join([".."] * 50))), { P(self.base, *[".."] * 50)}) - - @needs_symlinks - def test_glob_permissions(self): - # See bpo-38894 - P = self.cls - base = P(self.base) / 'permissions' - base.mkdir() - - for i in range(100): - link = base / f"link{i}" - if i % 2: - link.symlink_to(P(self.base, "dirE", "nonexistent")) - else: - link.symlink_to(P(self.base, "dirC"), target_is_directory=True) - - self.assertEqual(len(set(base.glob("*"))), 100) - self.assertEqual(len(set(base.glob("*/"))), 50) - self.assertEqual(len(set(base.glob("*/fileC"))), 50) - self.assertEqual(len(set(base.glob("*/file*"))), 50) - - @needs_symlinks - def test_glob_long_symlink(self): - # See gh-87695 - base = self.cls(self.base) / 'long_symlink' - base.mkdir() - bad_link = base / 'bad_link' - bad_link.symlink_to("bad" * 200) - self.assertEqual(sorted(base.glob('**/*')), [bad_link]) - - @needs_posix - def test_absolute_posix(self): - P = self.cls - # The default implementation uses '/' as the current directory - self.assertEqual(str(P('').absolute()), '/') - self.assertEqual(str(P('a').absolute()), '/a') - self.assertEqual(str(P('a/b').absolute()), '/a/b') - - self.assertEqual(str(P('/').absolute()), '/') - self.assertEqual(str(P('/a').absolute()), '/a') - self.assertEqual(str(P('/a/b').absolute()), '/a/b') - - # '//'-prefixed absolute path (supported by POSIX). - self.assertEqual(str(P('//').absolute()), '//') - self.assertEqual(str(P('//a').absolute()), '//a') - self.assertEqual(str(P('//a/b').absolute()), '//a/b') - - @needs_symlinks - def test_readlink(self): - P = self.cls(self.base) - self.assertEqual((P / 'linkA').readlink(), self.cls('fileA')) - self.assertEqual((P / 'brokenLink').readlink(), - self.cls('non-existing')) - self.assertEqual((P / 'linkB').readlink(), self.cls('dirB')) - self.assertEqual((P / 'linkB' / 'linkD').readlink(), self.cls('../dirB')) - with self.assertRaises(OSError): - (P / 'fileA').readlink() - - @unittest.skipIf(hasattr(os, "readlink"), "os.readlink() is present") - def test_readlink_unsupported(self): - P = self.cls(self.base) - p = P / 'fileA' - with self.assertRaises(UnsupportedOperation): - q.readlink(p) - - def _check_resolve(self, p, expected, strict=True): - q = p.resolve(strict) - self.assertEqual(q, expected) - - # This can be used to check both relative and absolute resolutions. - _check_resolve_relative = _check_resolve_absolute = _check_resolve - - @needs_symlinks - def test_resolve_common(self): - P = self.cls - p = P(self.base, 'foo') - with self.assertRaises(OSError) as cm: - p.resolve(strict=True) - self.assertEqual(cm.exception.errno, errno.ENOENT) - # Non-strict - parser = self.parser - self.assertEqualNormCase(str(p.resolve(strict=False)), - parser.join(self.base, 'foo')) - p = P(self.base, 'foo', 'in', 'spam') - self.assertEqualNormCase(str(p.resolve(strict=False)), - parser.join(self.base, 'foo', 'in', 'spam')) - p = P(self.base, '..', 'foo', 'in', 'spam') - self.assertEqualNormCase(str(p.resolve(strict=False)), - parser.join(parser.dirname(self.base), 'foo', 'in', 'spam')) - # These are all relative symlinks. - p = P(self.base, 'dirB', 'fileB') - self._check_resolve_relative(p, p) - p = P(self.base, 'linkA') - self._check_resolve_relative(p, P(self.base, 'fileA')) - p = P(self.base, 'dirA', 'linkC', 'fileB') - self._check_resolve_relative(p, P(self.base, 'dirB', 'fileB')) - p = P(self.base, 'dirB', 'linkD', 'fileB') - self._check_resolve_relative(p, P(self.base, 'dirB', 'fileB')) - # Non-strict - p = P(self.base, 'dirA', 'linkC', 'fileB', 'foo', 'in', 'spam') - self._check_resolve_relative(p, P(self.base, 'dirB', 'fileB', 'foo', 'in', - 'spam'), False) - p = P(self.base, 'dirA', 'linkC', '..', 'foo', 'in', 'spam') - if self.cls.parser is not posixpath: - # In Windows, if linkY points to dirB, 'dirA\linkY\..' - # resolves to 'dirA' without resolving linkY first. - self._check_resolve_relative(p, P(self.base, 'dirA', 'foo', 'in', - 'spam'), False) - else: - # In Posix, if linkY points to dirB, 'dirA/linkY/..' - # resolves to 'dirB/..' first before resolving to parent of dirB. - self._check_resolve_relative(p, P(self.base, 'foo', 'in', 'spam'), False) - # Now create absolute symlinks. - d = self.tempdir() - P(self.base, 'dirA', 'linkX').symlink_to(d) - P(self.base, str(d), 'linkY').symlink_to(self.parser.join(self.base, 'dirB')) - p = P(self.base, 'dirA', 'linkX', 'linkY', 'fileB') - self._check_resolve_absolute(p, P(self.base, 'dirB', 'fileB')) - # Non-strict - p = P(self.base, 'dirA', 'linkX', 'linkY', 'foo', 'in', 'spam') - self._check_resolve_relative(p, P(self.base, 'dirB', 'foo', 'in', 'spam'), - False) - p = P(self.base, 'dirA', 'linkX', 'linkY', '..', 'foo', 'in', 'spam') - if self.cls.parser is not posixpath: - # In Windows, if linkY points to dirB, 'dirA\linkY\..' - # resolves to 'dirA' without resolving linkY first. - self._check_resolve_relative(p, P(d, 'foo', 'in', 'spam'), False) - else: - # In Posix, if linkY points to dirB, 'dirA/linkY/..' - # resolves to 'dirB/..' first before resolving to parent of dirB. - self._check_resolve_relative(p, P(self.base, 'foo', 'in', 'spam'), False) - - @needs_symlinks - def test_resolve_dot(self): - # See http://web.archive.org/web/20200623062557/https://bitbucket.org/pitrou/pathlib/issues/9/ - parser = self.parser - p = self.cls(self.base) - p.joinpath('0').symlink_to('.', target_is_directory=True) - p.joinpath('1').symlink_to(parser.join('0', '0'), target_is_directory=True) - p.joinpath('2').symlink_to(parser.join('1', '1'), target_is_directory=True) - q = p / '2' - self.assertEqual(q.resolve(strict=True), p) - r = q / '3' / '4' - self.assertRaises(FileNotFoundError, r.resolve, strict=True) - # Non-strict - self.assertEqual(r.resolve(strict=False), p / '3' / '4') - - def _check_symlink_loop(self, *args): - path = self.cls(*args) - with self.assertRaises(OSError) as cm: - path.resolve(strict=True) - self.assertEqual(cm.exception.errno, errno.ELOOP) - - @needs_posix - @needs_symlinks - def test_resolve_loop(self): - # Loops with relative symlinks. - self.cls(self.base, 'linkX').symlink_to('linkX/inside') - self._check_symlink_loop(self.base, 'linkX') - self.cls(self.base, 'linkY').symlink_to('linkY') - self._check_symlink_loop(self.base, 'linkY') - self.cls(self.base, 'linkZ').symlink_to('linkZ/../linkZ') - self._check_symlink_loop(self.base, 'linkZ') - # Non-strict - p = self.cls(self.base, 'linkZ', 'foo') - self.assertEqual(p.resolve(strict=False), p) - # Loops with absolute symlinks. - self.cls(self.base, 'linkU').symlink_to(self.parser.join(self.base, 'linkU/inside')) - self._check_symlink_loop(self.base, 'linkU') - self.cls(self.base, 'linkV').symlink_to(self.parser.join(self.base, 'linkV')) - self._check_symlink_loop(self.base, 'linkV') - self.cls(self.base, 'linkW').symlink_to(self.parser.join(self.base, 'linkW/../linkW')) - self._check_symlink_loop(self.base, 'linkW') - # Non-strict - q = self.cls(self.base, 'linkW', 'foo') - self.assertEqual(q.resolve(strict=False), q) - def test_stat(self): statA = self.cls(self.base).joinpath('fileA').stat() statB = self.cls(self.base).joinpath('dirB', 'fileB').stat() @@ -2627,12 +2118,6 @@ def test_stat(self): self.assertEqual(statA.st_dev, statC.st_dev) # other attributes not used by pathlib. - @needs_symlinks - def test_stat_no_follow_symlinks(self): - p = self.cls(self.base) / 'linkA' - st = p.stat() - self.assertNotEqual(st, p.stat(follow_symlinks=False)) - def test_stat_no_follow_symlinks_nosymlink(self): p = self.cls(self.base) / 'fileA' st = p.stat() @@ -2760,41 +2245,6 @@ def test_is_char_device_false(self): self.assertIs((P / 'fileA\udfff').is_char_device(), False) self.assertIs((P / 'fileA\x00').is_char_device(), False) - def _check_complex_symlinks(self, link0_target): - # Test solving a non-looping chain of symlinks (issue #19887). - parser = self.parser - P = self.cls(self.base) - P.joinpath('link1').symlink_to(parser.join('link0', 'link0'), target_is_directory=True) - P.joinpath('link2').symlink_to(parser.join('link1', 'link1'), target_is_directory=True) - P.joinpath('link3').symlink_to(parser.join('link2', 'link2'), target_is_directory=True) - P.joinpath('link0').symlink_to(link0_target, target_is_directory=True) - - # Resolve absolute paths. - p = (P / 'link0').resolve() - self.assertEqual(p, P) - self.assertEqualNormCase(str(p), self.base) - p = (P / 'link1').resolve() - self.assertEqual(p, P) - self.assertEqualNormCase(str(p), self.base) - p = (P / 'link2').resolve() - self.assertEqual(p, P) - self.assertEqualNormCase(str(p), self.base) - p = (P / 'link3').resolve() - self.assertEqual(p, P) - self.assertEqualNormCase(str(p), self.base) - - @needs_symlinks - def test_complex_symlinks_absolute(self): - self._check_complex_symlinks(self.base) - - @needs_symlinks - def test_complex_symlinks_relative(self): - self._check_complex_symlinks('.') - - @needs_symlinks - def test_complex_symlinks_relative_dot_dot(self): - self._check_complex_symlinks(self.parser.join('dirA', '..')) - def test_unlink(self): p = self.cls(self.base) / 'fileA' p.unlink() @@ -2838,41 +2288,6 @@ def test_delete_dir(self): self.assertRaises(FileNotFoundError, base.joinpath('dirC', 'fileC').stat) self.assertRaises(FileNotFoundError, base.joinpath('dirC', 'novel.txt').stat) - @needs_symlinks - def test_delete_symlink(self): - tmp = self.cls(self.base, 'delete') - tmp.mkdir() - dir_ = tmp / 'dir' - dir_.mkdir() - link = tmp / 'link' - link.symlink_to(dir_) - link._delete() - self.assertTrue(dir_.exists()) - self.assertFalse(link.exists(follow_symlinks=False)) - - @needs_symlinks - def test_delete_inner_symlink(self): - tmp = self.cls(self.base, 'delete') - tmp.mkdir() - dir1 = tmp / 'dir1' - dir2 = dir1 / 'dir2' - dir3 = tmp / 'dir3' - for d in dir1, dir2, dir3: - d.mkdir() - file1 = tmp / 'file1' - file1.write_text('foo') - link1 = dir1 / 'link1' - link1.symlink_to(dir2) - link2 = dir1 / 'link2' - link2.symlink_to(dir3) - link3 = dir1 / 'link3' - link3.symlink_to(file1) - # make sure symlinks are removed but not followed - dir1._delete() - self.assertFalse(dir1.exists()) - self.assertTrue(dir3.exists()) - self.assertTrue(file1.exists()) - def test_delete_missing(self): tmp = self.cls(self.base, 'delete') tmp.mkdir() @@ -2887,9 +2302,6 @@ class DummyPathWalkTest(unittest.TestCase): can_symlink = False def setUp(self): - name = self.id().split('.')[-1] - if name in _tests_needing_symlinks and not self.can_symlink: - self.skipTest('requires symlinks') # Build: # TESTFN/ # TEST1/ a file kid and two directory kids @@ -3002,70 +2414,6 @@ def test_walk_bottom_up(self): raise AssertionError(f"Unexpected path: {path}") self.assertTrue(seen_testfn) - @needs_symlinks - def test_walk_follow_symlinks(self): - walk_it = self.walk_path.walk(follow_symlinks=True) - for root, dirs, files in walk_it: - if root == self.link_path: - self.assertEqual(dirs, []) - self.assertEqual(files, ["tmp4"]) - break - else: - self.fail("Didn't follow symlink with follow_symlinks=True") - - @needs_symlinks - def test_walk_symlink_location(self): - # Tests whether symlinks end up in filenames or dirnames depending - # on the `follow_symlinks` argument. - walk_it = self.walk_path.walk(follow_symlinks=False) - for root, dirs, files in walk_it: - if root == self.sub2_path: - self.assertIn("link", files) - break - else: - self.fail("symlink not found") - - walk_it = self.walk_path.walk(follow_symlinks=True) - for root, dirs, files in walk_it: - if root == self.sub2_path: - self.assertIn("link", dirs) - break - else: - self.fail("symlink not found") - - -class DummyPathWithSymlinks(DummyPath): - __slots__ = () - - # Reduce symlink traversal limit to make tests run faster. - _max_symlinks = 20 - - def readlink(self): - path = str(self.parent.resolve() / self.name) - if path in self._symlinks: - return self.with_segments(self._symlinks[path][0]) - elif path in self._files or path in self._directories: - raise OSError(errno.EINVAL, "Not a symlink", path) - else: - raise FileNotFoundError(errno.ENOENT, "File not found", path) - - def symlink_to(self, target, target_is_directory=False): - path = str(self.parent.resolve() / self.name) - parent = str(self.parent.resolve()) - if path in self._symlinks: - raise FileExistsError(errno.EEXIST, "File exists", path) - self._directories[parent].add(self.name) - self._symlinks[path] = str(target), target_is_directory - - -class DummyPathWithSymlinksTest(DummyPathTest): - cls = DummyPathWithSymlinks - can_symlink = True - - -class DummyPathWithSymlinksWalkTest(DummyPathWalkTest): - cls = DummyPathWithSymlinks - can_symlink = True if __name__ == "__main__": From 72dca6c4eda0d63ee35a0aa619ae931ab226bef9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Srinivas=20Reddy=20Thatiparthy=20=28=E0=B0=A4=E0=B0=BE?= =?UTF-8?q?=E0=B0=9F=E0=B0=BF=E0=B0=AA=E0=B0=B0=E0=B1=8D=E0=B0=A4=E0=B0=BF?= =?UTF-8?q?=20=E0=B0=B6=E0=B1=8D=E0=B0=B0=E0=B1=80=E0=B0=A8=E0=B0=BF?= =?UTF-8?q?=E0=B0=B5=E0=B0=BE=E0=B0=B8=E0=B1=8D=20=20=E0=B0=B0=E0=B1=86?= =?UTF-8?q?=E0=B0=A1=E0=B1=8D=E0=B0=A1=E0=B0=BF=29?= Date: Sat, 7 Dec 2024 15:42:45 +0530 Subject: [PATCH 36/48] gh-119786: fix typo in `InternalDocs/garbage_collector.md` (#127687) --- InternalDocs/garbage_collector.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index 4761f78f3593e3..394e4ef075f55e 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -518,7 +518,7 @@ Then the above algorithm is repeated, starting from step 2. Determining how much work to do ------------------------------- -We need to do a certain amount of work to enusre that garbage is collected, +We need to do a certain amount of work to ensure that garbage is collected, but doing too much work slows down execution. To work out how much work we need to do, consider a heap with `L` live objects From 27d0d2141319d82709eb09ba20065df3e1714fab Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Sat, 7 Dec 2024 16:13:49 +0000 Subject: [PATCH 37/48] Give `poplib.POP3.rpop` a proper docstring (#127370) Previously `poplib.POP3.rpop` had a "Not sure what this does" docstring, now it has been fixed. --- Lib/poplib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/poplib.py b/Lib/poplib.py index 1a1629d175b6d9..beb93a0d57cf93 100644 --- a/Lib/poplib.py +++ b/Lib/poplib.py @@ -309,7 +309,7 @@ def close(self): # optional commands: def rpop(self, user): - """Not sure what this does.""" + """Send RPOP command to access the mailbox with an alternate user.""" return self._shortcmd('RPOP %s' % user) From 79b7cab50a3292a1c01466cf0e69fb7b4e56cfb1 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sat, 7 Dec 2024 17:58:42 +0000 Subject: [PATCH 38/48] GH-127090: Fix `urllib.response.addinfourl.url` value for opened `file:` URIs (#127091) The canonical `file:` URL (as generated by `pathname2url()`) is now used as the `url` attribute of the returned `addinfourl` object. The `addinfourl.url` attribute reflects the resolved URL for both `file:` or `http[s]:` URLs now. --- Lib/test/test_urllib.py | 11 ++++--- Lib/test/test_urllib2.py | 31 ++++++++----------- Lib/test/test_urllib2net.py | 3 +- Lib/urllib/request.py | 5 +-- ...-11-21-06-03-46.gh-issue-127090.yUYwdh.rst | 3 ++ 5 files changed, 25 insertions(+), 28 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-21-06-03-46.gh-issue-127090.yUYwdh.rst diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 00e46990c406ac..042d3b35b77022 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -156,7 +156,7 @@ def test_headers(self): self.assertIsInstance(self.returned_obj.headers, email.message.Message) def test_url(self): - self.assertEqual(self.returned_obj.url, "file://" + self.quoted_pathname) + self.assertEqual(self.returned_obj.url, "file:" + self.quoted_pathname) def test_status(self): self.assertIsNone(self.returned_obj.status) @@ -165,7 +165,7 @@ def test_info(self): self.assertIsInstance(self.returned_obj.info(), email.message.Message) def test_geturl(self): - self.assertEqual(self.returned_obj.geturl(), "file://" + self.quoted_pathname) + self.assertEqual(self.returned_obj.geturl(), "file:" + self.quoted_pathname) def test_getcode(self): self.assertIsNone(self.returned_obj.getcode()) @@ -471,11 +471,14 @@ def test_missing_localfile(self): def test_file_notexists(self): fd, tmp_file = tempfile.mkstemp() - tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/') + tmp_file_canon_url = 'file:' + urllib.request.pathname2url(tmp_file) + parsed = urllib.parse.urlsplit(tmp_file_canon_url) + tmp_fileurl = parsed._replace(netloc='localhost').geturl() try: self.assertTrue(os.path.exists(tmp_file)) with urllib.request.urlopen(tmp_fileurl) as fobj: self.assertTrue(fobj) + self.assertEqual(fobj.url, tmp_file_canon_url) finally: os.close(fd) os.unlink(tmp_file) @@ -609,7 +612,7 @@ def tearDown(self): def constructLocalFileUrl(self, filePath): filePath = os.path.abspath(filePath) - return "file://%s" % urllib.request.pathname2url(filePath) + return "file:" + urllib.request.pathname2url(filePath) def createNewTempFile(self, data=b""): """Creates a new temporary file containing the specified data, diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index 99ad11cf0552eb..4a9e653515be5b 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -23,7 +23,7 @@ _proxy_bypass_winreg_override, _proxy_bypass_macosx_sysconf, AbstractDigestAuthHandler) -from urllib.parse import urlparse +from urllib.parse import urlsplit import urllib.error import http.client @@ -717,14 +717,6 @@ def test_processors(self): self.assertIsInstance(args[1], MockResponse) -def sanepathname2url(path): - urlpath = urllib.request.pathname2url(path) - if os.name == "nt" and urlpath.startswith("///"): - urlpath = urlpath[2:] - # XXX don't ask me about the mac... - return urlpath - - class HandlerTests(unittest.TestCase): def test_ftp(self): @@ -818,19 +810,22 @@ def test_file(self): o = h.parent = MockOpener() TESTFN = os_helper.TESTFN - urlpath = sanepathname2url(os.path.abspath(TESTFN)) towrite = b"hello, world\n" + canonurl = 'file:' + urllib.request.pathname2url(os.path.abspath(TESTFN)) + parsed = urlsplit(canonurl) + if parsed.netloc: + raise unittest.SkipTest("non-local working directory") urls = [ - "file://localhost%s" % urlpath, - "file://%s" % urlpath, - "file://%s%s" % (socket.gethostbyname('localhost'), urlpath), + canonurl, + parsed._replace(netloc='localhost').geturl(), + parsed._replace(netloc=socket.gethostbyname('localhost')).geturl(), ] try: localaddr = socket.gethostbyname(socket.gethostname()) except socket.gaierror: localaddr = '' if localaddr: - urls.append("file://%s%s" % (localaddr, urlpath)) + urls.append(parsed._replace(netloc=localaddr).geturl()) for url in urls: f = open(TESTFN, "wb") @@ -855,10 +850,10 @@ def test_file(self): self.assertEqual(headers["Content-type"], "text/plain") self.assertEqual(headers["Content-length"], "13") self.assertEqual(headers["Last-modified"], modified) - self.assertEqual(respurl, url) + self.assertEqual(respurl, canonurl) for url in [ - "file://localhost:80%s" % urlpath, + parsed._replace(netloc='localhost:80').geturl(), "file:///file_does_not_exist.txt", "file://not-a-local-host.com//dir/file.txt", "file://%s:80%s/%s" % (socket.gethostbyname('localhost'), @@ -1156,13 +1151,13 @@ def test_full_url_setter(self): r = Request('http://example.com') for url in urls: r.full_url = url - parsed = urlparse(url) + parsed = urlsplit(url) self.assertEqual(r.get_full_url(), url) # full_url setter uses splittag to split into components. # splittag sets the fragment as None while urlparse sets it to '' self.assertEqual(r.fragment or '', parsed.fragment) - self.assertEqual(urlparse(r.get_full_url()).query, parsed.query) + self.assertEqual(urlsplit(r.get_full_url()).query, parsed.query) def test_full_url_deleter(self): r = Request('http://www.example.com') diff --git a/Lib/test/test_urllib2net.py b/Lib/test/test_urllib2net.py index f0874d8d3ce463..b84290a7368c29 100644 --- a/Lib/test/test_urllib2net.py +++ b/Lib/test/test_urllib2net.py @@ -4,7 +4,6 @@ from test.support import os_helper from test.support import socket_helper from test.support import ResourceDenied -from test.test_urllib2 import sanepathname2url import os import socket @@ -151,7 +150,7 @@ def test_file(self): f.write('hi there\n') f.close() urls = [ - 'file:' + sanepathname2url(os.path.abspath(TESTFN)), + 'file:' + urllib.request.pathname2url(os.path.abspath(TESTFN)), ('file:///nonsensename/etc/passwd', None, urllib.error.URLError), ] diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 1fcaa89188188d..7ef85431b718ad 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -1488,10 +1488,7 @@ def open_local_file(self, req): host, port = _splitport(host) if not host or \ (not port and _safe_gethostbyname(host) in self.get_names()): - if host: - origurl = 'file://' + host + filename - else: - origurl = 'file://' + filename + origurl = 'file:' + pathname2url(localfile) return addinfourl(open(localfile, 'rb'), headers, origurl) except OSError as exp: raise URLError(exp, exp.filename) diff --git a/Misc/NEWS.d/next/Library/2024-11-21-06-03-46.gh-issue-127090.yUYwdh.rst b/Misc/NEWS.d/next/Library/2024-11-21-06-03-46.gh-issue-127090.yUYwdh.rst new file mode 100644 index 00000000000000..8efe563f443774 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-21-06-03-46.gh-issue-127090.yUYwdh.rst @@ -0,0 +1,3 @@ +Fix value of :attr:`urllib.response.addinfourl.url` for ``file:`` URLs that +express relative paths and absolute Windows paths. The canonical URL generated +by :func:`urllib.request.pathname2url` is now used. From 70154855cf698560dd9a5e484a649839cd68dc7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filipe=20La=C3=ADns=20=F0=9F=87=B5=F0=9F=87=B8?= Date: Sun, 8 Dec 2024 05:57:22 +0000 Subject: [PATCH 39/48] GH-126789: fix some sysconfig data on late site initializations (#127729) --- Lib/sysconfig/__init__.py | 18 +++++++++---- Lib/test/test_sysconfig.py | 25 +++++++++++++++++++ ...-12-07-23-06-44.gh-issue-126789.4dqfV1.rst | 5 ++++ 3 files changed, 43 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-12-07-23-06-44.gh-issue-126789.4dqfV1.rst diff --git a/Lib/sysconfig/__init__.py b/Lib/sysconfig/__init__.py index ad86609016e478..ed7b6a335d01d4 100644 --- a/Lib/sysconfig/__init__.py +++ b/Lib/sysconfig/__init__.py @@ -173,9 +173,7 @@ def joinuser(*args): _PY_VERSION = sys.version.split()[0] _PY_VERSION_SHORT = f'{sys.version_info[0]}.{sys.version_info[1]}' _PY_VERSION_SHORT_NO_DOT = f'{sys.version_info[0]}{sys.version_info[1]}' -_PREFIX = os.path.normpath(sys.prefix) _BASE_PREFIX = os.path.normpath(sys.base_prefix) -_EXEC_PREFIX = os.path.normpath(sys.exec_prefix) _BASE_EXEC_PREFIX = os.path.normpath(sys.base_exec_prefix) # Mutex guarding initialization of _CONFIG_VARS. _CONFIG_VARS_LOCK = threading.RLock() @@ -473,8 +471,8 @@ def _init_config_vars(): global _CONFIG_VARS _CONFIG_VARS = {} - prefix = _PREFIX - exec_prefix = _EXEC_PREFIX + prefix = os.path.normpath(sys.prefix) + exec_prefix = os.path.normpath(sys.exec_prefix) base_prefix = _BASE_PREFIX base_exec_prefix = _BASE_EXEC_PREFIX @@ -564,9 +562,19 @@ def get_config_vars(*args): With arguments, return a list of values that result from looking up each argument in the configuration variable dictionary. """ + global _CONFIG_VARS_INITIALIZED # Avoid claiming the lock once initialization is complete. - if not _CONFIG_VARS_INITIALIZED: + if _CONFIG_VARS_INITIALIZED: + # GH-126789: If sys.prefix or sys.exec_prefix were updated, invalidate the cache. + prefix = os.path.normpath(sys.prefix) + exec_prefix = os.path.normpath(sys.exec_prefix) + if _CONFIG_VARS['prefix'] != prefix or _CONFIG_VARS['exec_prefix'] != exec_prefix: + with _CONFIG_VARS_LOCK: + _CONFIG_VARS_INITIALIZED = False + _init_config_vars() + else: + # Initialize the config_vars cache. with _CONFIG_VARS_LOCK: # Test again with the lock held to avoid races. Note that # we test _CONFIG_VARS here, not _CONFIG_VARS_INITIALIZED, diff --git a/Lib/test/test_sysconfig.py b/Lib/test/test_sysconfig.py index 0df1a67ea2b720..ce504dc21af85f 100644 --- a/Lib/test/test_sysconfig.py +++ b/Lib/test/test_sysconfig.py @@ -53,6 +53,8 @@ def setUp(self): os.uname = self._get_uname # saving the environment self.name = os.name + self.prefix = sys.prefix + self.exec_prefix = sys.exec_prefix self.platform = sys.platform self.version = sys.version self._framework = sys._framework @@ -77,6 +79,8 @@ def tearDown(self): else: del os.uname os.name = self.name + sys.prefix = self.prefix + sys.exec_prefix = self.exec_prefix sys.platform = self.platform sys.version = self.version sys._framework = self._framework @@ -653,6 +657,27 @@ def test_sysconfigdata_json(self): self.assertEqual(system_config_vars, json_config_vars) + def test_sysconfig_config_vars_no_prefix_cache(self): + sys.prefix = 'prefix-AAA' + sys.exec_prefix = 'exec-prefix-AAA' + + config_vars = sysconfig.get_config_vars() + + self.assertEqual(config_vars['prefix'], sys.prefix) + self.assertEqual(config_vars['base'], sys.prefix) + self.assertEqual(config_vars['exec_prefix'], sys.exec_prefix) + self.assertEqual(config_vars['platbase'], sys.exec_prefix) + + sys.prefix = 'prefix-BBB' + sys.exec_prefix = 'exec-prefix-BBB' + + config_vars = sysconfig.get_config_vars() + + self.assertEqual(config_vars['prefix'], sys.prefix) + self.assertEqual(config_vars['base'], sys.prefix) + self.assertEqual(config_vars['exec_prefix'], sys.exec_prefix) + self.assertEqual(config_vars['platbase'], sys.exec_prefix) + class MakefileTests(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2024-12-07-23-06-44.gh-issue-126789.4dqfV1.rst b/Misc/NEWS.d/next/Library/2024-12-07-23-06-44.gh-issue-126789.4dqfV1.rst new file mode 100644 index 00000000000000..417e9ac986f27a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-07-23-06-44.gh-issue-126789.4dqfV1.rst @@ -0,0 +1,5 @@ +Fixed :func:`sysconfig.get_config_vars`, :func:`sysconfig.get_paths`, and +siblings, returning outdated cached data if the value of :data:`sys.prefix` +or :data:`sys.exec_prefix` changes. Overwriting :data:`sys.prefix` or +:data:`sys.exec_prefix` still is discouraged, as that might break other +parts of the code. From 1503fc8f88d4903e61f76a78a30bcd581b0ee0cd Mon Sep 17 00:00:00 2001 From: Apostol Fet <90645107+ApostolFet@users.noreply.github.com> Date: Sun, 8 Dec 2024 13:05:15 +0300 Subject: [PATCH 40/48] gh-127610: Added validation for more than one var-positional and var-keyword parameters in inspect.Signature (GH-127657) --- Lib/inspect.py | 8 ++++++++ Lib/test/test_inspect/test_inspect.py | 11 +++++++++++ Misc/ACKS | 1 + .../2024-12-06-17-28-55.gh-issue-127610.ctv_NP.rst | 3 +++ 4 files changed, 23 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-12-06-17-28-55.gh-issue-127610.ctv_NP.rst diff --git a/Lib/inspect.py b/Lib/inspect.py index e3f74e9f047eaf..b7d8271f8a471f 100644 --- a/Lib/inspect.py +++ b/Lib/inspect.py @@ -2943,11 +2943,19 @@ def __init__(self, parameters=None, *, return_annotation=_empty, params = OrderedDict() top_kind = _POSITIONAL_ONLY seen_default = False + seen_var_parameters = set() for param in parameters: kind = param.kind name = param.name + if kind in (_VAR_POSITIONAL, _VAR_KEYWORD): + if kind in seen_var_parameters: + msg = f'more than one {kind.description} parameter' + raise ValueError(msg) + + seen_var_parameters.add(kind) + if kind < top_kind: msg = ( 'wrong parameter order: {} parameter before {} ' diff --git a/Lib/test/test_inspect/test_inspect.py b/Lib/test/test_inspect/test_inspect.py index a92627a4d60f68..1ecf18bf49fa7e 100644 --- a/Lib/test/test_inspect/test_inspect.py +++ b/Lib/test/test_inspect/test_inspect.py @@ -2992,6 +2992,17 @@ def test2(pod=42, /): with self.assertRaisesRegex(ValueError, 'follows default argument'): S((pkd, pk)) + second_args = args.replace(name="second_args") + with self.assertRaisesRegex(ValueError, 'more than one variadic positional parameter'): + S((args, second_args)) + + with self.assertRaisesRegex(ValueError, 'more than one variadic positional parameter'): + S((args, ko, second_args)) + + second_kwargs = kwargs.replace(name="second_kwargs") + with self.assertRaisesRegex(ValueError, 'more than one variadic keyword parameter'): + S((kwargs, second_kwargs)) + def test_signature_object_pickle(self): def foo(a, b, *, c:1={}, **kw) -> {42:'ham'}: pass foo_partial = functools.partial(foo, a=1) diff --git a/Misc/ACKS b/Misc/ACKS index 913f7c8ecf5f1e..086930666822ad 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -24,6 +24,7 @@ Eitan Adler Anton Afanasyev Ali Afshar Nitika Agarwal +Maxim Ageev Anjani Agrawal Pablo S. Blum de Aguiar Jim Ahlstrom diff --git a/Misc/NEWS.d/next/Library/2024-12-06-17-28-55.gh-issue-127610.ctv_NP.rst b/Misc/NEWS.d/next/Library/2024-12-06-17-28-55.gh-issue-127610.ctv_NP.rst new file mode 100644 index 00000000000000..58769029d79977 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-06-17-28-55.gh-issue-127610.ctv_NP.rst @@ -0,0 +1,3 @@ +Added validation for more than one var-positional or +var-keyword parameters in :class:`inspect.Signature`. +Patch by Maxim Ageev. From 8fa5ecec01337215bc7baa62c9c16488ecd854fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 8 Dec 2024 14:47:22 +0100 Subject: [PATCH 41/48] gh-123378: fix post-merge typos in comments and NEWS (#127739) --- .../C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst | 2 +- Objects/exceptions.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst b/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst index 2cfb8b8a1e245a..7254a04f61843d 100644 --- a/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst +++ b/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst @@ -1,5 +1,5 @@ Ensure that the value of :attr:`UnicodeEncodeError.start ` -retrieved by :c:func:`PyUnicodeEncodeError_GetStart` lie in +retrieved by :c:func:`PyUnicodeEncodeError_GetStart` lies in ``[0, max(0, objlen - 1)]`` where *objlen* is the length of :attr:`UnicodeEncodeError.object `. Similar arguments apply to :exc:`UnicodeDecodeError` and :exc:`UnicodeTranslateError` diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 124b591ee3a13f..287cbc25305964 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2712,7 +2712,7 @@ set_unicodefromstring(PyObject **attr, const char *value) * Adjust the (inclusive) 'start' value of a UnicodeError object. * * The 'start' can be negative or not, but when adjusting the value, - * we clip it in [0, max(0, objlen - 1)] but do not intepret it as + * we clip it in [0, max(0, objlen - 1)] and do not interpret it as * a relative offset. */ static inline Py_ssize_t @@ -2732,8 +2732,8 @@ unicode_error_adjust_start(Py_ssize_t start, Py_ssize_t objlen) * Adjust the (exclusive) 'end' value of a UnicodeError object. * * The 'end' can be negative or not, but when adjusting the value, - * we clip it in [min(1, objlen), max(min(1, objlen), objlen)] but - * do not intepret it as a relative offset. + * we clip it in [min(1, objlen), max(min(1, objlen), objlen)] and + * do not interpret it as a relative offset. */ static inline Py_ssize_t unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen) From 3b78409878c39d5afa344f7284b57104f7e765c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 8 Dec 2024 18:31:10 +0100 Subject: [PATCH 42/48] gh-87138: convert SHA-3 object type to heap type (GH-127670) --- Modules/sha3module.c | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/Modules/sha3module.c b/Modules/sha3module.c index ca839dc55e0519..b13e6a9de10114 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -71,13 +71,13 @@ typedef struct { static SHA3object * newSHA3object(PyTypeObject *type) { - SHA3object *newobj; - newobj = (SHA3object *)PyObject_New(SHA3object, type); + SHA3object *newobj = PyObject_GC_New(SHA3object, type); if (newobj == NULL) { return NULL; } HASHLIB_INIT_MUTEX(newobj); + PyObject_GC_Track(newobj); return newobj; } @@ -166,15 +166,32 @@ py_sha3_new_impl(PyTypeObject *type, PyObject *data, int usedforsecurity) /* Internal methods for a hash object */ +static int +SHA3_clear(SHA3object *self) +{ + if (self->hash_state != NULL) { + Hacl_Hash_SHA3_free(self->hash_state); + self->hash_state = NULL; + } + return 0; +} + static void SHA3_dealloc(SHA3object *self) { - Hacl_Hash_SHA3_free(self->hash_state); PyTypeObject *tp = Py_TYPE(self); - PyObject_Free(self); + PyObject_GC_UnTrack(self); + (void)SHA3_clear(self); + tp->tp_free(self); Py_DECREF(tp); } +static int +SHA3_traverse(PyObject *self, visitproc visit, void *arg) +{ + Py_VISIT(Py_TYPE(self)); + return 0; +} /* External methods for a hash object */ @@ -335,6 +352,7 @@ static PyObject * SHA3_get_capacity_bits(SHA3object *self, void *closure) { uint32_t rate = Hacl_Hash_SHA3_block_len(self->hash_state) * 8; + assert(rate <= 1600); int capacity = 1600 - rate; return PyLong_FromLong(capacity); } @@ -366,12 +384,14 @@ static PyGetSetDef SHA3_getseters[] = { #define SHA3_TYPE_SLOTS(type_slots_obj, type_doc, type_methods, type_getseters) \ static PyType_Slot type_slots_obj[] = { \ + {Py_tp_clear, SHA3_clear}, \ {Py_tp_dealloc, SHA3_dealloc}, \ + {Py_tp_traverse, SHA3_traverse}, \ {Py_tp_doc, (char*)type_doc}, \ {Py_tp_methods, type_methods}, \ {Py_tp_getset, type_getseters}, \ {Py_tp_new, py_sha3_new}, \ - {0,0} \ + {0, NULL} \ } // Using _PyType_GetModuleState() on these types is safe since they @@ -380,7 +400,8 @@ static PyGetSetDef SHA3_getseters[] = { static PyType_Spec type_spec_obj = { \ .name = "_sha3." type_name, \ .basicsize = sizeof(SHA3object), \ - .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE, \ + .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE \ + | Py_TPFLAGS_HAVE_GC, \ .slots = type_slots \ } @@ -444,9 +465,7 @@ _SHAKE_digest(SHA3object *self, unsigned long digestlen, int hex) result = PyBytes_FromStringAndSize((const char *)digest, digestlen); } - if (digest != NULL) { - PyMem_Free(digest); - } + PyMem_Free(digest); return result; } @@ -563,7 +582,7 @@ _sha3_clear(PyObject *module) static void _sha3_free(void *module) { - _sha3_clear((PyObject *)module); + (void)_sha3_clear((PyObject *)module); } static int From 2367759212f609b8ddf3218003b3ccd8e72849ae Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Sun, 8 Dec 2024 18:01:55 +0000 Subject: [PATCH 43/48] [doc] Fix typos in `interpreter_definition.md` (#127742) --- Tools/cases_generator/interpreter_definition.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Tools/cases_generator/interpreter_definition.md b/Tools/cases_generator/interpreter_definition.md index 203286834e3e3f..d50c420307852f 100644 --- a/Tools/cases_generator/interpreter_definition.md +++ b/Tools/cases_generator/interpreter_definition.md @@ -309,7 +309,7 @@ This might become (if it was an instruction): ### More examples -For explanations see "Generating the interpreter" below.) +For explanations see "Generating the interpreter" below. ```C op ( CHECK_HAS_INSTANCE_VALUES, (owner -- owner) ) { PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); @@ -371,7 +371,7 @@ For explanations see "Generating the interpreter" below.) A _family_ maps a specializable instruction to its specializations. -Example: These opcodes all share the same instruction format): +Example: These opcodes all share the same instruction format: ```C family(load_attr) = { LOAD_ATTR, LOAD_ATTR_INSTANCE_VALUE, LOAD_SLOT }; ``` @@ -393,7 +393,7 @@ which can be easily inserted. What is more complex is ensuring the correct stack and not generating excess pops and pushes. For example, in `CHECK_HAS_INSTANCE_VALUES`, `owner` occurs in the input, so it cannot be -redefined. Thus it doesn't need to written and can be read without adjusting the stack pointer. +redefined. Thus it doesn't need to be written and can be read without adjusting the stack pointer. The C code generated for `CHECK_HAS_INSTANCE_VALUES` would look something like: ```C From 7f8ec523021427a5c1ab3ce0cdd6e4bb909f1dc5 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sun, 8 Dec 2024 18:45:09 +0000 Subject: [PATCH 44/48] GH-127381: pathlib ABCs: remove `PathBase.unlink()` and `rmdir()` (#127736) Virtual filesystems don't always make a distinction between deleting files and empty directories, and sometimes support deleting non-empty directories in a single operation. Here we remove `PathBase.unlink()` and `rmdir()`, leaving `_delete()` as the sole deletion method, now made abstract. I hope to drop the underscore prefix later on. --- Lib/pathlib/_abc.py | 43 +++-------------- Lib/pathlib/_local.py | 16 +++++-- Lib/test/test_pathlib/test_pathlib.py | 19 ++++++++ Lib/test/test_pathlib/test_pathlib_abc.py | 56 +++++------------------ 4 files changed, 48 insertions(+), 86 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 820970fcd5889b..309eab2ff855c3 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -840,6 +840,12 @@ def copy_into(self, target_dir, *, follow_symlinks=True, dirs_exist_ok=dirs_exist_ok, preserve_metadata=preserve_metadata) + def _delete(self): + """ + Delete this file or directory (including all sub-directories). + """ + raise UnsupportedOperation(self._unsupported_msg('_delete()')) + def move(self, target): """ Recursively move this file or directory tree to the given destination. @@ -874,43 +880,6 @@ def lchmod(self, mode): """ self.chmod(mode, follow_symlinks=False) - def unlink(self, missing_ok=False): - """ - Remove this file or link. - If the path is a directory, use rmdir() instead. - """ - raise UnsupportedOperation(self._unsupported_msg('unlink()')) - - def rmdir(self): - """ - Remove this directory. The directory must be empty. - """ - raise UnsupportedOperation(self._unsupported_msg('rmdir()')) - - def _delete(self): - """ - Delete this file or directory (including all sub-directories). - """ - if self.is_symlink() or self.is_junction(): - self.unlink() - elif self.is_dir(): - self._rmtree() - else: - self.unlink() - - def _rmtree(self): - def on_error(err): - raise err - results = self.walk( - on_error=on_error, - top_down=False, # So we rmdir() empty directories. - follow_symlinks=False) - for dirpath, _, filenames in results: - for filename in filenames: - filepath = dirpath / filename - filepath.unlink() - dirpath.rmdir() - def owner(self, *, follow_symlinks=True): """ Return the login name of the file owner. diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index 250bc12956f5bc..f87069ce70a2de 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -846,10 +846,18 @@ def rmdir(self): """ os.rmdir(self) - def _rmtree(self): - # Lazy import to improve module import time - import shutil - shutil.rmtree(self) + def _delete(self): + """ + Delete this file or directory (including all sub-directories). + """ + if self.is_symlink() or self.is_junction(): + self.unlink() + elif self.is_dir(): + # Lazy import to improve module import time + import shutil + shutil.rmtree(self) + else: + self.unlink() def rename(self, target): """ diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 8c9049f15d5bf9..ce0f4748c860b1 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -1352,6 +1352,25 @@ def test_group_no_follow_symlinks(self): self.assertEqual(expected_gid, gid_2) self.assertEqual(expected_name, link.group(follow_symlinks=False)) + def test_unlink(self): + p = self.cls(self.base) / 'fileA' + p.unlink() + self.assertFileNotFound(p.stat) + self.assertFileNotFound(p.unlink) + + def test_unlink_missing_ok(self): + p = self.cls(self.base) / 'fileAAA' + self.assertFileNotFound(p.unlink) + p.unlink(missing_ok=True) + + def test_rmdir(self): + p = self.cls(self.base) / 'dirA' + for q in p.iterdir(): + q.unlink() + p.rmdir() + self.assertFileNotFound(p.stat) + self.assertFileNotFound(p.unlink) + @needs_symlinks def test_delete_symlink(self): tmp = self.cls(self.base, 'delete') diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index bf9ae6cc8a2433..675abf30a9f13c 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1370,8 +1370,6 @@ def test_unsupported_operation(self): self.assertRaises(e, p.touch) self.assertRaises(e, p.chmod, 0o755) self.assertRaises(e, p.lchmod, 0o755) - self.assertRaises(e, p.unlink) - self.assertRaises(e, p.rmdir) self.assertRaises(e, p.owner) self.assertRaises(e, p.group) self.assertRaises(e, p.as_uri) @@ -1493,31 +1491,18 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False): self.parent.mkdir(parents=True, exist_ok=True) self.mkdir(mode, parents=False, exist_ok=exist_ok) - def unlink(self, missing_ok=False): - path = str(self) - name = self.name - parent = str(self.parent) - if path in self._directories: - raise IsADirectoryError(errno.EISDIR, "Is a directory", path) - elif path in self._files: - self._directories[parent].remove(name) - del self._files[path] - elif not missing_ok: - raise FileNotFoundError(errno.ENOENT, "File not found", path) - - def rmdir(self): + def _delete(self): path = str(self) if path in self._files: - raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path) - elif path not in self._directories: - raise FileNotFoundError(errno.ENOENT, "File not found", path) - elif self._directories[path]: - raise OSError(errno.ENOTEMPTY, "Directory not empty", path) - else: - name = self.name - parent = str(self.parent) - self._directories[parent].remove(name) + del self._files[path] + elif path in self._directories: + for name in list(self._directories[path]): + self.joinpath(name)._delete() del self._directories[path] + else: + raise FileNotFoundError(errno.ENOENT, "File not found", path) + parent = str(self.parent) + self._directories[parent].remove(self.name) class DummyPathTest(DummyPurePathTest): @@ -2245,30 +2230,11 @@ def test_is_char_device_false(self): self.assertIs((P / 'fileA\udfff').is_char_device(), False) self.assertIs((P / 'fileA\x00').is_char_device(), False) - def test_unlink(self): - p = self.cls(self.base) / 'fileA' - p.unlink() - self.assertFileNotFound(p.stat) - self.assertFileNotFound(p.unlink) - - def test_unlink_missing_ok(self): - p = self.cls(self.base) / 'fileAAA' - self.assertFileNotFound(p.unlink) - p.unlink(missing_ok=True) - - def test_rmdir(self): - p = self.cls(self.base) / 'dirA' - for q in p.iterdir(): - q.unlink() - p.rmdir() - self.assertFileNotFound(p.stat) - self.assertFileNotFound(p.unlink) - def test_delete_file(self): p = self.cls(self.base) / 'fileA' p._delete() self.assertFileNotFound(p.stat) - self.assertFileNotFound(p.unlink) + self.assertFileNotFound(p._delete) def test_delete_dir(self): base = self.cls(self.base) @@ -2347,7 +2313,7 @@ def setUp(self): def tearDown(self): base = self.cls(self.base) - base._rmtree() + base._delete() def test_walk_topdown(self): walker = self.walk_path.walk() From a03efb533a58fd13fb0cc7f4a5c02c8406a407bd Mon Sep 17 00:00:00 2001 From: Stephen Morton Date: Sun, 8 Dec 2024 10:46:34 -0800 Subject: [PATCH 45/48] gh-127734: improve signature of `urllib.request.HTTPPasswordMgrWithPriorAuth.__init__` (#127735) improve signature of urllib.request.HTTPPasswordMgrWithPriorAuth.__init__ --- Lib/urllib/request.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 7ef85431b718ad..c5a6a18a32bba1 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -876,9 +876,9 @@ def find_user_password(self, realm, authuri): class HTTPPasswordMgrWithPriorAuth(HTTPPasswordMgrWithDefaultRealm): - def __init__(self, *args, **kwargs): + def __init__(self): self.authenticated = {} - super().__init__(*args, **kwargs) + super().__init__() def add_password(self, realm, uri, user, passwd, is_authenticated=False): self.update_authenticated(uri, is_authenticated) From be07edf511365ce554c0535b535bb5726266a17a Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Mon, 9 Dec 2024 02:34:28 +0100 Subject: [PATCH 46/48] gh-127111: Emscripten Move link flags from `LDFLAGS_NODIST` to `LINKFORSHARED` (#127666) Corrects the usage of linking flags to avoid compilation errors related to the use of `-sEXPORTED_FUNCTIONS` when linking shared libraries. --- configure | 10 +++++----- configure.ac | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/configure b/configure index bcbab8dfcff190..57be576e3cae99 100755 --- a/configure +++ b/configure @@ -9430,14 +9430,14 @@ else $as_nop wasm_debug=no fi - as_fn_append LDFLAGS_NODIST " -sALLOW_MEMORY_GROWTH -sINITIAL_MEMORY=20971520" + as_fn_append LINKFORSHARED " -sALLOW_MEMORY_GROWTH -sINITIAL_MEMORY=20971520" as_fn_append LDFLAGS_NODIST " -sWASM_BIGINT" - as_fn_append LDFLAGS_NODIST " -sFORCE_FILESYSTEM -lidbfs.js -lnodefs.js -lproxyfs.js -lworkerfs.js" - as_fn_append LDFLAGS_NODIST " -sEXPORTED_RUNTIME_METHODS=FS,callMain,ENV" - as_fn_append LDFLAGS_NODIST " -sEXPORTED_FUNCTIONS=_main,_Py_Version" - as_fn_append LDFLAGS_NODIST " -sSTACK_SIZE=5MB" + as_fn_append LINKFORSHARED " -sFORCE_FILESYSTEM -lidbfs.js -lnodefs.js -lproxyfs.js -lworkerfs.js" + as_fn_append LINKFORSHARED " -sEXPORTED_RUNTIME_METHODS=FS,callMain,ENV" + as_fn_append LINKFORSHARED " -sEXPORTED_FUNCTIONS=_main,_Py_Version" + as_fn_append LINKFORSHARED " -sSTACK_SIZE=5MB" if test "x$enable_wasm_dynamic_linking" = xyes then : diff --git a/configure.ac b/configure.ac index 922a125ea9608e..bd0221481c5341 100644 --- a/configure.ac +++ b/configure.ac @@ -2325,16 +2325,16 @@ AS_CASE([$ac_sys_system], AS_VAR_IF([Py_DEBUG], [yes], [wasm_debug=yes], [wasm_debug=no]) dnl Start with 20 MB and allow to grow - AS_VAR_APPEND([LDFLAGS_NODIST], [" -sALLOW_MEMORY_GROWTH -sINITIAL_MEMORY=20971520"]) + AS_VAR_APPEND([LINKFORSHARED], [" -sALLOW_MEMORY_GROWTH -sINITIAL_MEMORY=20971520"]) dnl map int64_t and uint64_t to JS bigint AS_VAR_APPEND([LDFLAGS_NODIST], [" -sWASM_BIGINT"]) dnl Include file system support - AS_VAR_APPEND([LDFLAGS_NODIST], [" -sFORCE_FILESYSTEM -lidbfs.js -lnodefs.js -lproxyfs.js -lworkerfs.js"]) - AS_VAR_APPEND([LDFLAGS_NODIST], [" -sEXPORTED_RUNTIME_METHODS=FS,callMain,ENV"]) - AS_VAR_APPEND([LDFLAGS_NODIST], [" -sEXPORTED_FUNCTIONS=_main,_Py_Version"]) - AS_VAR_APPEND([LDFLAGS_NODIST], [" -sSTACK_SIZE=5MB"]) + AS_VAR_APPEND([LINKFORSHARED], [" -sFORCE_FILESYSTEM -lidbfs.js -lnodefs.js -lproxyfs.js -lworkerfs.js"]) + AS_VAR_APPEND([LINKFORSHARED], [" -sEXPORTED_RUNTIME_METHODS=FS,callMain,ENV"]) + AS_VAR_APPEND([LINKFORSHARED], [" -sEXPORTED_FUNCTIONS=_main,_Py_Version"]) + AS_VAR_APPEND([LINKFORSHARED], [" -sSTACK_SIZE=5MB"]) AS_VAR_IF([enable_wasm_dynamic_linking], [yes], [ AS_VAR_APPEND([LINKFORSHARED], [" -sMAIN_MODULE"]) From 5876063d06ec55b10793f34bfe516c10f608665c Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Mon, 9 Dec 2024 03:01:37 +0100 Subject: [PATCH 47/48] gh-127503 Don't propagate native PATH to Emscripten Python (#127633) Modifies the handling of PATH to ensure that native executables aren't picked up when running under node. --- Tools/wasm/emscripten/node_entry.mjs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Tools/wasm/emscripten/node_entry.mjs b/Tools/wasm/emscripten/node_entry.mjs index 40ab1515cf28c1..98b8f572a7e762 100644 --- a/Tools/wasm/emscripten/node_entry.mjs +++ b/Tools/wasm/emscripten/node_entry.mjs @@ -35,11 +35,12 @@ const settings = { mountDirectories(Module); Module.FS.chdir(process.cwd()); Object.assign(Module.ENV, process.env); + delete Module.ENV.PATH; }, // Ensure that sys.executable, sys._base_executable, etc point to python.sh // not to this file. To properly handle symlinks, python.sh needs to compute // its own path. - thisProgram: process.argv[thisProgramIndex], + thisProgram: process.argv[thisProgramIndex].slice(thisProgram.length), // After python.sh come the arguments thatthe user passed to python.sh. arguments: process.argv.slice(thisProgramIndex + 1), }; From d8d12b37b5e5acb354db84b07dab8de64a6b9475 Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Mon, 9 Dec 2024 03:03:11 +0100 Subject: [PATCH 48/48] gh-127503: Fix realpath handling in emscripten cli (#127632) Corrects the handling of realpath on Linux. --- Tools/wasm/emscripten/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/wasm/emscripten/__main__.py b/Tools/wasm/emscripten/__main__.py index c998ed71309dad..6843b6fdeceb8c 100644 --- a/Tools/wasm/emscripten/__main__.py +++ b/Tools/wasm/emscripten/__main__.py @@ -223,7 +223,7 @@ def configure_emscripten_python(context, working_dir): if which grealpath > /dev/null; then # It has brew installed gnu core utils, use that REALPATH="grealpath -s" - elif which realpath > /dev/null && realpath --version 2&>1 | grep GNU > /dev/null; then + elif which realpath > /dev/null && realpath --version > /dev/null 2> /dev/null && realpath --version | grep GNU > /dev/null; then # realpath points to GNU realpath so use it. REALPATH="realpath -s" else