diff --git a/.travis.yml b/.travis.yml index 48dc6dd..df2c56a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,8 +2,11 @@ env: global: # directory containing the project source - REPO_DIR=. + - BUILD_DEPENDS="wheel==0.31.1" # pip dependencies to _test_ project - - TEST_DEPENDS="tox" + - TEST_DEPENDS="tox wheel==0.31.1" + # this to so prevent the latest wheel==0.32.0 from being installed in the venv + - VIRTUALENV_NO_DOWNLOAD=1 - PLAT=x86_64 - UNICODE_WIDTH=32 - TWINE_USERNAME="anthrotype" diff --git a/multibuild b/multibuild index 4e7a939..f612817 160000 --- a/multibuild +++ b/multibuild @@ -1 +1 @@ -Subproject commit 4e7a9396e9a50731bb83fc0d16bb98fb0c4032d7 +Subproject commit f6128176d90792ba572921c447325130666cb950 diff --git a/pyproject.toml b/pyproject.toml index d5646b9..0f98166 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [build-system] requires = [ "setuptools", - "wheel", + "wheel == 0.31.1", "cython >= 0.28.5", ] build-backend = "setuptools.build_meta" diff --git a/setup.py b/setup.py index 3013935..b03a59e 100644 --- a/setup.py +++ b/setup.py @@ -98,7 +98,7 @@ def run(self): else [] ) -cython_modules = ["parser", "_compat"] +cython_modules = ["parser", "util", "writer", "_test"] extensions = [ Extension( "openstep_plist." + mod, diff --git a/src/openstep_plist/__init__.py b/src/openstep_plist/__init__.py index c933239..89538ba 100644 --- a/src/openstep_plist/__init__.py +++ b/src/openstep_plist/__init__.py @@ -1,4 +1,5 @@ from .parser import load, loads, ParseError +from .writer import dump, dumps try: from ._version import version as __version__ @@ -6,4 +7,4 @@ __version__ = "0.0.0+unknown" -__all__ = ["load", "loads", "ParseError"] +__all__ = ["load", "loads", "dump", "dumps", "ParseError"] diff --git a/src/openstep_plist/__main__.py b/src/openstep_plist/__main__.py index 5767ed2..c415071 100755 --- a/src/openstep_plist/__main__.py +++ b/src/openstep_plist/__main__.py @@ -1,10 +1,10 @@ #!/usr/bin/env python - +from __future__ import absolute_import, unicode_literals import openstep_plist import json -import base64 import binascii -# from collections import OrderedDict +from functools import partial +from io import open class BytesEncoder(json.JSONEncoder): @@ -29,6 +29,8 @@ def main(args=None): method = args[0] if method == "-a": parse = openstep_plist.load + dump = partial(openstep_plist.dump, indent=0) + elif method == "-g": def parse(fp, dict_type=dict): @@ -37,19 +39,22 @@ def parse(fp, dict_type=dict): s = fp.read() p = Parser(current_type=dict_type) return p.parse(s) + + from glyphsLib.writer import dump + else: sys.exit("error: unknown option: %s" % method) infile = args[1] with open(infile, "r", encoding="utf-8") as fp: - # data = parse(fp, dict_type=OrderedDict) data = parse(fp) if len(args) > 2: outfile = args[2] with open(outfile, "w", encoding="utf-8") as fp: - json.dump(data, fp, cls=BytesEncoder, sort_keys=True, indent=" ") + # json.dump(data, fp, cls=BytesEncoder, sort_keys=True, indent=" ") + dump(data, fp) if __name__ == "__main__": diff --git a/src/openstep_plist/_compat.pxd b/src/openstep_plist/_compat.pxd deleted file mode 100644 index dbda7cd..0000000 --- a/src/openstep_plist/_compat.pxd +++ /dev/null @@ -1,7 +0,0 @@ -#cython: language_level=3 - - -cdef unicode tounicode(s, encoding=*, errors=*) - - -cdef tostr(s, encoding=*, errors=*) diff --git a/src/openstep_plist/_compat.pyx b/src/openstep_plist/_compat.pyx deleted file mode 100644 index 3af9778..0000000 --- a/src/openstep_plist/_compat.pyx +++ /dev/null @@ -1,24 +0,0 @@ -#cython: language_level=3 -#distutils: define_macros=CYTHON_TRACE_NOGIL=1 - -from cpython.version cimport PY_MAJOR_VERSION - - -cdef inline unicode tounicode(s, encoding="ascii", errors="strict"): - if type(s) is unicode: - return s - elif PY_MAJOR_VERSION < 3 and isinstance(s, bytes): - return (s).decode(encoding, errors=errors) - elif isinstance(s, unicode): - return unicode(s) - else: - raise TypeError(f"Could not convert to unicode: {s!r}") - - -cdef inline object tostr(s, encoding="ascii", errors="strict"): - if isinstance(s, bytes): - return s if PY_MAJOR_VERSION < 3 else s.decode(encoding, errors=errors) - elif isinstance(s, unicode): - return s.encode(encoding, errors=errors) if PY_MAJOR_VERSION < 3 else s - else: - raise TypeError(f"Could not convert to str: {s!r}") diff --git a/tests/cdef_wrappers.pyx b/src/openstep_plist/_test.pyx similarity index 92% rename from tests/cdef_wrappers.pyx rename to src/openstep_plist/_test.pyx index 13f4c63..9cd364f 100644 --- a/tests/cdef_wrappers.pyx +++ b/src/openstep_plist/_test.pyx @@ -1,16 +1,19 @@ #cython: language_level=3 #distutils: define_macros=CYTHON_TRACE_NOGIL=1 -from openstep_plist.parser cimport ( +from .parser cimport ( ParseInfo, line_number_strings as _line_number_strings, - is_valid_unquoted_string_char as _is_valid_unquoted_string_char, advance_to_non_space as _advance_to_non_space, get_slashed_char as _get_slashed_char, parse_unquoted_plist_string as _parse_unquoted_plist_string, parse_plist_string as _parse_plist_string, ) -from openstep_plist._compat cimport tounicode +from .util cimport ( + PY_NARROW_UNICODE, + tounicode, + is_valid_unquoted_string_char as _is_valid_unquoted_string_char, +) from cpython.unicode cimport ( PyUnicode_FromUnicode, PyUnicode_AS_UNICODE, PyUnicode_GET_SIZE, ) @@ -28,7 +31,7 @@ cdef class ParseContext: string, Py_ssize_t offset=0, dict_type=dict, - bint use_numbers=False + bint use_numbers=True ): cdef ParseContext self = ParseContext.__new__(cls) self.s = tounicode(string) @@ -45,6 +48,10 @@ cdef class ParseContext: return self +def is_narrow_unicode(): + return PY_NARROW_UNICODE + + def is_valid_unquoted_string_char(Py_UNICODE c): return _is_valid_unquoted_string_char(c) diff --git a/src/openstep_plist/parser.pxd b/src/openstep_plist/parser.pxd index 9f799f6..f03f9fc 100644 --- a/src/openstep_plist/parser.pxd +++ b/src/openstep_plist/parser.pxd @@ -4,11 +4,6 @@ from libc.stdint cimport uint32_t from cpython cimport array -cdef extern from "": - int isxdigit(int c) - int isdigit(int c) - - ctypedef struct ParseInfo: const Py_UNICODE *begin const Py_UNICODE *curr @@ -24,9 +19,6 @@ cdef class ParseError(Exception): cdef uint32_t line_number_strings(ParseInfo *pi) -cdef bint is_valid_unquoted_string_char(Py_UNICODE x) - - cdef bint advance_to_non_space(ParseInfo *pi) diff --git a/src/openstep_plist/parser.pyx b/src/openstep_plist/parser.pyx index aab039a..1d3507d 100644 --- a/src/openstep_plist/parser.pyx +++ b/src/openstep_plist/parser.pyx @@ -4,13 +4,24 @@ from cpython.unicode cimport ( PyUnicode_FromUnicode, PyUnicode_AS_UNICODE, PyUnicode_GET_SIZE, ) -from libc.stdint cimport uint8_t, uint32_t +from libc.stdint cimport uint8_t, uint16_t, uint32_t from cpython cimport array from cpython.version cimport PY_MAJOR_VERSION import array cimport cython -from ._compat cimport tounicode, tostr +from .util cimport ( + tounicode, + tostr, + unicode_array_template, + is_valid_unquoted_string_char, + isdigit, + isxdigit, + PY_NARROW_UNICODE, + is_high_surrogate, + is_low_surrogate, + unicode_scalar_from_surrogates, +) cdef uint32_t line_number_strings(ParseInfo *pi): @@ -28,20 +39,6 @@ cdef uint32_t line_number_strings(ParseInfo *pi): return count -cdef inline bint is_valid_unquoted_string_char(Py_UNICODE x): - return ( - (x >= c'a' and x <= c'z') or - (x >= c'A' and x <= c'Z') or - (x >= c'0' and x <= c'9') or - x == c'_' or - x == c'$' or - x == c'/' or - x == c':' or - x == c'.' or - x == c'-' - ) - - cdef bint advance_to_non_space(ParseInfo *pi): """Returns true if the advance found something that's not whitespace before the end of the buffer, false otherwise. @@ -182,16 +179,12 @@ cdef Py_UNICODE get_slashed_char(ParseInfo *pi): return ch -# must convert array type code to native str type else when using -# unicode literals on py27 one gets 'TypeError: must be char, not unicode' -cdef array.array unicode_array_template = array.array(tostr('u'), []) - - cdef unicode parse_quoted_plist_string(ParseInfo *pi, Py_UNICODE quote): cdef array.array string = array.clone(unicode_array_template, 0, zero=False) cdef const Py_UNICODE *start_mark = pi.curr cdef const Py_UNICODE *mark = pi.curr - cdef Py_UNICODE ch + cdef const Py_UNICODE *tmp + cdef Py_UNICODE ch, ch2 while pi.curr < pi.end: ch = pi.curr[0] if ch == quote: @@ -200,6 +193,24 @@ cdef unicode parse_quoted_plist_string(ParseInfo *pi, Py_UNICODE quote): array.extend_buffer(string, mark, pi.curr - mark) pi.curr += 1 ch = get_slashed_char(pi) + # If we are NOT on a "narrow" python 2 build, then we need to parse + # two successive \UXXXX escape sequences as one surrogate pair + # representing a "supplementary" Unicode scalar value. + # If we are on a "narrow" build, then the two code units already + # represent a single codepoint internally. + if ( + not PY_NARROW_UNICODE and is_high_surrogate(ch) + and pi.curr < pi.end and pi.curr[0] == c"\\" + ): + tmp = pi.curr + pi.curr += 1 + ch2 = get_slashed_char(pi) + if is_low_surrogate(ch2): + ch = unicode_scalar_from_surrogates(high=ch, low=ch2) + else: + # XXX maybe we should raise here instead of letting this + # lone high surrogate (not followed by a low) pass through? + pi.curr = tmp string.append(ch) mark = pi.curr else: @@ -519,7 +530,7 @@ cdef object parse_plist_object(ParseInfo *pi, bint required=True): ) -def loads(string, dict_type=dict, bint use_numbers=False): +def loads(string, dict_type=dict, bint use_numbers=True): cdef unicode s = tounicode(string) cdef Py_ssize_t length = PyUnicode_GET_SIZE(s) cdef Py_UNICODE* buf = PyUnicode_AS_UNICODE(s) @@ -555,5 +566,5 @@ def loads(string, dict_type=dict, bint use_numbers=False): return result -def load(fp, dict_type=dict, use_numbers=False): +def load(fp, dict_type=dict, use_numbers=True): return loads(fp.read(), dict_type=dict_type, use_numbers=use_numbers) diff --git a/src/openstep_plist/util.pxd b/src/openstep_plist/util.pxd new file mode 100644 index 0000000..0588598 --- /dev/null +++ b/src/openstep_plist/util.pxd @@ -0,0 +1,40 @@ +#cython: language_level=3 + +from cpython cimport array +from libc.stdint cimport uint16_t, uint32_t + + +cdef extern from "": + int isxdigit(int c) + int isdigit(int c) + int isprint(int c) + + +cdef unicode tounicode(s, encoding=*, errors=*) + + +cdef tostr(s, encoding=*, errors=*) + + +cdef array.array unicode_array_template + + +cdef bint is_valid_unquoted_string_char(Py_UNICODE x) + + +cdef bint PY_NARROW_UNICODE + + +cdef bint is_high_surrogate(uint32_t ch) + + +cdef bint is_low_surrogate(uint32_t ch) + + +cdef uint32_t unicode_scalar_from_surrogates(uint16_t high, uint16_t low) + + +cdef uint16_t high_surrogate_from_unicode_scalar(uint32_t scalar) + + +cdef uint16_t low_surrogate_from_unicode_scalar(uint32_t scalar) diff --git a/src/openstep_plist/util.pyx b/src/openstep_plist/util.pyx new file mode 100644 index 0000000..ca95773 --- /dev/null +++ b/src/openstep_plist/util.pyx @@ -0,0 +1,70 @@ +#cython: language_level=3 +#distutils: define_macros=CYTHON_TRACE_NOGIL=1 + +from cpython.version cimport PY_MAJOR_VERSION +from cpython cimport array +from libc.stdint cimport uint16_t, uint32_t +import array +import sys + + +cdef inline unicode tounicode(s, encoding="ascii", errors="strict"): + if type(s) is unicode: + return s + elif PY_MAJOR_VERSION < 3 and isinstance(s, bytes): + return (s).decode(encoding, errors=errors) + elif isinstance(s, unicode): + return unicode(s) + else: + raise TypeError(f"Could not convert to unicode: {s!r}") + + +cdef inline object tostr(s, encoding="ascii", errors="strict"): + if isinstance(s, bytes): + return s if PY_MAJOR_VERSION < 3 else s.decode(encoding, errors=errors) + elif isinstance(s, unicode): + return s.encode(encoding, errors=errors) if PY_MAJOR_VERSION < 3 else s + else: + raise TypeError(f"Could not convert to str: {s!r}") + + +# must convert array type code to native str type else when using +# unicode literals on py27 one gets 'TypeError: must be char, not unicode' +cdef array.array unicode_array_template = array.array(tostr('u'), []) + + +cdef inline bint is_valid_unquoted_string_char(Py_UNICODE x): + return ( + (x >= c'a' and x <= c'z') or + (x >= c'A' and x <= c'Z') or + (x >= c'0' and x <= c'9') or + x == c'_' or + x == c'$' or + x == c'/' or + x == c':' or + x == c'.' or + x == c'-' + ) + + +cdef bint PY_NARROW_UNICODE = sizeof(Py_UNICODE) != 4 + + +cdef inline bint is_high_surrogate(uint32_t ch): + return ch >= 0xD800 and ch <= 0xDBFF + + +cdef inline bint is_low_surrogate(uint32_t ch): + return ch >= 0xDC00 and ch <= 0xDFFF + + +cdef inline uint32_t unicode_scalar_from_surrogates(uint16_t high, uint16_t low): + return (high - 0xD800) * 0x400 + low - 0xDC00 + 0x10000 + + +cdef inline uint16_t high_surrogate_from_unicode_scalar(uint32_t scalar): + return ((scalar - 0x10000) // 0x400) + 0xD800 + + +cdef inline uint16_t low_surrogate_from_unicode_scalar(uint32_t scalar): + return (scalar - 0x10000) % 0x400 + 0xDC00 diff --git a/src/openstep_plist/writer.pyx b/src/openstep_plist/writer.pyx new file mode 100644 index 0000000..eb01f74 --- /dev/null +++ b/src/openstep_plist/writer.pyx @@ -0,0 +1,574 @@ +#cython: language_level=3 +#distutils: define_macros=CYTHON_TRACE_NOGIL=1 + +import array +from collections import OrderedDict +from cpython cimport array +from cpython.unicode cimport ( + PyUnicode_FromUnicode, + PyUnicode_AS_UNICODE, + PyUnicode_AS_DATA, + PyUnicode_GET_SIZE, + PyUnicode_AsUTF8String, +) +from cpython.bytes cimport PyBytes_GET_SIZE +from cpython.object cimport Py_SIZE +from libc.stdint cimport uint16_t +cimport cython + +from .util cimport ( + tounicode, + unicode_array_template, + is_valid_unquoted_string_char, + isprint, + PY_NARROW_UNICODE, + high_surrogate_from_unicode_scalar, + low_surrogate_from_unicode_scalar, +) + + +cdef Py_UNICODE *HEX_MAP = [ + c'0', c'1', c'2', c'3', c'4', c'5', c'6', c'7', + c'8', c'9', c'A', c'B', c'C', c'D', c'E', c'F', +] + +cdef Py_UNICODE *ARRAY_SEP_NO_INDENT = [c',', c' '] +cdef Py_UNICODE *DICT_KEY_VALUE_SEP = [c' ', c'=', c' '] +cdef Py_UNICODE *DICT_ITEM_SEP_NO_INDENT = [c';', c' '] + + +cdef inline bint is_valid_unquoted_string(const Py_UNICODE *a, Py_ssize_t length): + # if string starts with digit or with a '-', always write it within quotes + # to distinguish it from an actual (signed) integer or float number, which + # are always written without quotes + cdef Py_UNICODE ch = a[0] + if c'0' <= ch <= c'9' or ch == c'-': + return False + + cdef Py_ssize_t i + for i in range(length): + if not is_valid_unquoted_string_char(a[i]): + return False + return True + + +cdef inline void escape_unicode(uint16_t ch, Py_UNICODE *dest): + # caller must ensure 'dest' has rooms for 6 more Py_UNICODE + dest[0] = c'\\' + dest[1] = c'U' + dest[5] = (ch & 15) + 55 if (ch & 15) > 9 else (ch & 15) + 48 + ch >>= 4 + dest[4] = (ch & 15) + 55 if (ch & 15) > 9 else (ch & 15) + 48 + ch >>= 4 + dest[3] = (ch & 15) + 55 if (ch & 15) > 9 else (ch & 15) + 48 + ch >>= 4 + dest[2] = (ch & 15) + 55 if (ch & 15) > 9 else (ch & 15) + 48 + + +@cython.final +cdef class Writer: + + cdef public array.array dest + cdef bint unicode_escape + cdef int float_precision + cdef unicode indent + cdef int current_indent_level + + def __cinit__( + self, bint unicode_escape=True, int float_precision=6, indent=None + ): + self.dest = array.clone(unicode_array_template, 0, zero=False) + self.unicode_escape = unicode_escape + self.float_precision = float_precision + + if indent is not None: + if isinstance(indent, basestring): + self.indent = tounicode(indent) + else: + self.indent = ' ' * indent + else: + self.indent = None + self.current_indent_level = 0 + + def getvalue(self): + return self._getvalue() + + def dump(self, file): + cdef unicode s = self._getvalue() + # figure out whether file object expects bytes or unicodes + try: + file.write(b"") + except TypeError: + file.write("") # this better not fail... + # file already accepts unicodes; use it directly + file.write(s) + else: + # file expects bytes; always encode as UTF-8 + file.write(PyUnicode_AsUTF8String(s)) + + def write(self, object obj): + return self.write_object(obj) + + cdef inline unicode _getvalue(self): + cdef array.array dest = self.dest + return PyUnicode_FromUnicode(dest.data.as_pyunicodes, Py_SIZE(dest)) + + cdef Py_ssize_t write_object(self, object obj) except -1: + if obj is None: + return self.write_string("(nil)") + if isinstance(obj, unicode): + return self.write_string(obj) + elif isinstance(obj, bool): + self.dest.append("1" if obj else "0") + return 1 + elif isinstance(obj, float): + return self.write_short_float_repr(obj) + elif isinstance(obj, (int, long)): + return self.write_unquoted_string(unicode(obj)) + elif isinstance(obj, list): + return self.write_array_from_list(obj) + elif isinstance(obj, tuple): + return self.write_array_from_tuple(obj) + elif isinstance(obj, OrderedDict): + return self.write_ordered_dict(obj) + elif isinstance(obj, dict): + return self.write_dict(obj) + elif isinstance(obj, bytes): + return self.write_data(obj) + else: + raise TypeError( + f"Object of type {type(obj).__name__} is not PLIST serializable" + ) + + cdef Py_ssize_t write_quoted_string( + self, const Py_UNICODE *s, Py_ssize_t length + ) except -1: + + cdef: + array.array dest = self.dest + bint unicode_escape = self.unicode_escape + const Py_UNICODE *curr = s + const Py_UNICODE *end = &s[length] + Py_UNICODE *ptr + unsigned long ch + Py_ssize_t base_length = Py_SIZE(dest) + Py_ssize_t new_length = 0 + + while curr < end: + ch = curr[0] + if ch == c'\t': + new_length += 1 + elif ( + ch == c'\n' or ch == c'\\' or ch == c'"' or ch == c'\a' + or ch == c'\b' or ch == c'\v' or ch == c'\f' or ch == c'\r' + ): + new_length += 2 + else: + if ch < 128: + if isprint(ch) or ch == c' ': + new_length += 1 + else: + new_length += 4 + elif unicode_escape: + if ch > 0xFFFF and not PY_NARROW_UNICODE: + new_length += 12 + else: + new_length += 6 + else: + new_length += 1 + curr += 1 + + array.resize_smart(dest, base_length + new_length + 2) + ptr = dest.data.as_pyunicodes + base_length + ptr[0] = '"' + ptr += 1 + + curr = s + while curr < end: + ch = curr[0] + if ch == c'\t': + ptr[0] = ch + ptr += 1 + elif ch == c'\n': + ptr[0] = c'\\'; ptr[1] = c'n'; ptr += 2 + elif ch == c'\a': + ptr[0] = c'\\'; ptr[1] = c'a'; ptr += 2 + elif ch == c'\b': + ptr[0] = c'\\'; ptr[1] = c'b'; ptr += 2 + elif ch == c'\v': + ptr[0] = c'\\'; ptr[1] = c'v'; ptr += 2 + elif ch == c'\f': + ptr[0] = c'\\'; ptr[1] = c'f'; ptr += 2 + elif ch == c'\\': + ptr[0] = c'\\'; ptr[1] = c'\\'; ptr += 2 + elif ch == c'"': + ptr[0] = c'\\'; ptr[1] = c'"'; ptr += 2 + elif ch == c'\r': + ptr[0] = c'\\'; ptr[1] = c'r'; ptr += 2 + else: + if ch < 128: + if isprint(ch) or ch == c' ': + ptr[0] = ch + ptr += 1 + else: + ptr[0] = c'\\' + ptr += 1 + ptr[2] = (ch & 7) + c'0' + ch >>= 3 + ptr[1] = (ch & 7) + c'0' + ch >>= 3 + ptr[0] = (ch & 7) + c'0' + ptr += 3 + elif unicode_escape: + if ch > 0xFFFF and not PY_NARROW_UNICODE: + escape_unicode(high_surrogate_from_unicode_scalar(ch), ptr) + ptr += 6 + escape_unicode(low_surrogate_from_unicode_scalar(ch), ptr) + ptr += 6 + else: + escape_unicode(ch, ptr) + ptr += 6 + else: + ptr[0] = ch + ptr += 1 + + curr += 1 + + ptr[0] = c'"' + + return new_length + 2 + + cdef inline Py_ssize_t write_unquoted_string(self, unicode string) except -1: + cdef: + const char *s = PyUnicode_AS_DATA(string) + Py_ssize_t length = PyUnicode_GET_SIZE(string) + array.array dest = self.dest + + array.extend_buffer(dest, s, length) + return length + + + cdef Py_ssize_t write_string(self, unicode string) except -1: + cdef: + Py_UNICODE *s = PyUnicode_AS_UNICODE(string) + Py_ssize_t length = PyUnicode_GET_SIZE(string) + array.array dest = self.dest + + if length > 0 and is_valid_unquoted_string(s, length): + array.extend_buffer(dest, s, length) + return length + else: + return self.write_quoted_string(s, length) + + cdef Py_ssize_t write_short_float_repr(self, object py_float) except -1: + cdef: + array.array dest = self.dest + unicode string = f"{py_float:.{self.float_precision}f}" + const Py_UNICODE *s = PyUnicode_AS_UNICODE(string) + Py_ssize_t length = PyUnicode_GET_SIZE(string) + Py_UNICODE ch + + # read digits backwards, skipping all the '0's until either a + # non-'0' or '.' is found + while length > 0: + ch = s[length-1] + if ch == c'.': + length -= 1 # skip the trailing dot + break + elif ch != c'0': + break + length -= 1 + + array.extend_buffer(dest, s, length) + return length + + cdef Py_ssize_t write_data(self, bytes data) except -1: + cdef: + array.array dest = self.dest + const unsigned char *src = data + Py_UNICODE *ptr + Py_ssize_t length = PyBytes_GET_SIZE(data) + Py_ssize_t extra_length, i, j + + # the number includes the opening '<' and closing '>', and the + # interleaving spaces between each group of 4 bytes; each byte + # is encoded with two hexadecimal digit + extra_length = 2 + 2*length + ((length - 1)//4 if length > 4 else 0) + + j = Py_SIZE(dest) + array.resize_smart(dest, j + extra_length) + ptr = dest.data.as_pyunicodes + + ptr[j] = c'<' + j += 1 + for i in range(length): + ptr[j] = HEX_MAP[(src[i] >> 4) & 0x0F] + j += 1 + ptr[j] = HEX_MAP[src[i] & 0x0F] + if (i & 3) == 3 and i < length - 1: + # if we've just finished a 32-bit int, print a space + j += 1 + ptr[j] = c' ' + j += 1 + ptr[j] = c'>' + + return extra_length + + # XXX The two write_array_* methods are identical apart from the type of + # the 'seq' (one is list, the other is tuple). I tried using fused type + # ``'list_or_tuple' to avoid duplication but I couldn't make it work... + + @cython.boundscheck(False) + @cython.wraparound(False) + cdef Py_ssize_t write_array_from_list(self, list seq) except -1: + cdef: + Py_ssize_t length = len(seq) + Py_ssize_t last + Py_ssize_t count + Py_ssize_t i + array.array dest = self.dest + unicode indent, newline_indent + const char *indent_chars = NULL + Py_ssize_t indent_length = 0 + + if length == 0: + dest.extend("()") + return 2 + + dest.append('(') + count = 1 + + indent = self.indent + if indent is not None: + self.current_indent_level += 1 + newline_indent = '\n' + self.current_indent_level * indent + indent_length = PyUnicode_GET_SIZE(newline_indent) + indent_chars = PyUnicode_AS_DATA(newline_indent) + array.extend_buffer(dest, indent_chars, indent_length) + count += indent_length + + last = length - 1 + for i in range(length): + count += self.write_object(seq[i]) + if i != last: + if indent is None: + array.extend_buffer(dest, ARRAY_SEP_NO_INDENT, 2) + count += 2 + else: + dest.append(',') + array.extend_buffer(dest, indent_chars, indent_length) + count += 1 + indent_length + + if indent is not None: + self.current_indent_level -= 1 + newline_indent = '\n' + self.current_indent_level * indent + indent_length = PyUnicode_GET_SIZE(newline_indent) + indent_chars = PyUnicode_AS_DATA(newline_indent) + array.extend_buffer(dest, indent_chars, indent_length) + count += indent_length + + dest.append(')') + count += 1 + + return count + + @cython.boundscheck(False) + @cython.wraparound(False) + cdef Py_ssize_t write_array_from_tuple(self, tuple seq) except -1: + cdef: + Py_ssize_t length = len(seq) + Py_ssize_t last + Py_ssize_t count + Py_ssize_t i + array.array dest = self.dest + unicode indent, newline_indent + const char *indent_chars = NULL + Py_ssize_t indent_length = 0 + + if length == 0: + dest.extend("()") + return 2 + + dest.append('(') + count = 1 + + indent = self.indent + if indent is not None: + self.current_indent_level += 1 + newline_indent = '\n' + self.current_indent_level * indent + indent_length = PyUnicode_GET_SIZE(newline_indent) + indent_chars = PyUnicode_AS_DATA(newline_indent) + array.extend_buffer(dest, indent_chars, indent_length) + count += indent_length + + last = length - 1 + for i in range(length): + count += self.write_object(seq[i]) + if i != last: + if indent is None: + array.extend_buffer(dest, ARRAY_SEP_NO_INDENT, 2) + count += 2 + else: + dest.append(',') + array.extend_buffer(dest, indent_chars, indent_length) + count += 1 + indent_length + + if indent is not None: + self.current_indent_level -= 1 + newline_indent = '\n' + self.current_indent_level * indent + indent_length = PyUnicode_GET_SIZE(newline_indent) + indent_chars = PyUnicode_AS_DATA(newline_indent) + array.extend_buffer(dest, indent_chars, indent_length) + count += indent_length + + dest.append(')') + count += 1 + + return count + + cdef Py_ssize_t write_dict(self, dict d) except -1: + cdef: + unicode indent + unicode newline_indent + const char *indent_chars = NULL + Py_ssize_t indent_length = 0 + array.array dest = self.dest + Py_ssize_t last, count, i + + if not d: + dest.extend("{}") + return 2 + + dest.append('{') + count = 1 + + indent = self.indent + if indent is not None: + self.current_indent_level += 1 + newline_indent = '\n' + self.current_indent_level * indent + indent_length = PyUnicode_GET_SIZE(newline_indent) + indent_chars = PyUnicode_AS_DATA(newline_indent) + array.extend_buffer(dest, indent_chars, indent_length) + count += indent_length + + last = len(d) - 1 + for i, (key, value) in enumerate(sorted(d.items())): + if not isinstance(key, unicode): + key = unicode(key) + count += self.write_string(key) + + array.extend_buffer(dest, DICT_KEY_VALUE_SEP, 3) + count += 3 + + count += self.write_object(value) + + if i != last: + if indent is None: + array.extend_buffer(dest, DICT_ITEM_SEP_NO_INDENT, 2) + count += 2 + else: + dest.append(';') + array.extend_buffer(dest, indent_chars, indent_length) + count += 1 + indent_length + else: + dest.append(';') + count += 1 + + if indent is not None: + self.current_indent_level -= 1 + newline_indent = '\n' + self.current_indent_level * indent + indent_length = PyUnicode_GET_SIZE(newline_indent) + indent_chars = PyUnicode_AS_DATA(newline_indent) + array.extend_buffer(dest, indent_chars, indent_length) + count += indent_length + + dest.append('}') + count += 1 + + return count + + cdef Py_ssize_t write_ordered_dict(self, object d) except -1: + # This is the same as the write_dict method but doesn't sort the items. + # Also, in `write_dict`, the type of `d` is `dict` so it uses optimized + # C dict methods, whereas here is generic `object`, as OrderedDict does + # not have a C API (as far as I know). + cdef: + unicode indent + unicode newline_indent + const char *indent_chars = NULL + Py_ssize_t indent_length = 0 + array.array dest = self.dest + Py_ssize_t last, count, i + + if not d: + dest.extend("{}") + return 2 + + dest.append('{') + count = 1 + + indent = self.indent + if indent is not None: + self.current_indent_level += 1 + newline_indent = '\n' + self.current_indent_level * indent + indent_length = PyUnicode_GET_SIZE(newline_indent) + indent_chars = PyUnicode_AS_DATA(newline_indent) + array.extend_buffer(dest, indent_chars, indent_length) + count += indent_length + + last = len(d) - 1 + # we don't sort OrderedDict + for i, (key, value) in enumerate(d.items()): + if not isinstance(key, unicode): + key = unicode(key) + count += self.write_string(key) + + array.extend_buffer(dest, DICT_KEY_VALUE_SEP, 3) + count += 3 + + count += self.write_object(value) + + if i != last: + if indent is None: + array.extend_buffer(dest, DICT_ITEM_SEP_NO_INDENT, 2) + count += 2 + else: + dest.append(';') + array.extend_buffer(dest, indent_chars, indent_length) + count += 1 + indent_length + else: + dest.append(';') + count += 1 + + if indent is not None: + self.current_indent_level -= 1 + newline_indent = '\n' + self.current_indent_level * indent + indent_length = PyUnicode_GET_SIZE(newline_indent) + indent_chars = PyUnicode_AS_DATA(newline_indent) + array.extend_buffer(dest, indent_chars, indent_length) + count += indent_length + + dest.append('}') + count += 1 + + return count + + +def dumps(obj, bint unicode_escape=True, int float_precision=6, indent=None): + w = Writer( + unicode_escape=unicode_escape, + float_precision=float_precision, + indent=indent, + ) + w.write(obj) + return w.getvalue() + + +def dump(obj, fp, bint unicode_escape=True, int float_precision=6, indent=None): + w = Writer( + unicode_escape=unicode_escape, + float_precision=float_precision, + indent=indent, + ) + w.write(obj) + w.dump(fp) diff --git a/tests/setup.py b/tests/setup.py deleted file mode 100644 index 5f5c81e..0000000 --- a/tests/setup.py +++ /dev/null @@ -1,19 +0,0 @@ -import sys -import os - -sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) - -from setup import cython_build_ext, include_dirs -from setuptools import setup, Extension - - -setup( - ext_modules=[ - Extension( - "tests.cdef_wrappers", - sources=["tests/cdef_wrappers.pyx"], - include_dirs=include_dirs, - ) - ], - cmdclass={"build_ext": cython_build_ext}, -) diff --git a/tests/test_parser.py b/tests/test_parser.py index 1403e5d..6679370 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1,8 +1,9 @@ +# -*- coding: utf-8 -*- from __future__ import absolute_import, unicode_literals import sys from io import StringIO, BytesIO from collections import OrderedDict -from .cdef_wrappers import ( +from openstep_plist._test import ( line_number_strings, is_valid_unquoted_string_char, advance_to_non_space, @@ -117,9 +118,15 @@ def test_get_slashed_char(string, expected): [ ("a", "a"), ("abc;", "abc"), # trailing chars left in buffer - ("1", "1"), - ("123456789", "123456789"), - ("1.23456789", "1.23456789"), + ("1", 1), + ("-1", -1), + ("123456789", 123456789), + ("1.23456789", 1.23456789), + ("-12.3456789", -12.3456789), + ("1z", "1z"), # non-numbers parsed as strings + ("-9y", "-9y"), + ("-", "-"), + ("x123", "x123"), ], ) def test_parse_unquoted_plist_string(string, expected): @@ -133,7 +140,19 @@ def test_parse_unquoted_plist_string_EOF(): @pytest.mark.parametrize( "string, expected", - [("a", "a"), ('"a"', "a"), ("'a'", "a"), ('"a\\012b"', ("a\nb"))], + [ + ("a", "a"), + ('"a"', "a"), + ("'a'", "a"), + ('"a\\012b"', ("a\nb")), + # surrogate pair gets decoded as a single scalar value + ('"\\UD83D\\UDCA9"', "\U0001F4A9"), # '💩' + # surrogate that don't go in pairs are simply passed through + ('"\\UD83D"', "\ud83d"), + ('"\\UD83D\\012"', "\ud83d\n"), + ('"\\UDCA9"', "\udca9"), + ('"\\UDCA9\\012"', "\udca9\n"), + ], ) def test_parse_plist_string(string, expected): assert parse_plist_string(string) == expected @@ -153,11 +172,11 @@ def test_parse_plist_string_invalid_char(): def test_parse_plist_array(): - assert openstep_plist.loads("(1)") == ["1"] - assert openstep_plist.loads("(1,)") == ["1"] - assert openstep_plist.loads("(\t1 \r\n, 2.2, c,\n)") == ["1", "2.2", "c"] + assert openstep_plist.loads("(1)") == [1] + assert openstep_plist.loads("(1,)") == [1] + assert openstep_plist.loads("(\t1 \r\n, 2.2, c,\n)") == [1, 2.2, "c"] assert openstep_plist.loads("('1', '2')") == ["1", "2"] - assert openstep_plist.loads("(\n1,\n\"'2'\"\n)") == ["1", "'2'"] + assert openstep_plist.loads("(\n1,\n\"'2'\"\n)") == [1, "'2'"] @pytest.mark.parametrize("string, lineno", [("(a ", 1), ("(a,\nb,\r\nc", 3)]) @@ -186,12 +205,12 @@ def test_parse_plist_dict_empty(): @pytest.mark.parametrize( "string, expected", [ - ("{a=1;}", {"a": "1"}), + ("{a=1;}", {"a": 1}), ('{"a"="1";}', {"a": "1"}), ("{'a'='1';}", {"a": "1"}), - ("{\na = 1;\n}", {"a": "1"}), - ("{\na\n=\n1;\n}", {"a": "1"}), - ("{a=1;b;}", {"a": "1", "b": "b"}), + ("{\na = 1;\n}", {"a": 1}), + ("{\na\n=\n1;\n}", {"a": 1}), + ("{a=1;b;}", {"a": 1, "b": "b"}), ], ) def test_parse_plist_dict(string, expected): @@ -260,20 +279,20 @@ def test_parse_plist_object_invalid(): def test_parse_string_resources(): assert openstep_plist.loads("a=1;\n'b' = 2.4;\n'c' = \"hello world\";") == { - "a": "1", - "b": "2.4", + "a": 1, + "b": 2.4, "c": "hello world", } def test_load(): fp = StringIO("{a=1;}") - assert openstep_plist.load(fp) == {"a": "1"} + assert openstep_plist.load(fp) == {"a": 1} def test_load_from_bytes(): if sys.version_info.major < 3: - assert openstep_plist.loads(b"{a=1;}") == {"a": "1"} + assert openstep_plist.loads(b"{a=1;}") == {"a": 1} else: with pytest.raises(TypeError, match="Could not convert to unicode"): openstep_plist.loads(b"{a=1;}") @@ -282,14 +301,14 @@ def test_load_from_bytes(): @pytest.mark.parametrize( "string, expected", [ - ("{a = 2;}", {"a": 2}), - ("{a = {b = -2;};}", {"a": {"b": -2}}), - ("{a = (1.5, -23.9999);}", {"a": [1.5, -23.9999]}), + ("{a = 2;}", {"a": "2"}), + ("{a = {b = -2;};}", {"a": {"b": "-2"}}), + ("{a = (1.5, -23.9999);}", {"a": ["1.5", "-23.9999"]}), ("{a = x123; b = -c; minus = -;}", {"a": "x123", "b": "-c", "minus": "-"}), ], ) -def test_loads_use_numbers(string, expected): - assert openstep_plist.loads(string, use_numbers=True) == expected +def test_loads_no_use_numbers(string, expected): + assert openstep_plist.loads(string, use_numbers=False) == expected def test_loads_dict_type(): diff --git a/tests/test_writer.py b/tests/test_writer.py new file mode 100644 index 0000000..466e5d8 --- /dev/null +++ b/tests/test_writer.py @@ -0,0 +1,228 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, unicode_literals +import openstep_plist +from openstep_plist.writer import Writer +from openstep_plist._test import is_narrow_unicode +from io import StringIO, BytesIO +from collections import OrderedDict +import pytest + + +class TestWriter(object): + def test_simple(self): + w = Writer() + assert w.write("abc") == 3 + assert w.getvalue() == "abc" + + f = StringIO() + w.dump(f) + assert f.getvalue() == "abc" + + def test_None(self): + w = Writer() + w.write(None) + assert w.getvalue() == '"(nil)"' + + def test_unquoted_string(self): + w = Writer() + assert w.write(".appVersion") == 11 + assert w.getvalue() == ".appVersion" + + @pytest.mark.parametrize( + "string, expected", + [ + ("", '""'), + ("\t", '"\t"'), + ("\n\a\b\v\f\r", '"\\n\\a\\b\\v\\f\\r"'), + ("\\", '"\\\\"'), + ('"', '"\\""'), + ("\0\1\2\3\4\5\6", '"\\000\\001\\002\\003\\004\\005\\006"'), + ("\x0E\x0F\x10\x11\x12\x13", '"\\016\\017\\020\\021\\022\\023"'), + ("\x14\x15\x16\x17\x18\x19", '"\\024\\025\\026\\027\\030\\031"'), + ("\x1a\x1b\x1c\x1d\x1e\x1f\x7f", '"\\032\\033\\034\\035\\036\\037\\177"'), + ("\x80\x81\x9E\x9F\xA0", '"\\U0080\\U0081\\U009E\\U009F\\U00A0"'), + ("\U0001F4A9", '"\\UD83D\\UDCA9"'), # '💩' + # if string starts with digit or '-', always quote it to distinguish + # from (signed) int or float number (always unquoted) + ("1", '"1"'), + ("1.1", '"1.1"'), + ("-23", '"-23"'), + ("1zzz", '"1zzz"'), # ... even if it's not actually a number + ("-23yyy", '"-23yyy"'), + ("-", '"-"'), + ("-a-", '"-a-"'), + ], + ) + def test_quoted_string(self, string, expected): + w = Writer() + w.write(string) + assert w.getvalue() == expected + + def test_quoted_string_no_unicode_escape(self): + w = Writer(unicode_escape=False) + w.write("\u0410") == 3 + assert w.getvalue() == '"\u0410"' + + w = Writer(unicode_escape=False) + assert w.write("\U0001F4A9") == (4 if is_narrow_unicode() else 3) + assert w.getvalue() == '"\U0001F4A9"' + + @pytest.mark.parametrize( + "integer, expected", + [ + (0, "0"), + (1, "1"), + (123, "123"), + (0x7fffffffffffffff, "9223372036854775807"), + (0x7fffffffffffffff + 1, "9223372036854775808"), + ], + ) + def test_int(self, integer, expected): + w = Writer() + w.write(integer) + assert w.getvalue() == expected + + @pytest.mark.parametrize( + "flt, expected", + [ + (0.0, "0"), + (1.0, "1"), + (123.456, "123.456"), + (0.01, "0.01"), + (0.001, "0.001"), + (0.0001, "0.0001"), + (0.00001, "0.00001"), + (0.000001, "0.000001"), + (0.0000001, "0"), # default precision is 6 + ], + ) + def test_float(self, flt, expected): + w = Writer() + w.write(flt) + assert w.getvalue() == expected + + def test_float_precision(self): + w = Writer(float_precision=3) + w.write(0.0001) + assert w.getvalue() == "0" + + w = Writer(float_precision=0) + w.write(0.999) + assert w.getvalue() == "1" + + @pytest.mark.parametrize( + "data, expected", + [ + (b"\x00", "<00>"), + (b"\x00\x01", "<0001>"), + (b"\x00\x01\x02", "<000102>"), + (b"\x00\x01\x02\x03", "<00010203>"), + (b"\x00\x01\x02\x03\x04", "<00010203 04>"), + (b"\x00\x01\x02\x03\x04\x05", "<00010203 0405>"), + (b"\x00\x01\x02\x03\x04\x05\x06", "<00010203 040506>"), + (b"\x00\x01\x02\x03\x04\x05\x06\x07", "<00010203 04050607>"), + (b"\x00\x01\x02\x03\x04\x05\x06\x07\x08", "<00010203 04050607 08>"), + (b"\x09\x0A\x0B\x0C\x0D\x0E\x0F\x10\x11", "<090A0B0C 0D0E0F10 11>"), + ], + ids=lambda p: p.decode() if isinstance(p, bytes) else p, + ) + def test_data(self, data, expected): + w = Writer() + assert w.write(data) == len(expected) + assert w.getvalue() == expected + + def test_bool(self): + w = Writer() + assert w.write(True) == 1 + assert w.getvalue() == "1" + + w = Writer() + assert w.write(False) == 1 + assert w.getvalue() == "0" + + @pytest.mark.parametrize( + "array, expected_no_indent, expected_indent", + [ + ([], "()", "()"), + ((), "()", "()"), + ([1], "(1)", "(\n 1\n)"), + ([1, 2], "(1, 2)", "(\n 1,\n 2\n)"), + ([1.2, 3.4, 5.6], "(1.2, 3.4, 5.6)", "(\n 1.2,\n 3.4,\n 5.6\n)"), + ( + (1, "a", ("b", 2)), + "(1, a, (b, 2))", + "(\n 1,\n a,\n (\n b,\n 2\n )\n)", + ), + ([b"a", b"b"], "(<61>, <62>)", "(\n <61>,\n <62>\n)"), + ( + [{"a": "b"}, {"c": "d"}], + "({a = b;}, {c = d;})", + "(\n {\n a = b;\n },\n {\n c = d;\n }\n)", + ), + ], + ) + def test_array(self, array, expected_no_indent, expected_indent): + w = Writer() + assert w.write(array) == len(expected_no_indent) + assert w.getvalue() == expected_no_indent + + w = Writer(indent=2) + assert w.write(array) == len(expected_indent) + assert w.getvalue() == expected_indent + + @pytest.mark.parametrize( + "dictionary, expected_no_indent, expected_indent", + [ + ({}, "{}", "{}"), + (OrderedDict(), "{}", "{}"), + ({"a": "b"}, "{a = b;}", "{\n a = b;\n}"), + ({1: "c"}, '{"1" = c;}', '{\n "1" = c;\n}'), + ( + {"hello world": 12, "abc": [34, 56.8]}, + '{abc = (34, 56.8); "hello world" = 12;}', + '{\n abc = (\n 34,\n 56.8\n );\n "hello world" = 12;\n}', + ), + ( + OrderedDict([("z", 2), ("a", 1), (12, "c")]), + '{z = 2; a = 1; "12" = c;}', + '{\n z = 2;\n a = 1;\n "12" = c;\n}', + ), + ], + ) + def test_dictionary(self, dictionary, expected_no_indent, expected_indent): + w = Writer() + assert w.write(dictionary) == len(expected_no_indent) + assert w.getvalue() == expected_no_indent + + w = Writer(indent=" ") + assert w.write(dictionary) == len(expected_indent) + assert w.getvalue() == expected_indent + + def test_type_error(self): + obj = object() + w = Writer() + with pytest.raises(TypeError, match="not PLIST serializable"): + w.write(obj) + + +def test_dumps(): + assert openstep_plist.dumps( + {"a": 1, "b": 2.9999999, "c d": [33, 44], "e": (b"fghilmno", b"pqrstuvz")} + ) == ( + '{a = 1; b = 3; "c d" = (33, 44); ' + "e = (<66676869 6C6D6E6F>, <70717273 7475767A>);}" + ) + + +def test_dump(): + plist = [1, b"2", {3: (4, "5", "\U0001F4A9")}] + fp = StringIO() + openstep_plist.dump(plist, fp) + assert fp.getvalue() == '(1, <32>, {"3" = (4, "5", "\\UD83D\\UDCA9");})' + + fp = BytesIO() + openstep_plist.dump(plist, fp, unicode_escape=False) + assert fp.getvalue() == b'(1, <32>, {"3" = (4, "5", "\xf0\x9f\x92\xa9");})' + + with pytest.raises(AttributeError): + openstep_plist.dump(plist, object()) diff --git a/tox.ini b/tox.ini index 059f6a1..b5b7409 100644 --- a/tox.ini +++ b/tox.ini @@ -22,7 +22,6 @@ setenv = cov: CYTHON_TRACE=1 commands = cov: python setup.py build_ext -i - python tests/setup.py build_ext -i nocov: pytest {posargs} cov: coverage run --parallel -m pytest {posargs}