Merge pull request #3 from fonttools/writer

Implement writer
fonttools · Sep 30, 2018 · d26e5b7 · d26e5b7
2 parents be47c99 + 82917e3
commit d26e5b7
Show file tree

Hide file tree

Showing 18 changed files with 1,018 additions and 119 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -2,8 +2,11 @@ env:
   global:
     # directory containing the project source
     - REPO_DIR=.
+    - BUILD_DEPENDS="wheel==0.31.1"
     # pip dependencies to _test_ project
-    - TEST_DEPENDS="tox"
+    - TEST_DEPENDS="tox wheel==0.31.1"
+    # this to so prevent the latest wheel==0.32.0 from being installed in the venv
+    - VIRTUALENV_NO_DOWNLOAD=1
     - PLAT=x86_64
     - UNICODE_WIDTH=32
     - TWINE_USERNAME="anthrotype"

diff --git a/multibuild b/multibuild
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,7 +1,7 @@
 [build-system]
 requires = [
     "setuptools",
-    "wheel",
+    "wheel == 0.31.1",
     "cython >= 0.28.5",
 ]
 build-backend = "setuptools.build_meta"
diff --git a/setup.py b/setup.py
@@ -98,7 +98,7 @@ def run(self):
     else []
 )
 
-cython_modules = ["parser", "_compat"]
+cython_modules = ["parser", "util", "writer", "_test"]
 extensions = [
     Extension(
         "openstep_plist." + mod,

diff --git a/src/openstep_plist/__init__.py b/src/openstep_plist/__init__.py
@@ -1,9 +1,10 @@
 from .parser import load, loads, ParseError
+from .writer import dump, dumps
 
 try:
     from ._version import version as __version__
 except ImportError:
     __version__ = "0.0.0+unknown"
 
 
-__all__ = ["load", "loads", "ParseError"]
+__all__ = ["load", "loads", "dump", "dumps", "ParseError"]
diff --git a/src/openstep_plist/__main__.py b/src/openstep_plist/__main__.py
@@ -1,10 +1,10 @@
 #!/usr/bin/env python
-
+from __future__ import absolute_import, unicode_literals
 import openstep_plist
 import json
-import base64
 import binascii
-# from collections import OrderedDict
+from functools import partial
+from io import open
 
 
 class BytesEncoder(json.JSONEncoder):
@@ -29,6 +29,8 @@ def main(args=None):
     method = args[0]
     if method == "-a":
         parse = openstep_plist.load
+        dump = partial(openstep_plist.dump, indent=0)
+
     elif method == "-g":
 
         def parse(fp, dict_type=dict):
@@ -37,19 +39,22 @@ def parse(fp, dict_type=dict):
             s = fp.read()
             p = Parser(current_type=dict_type)
             return p.parse(s)
+
+        from glyphsLib.writer import dump
+
     else:
         sys.exit("error: unknown option: %s" % method)
 
     infile = args[1]
 
     with open(infile, "r", encoding="utf-8") as fp:
-        # data = parse(fp, dict_type=OrderedDict)
         data = parse(fp)
 
     if len(args) > 2:
         outfile = args[2]
         with open(outfile, "w", encoding="utf-8") as fp:
-            json.dump(data, fp, cls=BytesEncoder, sort_keys=True, indent="  ")
+            # json.dump(data, fp, cls=BytesEncoder, sort_keys=True, indent="  ")
+            dump(data, fp)
 
 
 if __name__ == "__main__":

diff --git a/src/openstep_plist/_compat.pxd b/src/openstep_plist/_compat.pxd
diff --git a/src/openstep_plist/_compat.pyx b/src/openstep_plist/_compat.pyx
diff --git a/tests/cdef_wrappers.pyx → src/openstep_plist/_test.pyx b/tests/cdef_wrappers.pyx → src/openstep_plist/_test.pyx
@@ -1,16 +1,19 @@
 #cython: language_level=3
 #distutils: define_macros=CYTHON_TRACE_NOGIL=1
 
-from openstep_plist.parser cimport (
+from .parser cimport (
     ParseInfo,
     line_number_strings as _line_number_strings,
-    is_valid_unquoted_string_char as _is_valid_unquoted_string_char,
     advance_to_non_space as _advance_to_non_space,
     get_slashed_char as _get_slashed_char,
     parse_unquoted_plist_string as _parse_unquoted_plist_string,
     parse_plist_string as _parse_plist_string,
 )
-from openstep_plist._compat cimport tounicode
+from .util cimport (
+    PY_NARROW_UNICODE,
+    tounicode,
+    is_valid_unquoted_string_char as _is_valid_unquoted_string_char,
+)
 from cpython.unicode cimport (
     PyUnicode_FromUnicode, PyUnicode_AS_UNICODE, PyUnicode_GET_SIZE,
 )
@@ -28,7 +31,7 @@ cdef class ParseContext:
             string,
             Py_ssize_t offset=0,
             dict_type=dict,
-            bint use_numbers=False
+            bint use_numbers=True
     ):
         cdef ParseContext self = ParseContext.__new__(cls)
         self.s = tounicode(string)
@@ -45,6 +48,10 @@ cdef class ParseContext:
         return self
 
 
+def is_narrow_unicode():
+    return PY_NARROW_UNICODE
+
+
 def is_valid_unquoted_string_char(Py_UNICODE c):
     return _is_valid_unquoted_string_char(c)
 

diff --git a/src/openstep_plist/parser.pxd b/src/openstep_plist/parser.pxd
@@ -4,11 +4,6 @@ from libc.stdint cimport uint32_t
 from cpython cimport array
 
 
-cdef extern from "<ctype.h>":
-    int isxdigit(int c)
-    int isdigit(int c)
-
-
 ctypedef struct ParseInfo:
     const Py_UNICODE *begin
     const Py_UNICODE *curr
@@ -24,9 +19,6 @@ cdef class ParseError(Exception):
 cdef uint32_t line_number_strings(ParseInfo *pi)
 
 
-cdef bint is_valid_unquoted_string_char(Py_UNICODE x)
-
-
 cdef bint advance_to_non_space(ParseInfo *pi)
 
 

diff --git a/src/openstep_plist/parser.pyx b/src/openstep_plist/parser.pyx
@@ -4,13 +4,24 @@
 from cpython.unicode cimport (
     PyUnicode_FromUnicode, PyUnicode_AS_UNICODE, PyUnicode_GET_SIZE,
 )
-from libc.stdint cimport uint8_t, uint32_t
+from libc.stdint cimport uint8_t, uint16_t, uint32_t
 from cpython cimport array
 from cpython.version cimport PY_MAJOR_VERSION
 import array
 cimport cython
 
-from ._compat cimport tounicode, tostr
+from .util cimport (
+    tounicode,
+    tostr,
+    unicode_array_template,
+    is_valid_unquoted_string_char,
+    isdigit,
+    isxdigit,
+    PY_NARROW_UNICODE,
+    is_high_surrogate,
+    is_low_surrogate,
+    unicode_scalar_from_surrogates,
+)
 
 
 cdef uint32_t line_number_strings(ParseInfo *pi):
@@ -28,20 +39,6 @@ cdef uint32_t line_number_strings(ParseInfo *pi):
     return count
 
 
-cdef inline bint is_valid_unquoted_string_char(Py_UNICODE x):
-    return (
-        (x >= c'a' and x <= c'z') or
-        (x >= c'A' and x <= c'Z') or
-        (x >= c'0' and x <= c'9') or
-        x == c'_' or
-        x == c'$' or
-        x == c'/' or
-        x == c':' or
-        x == c'.' or
-        x == c'-'
-    )
-
-
 cdef bint advance_to_non_space(ParseInfo *pi):
     """Returns true if the advance found something that's not whitespace
     before the end of the buffer, false otherwise.
@@ -182,16 +179,12 @@ cdef Py_UNICODE get_slashed_char(ParseInfo *pi):
     return ch
 
 
-# must convert array type code to native str type else when using
-# unicode literals on py27 one gets 'TypeError: must be char, not unicode'
-cdef array.array unicode_array_template = array.array(tostr('u'), [])
-
-
 cdef unicode parse_quoted_plist_string(ParseInfo *pi, Py_UNICODE quote):
     cdef array.array string = array.clone(unicode_array_template, 0, zero=False)
     cdef const Py_UNICODE *start_mark = pi.curr
     cdef const Py_UNICODE *mark = pi.curr
-    cdef Py_UNICODE ch
+    cdef const Py_UNICODE *tmp
+    cdef Py_UNICODE ch, ch2
     while pi.curr < pi.end:
         ch = pi.curr[0]
         if ch == quote:
@@ -200,6 +193,24 @@ cdef unicode parse_quoted_plist_string(ParseInfo *pi, Py_UNICODE quote):
             array.extend_buffer(string, <char*>mark, pi.curr - mark)
             pi.curr += 1
             ch = get_slashed_char(pi)
+            # If we are NOT on a "narrow" python 2 build, then we need to parse
+            # two successive \UXXXX escape sequences as one surrogate pair
+            # representing a "supplementary" Unicode scalar value.
+            # If we are on a "narrow" build, then the two code units already
+            # represent a single codepoint internally.
+            if (
+                not PY_NARROW_UNICODE and is_high_surrogate(ch)
+                and pi.curr < pi.end and pi.curr[0] == c"\\"
+            ):
+                tmp = pi.curr
+                pi.curr += 1
+                ch2 = get_slashed_char(pi)
+                if is_low_surrogate(ch2):
+                    ch = unicode_scalar_from_surrogates(high=ch, low=ch2)
+                else:
+                    # XXX maybe we should raise here instead of letting this
+                    # lone high surrogate (not followed by a low) pass through?
+                    pi.curr = tmp
             string.append(ch)
             mark = pi.curr
         else:
@@ -519,7 +530,7 @@ cdef object parse_plist_object(ParseInfo *pi, bint required=True):
             )
 
 
-def loads(string, dict_type=dict, bint use_numbers=False):
+def loads(string, dict_type=dict, bint use_numbers=True):
     cdef unicode s = tounicode(string)
     cdef Py_ssize_t length = PyUnicode_GET_SIZE(s)
     cdef Py_UNICODE* buf = PyUnicode_AS_UNICODE(s)
@@ -555,5 +566,5 @@ def loads(string, dict_type=dict, bint use_numbers=False):
     return result
 
 
-def load(fp, dict_type=dict, use_numbers=False):
+def load(fp, dict_type=dict, use_numbers=True):
     return loads(fp.read(), dict_type=dict_type, use_numbers=use_numbers)
diff --git a/src/openstep_plist/util.pxd b/src/openstep_plist/util.pxd
@@ -0,0 +1,40 @@
+#cython: language_level=3
+
+from cpython cimport array
+from libc.stdint cimport uint16_t, uint32_t
+
+
+cdef extern from "<ctype.h>":
+    int isxdigit(int c)
+    int isdigit(int c)
+    int isprint(int c)
+
+
+cdef unicode tounicode(s, encoding=*, errors=*)
+
+
+cdef tostr(s, encoding=*, errors=*)
+
+
+cdef array.array unicode_array_template
+
+
+cdef bint is_valid_unquoted_string_char(Py_UNICODE x)
+
+
+cdef bint PY_NARROW_UNICODE
+
+
+cdef bint is_high_surrogate(uint32_t ch)
+
+
+cdef bint is_low_surrogate(uint32_t ch)
+
+
+cdef uint32_t unicode_scalar_from_surrogates(uint16_t high, uint16_t low)
+
+
+cdef uint16_t high_surrogate_from_unicode_scalar(uint32_t scalar)
+
+
+cdef uint16_t low_surrogate_from_unicode_scalar(uint32_t scalar)