From 62e18faa10e2e11bd294a1315c39d92144161a7e Mon Sep 17 00:00:00 2001
From: tr1cks
Date: Wed, 15 Mar 2017 19:17:42 +0500
Subject: [PATCH] Fix bug with IDL encoding support. Before this moment we
haven't option to specify encoding of IDL files, in parsing always was used
OS specific encoding. That worked fine only for IDL in ASCII (without
national language comments) or if you was lucky enough, that OS encoding
matches IDL encoding.
---
thriftpy/parser/__init__.py | 4 ++--
thriftpy/parser/parser.py | 13 +++++++++----
2 files changed, 11 insertions(+), 6 deletions(-)
diff --git a/thriftpy/parser/__init__.py b/thriftpy/parser/__init__.py
index 7aa7e2e..1db1f50 100644
--- a/thriftpy/parser/__init__.py
+++ b/thriftpy/parser/__init__.py
@@ -15,7 +15,7 @@
from .parser import parse, parse_fp
-def load(path, module_name=None, include_dirs=None, include_dir=None):
+def load(path, module_name=None, include_dirs=None, include_dir=None, encoding=None):
"""Load thrift file as a module.
The module loaded and objects inside may only be pickled if module_name
@@ -27,7 +27,7 @@ def load(path, module_name=None, include_dirs=None, include_dir=None):
"""
real_module = bool(module_name)
thrift = parse(path, module_name, include_dirs=include_dirs,
- include_dir=include_dir)
+ include_dir=include_dir, encoding=encoding)
if real_module:
sys.modules[module_name] = thrift
diff --git a/thriftpy/parser/parser.py b/thriftpy/parser/parser.py
index f65320a..5fe6ef0 100644
--- a/thriftpy/parser/parser.py
+++ b/thriftpy/parser/parser.py
@@ -6,6 +6,7 @@
"""
from __future__ import absolute_import
+from io import open
import collections
import os
@@ -55,7 +56,7 @@ def p_include(p):
for include_dir in replace_include_dirs:
path = os.path.join(include_dir, p[2])
if os.path.exists(path):
- child = parse(path)
+ child = parse(path, encoding=thrift.__thrift_encoding__)
setattr(thrift, child.__name__, child)
_add_thrift_meta('includes', child)
return
@@ -482,7 +483,7 @@ def p_type_annotation(p):
def parse(path, module_name=None, include_dirs=None, include_dir=None,
- lexer=None, parser=None, enable_cache=True):
+ lexer=None, parser=None, enable_cache=True, encoding=None):
"""Parse a single thrift file to module object, e.g.::
>>> from thriftpy.parser.parser import parse
@@ -503,6 +504,9 @@ def parse(path, module_name=None, include_dirs=None, include_dir=None,
:param enable_cache: if this is set to be `True`, parsed module will be
cached, this is enabled by default. If `module_name`
is provided, use it as cache key, else use the `path`.
+ :param encoding: encoding is the name of the encoding used to decode or encode the file.
+ This should only be used in text mode. The default encoding is platform dependent,
+ but any encoding supported by Python can be passed.
"""
if os.name == 'nt' and sys.version_info < (3, 2):
os.path.samefile = lambda f1, f2: os.stat(f1) == os.stat(f2)
@@ -537,10 +541,10 @@ def parse(path, module_name=None, include_dirs=None, include_dir=None,
url_scheme = urlparse(path).scheme
if url_scheme == 'file':
- with open(urlparse(path).netloc + urlparse(path).path) as fh:
+ with open(urlparse(path).netloc + urlparse(path).path, encoding=encoding) as fh:
data = fh.read()
elif url_scheme == '':
- with open(path) as fh:
+ with open(path, encoding=encoding) as fh:
data = fh.read()
elif url_scheme in ('http', 'https'):
data = urlopen(path).read()
@@ -559,6 +563,7 @@ def parse(path, module_name=None, include_dirs=None, include_dir=None,
thrift = types.ModuleType(module_name)
setattr(thrift, '__thrift_file__', path)
+ setattr(thrift, '__thrift_encoding__', encoding)
thrift_stack.append(thrift)
lexer.lineno = 1
parser.parse(data)