From 62e18faa10e2e11bd294a1315c39d92144161a7e Mon Sep 17 00:00:00 2001 From: tr1cks Date: Wed, 15 Mar 2017 19:17:42 +0500 Subject: [PATCH] Fix bug with IDL encoding support. Before this moment we haven't option to specify encoding of IDL files, in parsing always was used OS specific encoding. That worked fine only for IDL in ASCII (without national language comments) or if you was lucky enough, that OS encoding matches IDL encoding. --- thriftpy/parser/__init__.py | 4 ++-- thriftpy/parser/parser.py | 13 +++++++++---- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/thriftpy/parser/__init__.py b/thriftpy/parser/__init__.py index 7aa7e2e..1db1f50 100644 --- a/thriftpy/parser/__init__.py +++ b/thriftpy/parser/__init__.py @@ -15,7 +15,7 @@ from .parser import parse, parse_fp -def load(path, module_name=None, include_dirs=None, include_dir=None): +def load(path, module_name=None, include_dirs=None, include_dir=None, encoding=None): """Load thrift file as a module. The module loaded and objects inside may only be pickled if module_name @@ -27,7 +27,7 @@ def load(path, module_name=None, include_dirs=None, include_dir=None): """ real_module = bool(module_name) thrift = parse(path, module_name, include_dirs=include_dirs, - include_dir=include_dir) + include_dir=include_dir, encoding=encoding) if real_module: sys.modules[module_name] = thrift diff --git a/thriftpy/parser/parser.py b/thriftpy/parser/parser.py index f65320a..5fe6ef0 100644 --- a/thriftpy/parser/parser.py +++ b/thriftpy/parser/parser.py @@ -6,6 +6,7 @@ """ from __future__ import absolute_import +from io import open import collections import os @@ -55,7 +56,7 @@ def p_include(p): for include_dir in replace_include_dirs: path = os.path.join(include_dir, p[2]) if os.path.exists(path): - child = parse(path) + child = parse(path, encoding=thrift.__thrift_encoding__) setattr(thrift, child.__name__, child) _add_thrift_meta('includes', child) return @@ -482,7 +483,7 @@ def p_type_annotation(p): def parse(path, module_name=None, include_dirs=None, include_dir=None, - lexer=None, parser=None, enable_cache=True): + lexer=None, parser=None, enable_cache=True, encoding=None): """Parse a single thrift file to module object, e.g.:: >>> from thriftpy.parser.parser import parse @@ -503,6 +504,9 @@ def parse(path, module_name=None, include_dirs=None, include_dir=None, :param enable_cache: if this is set to be `True`, parsed module will be cached, this is enabled by default. If `module_name` is provided, use it as cache key, else use the `path`. + :param encoding: encoding is the name of the encoding used to decode or encode the file. + This should only be used in text mode. The default encoding is platform dependent, + but any encoding supported by Python can be passed. """ if os.name == 'nt' and sys.version_info < (3, 2): os.path.samefile = lambda f1, f2: os.stat(f1) == os.stat(f2) @@ -537,10 +541,10 @@ def parse(path, module_name=None, include_dirs=None, include_dir=None, url_scheme = urlparse(path).scheme if url_scheme == 'file': - with open(urlparse(path).netloc + urlparse(path).path) as fh: + with open(urlparse(path).netloc + urlparse(path).path, encoding=encoding) as fh: data = fh.read() elif url_scheme == '': - with open(path) as fh: + with open(path, encoding=encoding) as fh: data = fh.read() elif url_scheme in ('http', 'https'): data = urlopen(path).read() @@ -559,6 +563,7 @@ def parse(path, module_name=None, include_dirs=None, include_dir=None, thrift = types.ModuleType(module_name) setattr(thrift, '__thrift_file__', path) + setattr(thrift, '__thrift_encoding__', encoding) thrift_stack.append(thrift) lexer.lineno = 1 parser.parse(data)