This repository has been archived by the owner on Sep 23, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
jsonc.py
255 lines (214 loc) · 8.21 KB
/
jsonc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
"""
Json for config file. Added supports for comments and expandable keywords.
"""
import copy
import json
import os
RECURRENT_OBJECT_TYPES = (dict, list)
# Identifier key to import another json file,
# work as prefix, allowing "INCLUDE_KEY_1", "INCLUDE_KEY_2"...
INCLUDE_KEY = '_include_json'
# There may be performance suffer backtracking the last comma
def _remove_last_comma(str_list, before_index):
i = before_index - 1
while str_list[i].isspace() or not str_list[i]:
i -= 1
# This is the first none space char before before_index
if str_list[i] == ',':
str_list[i] = ''
def _dispose_comments(json_str: str):
"""Clear C/JS-style comments like // and /**/ in json_str.
Args:
json_str (str): json string to clean up comment
Returns:
str: json_str without comments
"""
result_str = list(json_str)
escaped = False
normal = True
sl_comment = False
ml_comment = False
quoted = False
a_step_from_comment = False
a_step_from_comment_away = False
former_index = None
for index, char in enumerate(json_str):
if escaped: # We have just met a '\'
escaped = False
continue
if a_step_from_comment: # We have just met a '/'
if char != '/' and char != '*':
a_step_from_comment = False
normal = True
continue
if a_step_from_comment_away: # We have just met a '*'
if char != '/':
a_step_from_comment_away = False
if char == '"':
if normal and not escaped:
# We are now in a string
quoted = True
normal = False
elif quoted and not escaped:
# We are now out of a string
quoted = False
normal = True
elif char == '\\':
# '\' should not take effect in comment
if normal or quoted:
escaped = True
elif char == '/':
if a_step_from_comment:
# Now we are in single line comment
a_step_from_comment = False
sl_comment = True
normal = False
former_index = index - 1
elif a_step_from_comment_away:
# Now we are out of comment
a_step_from_comment_away = False
normal = True
ml_comment = False
for i in range(former_index, index + 1):
result_str[i] = ""
elif normal:
# Now we are just one step away from comment
a_step_from_comment = True
normal = False
elif char == '*':
if a_step_from_comment:
# We are now in multi-line comment
a_step_from_comment = False
ml_comment = True
normal = False
former_index = index - 1
elif ml_comment:
a_step_from_comment_away = True
elif char == '\n':
if sl_comment:
sl_comment = False
normal = True
for i in range(former_index, index + 1):
result_str[i] = ""
elif char == ']' or char == '}':
if normal:
_remove_last_comma(result_str, index)
# To remove single line comment which is the last line of json
if sl_comment:
sl_comment = False
normal = True
for i in range(former_index, len(json_str)):
result_str[i] = ""
# Show respect to original input if we are in python2
return ("" if isinstance(json_str, str) else u"").join(result_str)
def _json_walker(json_obj, **kwargs):
"""Expand sub jsonc files in jsonc object
Args:
json_obj (dict or list): json object loaded from files
"""
# cache to update after walking finished
to_update = []
if isinstance(json_obj, dict):
for k, v in json_obj.items():
# support multiple include suffix, e.g. "include_key1", "include_key2"...
if k.startswith(INCLUDE_KEY):
base_path = kwargs.get('base_path', '')
if v.startswith('/'):
file_path = v
else:
file_path = os.path.join(base_path, v)
with open(file_path, 'r') as fp:
kwargs['base_path'] = os.path.dirname(fp.name)
to_update.append(
_json_include(
fp.read(),
inside_include=True,
**kwargs
)
)
elif isinstance(v, RECURRENT_OBJECT_TYPES):
_json_walker(v, **kwargs)
elif isinstance(json_obj, list):
for i in json_obj:
if isinstance(i, RECURRENT_OBJECT_TYPES):
_json_walker(i, **kwargs)
for i in to_update:
json_obj.update(i)
def _update_walker(d: dict, u: dict):
"""Similar to dict update in python, but apply recursively
TODO this is rather a quick implementation for keep_top_values, consider to optimize it @p2
Args:
d (dict): dict to be updated
u (dict): dict that apply to d
Returns:
dict: updated dict d
"""
if isinstance(u, dict):
assert isinstance(d, dict), 'Two dicts in _update should be the same type'
for k, v in u.items():
if isinstance(v, dict):
d[k] = _update_walker(d.get(k, {}), v)
elif isinstance(v, list):
d[k] = _update_walker(d.get(k, []), v)
else:
d[k] = v
if isinstance(u, list):
assert isinstance(d, list), 'Two objects in _update should be the same type'
for i, j in zip(d, u):
_update_walker(i, j)
return d
def _json_include(text: str, inside_include=False, keep_top_values=True, base_path='', **kwargs):
"""Build jsonc object from text
Args:
text (str): loaded text from jsonc file
inside_include (bool, optional): means this function is not top level _json_include call. Defaults to False.
keep_top_values (bool, optional): duplicated sub json key will be overwritten. Defaults to True.
base_path (str): base path for relative includes sub jsonc. Defaults to empty string.
Returns:
dict: loaded jsonc dict
"""
d = json.loads(_dispose_comments(text), **kwargs)
d_orignal = {}
if keep_top_values:
# cache the original file to prevent included file modifing original values
d_orignal = copy.deepcopy(d)
if inside_include:
assert isinstance(d, dict),\
'The JSON file being included should always be a dict rather than a list'
# update missing values from included files
_json_walker(d, base_path=base_path)
if keep_top_values:
# recover the original values from top files
_update_walker(d, d_orignal)
return d
def _remove_include_key(json_obj):
"""Remove the INCLUDE_KEY in the loaded json object
Args:
json_obj (dict or list): jsonc object to be modified
"""
to_del = []
if isinstance(json_obj, dict):
for k, v in json_obj.items():
if k.startswith(INCLUDE_KEY):
to_del.append(k)
elif isinstance(v, RECURRENT_OBJECT_TYPES):
_remove_include_key(v)
elif isinstance(json_obj, list):
for i in json_obj:
if isinstance(i, RECURRENT_OBJECT_TYPES):
_remove_include_key(i)
for i in to_del:
del json_obj[i]
# Below are just some wrapper function around the standard json module,
# note that not all original kwargs are tested.
def loads(text, remove_include_key=False, base_path='', **kwargs):
d = _json_include(text, keep_top_values=True, base_path=base_path, **kwargs)
if remove_include_key:
_remove_include_key(d)
return d
def load(fp, remove_include_key=False, **kwargs):
return loads(fp.read(), remove_include_key=remove_include_key, base_path=os.path.dirname(fp.name), **kwargs)
def dumps(obj, **kwargs):
return json.dumps(obj, **kwargs)
def dump(obj, fp, **kwargs):
json.dump(obj, fp, **kwargs)