-
Notifications
You must be signed in to change notification settings - Fork 23
/
scenes.py
177 lines (136 loc) · 5.72 KB
/
scenes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
"""This modules encapsulates all scene-related functions.
Scene-related functions are used to extract, identify and label scenes
in the foundational layer of UCCA.
"""
import re
from ucca import layer0, layer1
def extract_possible_scenes(passage):
"""Extracts all possible scenes from a Passage.
Possible scenes are currently units which follow these restrictions:
1. It is a Participant of a main relation (Process/State)
2. It is not a remote participant
If so, add the participant of it if has only one center and at least
one elaborator, or add all centers otherwise.
In addition, add all elaborators which have more than one child (less
likely to have one-terminal elaborators as elaborating scenes).
Args:
passage: the core.Passage object to extract optional scenes from
Returns:
a list of core.Node objects which are candidates for scenes
"""
l1 = passage.layer(layer1.LAYER_ID)
ret = []
for scene in (x for x in l1.all if x.tag == layer1.NodeTags.Foundational
and x.is_scene()):
for p in (e.child for e in scene
if e.tag == layer1.EdgeTags.Participant and
not e.attrib.get('remote')):
if p.is_scene() or (len(p.centers) == 1 and p.elaborators):
ret.append(p)
else: # if there are more than one center, add all of them
ret.extend(c for c in p.centers)
ret.extend(e for e in p.elaborators if len(e) > 1)
return ret
def extract_non_scenes_AGCE(passage):
"""Extracts all non-scene participant/center/elaborator/ground objects.
Args:
passage: core.Passage object to extract FNodes from
Returns:
a list of layer1.FundationalNodes objects, or an empty list
"""
l1 = passage.layer(layer1.LAYER_ID)
requested_tags = (layer1.EdgeTags.Participant, layer1.EdgeTags.Center,
layer1.EdgeTags.Elaborator, layer1.EdgeTags.Ground)
return [fnode for fnode in l1.all
if fnode.tag == layer1.NodeTags.Foundational and
fnode.ftag in requested_tags]
def extract_head(fnode):
"""Extracts the head of the FoundationalNode given.
The head is defined differently and recursively for scenes and non-scenes:
1. For non-scenes, the head is defined as the head of the only Center
(recursively). If there are multiple Centers, there is no head.
2. For scenes, the head is the head of the Process/State (recursively).
3. For Centers w/o further FoundationalNodes, the head is the Center
itself.
Returns:
A core.FoundationalNode object of the head, or None if there is no head
"""
if fnode.is_scene():
return extract_head(fnode.process or fnode.state)
elif len(fnode.centers) == 1:
return extract_head(fnode.centers[0])
elif all(e.tag in (layer1.EdgeTags.Terminal, layer1.EdgeTags.Punctuation)
for e in fnode):
return fnode
else:
return None
def is_noun(fnode):
"""Returns whether the fnode is a noun.
Args:
fnode: layer1.FoundationalNode object who have at least one Terminal
as a child
Returns:
Whether the first Terminal child has NN* (PTB noun POS tag) in its
extra[postag] attribute.
"""
return fnode.terminals and re.match(r'NN.+',
fnode.terminals[0].extra['postag'])
def filter_noun_heads(heads):
"""Extract only the possible noun heads from the given FNodes.
Args:
heads: list of fnodes, each have at least one Terminal as a child,
and the Terminal have the attribute 'postag' in extra.
Returns:
a set of all tokens (Terminal text) which at least one of the postag
attributes for them where a noun (PTB POSTags).
"""
out = set()
for head in heads:
try: # handling implicit processes/centers
term = head.terminals[0]
if re.match(r'NN.+', term.extra['postag']):
out.add(term.text)
except IndexError:
pass
return out
def extract_all_nouns(passages):
"""Extract all nouns in passages and their location.
Extracted nouns are either single words with PTB POS tag of a noun,
or phrases where all words are tagged as nouns.
Args:
passages: sequence of core.Passage object to extract nouns from.
All Terminals should have the attribute 'postag' in extra data.
Returns:
dictionary of noun: [Terminal1, Terminal2 ...] where they appear.
"""
nouns = {}
for passage in passages:
for term in passage.layer(layer0.LAYER_ID).all:
if all(re.match(r'NN.+', tag)
for tag in term.extra['postag'].split()):
nouns[term.text] = nouns.get(term.text, []) + [term]
return nouns
def is_scene_evoking(terminal):
"""Returns whether the Terminal object is a scene-evoker.
Scene-evoking Terminals are ones which are the head of a Process/State,
where a head is the (recursive) only Center of an FNode (non-remote).
Args:
terminal: a layer0.Terminal object to check
Returns:
True iff the terminal given is a head of a scene (scene-evoker).
"""
if not terminal.parents:
return False
fnode = terminal.parents[0]
if fnode.tag != layer1.NodeTags.Foundational:
return False
while fnode:
if len(fnode.centers) > 1: # multiple centers, can't have a head
break
elif fnode.ftag in (layer1.EdgeTags.Process, layer1.EdgeTags.State):
return True
elif fnode.ftag == layer1.EdgeTags.Center:
fnode = fnode.fparent
else:
break
return False