Skip to content

Commit 9bec41b

Browse files
committed
refactor write.Conllu so it has the iter_comment_lines() method
and can be used as a base class or helper module for other writers
1 parent c854a37 commit 9bec41b

1 file changed

Lines changed: 29 additions & 24 deletions

File tree

udapi/block/write/conllu.py

Lines changed: 29 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,8 @@ def __init__(self, print_sent_id=True, print_text=True, print_empty_trees=True,
1111
self.print_text = print_text
1212
self.print_empty_trees = print_empty_trees
1313

14-
def process_tree(self, tree): # pylint: disable=too-many-branches
15-
empty_nodes = tree.empty_nodes
16-
if empty_nodes:
17-
nodes = sorted(tree._descendants + empty_nodes)
18-
else:
19-
nodes = tree._descendants
20-
21-
# Empty sentences are not allowed in CoNLL-U, so with print_empty_trees==0
22-
# we need to skip the whole tree (including possible comments).
23-
if not nodes and not self.print_empty_trees:
24-
return
25-
14+
def iter_comment_lines(self, tree):
15+
"""Yield comment lines (without leading #) for the current tree."""
2616
# If tree.comment contains placeholders $NEWDOC,...$TEXT, replace them with the actual
2717
# value of the attribute and make note on which line (i_*) they were present.
2818
comment_lines = tree.comment.splitlines()
@@ -65,51 +55,66 @@ def process_tree(self, tree): # pylint: disable=too-many-branches
6555
printed_i = -1
6656
if comment_lines and comment_lines[0].startswith(' global.columns'):
6757
printed_i += 1
68-
print('#' + comment_lines[printed_i])
58+
yield comment_lines[printed_i]
6959
if self.print_sent_id:
7060
if tree.newdoc:
7161
if i_newdoc == -1:
72-
print('# newdoc' + (' id = ' + tree.newdoc if tree.newdoc is not True else ''))
62+
yield ' newdoc' + (' id = ' + tree.newdoc if tree.newdoc is not True else '')
7363
else:
7464
while printed_i < i_newdoc:
7565
printed_i += 1
7666
if comment_lines[printed_i]:
77-
print('#' + comment_lines[printed_i])
67+
yield comment_lines[printed_i]
7868
ge = tree.document.meta.get('global.Entity')
7969
if ge:
8070
if i_global_entity == -1:
81-
print('# global.Entity = ' + ge)
71+
yield ' global.Entity = ' + ge
8272
else:
8373
while printed_i < i_global_entity:
8474
printed_i += 1
8575
if comment_lines[printed_i]:
86-
print('#' + comment_lines[printed_i])
76+
yield comment_lines[printed_i]
8777
if tree.newpar:
8878
if i_newpar == -1:
89-
print('# newpar' + (' id = ' + tree.newpar if tree.newpar is not True else ''))
79+
yield ' newpar' + (' id = ' + tree.newpar if tree.newpar is not True else '')
9080
else:
9181
while printed_i < i_newpar:
9282
printed_i += 1
9383
if comment_lines[printed_i]:
94-
print('#' + comment_lines[printed_i])
84+
yield comment_lines[printed_i]
9585
if i_sent_id == -1:
96-
print('# sent_id = ' + tree.sent_id)
86+
yield ' sent_id = ' + tree.sent_id
9787
else:
9888
while printed_i < i_sent_id:
9989
printed_i += 1
10090
if comment_lines[printed_i]:
101-
print('#' + comment_lines[printed_i])
91+
yield comment_lines[printed_i]
10292
if self.print_text and i_text == -1:
103-
print('# text = ' + (tree.compute_text() if tree.text is None else tree.text.replace('\n', '').replace('\r', '').rstrip()))
93+
yield ' text = ' + (tree.compute_text() if tree.text is None else tree.text.replace('\n', '').replace('\r', '').rstrip())
10494

10595
for c_line in comment_lines[printed_i + 1:]:
10696
if c_line:
107-
print('#' + c_line)
97+
yield c_line
10898

10999
# Special-purpose json_* comments should always be at the end of the comment block.
110100
if tree.json:
111101
for key, value in sorted(tree.json.items()):
112-
print(f"# json_{key} = {json.dumps(value, ensure_ascii=False, sort_keys=True)}")
102+
yield f" json_{key} = {json.dumps(value, ensure_ascii=False, sort_keys=True)}"
103+
104+
def process_tree(self, tree): # pylint: disable=too-many-branches
105+
empty_nodes = tree.empty_nodes
106+
if empty_nodes:
107+
nodes = sorted(tree._descendants + empty_nodes)
108+
else:
109+
nodes = tree._descendants
110+
111+
# Empty sentences are not allowed in CoNLL-U, so with print_empty_trees==0
112+
# we need to skip the whole tree (including possible comments).
113+
if not nodes and not self.print_empty_trees:
114+
return
115+
116+
for line in self.iter_comment_lines(tree):
117+
print('#' + line)
113118

114119
last_mwt_id = 0
115120
for node in nodes:

0 commit comments

Comments
 (0)