diff --git a/.gitignore b/.gitignore index acf8f8a..7c498b8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,12 +1,11 @@ -*.idea +/.idea/ +/.venv/ +/dist/ + **/build/ *.egg-info/ *.pyc *.script -*.venv -dist -output.zip -*.sketch .DS_Store setup.py .vscode diff --git a/reqif/helpers/lxml.py b/reqif/helpers/lxml.py index d3333ad..5944138 100644 --- a/reqif/helpers/lxml.py +++ b/reqif/helpers/lxml.py @@ -3,12 +3,12 @@ from itertools import chain from lxml import etree -from lxml.etree import _Comment, tostring +from lxml.etree import Comment, tostring from lxml.html import fragment_fromstring def lxml_dump_node(node): - return lxml_stringify_node(node) + return lxml_stringify_node(node, root_node=False) # This code is taken from Python 3.7. The addition is escaping of the tab @@ -25,6 +25,8 @@ def lxml_escape_for_html(string: str) -> str: string = string.replace(">", ">") string = string.replace('"', """) string = string.replace("'", "'") + # Non-breaking space character. + string = string.replace("\xa0", " ") # Invisible tab character string = string.replace("\t", " ") return string @@ -120,16 +122,53 @@ def _lxml_stringify_reqif_ns_node(node): return string -def lxml_stringify_node(node): +def lxml_stringify_node(node, root_node=True): + """ + Stringify a given LXML node. + + :param node: + :param root_node: Needed to track whether a node is the first among the + nodes being stringified. Some ReqIF producers do not use a + global xmlns="http://www.w3.org/1999/xhtml" namespace. + Instead, they assign this namespace only to the very first + node inside the ATTRIBUTE-VALUE-XHTML/THE-VALUE tag, for + example:
... + Tracking this root node ensures that the xmlns attribute + is assigned only to the first node and not to all + subsequent nodes. + :return: + """ + output = "" + + # Some ReqIF producers add comments but these comment nodes + # cannot be handled using etree.QName(node).localname like used further + # below. Handling them separately with this dedicated branch. + # A user report that helped to discover this case: + # https://github.com/strictdoc-project/reqif/issues/205 + if lxml_is_comment_node(node): + assert node.text is not None + output = f"" + if node.tail is not None: + output += lxml_escape_for_html(node.tail) + return output + nskey = None + nsvalue = None if len(node.nsmap) > 0: - nskey = next(iter(node.nsmap.keys())) - output = "" + nskey, nsvalue = next(iter(node.nsmap.items())) + node_no_ns_tag = etree.QName(node).localname - tag = f"{nskey}:{node_no_ns_tag}" if node.tag[0] == "{" else node.tag + tag = ( + f"{nskey}:{node_no_ns_tag}" + if node.tag[0] == "{" and nskey is not None + else node_no_ns_tag + ) output += f"<{tag}" for attribute, attribute_value in node.attrib.items(): output += f' {attribute}="{lxml_escape_for_html(attribute_value)}"' + if nsvalue is not None and root_node: + output += f' xmlns="{nsvalue}"' + # is surprisingly a tag that must have a closing tag even if it # is empty. If self-closed, it breaks all the following markup. if ( @@ -141,7 +180,7 @@ def lxml_stringify_node(node): if node.text is not None: output += lxml_escape_for_html(node.text) for child in node.getchildren(): - output += lxml_stringify_node(child) + output += lxml_stringify_node(child, root_node=False) output += f"" else: output += "/>" @@ -221,5 +260,4 @@ def lxml_strip_namespace_from_xml(root_xml, full=False): def lxml_is_comment_node(xml_node): - # FIXME: Accessing a "_"-marked Comment class of lxml is not great. - return isinstance(xml_node, _Comment) + return xml_node.tag is Comment diff --git a/tests/integration/reqif/_other/01_tinymce_comments/sample.reqif b/tests/integration/reqif/_other/01_tinymce_comments/sample.reqif new file mode 100644 index 0000000..ae12e89 --- /dev/null +++ b/tests/integration/reqif/_other/01_tinymce_comments/sample.reqif @@ -0,0 +1,91 @@ + + + + + 2023-01-01T00:00:00.000Z + Test Tool + 1.0 + Test + Minimal ReqIF + + + + + + + + + + + + + + DT_String + + + + + + + + + DT_String + + + + + + + + + _6cf52001-6b78-428a-bfcd-429ba136d53a + + + + + _0cad5cb5-6b09-4b60-bb66-fed63d56e806 + +
+

The ###Platform shall  provide cover art within 2s of audio select

+
+
+ + + _d0bf9a76-226d-4856-9387-e64040705955 + + + + + _4d6b67beb8b9fa04468c6c2d5ae99f484d02c5d8 + + + _009812fe2a185feb11f17bd36dcd508a736fe8c8 + + +
+
+
+ + + + + + AD_SpecName + + + + + ST_Spec + + + + + _73d9cdc2-1c71-4d27-92ab-1df19c9f6ac3 + + + + + +
+
+
diff --git a/tests/integration/reqif/_other/01_tinymce_comments/test.itest b/tests/integration/reqif/_other/01_tinymce_comments/test.itest new file mode 100644 index 0000000..aa3061a --- /dev/null +++ b/tests/integration/reqif/_other/01_tinymce_comments/test.itest @@ -0,0 +1,3 @@ +RUN: mkdir -p %S/output +RUN: %reqif passthrough %S/sample.reqif %S/output/sample.reqif +RUN: %diff %S/sample.reqif %S/output/sample.reqif