From e21e09bc0e5c2239e6080d041ea16391f71d8b50 Mon Sep 17 00:00:00 2001 From: Juanje Mendoza Date: Wed, 7 Jan 2026 15:24:12 +0100 Subject: [PATCH 1/4] test 862 with a little change in remove_comments --- src/somef/somef_cli.py | 1 + src/somef/test/test_JSON_export.py | 50 +++++++++++++++--------------- src/somef/utils/markdown_utils.py | 4 ++- 3 files changed, 29 insertions(+), 26 deletions(-) diff --git a/src/somef/somef_cli.py b/src/somef/somef_cli.py index c6bc02e1..4b3d54a4 100644 --- a/src/somef/somef_cli.py +++ b/src/somef/somef_cli.py @@ -156,6 +156,7 @@ def cli_get_data(threshold, ignore_classifiers, repo_url=None, doc_src=None, loc repository_metadata) logging.info("--> create excerpts") excerpts = create_excerpts.create_excerpts(string_list) + logging.info("--> extract text excerpts headers") excerpts_headers = mardown_parser.extract_text_excerpts_header(readme_unfiltered_text) header_parents = mardown_parser.extract_headers_parents(readme_unfiltered_text) score_dict = supervised_classification.run_classifiers(excerpts, file_paths) diff --git a/src/somef/test/test_JSON_export.py b/src/somef/test/test_JSON_export.py index 7cc72e2d..edd9b7ae 100644 --- a/src/somef/test/test_JSON_export.py +++ b/src/somef/test/test_JSON_export.py @@ -407,36 +407,36 @@ def test_issue_830(self): # except Exception as e: # print(f"Failed to delete {cls.json_file}: {e}") - # def test_issue_862(self): - # """Checks if this repository does not gets stuck when labeling headers""" - # somef_cli.run_cli(threshold=0.8, - # ignore_classifiers=False, - # repo_url=None, - # local_repo=test_data_repositories + "componentInstaller", - # doc_src=None, - # in_file=None, - # output=test_data_path + "test_issue_862.json", - # graph_out=None, - # graph_format="turtle", - # codemeta_out=None, - # pretty=True, - # missing=False, - # readme_only=False) + def test_issue_862(self): + """Checks if this repository does not gets stuck when labeling headers""" + somef_cli.run_cli(threshold=0.8, + ignore_classifiers=False, + repo_url=None, + local_repo=test_data_repositories + "componentInstaller", + doc_src=None, + in_file=None, + output=test_data_path + "test_issue_862.json", + graph_out=None, + graph_format="turtle", + codemeta_out=None, + pretty=True, + missing=False, + readme_only=False) - # text_file = open(test_data_path + "test_issue_862.json", "r") - # data = text_file.read() - # text_file.close() - # json_content = json.loads(data) + text_file = open(test_data_path + "test_issue_862.json", "r") + data = text_file.read() + text_file.close() + json_content = json.loads(data) - # assert "description" in json_content, "Missing 'description' property" + assert "description" in json_content, "Missing 'description' property" - # assert len(json_content["description"]) > 0, "Description list is empty" + assert len(json_content["description"]) > 0, "Description list is empty" - # first_desc = json_content["description"][0]["result"] - # assert "value" in first_desc, "Missing 'value' in description result" - # assert first_desc["value"], "Description 'value' is empty" + first_desc = json_content["description"][0]["result"] + assert "value" in first_desc, "Missing 'value' in description result" + assert first_desc["value"], "Description 'value' is empty" - # os.remove(test_data_path + "test_issue_862.json") + os.remove(test_data_path + "test_issue_862.json") def test_issue_859(self): """Checks whether a repository without content works fine. Must have just some results from the API.""" diff --git a/src/somef/utils/markdown_utils.py b/src/somef/utils/markdown_utils.py index e898d7d5..0ce5479c 100644 --- a/src/somef/utils/markdown_utils.py +++ b/src/somef/utils/markdown_utils.py @@ -63,6 +63,8 @@ def remove_comments(html_text): ------- Markdown with no HTML comments """ - comment_pattern = r'' + # comment_pattern = r'' + comment_pattern = r'' - comment_pattern = r'