Skip to content

Commit 2e1ec7f

Browse files
author
Ezra Peisach
committed
Merge branch 'DAOTHER-10246' into develop
2 parents df096d0 + 2e1a66d commit 2e1ec7f

1 file changed

Lines changed: 23 additions & 11 deletions

File tree

scripts/dump_db_to_cif.py

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232

3333
try:
3434
import gemmi
35-
from gemmi.cif import Style, WriteOptions
3635
except ImportError as e:
3736
sys.stderr.write("Error: gemmi library is required but not installed.\n")
3837
sys.stderr.write("Please install it with: pip install gemmi\n")
@@ -336,9 +335,10 @@ def format_cif_loop_value(value: Any, allow_multiline: bool = False) -> str:
336335
pass
337336

338337
# For multiline text in loops, use semicolon format to preserve newlines
338+
# Need newline after close
339339
if allow_multiline and "\n" in str_value:
340340
escaped = escape_non_ascii(str_value, preserve_newlines=True)
341-
return f"\n;{escaped}\n;"
341+
return f"\n;{escaped}\n;\n"
342342

343343
# For single-line values, escape and flatten
344344
escaped = escape_non_ascii(str_value, preserve_newlines=False)
@@ -633,12 +633,7 @@ def _export_messages_to_cif(self, messages: List[MessageInfo], file_path: str,
633633
# Ensure output directory exists
634634
os.makedirs(os.path.dirname(file_path), exist_ok=True)
635635

636-
# Write CIF file - use as_string() to avoid alignment issues with write_file()
637-
# gemmi's write_file() with align_loops causes spacing issues that break parsing
638-
opts = WriteOptions(Style.Pdbx)
639-
cif_content = doc.as_string(opts)
640-
with open(file_path, 'w') as f:
641-
f.write(cif_content)
636+
doc.write_file(file_path)
642637

643638
log_event("file_exported", deposition_id=deposition_id, file_path=file_path,
644639
content_type=content_type, messages=len(messages))
@@ -659,7 +654,16 @@ def _export_messages_to_cif(self, messages: List[MessageInfo], file_path: str,
659654
return False
660655

661656
def _add_message_info_category(self, block: gemmi.cif.Block, messages: List[MessageInfo]):
662-
"""Add _pdbx_deposition_message_info category to CIF block"""
657+
"""Add _pdbx_deposition_message_info category to CIF block
658+
659+
Note: This implementation manually distinguishes between pairs (single item) and
660+
loops (multiple items). A better approach for future refactoring would be to use
661+
gemmi's block.find_or_add() which handles this automatically:
662+
table = block.find_or_add(prefix, tags)
663+
table.append_row(values)
664+
This would let gemmi decide whether to write as pairs or loops, and handle quoting
665+
automatically. See: https://gemmi.readthedocs.io/en/latest/cif.html#pairs-and-loops
666+
"""
663667
# Define the columns we want to include
664668
columns = [
665669
"ordinal_id",
@@ -698,7 +702,11 @@ def _add_message_info_category(self, block: gemmi.cif.Block, messages: List[Mess
698702
block.set_pair(f"_pdbx_deposition_message_info.{col}", formatted_value)
699703

700704
def _add_file_reference_category(self, block: gemmi.cif.Block, file_refs: List[MessageFileReference]):
701-
"""Add _pdbx_deposition_message_file_reference category to CIF block"""
705+
"""Add _pdbx_deposition_message_file_reference category to CIF block
706+
707+
Note: See _add_message_info_category() for recommended refactoring approach using
708+
gemmi's find_or_add() to avoid manual pair/loop branching.
709+
"""
702710
columns = [
703711
"ordinal_id",
704712
"message_id",
@@ -728,7 +736,11 @@ def _add_file_reference_category(self, block: gemmi.cif.Block, file_refs: List[M
728736
block.set_pair(f"_pdbx_deposition_message_file_reference.{col}", formatted_value)
729737

730738
def _add_status_category(self, block: gemmi.cif.Block, statuses: List[MessageStatus]):
731-
"""Add _pdbx_deposition_message_status category to CIF block"""
739+
"""Add _pdbx_deposition_message_status category to CIF block
740+
741+
Note: See _add_message_info_category() for recommended refactoring approach using
742+
gemmi's find_or_add() to avoid manual pair/loop branching.
743+
"""
732744
columns = [
733745
"message_id",
734746
"deposition_data_set_id",

0 commit comments

Comments
 (0)