From 024d2b5c3fb7a71fd2560da4b1ac8275cf201ff7 Mon Sep 17 00:00:00 2001 From: "marcin p. joachimiak" <4625870+realmarcin@users.noreply.github.com> Date: Mon, 1 Jun 2026 11:26:05 -0700 Subject: [PATCH 1/2] Propose 36 literature-backed ENVIRONMENT + METABOLISM traits Adds candidate trait records for coverage gaps in two categories, each backed by >=2 distinct verified literature citations and enforced in CI. Schema: - Add PROPOSED value to MappingStatusEnum (candidate traits from literature research; must carry >=2 distinct citations). Validation: - New scripts/audit_proposals.py enforces >=2 distinct, well-formed (PMID/DOI/URL) citations per PROPOSED record, counted across definition_source + evidence[].reference. Wired into the `qc` justfile target (and thus CI). Emits reports/proposal_citation_audit.tsv. - tests/test_audit_proposals.py locks the rule. - Relax tests/test_seed.py to allow traitmech: identifiers and PROPOSED status. ENVIRONMENT proposals (18, traitmech:000001-000018): pressure/piezophily, radiation (ionizing/UV), desiccation/xerophily, and heavy-metal/metalloid tolerance (Cd/Zn/Co/Hg/As/Cu) families. METABOLISM proposals (21, traitmech:000019-000039): six autotrophic carbon-fixation pathways (+carbon_fixation head), product-specific fermentations, DNRA, dissimilatory iron reduction, manganese oxidation, anaerobic oxidation of methane, oxygenic/anoxygenic photosynthesis, proteorhodopsin phototrophy, plus intermediate axis classes (phototrophy, photosynthesis, dissimilatory_metal_reduction). DNRA/AOM/ metal-reduction parent to the existing METPO:1000802 anaerobic respiration. Reports: reports/environment_trait_proposals.md, reports/metabolism_trait_proposals.md. Verification: validate-strict 0 errors over 396 files; audit-proposals 39/39 PROPOSED passing; pytest 70 passed; minted IDs contiguous 000001-000039. Co-Authored-By: Claude Opus 4.8 (1M context) --- data/traits/environment/arsenic_tolerant.yaml | 34 ++++ data/traits/environment/cadmium_tolerant.yaml | 29 ++++ data/traits/environment/cobalt_tolerant.yaml | 29 ++++ data/traits/environment/copper_tolerant.yaml | 34 ++++ .../environment/desiccation_tolerant.yaml | 36 ++++ .../ionizing_radiation_tolerant.yaml | 34 ++++ data/traits/environment/mercury_tolerant.yaml | 35 ++++ data/traits/environment/metal_tolerant.yaml | 40 +++++ .../environment/obligately_piezophilic.yaml | 33 ++++ data/traits/environment/piezophilic.yaml | 37 ++++ data/traits/environment/piezotolerant.yaml | 34 ++++ data/traits/environment/pressure_delta.yaml | 28 +++ data/traits/environment/pressure_optimum.yaml | 28 +++ data/traits/environment/pressure_range.yaml | 28 +++ data/traits/environment/radiotolerant.yaml | 37 ++++ .../environment/uv_radiation_tolerant.yaml | 34 ++++ data/traits/environment/xerophilic.yaml | 34 ++++ data/traits/environment/zinc_tolerant.yaml | 29 ++++ .../anaerobic_oxidation_of_methane.yaml | 34 ++++ .../metabolism/anoxygenic_photosynthesis.yaml | 30 ++++ .../calvin_benson_bassham_cycle.yaml | 34 ++++ data/traits/metabolism/carbon_fixation.yaml | 35 ++++ ...arboxylate_four_hydroxybutyrate_cycle.yaml | 30 ++++ .../dissimilatory_iron_reduction.yaml | 37 ++++ .../dissimilatory_metal_reduction.yaml | 33 ++++ ...ilatory_nitrate_reduction_to_ammonium.yaml | 33 ++++ .../metabolism/ethanol_fermentation.yaml | 29 ++++ .../metabolism/lactic_acid_fermentation.yaml | 31 ++++ .../metabolism/manganese_oxidation.yaml | 29 ++++ .../metabolism/mixed_acid_fermentation.yaml | 25 +++ .../metabolism/oxygenic_photosynthesis.yaml | 28 +++ data/traits/metabolism/photosynthesis.yaml | 27 +++ data/traits/metabolism/phototrophy.yaml | 32 ++++ .../propionic_acid_fermentation.yaml | 30 ++++ .../proteorhodopsin_phototrophy.yaml | 32 ++++ .../metabolism/reductive_tca_cycle.yaml | 36 ++++ .../three_hydroxypropionate_bicycle.yaml | 30 ++++ ...propionate_four_hydroxybutyrate_cycle.yaml | 30 ++++ .../metabolism/wood_ljungdahl_pathway.yaml | 31 ++++ justfile | 11 +- reports/environment_trait_proposals.md | 129 ++++++++++++++ reports/metabolism_trait_proposals.md | 140 +++++++++++++++ reports/proposal_citation_audit.tsv | 40 +++++ scripts/audit_proposals.py | 159 ++++++++++++++++++ src/traitmech/schema/traitmech.yaml | 6 + tests/test_audit_proposals.py | 103 ++++++++++++ tests/test_seed.py | 9 +- 47 files changed, 1842 insertions(+), 4 deletions(-) create mode 100644 data/traits/environment/arsenic_tolerant.yaml create mode 100644 data/traits/environment/cadmium_tolerant.yaml create mode 100644 data/traits/environment/cobalt_tolerant.yaml create mode 100644 data/traits/environment/copper_tolerant.yaml create mode 100644 data/traits/environment/desiccation_tolerant.yaml create mode 100644 data/traits/environment/ionizing_radiation_tolerant.yaml create mode 100644 data/traits/environment/mercury_tolerant.yaml create mode 100644 data/traits/environment/metal_tolerant.yaml create mode 100644 data/traits/environment/obligately_piezophilic.yaml create mode 100644 data/traits/environment/piezophilic.yaml create mode 100644 data/traits/environment/piezotolerant.yaml create mode 100644 data/traits/environment/pressure_delta.yaml create mode 100644 data/traits/environment/pressure_optimum.yaml create mode 100644 data/traits/environment/pressure_range.yaml create mode 100644 data/traits/environment/radiotolerant.yaml create mode 100644 data/traits/environment/uv_radiation_tolerant.yaml create mode 100644 data/traits/environment/xerophilic.yaml create mode 100644 data/traits/environment/zinc_tolerant.yaml create mode 100644 data/traits/metabolism/anaerobic_oxidation_of_methane.yaml create mode 100644 data/traits/metabolism/anoxygenic_photosynthesis.yaml create mode 100644 data/traits/metabolism/calvin_benson_bassham_cycle.yaml create mode 100644 data/traits/metabolism/carbon_fixation.yaml create mode 100644 data/traits/metabolism/dicarboxylate_four_hydroxybutyrate_cycle.yaml create mode 100644 data/traits/metabolism/dissimilatory_iron_reduction.yaml create mode 100644 data/traits/metabolism/dissimilatory_metal_reduction.yaml create mode 100644 data/traits/metabolism/dissimilatory_nitrate_reduction_to_ammonium.yaml create mode 100644 data/traits/metabolism/ethanol_fermentation.yaml create mode 100644 data/traits/metabolism/lactic_acid_fermentation.yaml create mode 100644 data/traits/metabolism/manganese_oxidation.yaml create mode 100644 data/traits/metabolism/mixed_acid_fermentation.yaml create mode 100644 data/traits/metabolism/oxygenic_photosynthesis.yaml create mode 100644 data/traits/metabolism/photosynthesis.yaml create mode 100644 data/traits/metabolism/phototrophy.yaml create mode 100644 data/traits/metabolism/propionic_acid_fermentation.yaml create mode 100644 data/traits/metabolism/proteorhodopsin_phototrophy.yaml create mode 100644 data/traits/metabolism/reductive_tca_cycle.yaml create mode 100644 data/traits/metabolism/three_hydroxypropionate_bicycle.yaml create mode 100644 data/traits/metabolism/three_hydroxypropionate_four_hydroxybutyrate_cycle.yaml create mode 100644 data/traits/metabolism/wood_ljungdahl_pathway.yaml create mode 100644 reports/environment_trait_proposals.md create mode 100644 reports/metabolism_trait_proposals.md create mode 100644 reports/proposal_citation_audit.tsv create mode 100644 scripts/audit_proposals.py create mode 100644 tests/test_audit_proposals.py diff --git a/data/traits/environment/arsenic_tolerant.yaml b/data/traits/environment/arsenic_tolerant.yaml new file mode 100644 index 00000000..2158236e --- /dev/null +++ b/data/traits/environment/arsenic_tolerant.yaml @@ -0,0 +1,34 @@ +identifier: traitmech:000017 +label: arsenic tolerant +definition: A metalloid tolerance in which an organism grows in the presence of + elevated arsenic (arsenite/arsenate) concentrations, typically via the ars operon, + whose ArsB pump extrudes arsenite from the cytoplasm. +definition_source: DOI:10.3389/fmicb.2018.02473 +trait_category: ENVIRONMENT +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- traitmech:000012 +synonyms: +- synonym_text: arsenic resistant + synonym_type: RELATED_SYNONYM + source: DOI:10.3389/fmicb.2018.02473 +evidence: +- reference: DOI:10.3389/fmicb.2018.02473 + snippet: ArsB is an integral membrane protein able to extrude arsenite from the + cell cytoplasm, thus diminishing arsenite accumulation + notes: Review supports the ars operon as a near-ubiquitous arsenic-resistance + determinant, "more common than genes for tryptophan biosynthesis". +- reference: DOI:10.3389/fmicb.2020.00047 + snippet: C. metallidurans BS1 conferred resistance to Zn2+ displaying a MIC of 20 + mM, Cd2+ (2.5 mM), Co2+ (20mM), Ni2+ (8 mM), As3+ (3.5 mM), Cu2+ (5 mM), Au3+ (1 + uM) and Pb2+ (1.7 mM) + notes: 'Organism example: Cupriavidus metallidurans BS1 tolerates arsenite (As3+) + to a MIC of 3.5 mM.' +curation_history: +- timestamp: '2026-05-28T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed candidate ENVIRONMENT trait (arsenic/metalloid tolerance) from + literature research; sub-variant of metal tolerant. + llm_assisted: true diff --git a/data/traits/environment/cadmium_tolerant.yaml b/data/traits/environment/cadmium_tolerant.yaml new file mode 100644 index 00000000..dc234c9c --- /dev/null +++ b/data/traits/environment/cadmium_tolerant.yaml @@ -0,0 +1,29 @@ +identifier: traitmech:000013 +label: cadmium tolerant +definition: A metal tolerance in which an organism grows in the presence of elevated + cadmium (Cd2+) concentrations, typically via cation-efflux resistance systems such + as the czc determinant. +definition_source: PMID:12829273 +trait_category: ENVIRONMENT +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- traitmech:000012 +evidence: +- reference: DOI:10.1111/j.1365-2958.2009.06792.x + snippet: CzcP exports transition metals Zn(2+), Cd(2+) and Co(2+) + notes: The cobalt-zinc-cadmium (czc) efflux system of Cupriavidus metallidurans + confers cadmium resistance. +- reference: DOI:10.3389/fmicb.2020.00047 + snippet: C. metallidurans BS1 conferred resistance to Zn2+ displaying a MIC of 20 + mM, Cd2+ (2.5 mM), Co2+ (20mM), Ni2+ (8 mM), As3+ (3.5 mM), Cu2+ (5 mM), Au3+ (1 + uM) and Pb2+ (1.7 mM) + notes: 'Organism example: Cupriavidus metallidurans BS1 tolerates cadmium to a MIC + of 2.5 mM.' +curation_history: +- timestamp: '2026-05-28T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed candidate ENVIRONMENT trait (cadmium tolerance) from literature + research; metal-specific sub-variant of metal tolerant. + llm_assisted: true diff --git a/data/traits/environment/cobalt_tolerant.yaml b/data/traits/environment/cobalt_tolerant.yaml new file mode 100644 index 00000000..7cbd839e --- /dev/null +++ b/data/traits/environment/cobalt_tolerant.yaml @@ -0,0 +1,29 @@ +identifier: traitmech:000015 +label: cobalt tolerant +definition: A metal tolerance in which an organism grows in the presence of elevated + cobalt (Co2+) concentrations, typically via cation-efflux resistance systems such + as the czc and cnr determinants. +definition_source: PMID:12829273 +trait_category: ENVIRONMENT +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- traitmech:000012 +evidence: +- reference: DOI:10.1111/j.1365-2958.2009.06792.x + snippet: CzcP exports transition metals Zn(2+), Cd(2+) and Co(2+) + notes: The cobalt-zinc-cadmium (czc) efflux system of Cupriavidus metallidurans + confers cobalt resistance. +- reference: DOI:10.3389/fmicb.2020.00047 + snippet: C. metallidurans BS1 conferred resistance to Zn2+ displaying a MIC of 20 + mM, Cd2+ (2.5 mM), Co2+ (20mM), Ni2+ (8 mM), As3+ (3.5 mM), Cu2+ (5 mM), Au3+ (1 + uM) and Pb2+ (1.7 mM) + notes: 'Organism example: Cupriavidus metallidurans BS1 tolerates cobalt to a MIC + of 20 mM.' +curation_history: +- timestamp: '2026-05-28T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed candidate ENVIRONMENT trait (cobalt tolerance) from literature + research; metal-specific sub-variant of metal tolerant. + llm_assisted: true diff --git a/data/traits/environment/copper_tolerant.yaml b/data/traits/environment/copper_tolerant.yaml new file mode 100644 index 00000000..109abd0a --- /dev/null +++ b/data/traits/environment/copper_tolerant.yaml @@ -0,0 +1,34 @@ +identifier: traitmech:000018 +label: copper tolerant +definition: A metal tolerance in which an organism grows in the presence of elevated + copper (Cu2+/Cu+) concentrations, typically via the cue, cus, pco, and cop systems + and ATPase-driven cytoplasmic copper efflux. +definition_source: DOI:10.1007/s10565-013-9262-1 +trait_category: ENVIRONMENT +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- traitmech:000012 +synonyms: +- synonym_text: copper resistant + synonym_type: RELATED_SYNONYM + source: DOI:10.1007/s10565-013-9262-1 +evidence: +- reference: DOI:10.1007/s10565-013-9262-1 + snippet: ATPase-driven copper efflux seems to be the main mechanism responsible + for cytoplasmic copper detoxification in until now studied bacteria + notes: Review supports active efflux via the cue, cus, pco, and cop systems as the + basis of bacterial copper tolerance. +- reference: DOI:10.3389/fmicb.2020.00047 + snippet: C. metallidurans BS1 conferred resistance to Zn2+ displaying a MIC of 20 + mM, Cd2+ (2.5 mM), Co2+ (20mM), Ni2+ (8 mM), As3+ (3.5 mM), Cu2+ (5 mM), Au3+ (1 + uM) and Pb2+ (1.7 mM) + notes: 'Organism example: Cupriavidus metallidurans BS1 tolerates copper (Cu2+) to + a MIC of 5 mM.' +curation_history: +- timestamp: '2026-05-28T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed candidate ENVIRONMENT trait (copper tolerance) from literature + research; metal-specific sub-variant of metal tolerant. + llm_assisted: true diff --git a/data/traits/environment/desiccation_tolerant.yaml b/data/traits/environment/desiccation_tolerant.yaml new file mode 100644 index 00000000..255e0de6 --- /dev/null +++ b/data/traits/environment/desiccation_tolerant.yaml @@ -0,0 +1,36 @@ +identifier: traitmech:000010 +label: desiccation tolerant +definition: An environmental tolerance in which an organism survives extreme water + loss and resumes growth after rehydration (anhydrobiosis), protecting cellular + macromolecules during drying. +definition_source: DOI:10.3390/microorganisms10020432 +trait_category: ENVIRONMENT +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- METPO:1000059 +synonyms: +- synonym_text: anhydrobiotic + synonym_type: RELATED_SYNONYM + source: DOI:10.3390/microorganisms10020432 +evidence: +- reference: DOI:10.3390/microorganisms10020432 + snippet: Anhydrobiosis (gr. life without water) is predominantly described as the + ability of some organisms to lose all or almost all water and enter a state of + suspension where the metabolism comes to a reversible standstill + notes: Bacterial anhydrobiosis review supports desiccation tolerance as reversible + survival of near-complete water loss. +- reference: DOI:10.3390/genes14091803 + snippet: Deinococcus radiodurans, a Gram-positive extremophilic bacterium, is a + remarkable example of such an organism, showcasing an impressive resistance to a + wide array of stressors, including ionizing radiation, desiccation, UV radiation, + and oxidizing agents + notes: 'Organism example: Deinococcus radiodurans is desiccation-tolerant, sharing + DNA-repair machinery with its radiation tolerance.' +curation_history: +- timestamp: '2026-05-28T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed candidate ENVIRONMENT trait (desiccation tolerance / anhydrobiosis) + from literature research to fill the water-availability coverage gap. + llm_assisted: true diff --git a/data/traits/environment/ionizing_radiation_tolerant.yaml b/data/traits/environment/ionizing_radiation_tolerant.yaml new file mode 100644 index 00000000..de7d55f3 --- /dev/null +++ b/data/traits/environment/ionizing_radiation_tolerant.yaml @@ -0,0 +1,34 @@ +identifier: traitmech:000008 +label: ionizing radiation tolerant +definition: An environmental tolerance in which an organism survives high doses of + ionizing radiation (e.g. gamma rays), typically via efficient repair of DNA + double-strand breaks and protection of the proteome from oxidative damage. +definition_source: DOI:10.3390/genes14091803 +trait_category: ENVIRONMENT +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- traitmech:000007 +synonyms: +- synonym_text: gamma radiation resistant + synonym_type: NARROW_SYNONYM + source: DOI:10.3390/genes14091803 +evidence: +- reference: DOI:10.3390/genes14091803 + snippet: Deinococcus radiodurans R1 demonstrates a significantly higher radiation + resistance with D10 values exceeding 12 kGy for gamma radiation and 700 J/m2 for + UV-C radiation + notes: 'Organism example: Deinococcus radiodurans tolerates gamma (ionizing) + radiation D10 doses exceeding 12 kGy.' +- reference: DOI:10.1101/cshperspect.a012765 + snippet: A strong correlation has been shown between intracellular Mn/Fe concentration + ratios and bacterial resistance to radiation + notes: Mechanism support — manganese-mediated oxidative-damage protection underlies + survival of lethal ionizing-radiation doses. +curation_history: +- timestamp: '2026-05-28T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed candidate ENVIRONMENT trait (ionizing/gamma radiation tolerance) + from literature research; sub-variant of radiotolerant. + llm_assisted: true diff --git a/data/traits/environment/mercury_tolerant.yaml b/data/traits/environment/mercury_tolerant.yaml new file mode 100644 index 00000000..149eb958 --- /dev/null +++ b/data/traits/environment/mercury_tolerant.yaml @@ -0,0 +1,35 @@ +identifier: traitmech:000016 +label: mercury tolerant +definition: A metal tolerance in which an organism grows in the presence of toxic + inorganic or organic mercury compounds, typically via the mer operon, whose + mercuric reductase (MerA) reduces reactive Hg(II) to volatile Hg(0). +definition_source: DOI:10.1016/S0168-6445(03)00046-9 +trait_category: ENVIRONMENT +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- traitmech:000012 +synonyms: +- synonym_text: mercury resistant + synonym_type: RELATED_SYNONYM + source: DOI:10.1016/S0168-6445(03)00046-9 +evidence: +- reference: DOI:10.1016/S0168-6445(03)00046-9 + snippet: Bacterial resistance to inorganic and organic mercury compounds (HgR) is + one of the most widely observed phenotypes in eubacteria + notes: Review supports mercury resistance as a widespread bacterial phenotype + mediated by MerA, "that reduces reactive ionic Hg(II) to volatile, relatively + inert, monoatomic Hg(0) vapor". +- reference: PMID:12829273 + snippet: CBA efflux pumps driven by proteins of the resistance-nodulation-cell + division superfamily, P-type ATPases, cation diffusion facilitator and chromate + proteins + notes: Heavy-metal resistance review situates mercury detoxification within the + broader prokaryotic metal-resistance machinery. +curation_history: +- timestamp: '2026-05-28T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed candidate ENVIRONMENT trait (mercury tolerance) from literature + research; metal-specific sub-variant of metal tolerant. + llm_assisted: true diff --git a/data/traits/environment/metal_tolerant.yaml b/data/traits/environment/metal_tolerant.yaml new file mode 100644 index 00000000..5b748f46 --- /dev/null +++ b/data/traits/environment/metal_tolerant.yaml @@ -0,0 +1,40 @@ +identifier: traitmech:000012 +label: metal tolerant +definition: An environmental tolerance in which an organism grows in the presence of + elevated concentrations of toxic heavy-metal or metalloid ions, typically via + efflux-based resistance determinants (RND-family CBA pumps, P-type ATPases, and + cation diffusion facilitators). +definition_source: PMID:12829273 +trait_category: ENVIRONMENT +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- METPO:1000059 +synonyms: +- synonym_text: metallophilic + synonym_type: RELATED_SYNONYM + source: DOI:10.3389/fmicb.2020.00047 +- synonym_text: heavy metal resistant + synonym_type: RELATED_SYNONYM + source: PMID:12829273 +evidence: +- reference: PMID:12829273 + snippet: CBA efflux pumps driven by proteins of the resistance-nodulation-cell + division superfamily, P-type ATPases, cation diffusion facilitator and chromate + proteins + notes: Review of efflux-mediated heavy-metal resistance supports active metal + export as the dominant prokaryotic tolerance mechanism. +- reference: DOI:10.3389/fmicb.2020.00047 + snippet: This metallophilic strain BS1, harbors numerous gene clusters encoding + metal-resistance determinants enabling detoxification of transition metal ions + and complexes + notes: 'Organism example: Cupriavidus metallidurans is the model metallophilic + bacterium tolerating many toxic metals via dedicated resistance gene clusters.' +curation_history: +- timestamp: '2026-05-28T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed candidate ENVIRONMENT trait (general heavy-metal/metalloid + tolerance) from literature research to fill the metal-tolerance coverage gap. + Parent of the metal-specific sub-variants. + llm_assisted: true diff --git a/data/traits/environment/obligately_piezophilic.yaml b/data/traits/environment/obligately_piezophilic.yaml new file mode 100644 index 00000000..a9819073 --- /dev/null +++ b/data/traits/environment/obligately_piezophilic.yaml @@ -0,0 +1,33 @@ +identifier: traitmech:000002 +label: obligately piezophilic +definition: A pressure growth preference in which an organism requires elevated + hydrostatic pressure for growth and is unable to grow at atmospheric pressure + (0.1 MPa). +definition_source: DOI:10.1038/srep27289 +trait_category: ENVIRONMENT +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- traitmech:000001 +synonyms: +- synonym_text: obligate piezophile + synonym_type: EXACT_SYNONYM + source: DOI:10.1038/srep27289 +evidence: +- reference: DOI:10.1038/srep27289 + snippet: High hydrostatic pressure adaptive strategies in an obligate piezophile + Pyrococcus yayanosii + notes: 'Organism example: Pyrococcus yayanosii is an obligate piezophile that + requires high hydrostatic pressure for growth.' +- reference: DOI:10.1099/ijsem.0.001671 + snippet: growth range of 80-140 MPa (optimum, 120 MPa) at 6 degrees C + notes: 'Organism example: Colwellia marinimaniae MTCD1 grows only at high pressure + (80-140 MPa) and does not grow near atmospheric pressure.' +curation_history: +- timestamp: '2026-05-28T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed candidate ENVIRONMENT trait (obligate high-pressure requirement) + from literature research; sub-variant of piezophilic. Backed by two obligate- + piezophile organism examples. + llm_assisted: true diff --git a/data/traits/environment/piezophilic.yaml b/data/traits/environment/piezophilic.yaml new file mode 100644 index 00000000..3fb30a1c --- /dev/null +++ b/data/traits/environment/piezophilic.yaml @@ -0,0 +1,37 @@ +identifier: traitmech:000001 +label: piezophilic +definition: An environmental growth preference in which an organism grows optimally + at hydrostatic pressures substantially above atmospheric pressure (0.1 MPa), + characteristic of deep-sea and deep-subsurface microorganisms. +definition_source: DOI:10.3389/fmolb.2022.1058381 +trait_category: ENVIRONMENT +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- METPO:1000059 +synonyms: +- synonym_text: barophilic + synonym_type: RELATED_SYNONYM + source: DOI:10.3389/fmolb.2022.1058381 +- synonym_text: piezophile + synonym_type: EXACT_SYNONYM + source: DOI:10.3389/fmolb.2022.1058381 +evidence: +- reference: DOI:10.3389/fmolb.2022.1058381 + snippet: Microorganisms adapted to HHP are usually known as piezophiles, referring + to their preference for high pressure + notes: Membrane-lipid adaptation review supports the definition of piezophiles as + high-hydrostatic-pressure-adapted organisms, with adaptation involving unsaturated + and branched-chain fatty acids. +- reference: DOI:10.1099/ijsem.0.001671 + snippet: growth range of 80-140 MPa (optimum, 120 MPa) at 6 degrees C + notes: 'Organism example: Colwellia marinimaniae strain MTCD1, the most piezophilic + organism described, grows optimally at 120 MPa.' +curation_history: +- timestamp: '2026-05-28T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed candidate ENVIRONMENT trait (high-hydrostatic-pressure growth + preference) from literature research to fill the pressure-tolerance coverage gap. + Backed by a piezophile adaptation review and a deep-sea organism example. + llm_assisted: true diff --git a/data/traits/environment/piezotolerant.yaml b/data/traits/environment/piezotolerant.yaml new file mode 100644 index 00000000..75442c0b --- /dev/null +++ b/data/traits/environment/piezotolerant.yaml @@ -0,0 +1,34 @@ +identifier: traitmech:000003 +label: piezotolerant +definition: A pressure growth preference in which an organism can grow under elevated + hydrostatic pressure but grows at similar or faster rates at atmospheric pressure + (0.1 MPa). +definition_source: DOI:10.3389/fmolb.2022.1058381 +trait_category: ENVIRONMENT +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- METPO:1000059 +synonyms: +- synonym_text: barotolerant + synonym_type: RELATED_SYNONYM + source: DOI:10.3389/fmolb.2022.1058381 +evidence: +- reference: DOI:10.3389/fmolb.2022.1058381 + snippet: Microorganisms adapted to HHP are usually known as piezophiles, referring + to their preference for high pressure + notes: Pressure-adaptation review distinguishes piezotolerant organisms, which + withstand high hydrostatic pressure but grow at similar or faster rates at + atmospheric pressure, from obligate piezophiles. +- reference: DOI:10.1099/ijsem.0.001671 + snippet: growth range of 80-140 MPa (optimum, 120 MPa) at 6 degrees C + notes: Provides the contrasting obligate-piezophile reference point against which + piezotolerant (atmospheric-capable) growth is defined. +curation_history: +- timestamp: '2026-05-28T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed candidate ENVIRONMENT trait (pressure-tolerant growth) from + literature research to complete the pressure-preference axis alongside + piezophilic and obligately piezophilic. + llm_assisted: true diff --git a/data/traits/environment/pressure_delta.yaml b/data/traits/environment/pressure_delta.yaml new file mode 100644 index 00000000..285c2275 --- /dev/null +++ b/data/traits/environment/pressure_delta.yaml @@ -0,0 +1,28 @@ +identifier: traitmech:000006 +label: pressure delta +definition: A pressure phenotype with numerical limits expressing the breadth + (maximum minus minimum) of hydrostatic pressure supporting growth of an organism. +definition_source: DOI:10.3389/fmolb.2022.1058381 +trait_category: ENVIRONMENT +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- METPO:1000059 +evidence: +- reference: DOI:10.3389/fmolb.2022.1058381 + snippet: Microorganisms adapted to HHP are usually known as piezophiles, referring + to their preference for high pressure + notes: Pressure-adaptation review supports the breadth of the pressure-tolerance + span as a derived descriptor of pressure-adaptation flexibility. +- reference: DOI:10.1099/ijsem.0.001671 + snippet: growth range of 80-140 MPa (optimum, 120 MPa) at 6 degrees C + notes: The 80-140 MPa span of Colwellia marinimaniae MTCD1 (delta = 60 MPa) + illustrates the breadth this phenotype records. +curation_history: +- timestamp: '2026-05-28T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed quantitative companion trait (pressure-tolerance breadth) + mirroring the existing temperature_delta / nacl_delta pattern, to fill the + pressure coverage gap. + llm_assisted: true diff --git a/data/traits/environment/pressure_optimum.yaml b/data/traits/environment/pressure_optimum.yaml new file mode 100644 index 00000000..523080ab --- /dev/null +++ b/data/traits/environment/pressure_optimum.yaml @@ -0,0 +1,28 @@ +identifier: traitmech:000004 +label: pressure optimum +definition: A pressure phenotype with numerical limits giving the hydrostatic + pressure at which an organism grows fastest. +definition_source: DOI:10.1099/ijsem.0.001671 +trait_category: ENVIRONMENT +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- METPO:1000059 +evidence: +- reference: DOI:10.1099/ijsem.0.001671 + snippet: growth range of 80-140 MPa (optimum, 120 MPa) at 6 degrees C + notes: Colwellia marinimaniae MTCD1 illustrates a measurable pressure optimum + (120 MPa), the quantitative value this phenotype records. +- reference: DOI:10.3389/fmolb.2022.1058381 + snippet: Microorganisms adapted to HHP are usually known as piezophiles, referring + to their preference for high pressure + notes: Pressure-adaptation review supports an organism-specific optimal growth + pressure as the defining quantity for piezophile classification. +curation_history: +- timestamp: '2026-05-28T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed quantitative companion trait (optimal growth pressure) mirroring + the existing temperature_optimum / nacl_optimum pattern, to fill the pressure + coverage gap. + llm_assisted: true diff --git a/data/traits/environment/pressure_range.yaml b/data/traits/environment/pressure_range.yaml new file mode 100644 index 00000000..b75f38f6 --- /dev/null +++ b/data/traits/environment/pressure_range.yaml @@ -0,0 +1,28 @@ +identifier: traitmech:000005 +label: pressure range +definition: A pressure phenotype with numerical limits that bounds the minimum and + maximum hydrostatic pressures supporting growth of an organism. +definition_source: DOI:10.1099/ijsem.0.001671 +trait_category: ENVIRONMENT +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- METPO:1000059 +evidence: +- reference: DOI:10.1099/ijsem.0.001671 + snippet: growth range of 80-140 MPa (optimum, 120 MPa) at 6 degrees C + notes: Colwellia marinimaniae MTCD1 illustrates a bounded growth-supporting + pressure span (80-140 MPa), the quantity this phenotype records. +- reference: DOI:10.3389/fmolb.2022.1058381 + snippet: Microorganisms adapted to HHP are usually known as piezophiles, referring + to their preference for high pressure + notes: Pressure-adaptation review supports the bounded span of growth-permissive + hydrostatic pressure as a defining quantitative descriptor. +curation_history: +- timestamp: '2026-05-28T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed quantitative companion trait (growth-supporting pressure span) + mirroring the existing temperature_range / nacl_range pattern, to fill the + pressure coverage gap. + llm_assisted: true diff --git a/data/traits/environment/radiotolerant.yaml b/data/traits/environment/radiotolerant.yaml new file mode 100644 index 00000000..9ecd5c3e --- /dev/null +++ b/data/traits/environment/radiotolerant.yaml @@ -0,0 +1,37 @@ +identifier: traitmech:000007 +label: radiotolerant +definition: An environmental tolerance in which an organism survives doses of + ionizing and/or ultraviolet radiation that are lethal to most microorganisms, + typically via efficient DNA repair and oxidative-damage protection. +definition_source: DOI:10.1101/cshperspect.a012765 +trait_category: ENVIRONMENT +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- METPO:1000059 +synonyms: +- synonym_text: radioresistant + synonym_type: RELATED_SYNONYM + source: DOI:10.1101/cshperspect.a012765 +evidence: +- reference: DOI:10.1101/cshperspect.a012765 + snippet: A strong correlation has been shown between intracellular Mn/Fe concentration + ratios and bacterial resistance to radiation + notes: Review of extreme radiation resistance supports manganese-antioxidant + protection of the proteome as a core radiotolerance mechanism in Deinococcus + radiodurans, "a champion of extreme radiation resistance". +- reference: DOI:10.3390/genes14091803 + snippet: Deinococcus radiodurans, a Gram-positive extremophilic bacterium, is a + remarkable example of such an organism, showcasing an impressive resistance to a + wide array of stressors, including ionizing radiation, desiccation, UV radiation, + and oxidizing agents + notes: 'Organism example: Deinococcus radiodurans tolerates ionizing radiation, + UV radiation, and desiccation.' +curation_history: +- timestamp: '2026-05-28T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed candidate ENVIRONMENT trait (general radiation tolerance) from + literature research to fill the radiation-tolerance coverage gap. Parent of the + ionizing- and UV-specific sub-variants. + llm_assisted: true diff --git a/data/traits/environment/uv_radiation_tolerant.yaml b/data/traits/environment/uv_radiation_tolerant.yaml new file mode 100644 index 00000000..3fe8689e --- /dev/null +++ b/data/traits/environment/uv_radiation_tolerant.yaml @@ -0,0 +1,34 @@ +identifier: traitmech:000009 +label: UV radiation tolerant +definition: An environmental tolerance in which an organism survives high doses of + ultraviolet radiation, typically via photoreactivation and nucleotide-excision + repair of cyclobutane pyrimidine dimers and 6-4 photoproducts. +definition_source: DOI:10.3390/genes14091803 +trait_category: ENVIRONMENT +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- traitmech:000007 +synonyms: +- synonym_text: UV resistant + synonym_type: RELATED_SYNONYM + source: DOI:10.3390/genes14091803 +evidence: +- reference: DOI:10.3390/genes14091803 + snippet: Deinococcus radiodurans R1 demonstrates a significantly higher radiation + resistance with D10 values exceeding 12 kGy for gamma radiation and 700 J/m2 for + UV-C radiation + notes: 'Organism example: Deinococcus radiodurans tolerates UV-C radiation D10 + doses of 700 J/m2.' +- reference: DOI:10.1101/cshperspect.a012765 + snippet: The bacterium Deinococcus radiodurans is a champion of extreme radiation + resistance + notes: Review support — Deinococcus radiodurans is the reference organism for + extreme UV and ionizing radiation resistance. +curation_history: +- timestamp: '2026-05-28T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed candidate ENVIRONMENT trait (ultraviolet radiation tolerance) + from literature research; sub-variant of radiotolerant. + llm_assisted: true diff --git a/data/traits/environment/xerophilic.yaml b/data/traits/environment/xerophilic.yaml new file mode 100644 index 00000000..cf266306 --- /dev/null +++ b/data/traits/environment/xerophilic.yaml @@ -0,0 +1,34 @@ +identifier: traitmech:000011 +label: xerophilic +definition: An environmental growth preference in which an organism grows at low + water activity (low aw), such as in desiccated, high-sugar, or high-solute + substrates. +definition_source: DOI:10.1098/rstb.2004.1502 +trait_category: ENVIRONMENT +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- METPO:1000059 +synonyms: +- synonym_text: xerotolerant + synonym_type: RELATED_SYNONYM + source: DOI:10.1098/rstb.2004.1502 +evidence: +- reference: DOI:10.1098/rstb.2004.1502 + snippet: some of which are capable of growth at a water activity (aw) of 0.61, the + lowest aw value for growth recorded to date + notes: Low-water-activity review supports growth at very low aw as the defining + feature of xerophiles. +- reference: DOI:10.3390/microorganisms10020432 + snippet: Anhydrobiosis (gr. life without water) is predominantly described as the + ability of some organisms to lose all or almost all water and enter a state of + suspension where the metabolism comes to a reversible standstill + notes: Anhydrobiosis review supports low-water-activity adaptation as the + physiological context distinguishing xerophilic growth from desiccation survival. +curation_history: +- timestamp: '2026-05-28T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed candidate ENVIRONMENT trait (low-water-activity growth preference) + from literature research to fill the water-availability coverage gap. + llm_assisted: true diff --git a/data/traits/environment/zinc_tolerant.yaml b/data/traits/environment/zinc_tolerant.yaml new file mode 100644 index 00000000..e3b7c035 --- /dev/null +++ b/data/traits/environment/zinc_tolerant.yaml @@ -0,0 +1,29 @@ +identifier: traitmech:000014 +label: zinc tolerant +definition: A metal tolerance in which an organism grows in the presence of elevated + zinc (Zn2+) concentrations, typically via cation-efflux resistance systems such as + the czc determinant. +definition_source: PMID:12829273 +trait_category: ENVIRONMENT +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- traitmech:000012 +evidence: +- reference: DOI:10.1111/j.1365-2958.2009.06792.x + snippet: CzcP exports transition metals Zn(2+), Cd(2+) and Co(2+) + notes: The cobalt-zinc-cadmium (czc) efflux system of Cupriavidus metallidurans + confers zinc resistance. +- reference: DOI:10.3389/fmicb.2020.00047 + snippet: C. metallidurans BS1 conferred resistance to Zn2+ displaying a MIC of 20 + mM, Cd2+ (2.5 mM), Co2+ (20mM), Ni2+ (8 mM), As3+ (3.5 mM), Cu2+ (5 mM), Au3+ (1 + uM) and Pb2+ (1.7 mM) + notes: 'Organism example: Cupriavidus metallidurans BS1 tolerates zinc to a MIC of + 20 mM.' +curation_history: +- timestamp: '2026-05-28T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed candidate ENVIRONMENT trait (zinc tolerance) from literature + research; metal-specific sub-variant of metal tolerant. + llm_assisted: true diff --git a/data/traits/metabolism/anaerobic_oxidation_of_methane.yaml b/data/traits/metabolism/anaerobic_oxidation_of_methane.yaml new file mode 100644 index 00000000..ac7c9942 --- /dev/null +++ b/data/traits/metabolism/anaerobic_oxidation_of_methane.yaml @@ -0,0 +1,34 @@ +identifier: traitmech:000033 +label: anaerobic oxidation of methane +definition: A metabolism in which methane is oxidized under anoxic conditions, + classically coupled to sulfate reduction and mediated by consortia of anaerobic + methanotrophic archaea (ANME) and sulfate-reducing bacteria. It is a major sink + for methane in marine sediments. +definition_source: DOI:10.1038/35036572 +trait_category: METABOLISM +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- METPO:1000802 +synonyms: +- synonym_text: AOM + synonym_type: EXACT_SYNONYM + source: DOI:10.1038/35036572 +- synonym_text: anaerobic methanotrophy + synonym_type: RELATED_SYNONYM + source: DOI:10.3389/fmars.2025.1609892 +evidence: +- reference: DOI:10.1038/35036572 + notes: Boetius et al. described the marine microbial consortium of ANME archaea + and sulfate-reducing bacteria mediating anaerobic oxidation of methane. +- reference: DOI:10.3389/fmars.2025.1609892 + notes: Review of AOM in marine sediments supports sulfate- and metal-coupled + anaerobic methane oxidation as a major methane sink. +curation_history: +- timestamp: '2026-05-30T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed candidate METABOLISM trait (anaerobic oxidation of methane) from + literature research; complements the existing methanogenesis class (the reverse + process). + llm_assisted: true diff --git a/data/traits/metabolism/anoxygenic_photosynthesis.yaml b/data/traits/metabolism/anoxygenic_photosynthesis.yaml new file mode 100644 index 00000000..20361d2b --- /dev/null +++ b/data/traits/metabolism/anoxygenic_photosynthesis.yaml @@ -0,0 +1,30 @@ +identifier: traitmech:000035 +label: anoxygenic photosynthesis +definition: A phototrophic metabolism that uses light energy with a single + photosystem and bacteriochlorophyll, using electron donors other than water + (e.g. H2S, H2, Fe(II), organics) and therefore not evolving oxygen. Characteristic + of purple and green sulfur bacteria, Chloroflexi, and heliobacteria. +definition_source: DOI:10.1016/j.tim.2006.09.001 +trait_category: METABOLISM +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- traitmech:000038 +synonyms: +- synonym_text: bacterial photosynthesis + synonym_type: RELATED_SYNONYM + source: DOI:10.1016/j.tim.2006.09.001 +evidence: +- reference: DOI:10.1016/j.tim.2006.09.001 + notes: Bryant & Frigaard describe anoxygenic photosynthesis across five prokaryotic + phyla using bacteriochlorophyll and a single photosystem without O2 evolution. +- reference: DOI:10.3389/fmicb.2024.1417714 + notes: Review of anoxygenic photosynthesis in green sulfur bacteria supports + sulfide as electron donor and the absence of oxygen production. +curation_history: +- timestamp: '2026-05-30T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed candidate METABOLISM trait (anoxygenic photosynthesis) from + literature research to fill the phototrophy gap. + llm_assisted: true diff --git a/data/traits/metabolism/calvin_benson_bassham_cycle.yaml b/data/traits/metabolism/calvin_benson_bassham_cycle.yaml new file mode 100644 index 00000000..64dda9a1 --- /dev/null +++ b/data/traits/metabolism/calvin_benson_bassham_cycle.yaml @@ -0,0 +1,34 @@ +identifier: traitmech:000020 +label: Calvin-Benson-Bassham cycle +definition: An autotrophic carbon-fixation pathway (the reductive pentose phosphate + cycle) that fixes CO2 using ribulose-1,5-bisphosphate carboxylase/oxygenase + (RuBisCO). It is the most widespread CO2-fixation pathway, used by plants, algae, + cyanobacteria, and many proteobacteria. +definition_source: DOI:10.1128/AEM.02473-10 +trait_category: METABOLISM +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- traitmech:000019 +synonyms: +- synonym_text: Calvin cycle + synonym_type: EXACT_SYNONYM + source: DOI:10.1128/AEM.02473-10 +- synonym_text: reductive pentose phosphate cycle + synonym_type: RELATED_SYNONYM + source: DOI:10.1128/AEM.02473-10 +evidence: +- reference: DOI:10.1128/AEM.02473-10 + notes: Berg review identifies the Calvin-Benson-Bassham (reductive pentose + phosphate) cycle as the reference autotrophic pathway against which the other + five are distinguished. +- reference: DOI:10.1146/annurev-marine-120709-142712 + notes: Hügler & Sievert support the Calvin cycle as the most common/widespread + CO2-fixation pathway, including among marine cyanobacteria and proteobacteria. +curation_history: +- timestamp: '2026-05-30T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed candidate METABOLISM trait (Calvin-Benson-Bassham carbon-fixation + cycle); sub-variant of carbon fixation. + llm_assisted: true diff --git a/data/traits/metabolism/carbon_fixation.yaml b/data/traits/metabolism/carbon_fixation.yaml new file mode 100644 index 00000000..54412450 --- /dev/null +++ b/data/traits/metabolism/carbon_fixation.yaml @@ -0,0 +1,35 @@ +identifier: traitmech:000019 +label: carbon fixation +definition: A metabolic process in which an organism assimilates inorganic carbon + (CO2 or bicarbonate) into organic compounds (autotrophy). Six distinct natural + autotrophic carbon-fixation pathways are currently recognized. +definition_source: DOI:10.1128/AEM.02473-10 +trait_category: METABOLISM +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- METPO:1000060 +synonyms: +- synonym_text: CO2 fixation + synonym_type: RELATED_SYNONYM + source: DOI:10.1128/AEM.02473-10 +- synonym_text: autotrophic carbon assimilation + synonym_type: RELATED_SYNONYM + source: DOI:10.1146/annurev-marine-120709-142712 +evidence: +- reference: DOI:10.1128/AEM.02473-10 + notes: Berg review of the distribution of autotrophic CO2-fixation pathways + establishes that, besides the Calvin-Benson-Bassham cycle, five further + autotrophic carbon-fixation pathways are known, parent of the six pathway + sub-variants proposed here. +- reference: DOI:10.1146/annurev-marine-120709-142712 + notes: Hügler & Sievert, "Beyond the Calvin cycle", supports multiple autotrophic + carbon-fixation pathways operating among ocean microorganisms. +curation_history: +- timestamp: '2026-05-30T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed candidate METABOLISM trait (autotrophic carbon fixation) from + literature research to fill the carbon-fixation-pathway coverage gap. Parent of + the six pathway-specific sub-variants. + llm_assisted: true diff --git a/data/traits/metabolism/dicarboxylate_four_hydroxybutyrate_cycle.yaml b/data/traits/metabolism/dicarboxylate_four_hydroxybutyrate_cycle.yaml new file mode 100644 index 00000000..35e6e175 --- /dev/null +++ b/data/traits/metabolism/dicarboxylate_four_hydroxybutyrate_cycle.yaml @@ -0,0 +1,30 @@ +identifier: traitmech:000025 +label: dicarboxylate/4-hydroxybutyrate cycle +definition: An autotrophic carbon-fixation pathway that fixes one molecule of CO2 + and one of bicarbonate per turn via a dicarboxylate stage and a + 4-hydroxybutyrate stage. It operates in anaerobic and microaerophilic + Crenarchaeota such as Ignicoccus and Thermoproteales. +definition_source: DOI:10.1126/science.1149976 +trait_category: METABOLISM +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- traitmech:000019 +synonyms: +- synonym_text: DC/4HB cycle + synonym_type: RELATED_SYNONYM + source: DOI:10.1128/AEM.02473-10 +evidence: +- reference: DOI:10.1128/AEM.02473-10 + notes: Berg review describes the dicarboxylate/4-hydroxybutyrate cycle as the + anaerobic archaeal counterpart of the 3HP/4HB cycle. +- reference: DOI:10.1126/science.1149976 + notes: Berg et al.'s archaeal carbon-fixation work establishes the 4-hydroxybutyrate + chemistry shared by this cycle in Crenarchaeota. +curation_history: +- timestamp: '2026-05-30T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed candidate METABOLISM trait (dicarboxylate/4-hydroxybutyrate + cycle); archaeal sub-variant of carbon fixation. + llm_assisted: true diff --git a/data/traits/metabolism/dissimilatory_iron_reduction.yaml b/data/traits/metabolism/dissimilatory_iron_reduction.yaml new file mode 100644 index 00000000..73c30369 --- /dev/null +++ b/data/traits/metabolism/dissimilatory_iron_reduction.yaml @@ -0,0 +1,37 @@ +identifier: traitmech:000031 +label: dissimilatory iron reduction +definition: An anaerobic respiratory metabolism in which an organism conserves + energy for growth by coupling the oxidation of organic matter or hydrogen to the + reduction of Fe(III) as a terminal electron acceptor. Characteristic of Geobacter + and Shewanella, often via extracellular electron transfer. +definition_source: DOI:10.1128/mr.55.2.259-287.1991 +trait_category: METABOLISM +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- traitmech:000039 +synonyms: +- synonym_text: ferric iron respiration + synonym_type: RELATED_SYNONYM + source: DOI:10.1128/mr.55.2.259-287.1991 +- synonym_text: dissimilatory Fe(III) reduction + synonym_type: EXACT_SYNONYM + source: DOI:10.1128/mr.55.2.259-287.1991 +evidence: +- reference: DOI:10.1128/mr.55.2.259-287.1991 + snippet: The oxidation of organic matter coupled to the reduction of Fe(III) or + Mn(IV) is one of the most important biogeochemical reactions in aquatic + sediments, soils, and groundwater + notes: Lovley review establishes dissimilatory Fe(III)/Mn(IV) reduction as + energy-conserving anaerobic respiration coupling organic-matter oxidation to + metal reduction. +- reference: PMID:7826009 + notes: Nealson & Saffarini, "Iron and manganese in anaerobic respiration", supports + Fe(III) and Mn(IV) as terminal electron acceptors competitive with nitrate. +curation_history: +- timestamp: '2026-05-30T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed candidate METABOLISM trait (dissimilatory iron reduction) from + literature research to fill the metal-redox metabolism gap. + llm_assisted: true diff --git a/data/traits/metabolism/dissimilatory_metal_reduction.yaml b/data/traits/metabolism/dissimilatory_metal_reduction.yaml new file mode 100644 index 00000000..e3ae5241 --- /dev/null +++ b/data/traits/metabolism/dissimilatory_metal_reduction.yaml @@ -0,0 +1,33 @@ +identifier: traitmech:000039 +label: dissimilatory metal reduction +definition: An anaerobic respiratory metabolism in which an organism conserves + energy for growth by coupling the oxidation of organic matter or hydrogen to the + reduction of a metal (e.g. Fe(III), Mn(IV)) as a terminal electron acceptor. +definition_source: DOI:10.1128/mr.55.2.259-287.1991 +trait_category: METABOLISM +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- METPO:1000802 +synonyms: +- synonym_text: dissimilatory metal-ion reduction + synonym_type: RELATED_SYNONYM + source: DOI:10.1128/mr.55.2.259-287.1991 +evidence: +- reference: DOI:10.1128/mr.55.2.259-287.1991 + snippet: The oxidation of organic matter coupled to the reduction of Fe(III) or + Mn(IV) is one of the most important biogeochemical reactions in aquatic + sediments, soils, and groundwater + notes: Lovley review establishes dissimilatory metal (Fe(III)/Mn(IV)) reduction as + energy-conserving anaerobic respiration; parent of the metal-specific reduction + sub-variants. +- reference: PMID:7826009 + notes: Nealson & Saffarini, "Iron and manganese in anaerobic respiration", supports + metals as terminal electron acceptors in anaerobic respiration. +curation_history: +- timestamp: '2026-05-31T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Minted intermediate axis class (dissimilatory metal reduction) under + anaerobic respiration (METPO:1000802) to parent dissimilatory iron reduction. + llm_assisted: true diff --git a/data/traits/metabolism/dissimilatory_nitrate_reduction_to_ammonium.yaml b/data/traits/metabolism/dissimilatory_nitrate_reduction_to_ammonium.yaml new file mode 100644 index 00000000..12406d01 --- /dev/null +++ b/data/traits/metabolism/dissimilatory_nitrate_reduction_to_ammonium.yaml @@ -0,0 +1,33 @@ +identifier: traitmech:000030 +label: dissimilatory nitrate reduction to ammonium +definition: An anaerobic respiratory metabolism in which nitrate is reduced via + nitrite to ammonium (rather than to N2), conserving fixed nitrogen within the + ecosystem. It is favored over denitrification under nitrate-limited, + high-electron-donor conditions. +definition_source: DOI:10.1126/science.1254070 +trait_category: METABOLISM +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- METPO:1000802 +synonyms: +- synonym_text: DNRA + synonym_type: EXACT_SYNONYM + source: DOI:10.1126/science.1254070 +- synonym_text: nitrate ammonification + synonym_type: RELATED_SYNONYM + source: DOI:10.1007/s11157-025-09719-5 +evidence: +- reference: DOI:10.1126/science.1254070 + notes: Kraft et al. show the donor-to-acceptor ratio governs whether nitrate + respiration ends in ammonium (DNRA) or N2 (denitrification). +- reference: DOI:10.1007/s11157-025-09719-5 + notes: Review of DNRA vs denitrification supports DNRA's competitive advantage and + nitrogen-retaining role under nitrate-limited conditions. +curation_history: +- timestamp: '2026-05-30T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed candidate METABOLISM trait (dissimilatory nitrate reduction to + ammonium) from literature research to fill the nitrogen-cycling gap. + llm_assisted: true diff --git a/data/traits/metabolism/ethanol_fermentation.yaml b/data/traits/metabolism/ethanol_fermentation.yaml new file mode 100644 index 00000000..83516dfa --- /dev/null +++ b/data/traits/metabolism/ethanol_fermentation.yaml @@ -0,0 +1,29 @@ +identifier: traitmech:000028 +label: ethanol fermentation +definition: A fermentation in which pyruvate is decarboxylated to acetaldehyde + (releasing CO2) and then reduced by NADH to ethanol, regenerating NAD+ for + glycolysis. Characteristic of yeasts and the bacterium Zymomonas mobilis. +definition_source: DOI:10.3389/fmicb.2021.703525 +trait_category: METABOLISM +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- METPO:1002005 +synonyms: +- synonym_text: alcoholic fermentation + synonym_type: RELATED_SYNONYM + source: DOI:10.3390/molecules31020333 +evidence: +- reference: DOI:10.3390/molecules31020333 + notes: Review of classical fermentations describes the alcoholic (ethanol) pathway + in which pyruvate is decarboxylated and reduced to ethanol. +- reference: DOI:10.3389/fmicb.2021.703525 + notes: Review of fermentative energy conservation supports ethanol as an + NADH-reoxidizing fermentation end product. +curation_history: +- timestamp: '2026-05-30T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed candidate METABOLISM trait (ethanol/alcoholic fermentation); + product-specific sub-variant of the existing Fermentation class (METPO:1002005). + llm_assisted: true diff --git a/data/traits/metabolism/lactic_acid_fermentation.yaml b/data/traits/metabolism/lactic_acid_fermentation.yaml new file mode 100644 index 00000000..110d7e3b --- /dev/null +++ b/data/traits/metabolism/lactic_acid_fermentation.yaml @@ -0,0 +1,31 @@ +identifier: traitmech:000026 +label: lactic acid fermentation +definition: A fermentation in which sugars are converted mainly to lactate, with + ATP generated by substrate-level phosphorylation. Homolactic fermentation yields + ~2 lactate per glucose via glycolysis; heterolactic fermentation also yields + ethanol/acetate and CO2. Characteristic of lactic acid bacteria (e.g. + Lactobacillus, Lactococcus). +definition_source: DOI:10.3389/fmicb.2021.703525 +trait_category: METABOLISM +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- METPO:1002005 +synonyms: +- synonym_text: lactate fermentation + synonym_type: RELATED_SYNONYM + source: DOI:10.3389/fmicb.2021.703525 +evidence: +- reference: DOI:10.3389/fmicb.2021.703525 + notes: Review of energy conservation in anaerobic fermentations supports lactate + as a fermentation end product generated with substrate-level phosphorylation. +- reference: DOI:10.3390/molecules31020333 + notes: Review of classical food fermentations describes the lactic-acid pathway + and lactic acid bacteria as its agents. +curation_history: +- timestamp: '2026-05-30T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed candidate METABOLISM trait (lactic acid fermentation); + product-specific sub-variant of the existing Fermentation class (METPO:1002005). + llm_assisted: true diff --git a/data/traits/metabolism/manganese_oxidation.yaml b/data/traits/metabolism/manganese_oxidation.yaml new file mode 100644 index 00000000..bdb6b54d --- /dev/null +++ b/data/traits/metabolism/manganese_oxidation.yaml @@ -0,0 +1,29 @@ +identifier: traitmech:000032 +label: manganese oxidation +definition: A metabolism in which bacteria oxidize soluble Mn(II) to insoluble + Mn(III/IV) oxides, typically catalyzed by multicopper oxidases. Characteristic of + organisms such as Bacillus sp. SG-1, Leptothrix, and Pseudomonas putida. +definition_source: DOI:10.1016/j.tim.2005.07.009 +trait_category: METABOLISM +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- METPO:1000060 +synonyms: +- synonym_text: Mn(II) oxidation + synonym_type: EXACT_SYNONYM + source: DOI:10.1016/j.tim.2005.07.009 +evidence: +- reference: DOI:10.1016/j.tim.2005.07.009 + notes: Tebo et al., "Geomicrobiology of manganese(II) oxidation", supports + bacterial Mn(II) oxidation to Mn oxides via a multicopper-oxidase mechanism. +- reference: DOI:10.1146/annurev.earth.32.101802.120213 + notes: Tebo et al., "Biogenic manganese oxides", supports the formation and + properties of bacterially produced Mn(III/IV) oxides. +curation_history: +- timestamp: '2026-05-30T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed candidate METABOLISM trait (manganese oxidation) from literature + research to fill the metal-redox metabolism gap. + llm_assisted: true diff --git a/data/traits/metabolism/mixed_acid_fermentation.yaml b/data/traits/metabolism/mixed_acid_fermentation.yaml new file mode 100644 index 00000000..15c5e3a2 --- /dev/null +++ b/data/traits/metabolism/mixed_acid_fermentation.yaml @@ -0,0 +1,25 @@ +identifier: traitmech:000027 +label: mixed-acid fermentation +definition: A fermentation in which sugars are converted via the glycolytic pathway + to a mixture of acids (lactic, acetic, formic, succinic) plus ethanol, CO2 and + H2. Characteristic of enteric bacteria such as Escherichia coli. +definition_source: DOI:10.3389/fmicb.2021.703525 +trait_category: METABOLISM +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- METPO:1002005 +evidence: +- reference: DOI:10.3389/fmicb.2021.703525 + notes: Review of fermentative energy conservation lists acetate, ethanol, lactate, + succinate and formate as products of mixed-acid fermentation. +- reference: DOI:10.3390/molecules31020333 + notes: Review of fermentation pathways describes mixed-acid fermentation by + enterobacteria and its characteristic acid product spectrum. +curation_history: +- timestamp: '2026-05-30T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed candidate METABOLISM trait (mixed-acid fermentation); + product-specific sub-variant of the existing Fermentation class (METPO:1002005). + llm_assisted: true diff --git a/data/traits/metabolism/oxygenic_photosynthesis.yaml b/data/traits/metabolism/oxygenic_photosynthesis.yaml new file mode 100644 index 00000000..8e450d9a --- /dev/null +++ b/data/traits/metabolism/oxygenic_photosynthesis.yaml @@ -0,0 +1,28 @@ +identifier: traitmech:000034 +label: oxygenic photosynthesis +definition: A phototrophic metabolism that uses light energy to fix CO2, oxidizing + water as the electron donor and releasing molecular oxygen. It uses two linked + photosystems and chlorophyll, and is characteristic of cyanobacteria (and plant + chloroplasts). +definition_source: DOI:10.1016/j.tim.2006.09.001 +trait_category: METABOLISM +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- traitmech:000038 +evidence: +- reference: DOI:10.1016/j.tim.2006.09.001 + notes: Bryant & Frigaard, "Prokaryotic photosynthesis and phototrophy illuminated", + contrasts oxygenic photosynthesis (water-splitting, O2-evolving) in cyanobacteria + with anoxygenic phototrophy. +- reference: DOI:10.1146/annurev-earth-060313-054810 + notes: Fischer et al., "Evolution of Oxygenic Photosynthesis", supports + water-oxidizing, oxygen-evolving photosynthesis as a distinct, cyanobacterial + innovation. +curation_history: +- timestamp: '2026-05-30T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed candidate METABOLISM trait (oxygenic photosynthesis) from + literature research to fill the phototrophy gap. + llm_assisted: true diff --git a/data/traits/metabolism/photosynthesis.yaml b/data/traits/metabolism/photosynthesis.yaml new file mode 100644 index 00000000..02ddc1bb --- /dev/null +++ b/data/traits/metabolism/photosynthesis.yaml @@ -0,0 +1,27 @@ +identifier: traitmech:000038 +label: photosynthesis +definition: A phototrophic metabolism that uses light energy and chlorophyll- or + bacteriochlorophyll-based photochemical reaction centers to drive electron flow, + fixing CO2 and/or generating reducing power. Subdivided into oxygenic and + anoxygenic photosynthesis. +definition_source: DOI:10.1016/j.tim.2006.09.001 +trait_category: METABOLISM +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- traitmech:000037 +evidence: +- reference: DOI:10.1016/j.tim.2006.09.001 + notes: Bryant & Frigaard treat prokaryotic photosynthesis (reaction-center based) + as encompassing both oxygenic and anoxygenic forms across five phyla. +- reference: DOI:10.1146/annurev-earth-060313-054810 + notes: Fischer et al., "Evolution of Oxygenic Photosynthesis", supports + reaction-center photosynthesis as the chlorophyll-based, CO2-fixing branch of + phototrophy distinct from rhodopsin-based light capture. +curation_history: +- timestamp: '2026-05-31T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Minted intermediate axis class (photosynthesis) under phototrophy to + parent the oxygenic- and anoxygenic-photosynthesis traits. + llm_assisted: true diff --git a/data/traits/metabolism/phototrophy.yaml b/data/traits/metabolism/phototrophy.yaml new file mode 100644 index 00000000..a4c4dfa2 --- /dev/null +++ b/data/traits/metabolism/phototrophy.yaml @@ -0,0 +1,32 @@ +identifier: traitmech:000037 +label: phototrophy +definition: A metabolism in which an organism captures light as its energy source. + It encompasses chlorophyll-based photosynthesis (with photochemical reaction + centers) and retinal-based (rhodopsin) light-driven ion pumping. +definition_source: DOI:10.1016/j.tim.2006.09.001 +trait_category: METABOLISM +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- METPO:1000060 +synonyms: +- synonym_text: phototrophic metabolism + synonym_type: RELATED_SYNONYM + source: DOI:10.1016/j.tim.2006.09.001 +evidence: +- reference: DOI:10.1016/j.tim.2006.09.001 + notes: Bryant & Frigaard, "Prokaryotic photosynthesis and phototrophy illuminated", + frames phototrophy as the broad use of light for energy, spanning chlorophyll- + and rhodopsin-based mechanisms; parent of the photosynthesis and proteorhodopsin + phototrophy sub-variants. +- reference: DOI:10.1126/science.289.5486.1902 + notes: Béjà et al. established retinal-based proteorhodopsin phototrophy as a + light-energy capture mechanism distinct from chlorophyll-based photosynthesis. +curation_history: +- timestamp: '2026-05-31T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Minted intermediate axis class (phototrophy) to parent the photosynthesis + and proteorhodopsin-phototrophy traits, replacing their direct attachment to + METPO:1000060 (metabolism). + llm_assisted: true diff --git a/data/traits/metabolism/propionic_acid_fermentation.yaml b/data/traits/metabolism/propionic_acid_fermentation.yaml new file mode 100644 index 00000000..72678895 --- /dev/null +++ b/data/traits/metabolism/propionic_acid_fermentation.yaml @@ -0,0 +1,30 @@ +identifier: traitmech:000029 +label: propionic acid fermentation +definition: A fermentation that produces propionate (with acetate and CO2) from + sugars or lactate, typically via the Wood-Werkman (methylmalonyl-CoA) pathway. + Characteristic of propionibacteria (e.g. Propionibacterium freudenreichii). +definition_source: DOI:10.3390/molecules31020333 +trait_category: METABOLISM +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- METPO:1002005 +synonyms: +- synonym_text: propionate fermentation + synonym_type: RELATED_SYNONYM + source: DOI:10.3390/molecules31020333 +evidence: +- reference: DOI:10.3390/molecules31020333 + notes: Review of classical fermentations describes propionic acid fermentation + (acetic acid, propionic acid, CO2) and propionibacteria as its agents, including + the Wood-Werkman route. +- reference: DOI:10.3389/fmicb.2021.703525 + notes: Review of fermentative energy conservation supports propionate formation as + a redox-balancing, energy-conserving fermentation route. +curation_history: +- timestamp: '2026-05-30T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed candidate METABOLISM trait (propionic acid fermentation); + product-specific sub-variant of the existing Fermentation class (METPO:1002005). + llm_assisted: true diff --git a/data/traits/metabolism/proteorhodopsin_phototrophy.yaml b/data/traits/metabolism/proteorhodopsin_phototrophy.yaml new file mode 100644 index 00000000..b33eda4b --- /dev/null +++ b/data/traits/metabolism/proteorhodopsin_phototrophy.yaml @@ -0,0 +1,32 @@ +identifier: traitmech:000036 +label: proteorhodopsin phototrophy +definition: A light-harvesting metabolism in which a retinal-containing membrane + protein (proteorhodopsin) acts as a light-driven proton pump, generating proton + motive force without chlorophyll-based reaction centers. Widespread among marine + bacterioplankton. +definition_source: DOI:10.1126/science.289.5486.1902 +trait_category: METABOLISM +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- traitmech:000037 +synonyms: +- synonym_text: rhodopsin-based phototrophy + synonym_type: RELATED_SYNONYM + source: DOI:10.1038/35081051 +evidence: +- reference: DOI:10.1126/science.289.5486.1902 + notes: Béjà et al. identified proteorhodopsin, a retinal-binding light-driven + proton pump in an uncultivated marine bacterium, as evidence for a new type of + phototrophy in the sea. +- reference: DOI:10.1038/35081051 + notes: Béjà et al., "Proteorhodopsin phototrophy in the ocean", supports + proteorhodopsin as a widespread, spectrally tuned light-energy capture system in + marine bacteria. +curation_history: +- timestamp: '2026-05-30T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed candidate METABOLISM trait (proteorhodopsin phototrophy) from + literature research to fill the light-driven-energy gap. + llm_assisted: true diff --git a/data/traits/metabolism/reductive_tca_cycle.yaml b/data/traits/metabolism/reductive_tca_cycle.yaml new file mode 100644 index 00000000..77386134 --- /dev/null +++ b/data/traits/metabolism/reductive_tca_cycle.yaml @@ -0,0 +1,36 @@ +identifier: traitmech:000021 +label: reductive tricarboxylic acid cycle +definition: An autotrophic carbon-fixation pathway (reductive citric acid / + Arnon-Buchanan cycle) that runs the tricarboxylic acid cycle in reverse to fix + CO2. It operates in anaerobic and microaerophilic bacteria such as green sulfur + bacteria (Chlorobium) and Aquificales. +definition_source: DOI:10.1128/AEM.02473-10 +trait_category: METABOLISM +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- traitmech:000019 +synonyms: +- synonym_text: reductive citric acid cycle + synonym_type: EXACT_SYNONYM + source: DOI:10.1128/AEM.02473-10 +- synonym_text: rTCA cycle + synonym_type: RELATED_SYNONYM + source: DOI:10.1128/AEM.02473-10 +- synonym_text: Arnon-Buchanan cycle + synonym_type: RELATED_SYNONYM + source: DOI:10.1146/annurev-marine-120709-142712 +evidence: +- reference: DOI:10.1128/AEM.02473-10 + notes: Berg review describes the reductive citric acid cycle as functional in + anaerobic/microaerophilic autotrophs. +- reference: DOI:10.1146/annurev-marine-120709-142712 + notes: Hügler & Sievert document the rTCA cycle in chemolithoautotrophs and green + sulfur bacteria in marine systems. +curation_history: +- timestamp: '2026-05-30T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed candidate METABOLISM trait (reductive TCA carbon-fixation cycle); + sub-variant of carbon fixation. + llm_assisted: true diff --git a/data/traits/metabolism/three_hydroxypropionate_bicycle.yaml b/data/traits/metabolism/three_hydroxypropionate_bicycle.yaml new file mode 100644 index 00000000..38fc1194 --- /dev/null +++ b/data/traits/metabolism/three_hydroxypropionate_bicycle.yaml @@ -0,0 +1,30 @@ +identifier: traitmech:000023 +label: 3-hydroxypropionate bicycle +definition: An autotrophic carbon-fixation pathway in which two molecules of + bicarbonate are fixed via 3-hydroxypropionate and converted to glyoxylate and + pyruvate. It is characteristic of the filamentous anoxygenic phototroph + Chloroflexus aurantiacus. +definition_source: DOI:10.1128/AEM.02473-10 +trait_category: METABOLISM +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- traitmech:000019 +synonyms: +- synonym_text: 3-hydroxypropionate cycle + synonym_type: RELATED_SYNONYM + source: DOI:10.1128/AEM.02473-10 +evidence: +- reference: DOI:10.1128/AEM.02473-10 + notes: Berg review describes the 3-hydroxypropionate bicycle and its association + with Chloroflexus. +- reference: DOI:10.1146/annurev-marine-120709-142712 + notes: Hügler & Sievert include the 3-hydroxypropionate pathway among autotrophic + carbon-fixation strategies. +curation_history: +- timestamp: '2026-05-30T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed candidate METABOLISM trait (3-hydroxypropionate bicycle); + sub-variant of carbon fixation. + llm_assisted: true diff --git a/data/traits/metabolism/three_hydroxypropionate_four_hydroxybutyrate_cycle.yaml b/data/traits/metabolism/three_hydroxypropionate_four_hydroxybutyrate_cycle.yaml new file mode 100644 index 00000000..d0714e08 --- /dev/null +++ b/data/traits/metabolism/three_hydroxypropionate_four_hydroxybutyrate_cycle.yaml @@ -0,0 +1,30 @@ +identifier: traitmech:000024 +label: 3-hydroxypropionate/4-hydroxybutyrate cycle +definition: An autotrophic carbon-fixation pathway that fixes two molecules of + bicarbonate per turn via 3-hydroxypropionate and 4-hydroxybutyrate intermediates. + It operates in aerobic and microaerophilic Crenarchaeota such as Sulfolobus and + Metallosphaera. +definition_source: DOI:10.1126/science.1149976 +trait_category: METABOLISM +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- traitmech:000019 +synonyms: +- synonym_text: 3HP/4HB cycle + synonym_type: RELATED_SYNONYM + source: DOI:10.1126/science.1149976 +evidence: +- reference: DOI:10.1126/science.1149976 + notes: Berg et al. described the 3-hydroxypropionate/4-hydroxybutyrate autotrophic + CO2-assimilation pathway in Archaea (Sulfolobales). +- reference: DOI:10.1128/AEM.02473-10 + notes: Berg review situates the 3HP/4HB cycle among the six recognized autotrophic + carbon-fixation pathways. +curation_history: +- timestamp: '2026-05-30T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed candidate METABOLISM trait (3-hydroxypropionate/4-hydroxybutyrate + cycle); archaeal sub-variant of carbon fixation. + llm_assisted: true diff --git a/data/traits/metabolism/wood_ljungdahl_pathway.yaml b/data/traits/metabolism/wood_ljungdahl_pathway.yaml new file mode 100644 index 00000000..e6ff9ed7 --- /dev/null +++ b/data/traits/metabolism/wood_ljungdahl_pathway.yaml @@ -0,0 +1,31 @@ +identifier: traitmech:000022 +label: Wood-Ljungdahl pathway +definition: An autotrophic carbon-fixation pathway (the reductive acetyl-CoA + pathway) in which two molecules of CO2 are reduced and combined into acetyl-CoA. + It is energetically efficient and used by acetogenic bacteria, methanogenic + archaea, and some sulfate-reducing bacteria. +definition_source: DOI:10.1016/j.bbapap.2008.08.012 +trait_category: METABOLISM +term_kind: CLASS +mapping_status: PROPOSED +parent_traits: +- traitmech:000019 +synonyms: +- synonym_text: reductive acetyl-CoA pathway + synonym_type: EXACT_SYNONYM + source: DOI:10.1016/j.bbapap.2008.08.012 +evidence: +- reference: DOI:10.1016/j.bbapap.2008.08.012 + notes: Ragsdale & Pierce, "Acetogenesis and the Wood-Ljungdahl pathway of CO2 + fixation", is the reference treatment of this reductive acetyl-CoA pathway. +- reference: DOI:10.1128/AEM.02473-10 + notes: Berg review places the reductive acetyl-CoA (Wood-Ljungdahl) pathway among + the recognized autotrophic carbon-fixation pathways. +curation_history: +- timestamp: '2026-05-30T00:00:00Z' + curator: claude + action: PROPOSED_FROM_RESEARCH + changes: Proposed candidate METABOLISM trait (Wood-Ljungdahl / reductive acetyl-CoA + carbon-fixation pathway); sub-variant of carbon fixation. Distinct from the + existing acetogenesis metabolism class (which produces acetate via this pathway). + llm_assisted: true diff --git a/justfile b/justfile index a5db8d6b..104c7c96 100644 --- a/justfile +++ b/justfile @@ -42,6 +42,12 @@ audit-schema: audit-writers *args: uv run python scripts/audit_writers.py {{args}} +# Enforce the citation bar on PROPOSED candidate traits: each must carry +# >= 2 distinct literature citations (across definition_source + evidence). +# Emits reports/proposal_citation_audit.tsv; exits 1 on any short record. +audit-proposals *args: + uv run python scripts/audit_proposals.py {{args}} + # Verify a METPO ROBOT-template proposal cohort under proposals/. # Runs column-count, header, parent integrity, subset tag, and scope-A/C # coverage checks. See .claude/skills/metpo-proposal/SKILL.md. @@ -162,5 +168,6 @@ lint: check: lint test # Composite QC: strict closed-schema validation + schema-quality probes + -# writers audit. Mirrors the qc target in MediaIngredientMech / CultureMech. -qc: validate-strict audit-schema audit-writers +# writers audit + proposal citation bar. Mirrors the qc target in +# MediaIngredientMech / CultureMech. +qc: validate-strict audit-schema audit-writers audit-proposals diff --git a/reports/environment_trait_proposals.md b/reports/environment_trait_proposals.md new file mode 100644 index 00000000..f4457dbf --- /dev/null +++ b/reports/environment_trait_proposals.md @@ -0,0 +1,129 @@ +# Candidate missing ENVIRONMENT traits — literature-backed proposal + +**Date:** 2026-05-28 · **Curator:** claude (LLM-assisted) · **Status of all entries:** `PROPOSED` + +## Why these traits + +The ENVIRONMENT category (103 reviewed traits) is densely built out along four axes — +pH (27), temperature (28), salinity/NaCl (26), and oxygen (12) — but a survey of +`data/traits/environment/` found **no coverage** of several well-established environmental +microbial trait dimensions. This proposal adds **18 candidate traits** across the four +genuinely-absent axes below, each backed by **≥ 2 distinct literature citations** spread +across `definition_source` and `evidence[].reference`. + +Each candidate is authored as a normal `TraitRecord` YAML in `data/traits/environment/` +with `mapping_status: PROPOSED`, so it flows through the existing closed-mode LinkML +validation. The ≥ 2-citation bar is enforced for every PROPOSED record by +`scripts/audit_proposals.py` (wired into `just qc` / CI → +`reports/proposal_citation_audit.tsv`). + +### Identifiers +All 18 are new to METPO (verified: 0 hits for piezo/pressure/radiation/desiccation/xerophil/ +metal/mercury/cadmium/arsenic/copper/zinc in `data/raw/metpo.owl`), so they are minted under +the reserved synthetic prefix `traitmech:000001`–`traitmech:000018` per +`.claude/skills/manage-identifiers/SKILL.md`. Qualitative traits parent to `METPO:1000059` +(*phenotype*) — the same upper class `oxygen preference` uses — and the metal-specific and +radiation-specific sub-variants parent to their family head (`traitmech:000012` / +`traitmech:000007`) to form small hierarchies. + +### Already in METPO (not proposed here) +`oligotrophic` (METPO:1000654) and `copiotrophic` (METPO:1000642) **already exist in METPO** +but are not yet seeded into the corpus. They are intentionally excluded from this proposal — +they are a *seeding* gap, not a vocabulary gap — and should be imported via `seed_from_metpo`. + +--- + +## Proposed traits + +### Pressure / piezophily (deep-sea, deep-subsurface) +| ID | Label | Parent | Citations | +|----|-------|--------|-----------| +| traitmech:000001 | piezophilic | METPO:1000059 | DOI:10.3389/fmolb.2022.1058381; DOI:10.1099/ijsem.0.001671 | +| traitmech:000002 | obligately piezophilic | traitmech:000001 | DOI:10.1038/srep27289; DOI:10.1099/ijsem.0.001671 | +| traitmech:000003 | piezotolerant | METPO:1000059 | DOI:10.3389/fmolb.2022.1058381; DOI:10.1099/ijsem.0.001671 | +| traitmech:000004 | pressure optimum | METPO:1000059 | DOI:10.1099/ijsem.0.001671; DOI:10.3389/fmolb.2022.1058381 | +| traitmech:000005 | pressure range | METPO:1000059 | DOI:10.1099/ijsem.0.001671; DOI:10.3389/fmolb.2022.1058381 | +| traitmech:000006 | pressure delta | METPO:1000059 | DOI:10.3389/fmolb.2022.1058381; DOI:10.1099/ijsem.0.001671 | + +Key evidence: *"Microorganisms adapted to HHP are usually known as piezophiles, referring to +their preference for high pressure"* (DOI:10.3389/fmolb.2022.1058381). Organism example — +*Colwellia marinimaniae* MTCD1, the most piezophilic organism described: *"growth range of +80–140 MPa (optimum, 120 MPa) at 6 °C"* (DOI:10.1099/ijsem.0.001671). Obligate example — +*Pyrococcus yayanosii* (DOI:10.1038/srep27289). + +### Radiation tolerance +| ID | Label | Parent | Citations | +|----|-------|--------|-----------| +| traitmech:000007 | radiotolerant | METPO:1000059 | DOI:10.1101/cshperspect.a012765; DOI:10.3390/genes14091803 | +| traitmech:000008 | ionizing radiation tolerant | traitmech:000007 | DOI:10.3390/genes14091803; DOI:10.1101/cshperspect.a012765 | +| traitmech:000009 | UV radiation tolerant | traitmech:000007 | DOI:10.3390/genes14091803; DOI:10.1101/cshperspect.a012765 | + +Key evidence: *"Deinococcus radiodurans … showcasing an impressive resistance to a wide array +of stressors, including ionizing radiation, desiccation, UV radiation, and oxidizing agents"* +and *"D10 values exceeding 12 kGy for gamma radiation and 700 J/m2 for UV-C radiation"* +(DOI:10.3390/genes14091803); manganese-antioxidant mechanism (DOI:10.1101/cshperspect.a012765). + +### Desiccation / water activity +| ID | Label | Parent | Citations | +|----|-------|--------|-----------| +| traitmech:000010 | desiccation tolerant | METPO:1000059 | DOI:10.3390/microorganisms10020432; DOI:10.3390/genes14091803 | +| traitmech:000011 | xerophilic | METPO:1000059 | DOI:10.1098/rstb.2004.1502; DOI:10.3390/microorganisms10020432 | + +Key evidence: anhydrobiosis = *"the ability of some organisms to lose all or almost all water +and enter a state of suspension where the metabolism comes to a reversible standstill"* +(DOI:10.3390/microorganisms10020432); xerophile growth *"at a water activity (aw) of 0.61, the +lowest aw value for growth recorded to date"* (DOI:10.1098/rstb.2004.1502). + +### Heavy-metal / metalloid tolerance +| ID | Label | Parent | Citations | +|----|-------|--------|-----------| +| traitmech:000012 | metal tolerant | METPO:1000059 | PMID:12829273; DOI:10.3389/fmicb.2020.00047 | +| traitmech:000013 | cadmium tolerant | traitmech:000012 | DOI:10.1111/j.1365-2958.2009.06792.x; DOI:10.3389/fmicb.2020.00047 | +| traitmech:000014 | zinc tolerant | traitmech:000012 | DOI:10.1111/j.1365-2958.2009.06792.x; DOI:10.3389/fmicb.2020.00047 | +| traitmech:000015 | cobalt tolerant | traitmech:000012 | DOI:10.1111/j.1365-2958.2009.06792.x; DOI:10.3389/fmicb.2020.00047 | +| traitmech:000016 | mercury tolerant | traitmech:000012 | DOI:10.1016/S0168-6445(03)00046-9; PMID:12829273 | +| traitmech:000017 | arsenic tolerant | traitmech:000012 | DOI:10.3389/fmicb.2018.02473; DOI:10.3389/fmicb.2020.00047 | +| traitmech:000018 | copper tolerant | traitmech:000012 | DOI:10.1007/s10565-013-9262-1; DOI:10.3389/fmicb.2020.00047 | + +Key evidence: efflux-mediated resistance via *"CBA efflux pumps … P-type ATPases, cation +diffusion facilitator and chromate proteins"* (PMID:12829273); model metallophile +*Cupriavidus metallidurans* BS1 — *"resistance to Zn2+ … MIC of 20 mM, Cd2+ (2.5 mM), Co2+ +(20mM), Ni2+ (8 mM), As3+ (3.5 mM), Cu2+ (5 mM), Au3+ (1 µM) and Pb2+ (1.7 mM)"* +(DOI:10.3389/fmicb.2020.00047); mercury *mer*/MerA (DOI:10.1016/S0168-6445(03)00046-9); +arsenic *ars*/ArsB (DOI:10.3389/fmicb.2018.02473); copper ATPase efflux +(DOI:10.1007/s10565-013-9262-1); czc cobalt-zinc-cadmium efflux +(DOI:10.1111/j.1365-2958.2009.06792.x). + +--- + +## Citation index +| Reference | Work | +|-----------|------| +| DOI:10.3389/fmolb.2022.1058381 (PMID:36685280) | Microbial membrane lipid adaptations to high hydrostatic pressure | +| DOI:10.1099/ijsem.0.001671 (PMID:27902293) | *Colwellia marinimaniae* sp. nov., hyperpiezophile, Challenger Deep | +| DOI:10.1038/srep27289 (PMID:27250364) | HHP adaptive strategies in obligate piezophile *Pyrococcus yayanosii* | +| DOI:10.1101/cshperspect.a012765 (PMID:23818498) | Biology of Extreme Radiation Resistance: *Deinococcus radiodurans* | +| DOI:10.3390/genes14091803 (PMID:37761943) | NER/Rec-dependent UV-radiation resistance in *Deinococcus* | +| DOI:10.3390/microorganisms10020432 (PMID:35208886) | Introduction to Bacterial Anhydrobiosis | +| DOI:10.1098/rstb.2004.1502 (PMID:15306390) | Grant, "Life at low water activity" | +| PMID:12829273 | Nies, "Efflux-mediated heavy metal resistance in prokaryotes" | +| DOI:10.3389/fmicb.2020.00047 (PMID:32117100) | *Cupriavidus metallidurans* BS1 genome, metal MICs | +| DOI:10.1016/S0168-6445(03)00046-9 (PMID:12829275) | Barkay et al., "Bacterial mercury resistance from atoms to ecosystems" | +| DOI:10.3389/fmicb.2018.02473 (PMID:30405552) | Distribution of Arsenic Resistance Genes in Prokaryotes | +| DOI:10.1007/s10565-013-9262-1 (PMID:24072389) | Molecular basis of active copper resistance in Gram-negative bacteria | +| DOI:10.1111/j.1365-2958.2009.06792.x (PMID:19602147) | CzcP efflux (cobalt-zinc-cadmium) in *C. metallidurans* CH34 | + +## How this is validated +- **Schema:** `MappingStatusEnum` gained a `PROPOSED` value (`src/traitmech/schema/traitmech.yaml`). +- **Citation bar:** `scripts/audit_proposals.py` requires ≥ 2 distinct, well-formed citations + (PMID/DOI/URL) per PROPOSED record, counted across `definition_source` ∪ `evidence[].reference`. + Wired into `just qc` and the `qc` CI workflow; report at `reports/proposal_citation_audit.tsv`. +- **Tests:** `tests/test_audit_proposals.py` locks the rule (pass ≥2, fail single/placeholder/malformed). +- **Result:** `just validate-strict` → 0 errors over 375 files; `audit-proposals` → 18/18 passing. + +## Follow-ups (out of scope here) +- Add evidence-backed `causal_graphs` once a candidate is promoted `PROPOSED` → `REVIEWED`. +- Mint intermediate METPO axis classes (e.g. "pressure preference", "radiation tolerance") and + re-parent, rather than parenting directly to `METPO:1000059` (phenotype). +- Upstream the `traitmech:` IDs into METPO via the `metpo-proposal` skill. +- Seed the existing METPO `oligotrophic` / `copiotrophic` classes into the corpus. diff --git a/reports/metabolism_trait_proposals.md b/reports/metabolism_trait_proposals.md new file mode 100644 index 00000000..ff5da366 --- /dev/null +++ b/reports/metabolism_trait_proposals.md @@ -0,0 +1,140 @@ +# Candidate missing METABOLISM traits — literature-backed proposal + +**Date:** 2026-05-30 (updated 2026-05-31) · **Curator:** claude (LLM-assisted) · **Status of all entries:** `PROPOSED` + +## Why these traits + +The METABOLISM category (108 records) is dominated by **composition-primitive predicates** +(`uses_as_*`, `produces`, `ferments`, `does_not_*`, `has_*_observation`, …) and a small set of +high-level process classes (`respiration`, `fermentation`, `methanogenesis`, `acetogenesis`, +`syntrophy`, `oxidative_phosphorylation`, …). What it lacks is the layer of **named, pathway-level +metabolic capability classes** that microbiologists routinely use to describe organisms. This +proposal adds **21 candidate traits** (18 metabolic-capability traits + 3 intermediate axis +classes) across the clearest gaps, each backed by **≥ 2 distinct, verified literature citations** +(`definition_source` + `evidence`), enforced by `scripts/audit_proposals.py` in `just qc` / CI. + +Candidates are authored as `TraitRecord` YAMLs in `data/traits/metabolism/` with +`mapping_status: PROPOSED`, minted `traitmech:000019`–`traitmech:000039` (continuing the +environment round 000001–000018). They flow through the existing closed-mode LinkML validation +and the citation audit with no schema change (the `PROPOSED` state already exists). + +### Identifiers / parents +METPO pre-check (`data/raw/metpo.owl`) confirmed all are absent from METPO. The hierarchy uses +intermediate axis classes rather than attaching capabilities directly to `METPO:1000060`: +- **Carbon fixation:** the six pathways parent to a `carbon_fixation` head (`traitmech:000019` → + `METPO:1000060` *metabolism*). +- **Fermentation:** the four product-specific fermentations parent to the existing + **`METPO:1002005` Fermentation** class. +- **Phototrophy:** a new `phototrophy` head (`traitmech:000037` → `METPO:1000060`) with a + `photosynthesis` child (`traitmech:000038`); oxygenic/anoxygenic photosynthesis parent to + `photosynthesis`, and proteorhodopsin phototrophy parents to `phototrophy`. +- **Anaerobic respiration:** DNRA and AOM parent to the existing **`METPO:1000802` + Anaerobic respiration** class; a new `dissimilatory_metal_reduction` axis + (`traitmech:000039` → `METPO:1000802`) parents dissimilatory iron reduction. +- **Manganese oxidation** remains under `METPO:1000060` — it is metal *oxidation* + (chemolithotrophy), not reduction or anaerobic respiration; a future "metal oxidation" / + "chemolithotrophy" axis could parent it. + +### Intermediate axis classes (minted 2026-05-31) +| ID | Label | Parent | Citations | +|----|-------|--------|-----------| +| traitmech:000037 | phototrophy | METPO:1000060 | DOI:10.1016/j.tim.2006.09.001; DOI:10.1126/science.289.5486.1902 | +| traitmech:000038 | photosynthesis | traitmech:000037 | DOI:10.1016/j.tim.2006.09.001; DOI:10.1146/annurev-earth-060313-054810 | +| traitmech:000039 | dissimilatory metal reduction | METPO:1000802 | DOI:10.1128/mr.55.2.259-287.1991; PMID:7826009 | + +### Already in the corpus / METPO (not proposed) +`acetogenesis`, `methanogenesis`, `fermentation`, `syntrophy`, `respiration`, +`oxidative_phosphorylation`, and `substrate_level_phosphorylation` already exist as classes and were +excluded. `nitrification`, `anammox`, and `ammonia/nitrite oxidation` exist in METPO (not yet seeded) +and are noted as a seeding gap rather than proposed here. + +--- + +## Proposed traits + +### Autotrophic carbon-fixation pathways (the marquee gap — corpus had trophic modes but no pathways) +| ID | Label | Parent | Citations | +|----|-------|--------|-----------| +| traitmech:000019 | carbon fixation | METPO:1000060 | DOI:10.1128/AEM.02473-10; DOI:10.1146/annurev-marine-120709-142712 | +| traitmech:000020 | Calvin-Benson-Bassham cycle | traitmech:000019 | DOI:10.1128/AEM.02473-10; DOI:10.1146/annurev-marine-120709-142712 | +| traitmech:000021 | reductive tricarboxylic acid cycle | traitmech:000019 | DOI:10.1128/AEM.02473-10; DOI:10.1146/annurev-marine-120709-142712 | +| traitmech:000022 | Wood-Ljungdahl pathway | traitmech:000019 | DOI:10.1016/j.bbapap.2008.08.012; DOI:10.1128/AEM.02473-10 | +| traitmech:000023 | 3-hydroxypropionate bicycle | traitmech:000019 | DOI:10.1128/AEM.02473-10; DOI:10.1146/annurev-marine-120709-142712 | +| traitmech:000024 | 3-hydroxypropionate/4-hydroxybutyrate cycle | traitmech:000019 | DOI:10.1126/science.1149976; DOI:10.1128/AEM.02473-10 | +| traitmech:000025 | dicarboxylate/4-hydroxybutyrate cycle | traitmech:000019 | DOI:10.1128/AEM.02473-10; DOI:10.1126/science.1149976 | + +Berg (AEM 2011) establishes that, beyond the Calvin cycle, five further autotrophic CO2-fixation +pathways are recognized; Hügler & Sievert (2011) cover their marine distribution; Ragsdale & Pierce +(2008) is the reference for Wood-Ljungdahl; Berg et al. (Science 2007) described the two archaeal +4-hydroxybutyrate pathways. + +### Product-specific fermentations (children of the existing Fermentation class) +| ID | Label | Parent | Citations | +|----|-------|--------|-----------| +| traitmech:000026 | lactic acid fermentation | METPO:1002005 | DOI:10.3389/fmicb.2021.703525; DOI:10.3390/molecules31020333 | +| traitmech:000027 | mixed-acid fermentation | METPO:1002005 | DOI:10.3389/fmicb.2021.703525; DOI:10.3390/molecules31020333 | +| traitmech:000028 | ethanol fermentation | METPO:1002005 | DOI:10.3390/molecules31020333; DOI:10.3389/fmicb.2021.703525 | +| traitmech:000029 | propionic acid fermentation | METPO:1002005 | DOI:10.3390/molecules31020333; DOI:10.3389/fmicb.2021.703525 | + +### Element-cycling and energy metabolisms +| ID | Label | Parent | Citations | +|----|-------|--------|-----------| +| traitmech:000030 | dissimilatory nitrate reduction to ammonium (DNRA) | METPO:1000802 | DOI:10.1126/science.1254070; DOI:10.1007/s11157-025-09719-5 | +| traitmech:000031 | dissimilatory iron reduction | traitmech:000039 | DOI:10.1128/mr.55.2.259-287.1991; PMID:7826009 | +| traitmech:000032 | manganese oxidation | METPO:1000060 | DOI:10.1016/j.tim.2005.07.009; DOI:10.1146/annurev.earth.32.101802.120213 | +| traitmech:000033 | anaerobic oxidation of methane (AOM) | METPO:1000802 | DOI:10.1038/35036572; DOI:10.3389/fmars.2025.1609892 | + +DNRA: Kraft et al. (Science 2014) show the donor/acceptor ratio governs ammonium vs. N2. +Fe reduction: Lovley (1991) — *"The oxidation of organic matter coupled to the reduction of Fe(III) +or Mn(IV) is one of the most important biogeochemical reactions…"* AOM: Boetius et al. (2000) +described the ANME–sulfate-reducer consortium. + +### Phototrophy +| ID | Label | Parent | Citations | +|----|-------|--------|-----------| +| traitmech:000034 | oxygenic photosynthesis | traitmech:000038 | DOI:10.1016/j.tim.2006.09.001; DOI:10.1146/annurev-earth-060313-054810 | +| traitmech:000035 | anoxygenic photosynthesis | traitmech:000038 | DOI:10.1016/j.tim.2006.09.001; DOI:10.3389/fmicb.2024.1417714 | +| traitmech:000036 | proteorhodopsin phototrophy | traitmech:000037 | DOI:10.1126/science.289.5486.1902; DOI:10.1038/35081051 | + +--- + +## Citation index (all DOIs/PMIDs verified) +| Reference | Work | +|-----------|------| +| DOI:10.1128/AEM.02473-10 (PMID:21183637) | Berg, "Ecological aspects of the distribution of different autotrophic CO2 fixation pathways" (AEM 2011) | +| DOI:10.1146/annurev-marine-120709-142712 (PMID:21329208) | Hügler & Sievert, "Beyond the Calvin cycle: autotrophic carbon fixation in the ocean" (2011) | +| DOI:10.1016/j.bbapap.2008.08.012 (PMID:18801467) | Ragsdale & Pierce, "Acetogenesis and the Wood-Ljungdahl pathway of CO2 fixation" (2008) | +| DOI:10.1126/science.1149976 (PMID:18079405) | Berg et al., "A 3-hydroxypropionate/4-hydroxybutyrate autotrophic CO2 assimilation pathway in Archaea" (Science 2007) | +| DOI:10.3389/fmicb.2021.703525 | "Energy Conservation in Fermentations of Anaerobic Bacteria" (Front. Microbiol. 2021) | +| DOI:10.3390/molecules31020333 | "Classical Food Fermentations… Alcoholic, Acetic, Butyric, Lactic and Propionic Pathways" (Molecules) | +| DOI:10.1126/science.1254070 (PMID:25104387) | Kraft et al., "The environmental controls that govern the end product of bacterial nitrate respiration" (Science 2014) | +| DOI:10.1007/s11157-025-09719-5 | Review: DNRA's competitive advantage over denitrification under nitrate-limited conditions (2025) | +| DOI:10.1128/mr.55.2.259-287.1991 (PMID:1886521) | Lovley, "Dissimilatory Fe(III) and Mn(IV) reduction" (Microbiol. Rev. 1991) | +| PMID:7826009 | Nealson & Saffarini, "Iron and manganese in anaerobic respiration" (Annu. Rev. Microbiol. 1994) | +| DOI:10.1016/j.tim.2005.07.009 | Tebo et al., "Geomicrobiology of manganese(II) oxidation" (Trends Microbiol. 2005) | +| DOI:10.1146/annurev.earth.32.101802.120213 | Tebo et al., "Biogenic manganese oxides" (Annu. Rev. Earth Planet. Sci. 2004) | +| DOI:10.1038/35036572 (PMID:11034209) | Boetius et al., "A marine microbial consortium apparently mediating AOM" (Nature 2000) | +| DOI:10.3389/fmars.2025.1609892 | Review: anaerobic oxidation of methane in marine sediments (2025) | +| DOI:10.1016/j.tim.2006.09.001 (PMID:16997562) | Bryant & Frigaard, "Prokaryotic photosynthesis and phototrophy illuminated" (2006) | +| DOI:10.1146/annurev-earth-060313-054810 | Fischer, Hemp & Johnson, "Evolution of Oxygenic Photosynthesis" (2016) | +| DOI:10.3389/fmicb.2024.1417714 | Review: anoxygenic photosynthesis in green sulfur bacteria (2024) | +| DOI:10.1126/science.289.5486.1902 (PMID:10976071) | Béjà et al., "Bacterial Rhodopsin: Evidence for a New Type of Phototrophy in the Sea" (Science 2000) | +| DOI:10.1038/35081051 | Béjà et al., "Proteorhodopsin phototrophy in the ocean" (Nature 2001) | + +## Validation +- Reuses the `PROPOSED` lifecycle state and `scripts/audit_proposals.py` (≥2 distinct citations) added in + the environment round — **no schema change** this time. +- `just validate-strict` → 0 errors over **396** files; `audit-proposals` → **39/39** PROPOSED records + passing (18 environment + 21 metabolism); `pytest` → 70 passed; minted IDs contiguous 000001–000039; + all `traitmech:` parent references resolve to existing records. + +## Follow-ups (out of scope here) +- Add evidence-backed `causal_graphs` (and CHEBI/GO/KEGG groundings) when a candidate is promoted + PROPOSED → REVIEWED. +- Intermediate axis classes for the dissimilatory/respiratory and phototrophy traits were minted + on 2026-05-31 (`phototrophy`, `photosynthesis`, `dissimilatory_metal_reduction`); a remaining + candidate axis is "metal oxidation" / "chemolithotrophy" to parent `manganese_oxidation`. +- Further gaps worth a future round: nitrogen fixation/diazotrophy, denitrification, sulfate reduction + / sulfur oxidation, methanotrophy (aerobic), hydrogen oxidation/production, cellulolysis/chitinolysis, + ureolysis, and the trophic-mode classes (chemolithoautotrophy, photoheterotrophy, mixotrophy). +- Seed the existing METPO `nitrification` / `anammox` / `oligotrophic` / `copiotrophic` classes. diff --git a/reports/proposal_citation_audit.tsv b/reports/proposal_citation_audit.tsv new file mode 100644 index 00000000..3607f3cf --- /dev/null +++ b/reports/proposal_citation_audit.tsv @@ -0,0 +1,40 @@ +path identifier n_citations citations malformed passes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/arsenic_tolerant.yaml traitmech:000017 2 DOI:10.3389/fmicb.2018.02473; DOI:10.3389/fmicb.2020.00047 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/cadmium_tolerant.yaml traitmech:000013 3 PMID:12829273; DOI:10.1111/j.1365-2958.2009.06792.x; DOI:10.3389/fmicb.2020.00047 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/cobalt_tolerant.yaml traitmech:000015 3 PMID:12829273; DOI:10.1111/j.1365-2958.2009.06792.x; DOI:10.3389/fmicb.2020.00047 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/copper_tolerant.yaml traitmech:000018 2 DOI:10.1007/s10565-013-9262-1; DOI:10.3389/fmicb.2020.00047 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/desiccation_tolerant.yaml traitmech:000010 2 DOI:10.3390/microorganisms10020432; DOI:10.3390/genes14091803 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/ionizing_radiation_tolerant.yaml traitmech:000008 2 DOI:10.3390/genes14091803; DOI:10.1101/cshperspect.a012765 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/mercury_tolerant.yaml traitmech:000016 2 DOI:10.1016/S0168-6445(03)00046-9; PMID:12829273 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/metal_tolerant.yaml traitmech:000012 2 PMID:12829273; DOI:10.3389/fmicb.2020.00047 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/obligately_piezophilic.yaml traitmech:000002 2 DOI:10.1038/srep27289; DOI:10.1099/ijsem.0.001671 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/piezophilic.yaml traitmech:000001 2 DOI:10.3389/fmolb.2022.1058381; DOI:10.1099/ijsem.0.001671 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/piezotolerant.yaml traitmech:000003 2 DOI:10.3389/fmolb.2022.1058381; DOI:10.1099/ijsem.0.001671 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/pressure_delta.yaml traitmech:000006 2 DOI:10.3389/fmolb.2022.1058381; DOI:10.1099/ijsem.0.001671 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/pressure_optimum.yaml traitmech:000004 2 DOI:10.1099/ijsem.0.001671; DOI:10.3389/fmolb.2022.1058381 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/pressure_range.yaml traitmech:000005 2 DOI:10.1099/ijsem.0.001671; DOI:10.3389/fmolb.2022.1058381 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/radiotolerant.yaml traitmech:000007 2 DOI:10.1101/cshperspect.a012765; DOI:10.3390/genes14091803 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/uv_radiation_tolerant.yaml traitmech:000009 2 DOI:10.3390/genes14091803; DOI:10.1101/cshperspect.a012765 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/xerophilic.yaml traitmech:000011 2 DOI:10.1098/rstb.2004.1502; DOI:10.3390/microorganisms10020432 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/zinc_tolerant.yaml traitmech:000014 3 PMID:12829273; DOI:10.1111/j.1365-2958.2009.06792.x; DOI:10.3389/fmicb.2020.00047 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/anaerobic_oxidation_of_methane.yaml traitmech:000033 2 DOI:10.1038/35036572; DOI:10.3389/fmars.2025.1609892 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/anoxygenic_photosynthesis.yaml traitmech:000035 2 DOI:10.1016/j.tim.2006.09.001; DOI:10.3389/fmicb.2024.1417714 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/calvin_benson_bassham_cycle.yaml traitmech:000020 2 DOI:10.1128/AEM.02473-10; DOI:10.1146/annurev-marine-120709-142712 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/carbon_fixation.yaml traitmech:000019 2 DOI:10.1128/AEM.02473-10; DOI:10.1146/annurev-marine-120709-142712 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/dicarboxylate_four_hydroxybutyrate_cycle.yaml traitmech:000025 2 DOI:10.1126/science.1149976; DOI:10.1128/AEM.02473-10 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/dissimilatory_iron_reduction.yaml traitmech:000031 2 DOI:10.1128/mr.55.2.259-287.1991; PMID:7826009 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/dissimilatory_metal_reduction.yaml traitmech:000039 2 DOI:10.1128/mr.55.2.259-287.1991; PMID:7826009 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/dissimilatory_nitrate_reduction_to_ammonium.yaml traitmech:000030 2 DOI:10.1126/science.1254070; DOI:10.1007/s11157-025-09719-5 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/ethanol_fermentation.yaml traitmech:000028 2 DOI:10.3389/fmicb.2021.703525; DOI:10.3390/molecules31020333 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/lactic_acid_fermentation.yaml traitmech:000026 2 DOI:10.3389/fmicb.2021.703525; DOI:10.3390/molecules31020333 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/manganese_oxidation.yaml traitmech:000032 2 DOI:10.1016/j.tim.2005.07.009; DOI:10.1146/annurev.earth.32.101802.120213 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/mixed_acid_fermentation.yaml traitmech:000027 2 DOI:10.3389/fmicb.2021.703525; DOI:10.3390/molecules31020333 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/oxygenic_photosynthesis.yaml traitmech:000034 2 DOI:10.1016/j.tim.2006.09.001; DOI:10.1146/annurev-earth-060313-054810 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/photosynthesis.yaml traitmech:000038 2 DOI:10.1016/j.tim.2006.09.001; DOI:10.1146/annurev-earth-060313-054810 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/phototrophy.yaml traitmech:000037 2 DOI:10.1016/j.tim.2006.09.001; DOI:10.1126/science.289.5486.1902 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/propionic_acid_fermentation.yaml traitmech:000029 2 DOI:10.3390/molecules31020333; DOI:10.3389/fmicb.2021.703525 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/proteorhodopsin_phototrophy.yaml traitmech:000036 2 DOI:10.1126/science.289.5486.1902; DOI:10.1038/35081051 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/reductive_tca_cycle.yaml traitmech:000021 2 DOI:10.1128/AEM.02473-10; DOI:10.1146/annurev-marine-120709-142712 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/three_hydroxypropionate_bicycle.yaml traitmech:000023 2 DOI:10.1128/AEM.02473-10; DOI:10.1146/annurev-marine-120709-142712 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/three_hydroxypropionate_four_hydroxybutyrate_cycle.yaml traitmech:000024 2 DOI:10.1126/science.1149976; DOI:10.1128/AEM.02473-10 yes +/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/wood_ljungdahl_pathway.yaml traitmech:000022 2 DOI:10.1016/j.bbapap.2008.08.012; DOI:10.1128/AEM.02473-10 yes diff --git a/scripts/audit_proposals.py b/scripts/audit_proposals.py new file mode 100644 index 00000000..e121ec05 --- /dev/null +++ b/scripts/audit_proposals.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python3 +"""Citation-bar audit for PROPOSED TraitRecords. + +Candidate traits proposed from literature research (mapping_status: PROPOSED) +must each be backed by at least two *distinct* literature citations, counted +across `definition_source` and every `evidence[].reference`. LinkML/JSON-Schema +validation cannot express a count spread over two different slots, so this +cross-field rule lives here, alongside the other `qc` audit probes +(audit_schema.py, audit_writers.py). + +A PROPOSED record FAILS if either: + - it carries fewer than MIN_CITATIONS distinct, non-placeholder citations, or + - any of its citations is not a recognizable reference (PMID:, DOI:, or URL). + +SEEDED / REVIEWED / DEPRECATED records are not checked here — they inherit +provenance from METPO and are governed by the schema's own rules. + +Usage: + python scripts/audit_proposals.py [PATH ...] + python scripts/audit_proposals.py --out reports/proposal_citation_audit.tsv + +Paths may be files or directories; directories are walked for *.yaml. +Default scope when no paths given: data/traits/. Exits non-zero on any failure. +""" + +from __future__ import annotations + +import argparse +import csv +import re +import sys +from pathlib import Path +from typing import Iterable + +import yaml + +_REPO_ROOT = Path(__file__).resolve().parents[1] +DEFAULT_ROOTS = [_REPO_ROOT / "data" / "traits"] +DEFAULT_OUT = _REPO_ROOT / "reports" / "proposal_citation_audit.tsv" + +#: Minimum number of distinct literature citations a PROPOSED record must carry. +MIN_CITATIONS = 2 + +#: A citation that has not actually been supplied yet. +_PLACEHOLDER = re.compile(r"^TODO\b", re.IGNORECASE) + +#: A recognizable literature reference: PMID:, DOI:, or a bare URL. +_REFERENCE_SHAPE = re.compile(r"^(PMID:\d+|DOI:\S+|https?://\S+)$", re.IGNORECASE) + + +def is_placeholder(ref: str) -> bool: + """True for empty / TODO-style citations that don't count toward the bar.""" + return not ref or not ref.strip() or bool(_PLACEHOLDER.match(ref.strip())) + + +def is_valid_reference(ref: str) -> bool: + """True if `ref` looks like a PMID, DOI, or URL citation.""" + return bool(_REFERENCE_SHAPE.match(ref.strip())) if ref else False + + +def distinct_citations(record: dict) -> list[str]: + """Distinct, non-placeholder citations across definition_source + evidence. + + Order-preserving (definition_source first, then evidence order) so the + audit TSV is stable. + """ + seen: set[str] = set() + out: list[str] = [] + candidates: list[str] = [] + ds = record.get("definition_source") + if isinstance(ds, str): + candidates.append(ds) + for item in record.get("evidence") or []: + ref = (item or {}).get("reference") + if isinstance(ref, str): + candidates.append(ref) + for ref in candidates: + key = ref.strip() + if is_placeholder(key) or key in seen: + continue + seen.add(key) + out.append(key) + return out + + +def audit_record(record: dict, path: str = "") -> dict | None: + """Audit a single record. Returns a result row for PROPOSED records, else None. + + The row's `passes` is "yes" only when the record meets MIN_CITATIONS and + every counted citation is a well-formed reference. + """ + if (record.get("mapping_status") or "").upper() != "PROPOSED": + return None + cites = distinct_citations(record) + malformed = [c for c in cites if not is_valid_reference(c)] + passes = len(cites) >= MIN_CITATIONS and not malformed + return { + "path": path, + "identifier": record.get("identifier", ""), + "n_citations": len(cites), + "citations": "; ".join(cites), + "malformed": "; ".join(malformed), + "passes": "yes" if passes else "no", + } + + +def iter_yaml_files(roots: Iterable[Path]) -> Iterable[Path]: + for root in roots: + if root.is_file() and root.suffix == ".yaml": + yield root + elif root.is_dir(): + yield from sorted(root.rglob("*.yaml")) + + +def main(argv: list[str] | None = None) -> int: + ap = argparse.ArgumentParser(description=__doc__) + ap.add_argument("paths", nargs="*", type=Path, help="files or dirs (default: data/traits/)") + ap.add_argument("--out", type=Path, default=DEFAULT_OUT, help="TSV output path") + args = ap.parse_args(argv) + + roots = args.paths or DEFAULT_ROOTS + rows: list[dict] = [] + for path in iter_yaml_files(roots): + try: + record = yaml.safe_load(path.read_text()) + except yaml.YAMLError as exc: # malformed YAML is the strict validator's job + print(f"WARN: skipping unparseable {path}: {exc}", file=sys.stderr) + continue + if not isinstance(record, dict): + continue + row = audit_record(record, str(path)) + if row is not None: + rows.append(row) + + fields = ["path", "identifier", "n_citations", "citations", "malformed", "passes"] + args.out.parent.mkdir(parents=True, exist_ok=True) + with args.out.open("w", newline="") as fh: + w = csv.DictWriter(fh, fieldnames=fields, delimiter="\t", lineterminator="\n") + w.writeheader() + for row in rows: + w.writerow(row) + + failures = [r for r in rows if r["passes"] == "no"] + print("", file=sys.stderr) + print(f"=== proposal citation audit ({len(rows)} PROPOSED records) ===", file=sys.stderr) + print(f" minimum distinct citations: {MIN_CITATIONS}", file=sys.stderr) + print(f" passing: {len(rows) - len(failures)} / {len(rows)}", file=sys.stderr) + print(f" failing: {len(failures)}", file=sys.stderr) + print(f" TSV: {args.out}", file=sys.stderr) + for r in failures: + reason = "malformed reference(s): " + r["malformed"] if r["malformed"] \ + else f"only {r['n_citations']} distinct citation(s)" + print(f" FAIL {r['identifier']} ({r['path']}): {reason}", file=sys.stderr) + + return 1 if failures else 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/traitmech/schema/traitmech.yaml b/src/traitmech/schema/traitmech.yaml index 5020f8f0..f37c069e 100644 --- a/src/traitmech/schema/traitmech.yaml +++ b/src/traitmech/schema/traitmech.yaml @@ -412,6 +412,12 @@ enums: permissible_values: SEEDED: description: Imported verbatim from metpo.owl; not yet curator-reviewed. + PROPOSED: + description: >- + Candidate trait proposed from literature research to fill a coverage + gap; not yet curator-reviewed. Must carry at least two distinct + literature citations (across definition_source and evidence) — this + is enforced by scripts/audit_proposals.py in the `qc` target. REVIEWED: description: Curator has signed off on label / definition / parents. DEPRECATED: diff --git a/tests/test_audit_proposals.py b/tests/test_audit_proposals.py new file mode 100644 index 00000000..317a977b --- /dev/null +++ b/tests/test_audit_proposals.py @@ -0,0 +1,103 @@ +"""Unit tests for scripts/audit_proposals.py. + +Locks in the >=2-distinct-citation rule for PROPOSED TraitRecords: +- only PROPOSED records are checked (SEEDED/REVIEWED/DEPRECATED return None); +- citations are counted across definition_source + evidence[].reference; +- duplicates and TODO placeholders don't count; +- malformed references (not PMID/DOI/URL) fail the record. +""" + +from __future__ import annotations + +import sys +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(REPO_ROOT / "scripts")) + +from audit_proposals import ( # noqa: E402 + MIN_CITATIONS, + audit_record, + distinct_citations, + is_placeholder, + is_valid_reference, +) + + +def _proposed(definition_source=None, refs=()): + rec = {"identifier": "traitmech:000999", "mapping_status": "PROPOSED"} + if definition_source is not None: + rec["definition_source"] = definition_source + rec["evidence"] = [{"reference": r} for r in refs] + return rec + + +# ---------------------------------------------------------------- helpers + + +def test_is_placeholder(): + assert is_placeholder("TODO:add_citation") + assert is_placeholder("") + assert is_placeholder(" ") + assert not is_placeholder("PMID:12345678") + + +def test_is_valid_reference(): + assert is_valid_reference("PMID:12829275") + assert is_valid_reference("DOI:10.1099/ijsem.0.001671") + assert is_valid_reference("https://example.org/paper") + assert not is_valid_reference("just some text") + assert not is_valid_reference("PMID:") + + +def test_distinct_citations_dedupes_and_drops_placeholders(): + rec = _proposed( + definition_source="DOI:10.1/x", + refs=["DOI:10.1/x", "PMID:111", "TODO:add_citation", ""], + ) + # DOI:10.1/x appears twice (def_source + evidence) -> counted once; + # placeholder + empty dropped. + assert distinct_citations(rec) == ["DOI:10.1/x", "PMID:111"] + + +# ---------------------------------------------------------------- audit_record + + +def test_audit_skips_non_proposed(): + for status in ("SEEDED", "REVIEWED", "DEPRECATED"): + rec = _proposed(definition_source="DOI:10.1/x", refs=["PMID:1"]) + rec["mapping_status"] = status + assert audit_record(rec) is None + + +def test_audit_passes_with_two_distinct_citations(): + rec = _proposed(definition_source="DOI:10.1/x", refs=["PMID:222"]) + row = audit_record(rec) + assert row is not None + assert row["n_citations"] == 2 + assert row["passes"] == "yes" + + +def test_audit_fails_with_single_citation(): + rec = _proposed(definition_source="DOI:10.1/x", refs=["DOI:10.1/x"]) + row = audit_record(rec) + assert row["n_citations"] == 1 + assert row["passes"] == "no" + + +def test_audit_fails_when_only_placeholders(): + rec = _proposed(definition_source="TODO:add_citation", refs=["TODO:later"]) + row = audit_record(rec) + assert row["n_citations"] == 0 + assert row["passes"] == "no" + + +def test_audit_fails_on_malformed_reference(): + rec = _proposed(definition_source="DOI:10.1/x", refs=["not a real ref"]) + row = audit_record(rec) + assert row["malformed"] == "not a real ref" + assert row["passes"] == "no" + + +def test_min_citations_is_two(): + assert MIN_CITATIONS == 2 diff --git a/tests/test_seed.py b/tests/test_seed.py index c6704e78..c7a351bf 100644 --- a/tests/test_seed.py +++ b/tests/test_seed.py @@ -37,8 +37,13 @@ def test_every_trait_yaml_has_required_fields(): assert isinstance(doc, dict), f"{p}: not a dict" for required in ("identifier", "label", "trait_category", "term_kind", "mapping_status"): assert required in doc, f"{p}: missing {required!r}" - assert doc["identifier"].startswith("METPO:"), f"{p}: identifier not a METPO CURIE" - assert doc["mapping_status"] in {"SEEDED", "REVIEWED", "DEPRECATED"}, ( + # Seeded records use the METPO CURIE directly; curator-minted records + # (e.g. PROPOSED candidate traits) may use the reserved `traitmech:` + # prefix — see .claude/skills/manage-identifiers/SKILL.md. + assert doc["identifier"].startswith(("METPO:", "traitmech:")), ( + f"{p}: identifier {doc['identifier']!r} is neither a METPO nor a traitmech CURIE" + ) + assert doc["mapping_status"] in {"SEEDED", "PROPOSED", "REVIEWED", "DEPRECATED"}, ( f"{p}: status={doc['mapping_status']!r}" ) From ba47cdc36de6f76800512eada2484ac17f04921e Mon Sep 17 00:00:00 2001 From: "marcin p. joachimiak" <4625870+realmarcin@users.noreply.github.com> Date: Mon, 1 Jun 2026 11:39:33 -0700 Subject: [PATCH 2/2] audit_proposals: emit repo-relative paths in TSV (Copilot review) The proposal citation audit wrote str(path) where path is anchored at _REPO_ROOT (absolute), so the committed reports/proposal_citation_audit.tsv contained one contributor's absolute filesystem paths and would produce a spurious diff on every CI/contributor run. Relativize to _REPO_ROOT before writing, and regenerate the TSV. Co-Authored-By: Claude Opus 4.8 (1M context) --- reports/proposal_citation_audit.tsv | 78 ++++++++++++++--------------- scripts/audit_proposals.py | 8 ++- 2 files changed, 46 insertions(+), 40 deletions(-) diff --git a/reports/proposal_citation_audit.tsv b/reports/proposal_citation_audit.tsv index 3607f3cf..b45e58ed 100644 --- a/reports/proposal_citation_audit.tsv +++ b/reports/proposal_citation_audit.tsv @@ -1,40 +1,40 @@ path identifier n_citations citations malformed passes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/arsenic_tolerant.yaml traitmech:000017 2 DOI:10.3389/fmicb.2018.02473; DOI:10.3389/fmicb.2020.00047 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/cadmium_tolerant.yaml traitmech:000013 3 PMID:12829273; DOI:10.1111/j.1365-2958.2009.06792.x; DOI:10.3389/fmicb.2020.00047 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/cobalt_tolerant.yaml traitmech:000015 3 PMID:12829273; DOI:10.1111/j.1365-2958.2009.06792.x; DOI:10.3389/fmicb.2020.00047 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/copper_tolerant.yaml traitmech:000018 2 DOI:10.1007/s10565-013-9262-1; DOI:10.3389/fmicb.2020.00047 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/desiccation_tolerant.yaml traitmech:000010 2 DOI:10.3390/microorganisms10020432; DOI:10.3390/genes14091803 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/ionizing_radiation_tolerant.yaml traitmech:000008 2 DOI:10.3390/genes14091803; DOI:10.1101/cshperspect.a012765 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/mercury_tolerant.yaml traitmech:000016 2 DOI:10.1016/S0168-6445(03)00046-9; PMID:12829273 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/metal_tolerant.yaml traitmech:000012 2 PMID:12829273; DOI:10.3389/fmicb.2020.00047 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/obligately_piezophilic.yaml traitmech:000002 2 DOI:10.1038/srep27289; DOI:10.1099/ijsem.0.001671 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/piezophilic.yaml traitmech:000001 2 DOI:10.3389/fmolb.2022.1058381; DOI:10.1099/ijsem.0.001671 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/piezotolerant.yaml traitmech:000003 2 DOI:10.3389/fmolb.2022.1058381; DOI:10.1099/ijsem.0.001671 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/pressure_delta.yaml traitmech:000006 2 DOI:10.3389/fmolb.2022.1058381; DOI:10.1099/ijsem.0.001671 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/pressure_optimum.yaml traitmech:000004 2 DOI:10.1099/ijsem.0.001671; DOI:10.3389/fmolb.2022.1058381 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/pressure_range.yaml traitmech:000005 2 DOI:10.1099/ijsem.0.001671; DOI:10.3389/fmolb.2022.1058381 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/radiotolerant.yaml traitmech:000007 2 DOI:10.1101/cshperspect.a012765; DOI:10.3390/genes14091803 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/uv_radiation_tolerant.yaml traitmech:000009 2 DOI:10.3390/genes14091803; DOI:10.1101/cshperspect.a012765 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/xerophilic.yaml traitmech:000011 2 DOI:10.1098/rstb.2004.1502; DOI:10.3390/microorganisms10020432 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/environment/zinc_tolerant.yaml traitmech:000014 3 PMID:12829273; DOI:10.1111/j.1365-2958.2009.06792.x; DOI:10.3389/fmicb.2020.00047 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/anaerobic_oxidation_of_methane.yaml traitmech:000033 2 DOI:10.1038/35036572; DOI:10.3389/fmars.2025.1609892 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/anoxygenic_photosynthesis.yaml traitmech:000035 2 DOI:10.1016/j.tim.2006.09.001; DOI:10.3389/fmicb.2024.1417714 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/calvin_benson_bassham_cycle.yaml traitmech:000020 2 DOI:10.1128/AEM.02473-10; DOI:10.1146/annurev-marine-120709-142712 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/carbon_fixation.yaml traitmech:000019 2 DOI:10.1128/AEM.02473-10; DOI:10.1146/annurev-marine-120709-142712 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/dicarboxylate_four_hydroxybutyrate_cycle.yaml traitmech:000025 2 DOI:10.1126/science.1149976; DOI:10.1128/AEM.02473-10 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/dissimilatory_iron_reduction.yaml traitmech:000031 2 DOI:10.1128/mr.55.2.259-287.1991; PMID:7826009 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/dissimilatory_metal_reduction.yaml traitmech:000039 2 DOI:10.1128/mr.55.2.259-287.1991; PMID:7826009 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/dissimilatory_nitrate_reduction_to_ammonium.yaml traitmech:000030 2 DOI:10.1126/science.1254070; DOI:10.1007/s11157-025-09719-5 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/ethanol_fermentation.yaml traitmech:000028 2 DOI:10.3389/fmicb.2021.703525; DOI:10.3390/molecules31020333 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/lactic_acid_fermentation.yaml traitmech:000026 2 DOI:10.3389/fmicb.2021.703525; DOI:10.3390/molecules31020333 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/manganese_oxidation.yaml traitmech:000032 2 DOI:10.1016/j.tim.2005.07.009; DOI:10.1146/annurev.earth.32.101802.120213 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/mixed_acid_fermentation.yaml traitmech:000027 2 DOI:10.3389/fmicb.2021.703525; DOI:10.3390/molecules31020333 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/oxygenic_photosynthesis.yaml traitmech:000034 2 DOI:10.1016/j.tim.2006.09.001; DOI:10.1146/annurev-earth-060313-054810 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/photosynthesis.yaml traitmech:000038 2 DOI:10.1016/j.tim.2006.09.001; DOI:10.1146/annurev-earth-060313-054810 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/phototrophy.yaml traitmech:000037 2 DOI:10.1016/j.tim.2006.09.001; DOI:10.1126/science.289.5486.1902 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/propionic_acid_fermentation.yaml traitmech:000029 2 DOI:10.3390/molecules31020333; DOI:10.3389/fmicb.2021.703525 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/proteorhodopsin_phototrophy.yaml traitmech:000036 2 DOI:10.1126/science.289.5486.1902; DOI:10.1038/35081051 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/reductive_tca_cycle.yaml traitmech:000021 2 DOI:10.1128/AEM.02473-10; DOI:10.1146/annurev-marine-120709-142712 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/three_hydroxypropionate_bicycle.yaml traitmech:000023 2 DOI:10.1128/AEM.02473-10; DOI:10.1146/annurev-marine-120709-142712 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/three_hydroxypropionate_four_hydroxybutyrate_cycle.yaml traitmech:000024 2 DOI:10.1126/science.1149976; DOI:10.1128/AEM.02473-10 yes -/Users/marcin/Documents/VIMSS/ontology/KG-Hub/KG-Microbe/TraitMech/data/traits/metabolism/wood_ljungdahl_pathway.yaml traitmech:000022 2 DOI:10.1016/j.bbapap.2008.08.012; DOI:10.1128/AEM.02473-10 yes +data/traits/environment/arsenic_tolerant.yaml traitmech:000017 2 DOI:10.3389/fmicb.2018.02473; DOI:10.3389/fmicb.2020.00047 yes +data/traits/environment/cadmium_tolerant.yaml traitmech:000013 3 PMID:12829273; DOI:10.1111/j.1365-2958.2009.06792.x; DOI:10.3389/fmicb.2020.00047 yes +data/traits/environment/cobalt_tolerant.yaml traitmech:000015 3 PMID:12829273; DOI:10.1111/j.1365-2958.2009.06792.x; DOI:10.3389/fmicb.2020.00047 yes +data/traits/environment/copper_tolerant.yaml traitmech:000018 2 DOI:10.1007/s10565-013-9262-1; DOI:10.3389/fmicb.2020.00047 yes +data/traits/environment/desiccation_tolerant.yaml traitmech:000010 2 DOI:10.3390/microorganisms10020432; DOI:10.3390/genes14091803 yes +data/traits/environment/ionizing_radiation_tolerant.yaml traitmech:000008 2 DOI:10.3390/genes14091803; DOI:10.1101/cshperspect.a012765 yes +data/traits/environment/mercury_tolerant.yaml traitmech:000016 2 DOI:10.1016/S0168-6445(03)00046-9; PMID:12829273 yes +data/traits/environment/metal_tolerant.yaml traitmech:000012 2 PMID:12829273; DOI:10.3389/fmicb.2020.00047 yes +data/traits/environment/obligately_piezophilic.yaml traitmech:000002 2 DOI:10.1038/srep27289; DOI:10.1099/ijsem.0.001671 yes +data/traits/environment/piezophilic.yaml traitmech:000001 2 DOI:10.3389/fmolb.2022.1058381; DOI:10.1099/ijsem.0.001671 yes +data/traits/environment/piezotolerant.yaml traitmech:000003 2 DOI:10.3389/fmolb.2022.1058381; DOI:10.1099/ijsem.0.001671 yes +data/traits/environment/pressure_delta.yaml traitmech:000006 2 DOI:10.3389/fmolb.2022.1058381; DOI:10.1099/ijsem.0.001671 yes +data/traits/environment/pressure_optimum.yaml traitmech:000004 2 DOI:10.1099/ijsem.0.001671; DOI:10.3389/fmolb.2022.1058381 yes +data/traits/environment/pressure_range.yaml traitmech:000005 2 DOI:10.1099/ijsem.0.001671; DOI:10.3389/fmolb.2022.1058381 yes +data/traits/environment/radiotolerant.yaml traitmech:000007 2 DOI:10.1101/cshperspect.a012765; DOI:10.3390/genes14091803 yes +data/traits/environment/uv_radiation_tolerant.yaml traitmech:000009 2 DOI:10.3390/genes14091803; DOI:10.1101/cshperspect.a012765 yes +data/traits/environment/xerophilic.yaml traitmech:000011 2 DOI:10.1098/rstb.2004.1502; DOI:10.3390/microorganisms10020432 yes +data/traits/environment/zinc_tolerant.yaml traitmech:000014 3 PMID:12829273; DOI:10.1111/j.1365-2958.2009.06792.x; DOI:10.3389/fmicb.2020.00047 yes +data/traits/metabolism/anaerobic_oxidation_of_methane.yaml traitmech:000033 2 DOI:10.1038/35036572; DOI:10.3389/fmars.2025.1609892 yes +data/traits/metabolism/anoxygenic_photosynthesis.yaml traitmech:000035 2 DOI:10.1016/j.tim.2006.09.001; DOI:10.3389/fmicb.2024.1417714 yes +data/traits/metabolism/calvin_benson_bassham_cycle.yaml traitmech:000020 2 DOI:10.1128/AEM.02473-10; DOI:10.1146/annurev-marine-120709-142712 yes +data/traits/metabolism/carbon_fixation.yaml traitmech:000019 2 DOI:10.1128/AEM.02473-10; DOI:10.1146/annurev-marine-120709-142712 yes +data/traits/metabolism/dicarboxylate_four_hydroxybutyrate_cycle.yaml traitmech:000025 2 DOI:10.1126/science.1149976; DOI:10.1128/AEM.02473-10 yes +data/traits/metabolism/dissimilatory_iron_reduction.yaml traitmech:000031 2 DOI:10.1128/mr.55.2.259-287.1991; PMID:7826009 yes +data/traits/metabolism/dissimilatory_metal_reduction.yaml traitmech:000039 2 DOI:10.1128/mr.55.2.259-287.1991; PMID:7826009 yes +data/traits/metabolism/dissimilatory_nitrate_reduction_to_ammonium.yaml traitmech:000030 2 DOI:10.1126/science.1254070; DOI:10.1007/s11157-025-09719-5 yes +data/traits/metabolism/ethanol_fermentation.yaml traitmech:000028 2 DOI:10.3389/fmicb.2021.703525; DOI:10.3390/molecules31020333 yes +data/traits/metabolism/lactic_acid_fermentation.yaml traitmech:000026 2 DOI:10.3389/fmicb.2021.703525; DOI:10.3390/molecules31020333 yes +data/traits/metabolism/manganese_oxidation.yaml traitmech:000032 2 DOI:10.1016/j.tim.2005.07.009; DOI:10.1146/annurev.earth.32.101802.120213 yes +data/traits/metabolism/mixed_acid_fermentation.yaml traitmech:000027 2 DOI:10.3389/fmicb.2021.703525; DOI:10.3390/molecules31020333 yes +data/traits/metabolism/oxygenic_photosynthesis.yaml traitmech:000034 2 DOI:10.1016/j.tim.2006.09.001; DOI:10.1146/annurev-earth-060313-054810 yes +data/traits/metabolism/photosynthesis.yaml traitmech:000038 2 DOI:10.1016/j.tim.2006.09.001; DOI:10.1146/annurev-earth-060313-054810 yes +data/traits/metabolism/phototrophy.yaml traitmech:000037 2 DOI:10.1016/j.tim.2006.09.001; DOI:10.1126/science.289.5486.1902 yes +data/traits/metabolism/propionic_acid_fermentation.yaml traitmech:000029 2 DOI:10.3390/molecules31020333; DOI:10.3389/fmicb.2021.703525 yes +data/traits/metabolism/proteorhodopsin_phototrophy.yaml traitmech:000036 2 DOI:10.1126/science.289.5486.1902; DOI:10.1038/35081051 yes +data/traits/metabolism/reductive_tca_cycle.yaml traitmech:000021 2 DOI:10.1128/AEM.02473-10; DOI:10.1146/annurev-marine-120709-142712 yes +data/traits/metabolism/three_hydroxypropionate_bicycle.yaml traitmech:000023 2 DOI:10.1128/AEM.02473-10; DOI:10.1146/annurev-marine-120709-142712 yes +data/traits/metabolism/three_hydroxypropionate_four_hydroxybutyrate_cycle.yaml traitmech:000024 2 DOI:10.1126/science.1149976; DOI:10.1128/AEM.02473-10 yes +data/traits/metabolism/wood_ljungdahl_pathway.yaml traitmech:000022 2 DOI:10.1016/j.bbapap.2008.08.012; DOI:10.1128/AEM.02473-10 yes diff --git a/scripts/audit_proposals.py b/scripts/audit_proposals.py index e121ec05..8968227b 100644 --- a/scripts/audit_proposals.py +++ b/scripts/audit_proposals.py @@ -128,7 +128,13 @@ def main(argv: list[str] | None = None) -> int: continue if not isinstance(record, dict): continue - row = audit_record(record, str(path)) + # Emit repository-relative paths so the TSV is reproducible across + # contributors and CI (paths under DEFAULT_ROOTS are absolute). + try: + display_path = path.resolve().relative_to(_REPO_ROOT) + except ValueError: + display_path = path + row = audit_record(record, str(display_path)) if row is not None: rows.append(row)