From 0d752f6905fc79714b8224cfe5ac9505e4748555 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Tue, 19 Dec 2023 14:19:00 +0100 Subject: [PATCH 01/24] models: add PGS data models, #TASK-5407, #TASK-5387 --- .../biodata/models/core/pgs/Cohort.java | 60 ++++++++ .../models/core/pgs/CommonPolygenicScore.java | 128 ++++++++++++++++++ .../biodata/models/core/pgs/EfoTrait.java | 84 ++++++++++++ .../models/core/pgs/PerformanceMetrics.java | 125 +++++++++++++++++ .../models/core/pgs/PolygenicScore.java | 97 +++++++++++++ .../core/pgs/VariantPolygenicScore.java | 100 ++++++++++++++ 6 files changed, 594 insertions(+) create mode 100644 biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/Cohort.java create mode 100644 biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/CommonPolygenicScore.java create mode 100644 biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/EfoTrait.java create mode 100644 biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/PerformanceMetrics.java create mode 100644 biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/PolygenicScore.java create mode 100644 biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/VariantPolygenicScore.java diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/Cohort.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/Cohort.java new file mode 100644 index 00000000..ee74b25a --- /dev/null +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/Cohort.java @@ -0,0 +1,60 @@ +/* + * + * + */ + +package org.opencb.biodata.models.core.pgs; + +public class Cohort { + private String id; + private String name; + + public Cohort() { + } + + public Cohort(String id, String name) { + this.id = id; + this.name = name; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("Cohort{"); + sb.append("id='").append(id).append('\''); + sb.append(", name='").append(name).append('\''); + sb.append('}'); + return sb.toString(); + } + + public String getId() { + return id; + } + + public Cohort setId(String id) { + this.id = id; + return this; + } + + public String getName() { + return name; + } + + public Cohort setName(String name) { + this.name = name; + return this; + } +} diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/CommonPolygenicScore.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/CommonPolygenicScore.java new file mode 100644 index 00000000..6cd2b015 --- /dev/null +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/CommonPolygenicScore.java @@ -0,0 +1,128 @@ +/* + * + * + */ + +package org.opencb.biodata.models.core.pgs; + +import java.util.ArrayList; +import java.util.List; + +public class CommonPolygenicScore { + private String id; + private String name; + private String source; + private String version; + private List pubmedIds; + private List efoTraits; + private List cohorts; + private List performanceMetrics; + + public CommonPolygenicScore() { + pubmedIds = new ArrayList<>(); + efoTraits = new ArrayList<>(); + cohorts = new ArrayList<>(); + performanceMetrics = new ArrayList<>(); + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("CommonPolygenicScore{"); + sb.append("id='").append(id).append('\''); + sb.append(", name='").append(name).append('\''); + sb.append(", source='").append(source).append('\''); + sb.append(", version='").append(version).append('\''); + sb.append(", pubmedIds=").append(pubmedIds); + sb.append(", efoTraits=").append(efoTraits); + sb.append(", cohorts=").append(cohorts); + sb.append(", performanceMetrics=").append(performanceMetrics); + sb.append('}'); + return sb.toString(); + } + + public String getId() { + return id; + } + + public CommonPolygenicScore setId(String id) { + this.id = id; + return this; + } + + public String getName() { + return name; + } + + public CommonPolygenicScore setName(String name) { + this.name = name; + return this; + } + + public String getSource() { + return source; + } + + public CommonPolygenicScore setSource(String source) { + this.source = source; + return this; + } + + public String getVersion() { + return version; + } + + public CommonPolygenicScore setVersion(String version) { + this.version = version; + return this; + } + + public List getPubmedIds() { + return pubmedIds; + } + + public CommonPolygenicScore setPubmedIds(List pubmedIds) { + this.pubmedIds = pubmedIds; + return this; + } + + public List getEfoTraits() { + return efoTraits; + } + + public CommonPolygenicScore setEfoTraits(List efoTraits) { + this.efoTraits = efoTraits; + return this; + } + + public List getCohorts() { + return cohorts; + } + + public CommonPolygenicScore setCohorts(List cohorts) { + this.cohorts = cohorts; + return this; + } + + public List getPerformanceMetrics() { + return performanceMetrics; + } + + public CommonPolygenicScore setPerformanceMetrics(List performanceMetrics) { + this.performanceMetrics = performanceMetrics; + return this; + } +} diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/EfoTrait.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/EfoTrait.java new file mode 100644 index 00000000..1b0e316e --- /dev/null +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/EfoTrait.java @@ -0,0 +1,84 @@ +/* + * + * + */ + +package org.opencb.biodata.models.core.pgs; + +public class EfoTrait { + private String id; + private String label; + private String description; + private String url; + + public EfoTrait() { + } + + public EfoTrait(String id, String label, String description, String url) { + this.id = id; + this.label = label; + this.description = description; + this.url = url; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("EfoTrait{"); + sb.append("id='").append(id).append('\''); + sb.append(", label='").append(label).append('\''); + sb.append(", description='").append(description).append('\''); + sb.append(", url='").append(url).append('\''); + sb.append('}'); + return sb.toString(); + } + + public String getId() { + return id; + } + + public EfoTrait setId(String id) { + this.id = id; + return this; + } + + public String getLabel() { + return label; + } + + public EfoTrait setLabel(String label) { + this.label = label; + return this; + } + + public String getDescription() { + return description; + } + + public EfoTrait setDescription(String description) { + this.description = description; + return this; + } + + public String getUrl() { + return url; + } + + public EfoTrait setUrl(String url) { + this.url = url; + return this; + } +} diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/PerformanceMetrics.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/PerformanceMetrics.java new file mode 100644 index 00000000..6d9b263c --- /dev/null +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/PerformanceMetrics.java @@ -0,0 +1,125 @@ +/* + * + * + */ + +package org.opencb.biodata.models.core.pgs; + +public class PerformanceMetrics { + private String id; + // Hazard ratio (HR) + private String hazardRatio; + // Odds ratio (OR) + private String oddsRatio; + private String beta; + // Area Under the Receiver-Operating Characteristic Curve (AUROC) + private String auroc; + // Concordance Statistic (C-index) + private String cIndex; + private String otherMetrics; + + public PerformanceMetrics() { + } + + public PerformanceMetrics(String id, String hazardRatio, String oddsRatio, String beta, String auroc, String cIndex, + String otherMetrics) { + this.id = id; + this.hazardRatio = hazardRatio; + this.oddsRatio = oddsRatio; + this.beta = beta; + this.auroc = auroc; + this.cIndex = cIndex; + this.otherMetrics = otherMetrics; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("PerformanceMetrics{"); + sb.append("id='").append(id).append('\''); + sb.append(", hazardRatio=").append(hazardRatio); + sb.append(", oddsRatio=").append(oddsRatio); + sb.append(", beta=").append(beta); + sb.append(", auroc=").append(auroc); + sb.append(", cIndex=").append(cIndex); + sb.append(", otherMetrics='").append(otherMetrics).append('\''); + sb.append('}'); + return sb.toString(); + } + + public String getId() { + return id; + } + + public PerformanceMetrics setId(String id) { + this.id = id; + return this; + } + + public String getHazardRatio() { + return hazardRatio; + } + + public PerformanceMetrics setHazardRatio(String hazardRatio) { + this.hazardRatio = hazardRatio; + return this; + } + + public String getOddsRatio() { + return oddsRatio; + } + + public PerformanceMetrics setOddsRatio(String oddsRatio) { + this.oddsRatio = oddsRatio; + return this; + } + + public String getBeta() { + return beta; + } + + public PerformanceMetrics setBeta(String beta) { + this.beta = beta; + return this; + } + + public String getAuroc() { + return auroc; + } + + public PerformanceMetrics setAuroc(String auroc) { + this.auroc = auroc; + return this; + } + + public String getcIndex() { + return cIndex; + } + + public PerformanceMetrics setcIndex(String cIndex) { + this.cIndex = cIndex; + return this; + } + + public String getOtherMetrics() { + return otherMetrics; + } + + public PerformanceMetrics setOtherMetrics(String otherMetrics) { + this.otherMetrics = otherMetrics; + return this; + } +} diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/PolygenicScore.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/PolygenicScore.java new file mode 100644 index 00000000..f116c106 --- /dev/null +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/PolygenicScore.java @@ -0,0 +1,97 @@ +/* + * + * + */ + +package org.opencb.biodata.models.core.pgs; + +public class PolygenicScore { + + private String id; + private double effectWeight; + private double alleleFrequencyEffect; + private double Or; + private String locusName; + + public PolygenicScore() { + } + + public PolygenicScore(String id, double effectWeight, double alleleFrequencyEffect, double or, String locusName) { + this.id = id; + this.effectWeight = effectWeight; + this.alleleFrequencyEffect = alleleFrequencyEffect; + Or = or; + this.locusName = locusName; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("PolygenicScore{"); + sb.append("id='").append(id).append('\''); + sb.append(", effectWeight=").append(effectWeight); + sb.append(", alleleFrequencyEffect=").append(alleleFrequencyEffect); + sb.append(", Or=").append(Or); + sb.append(", locusName='").append(locusName).append('\''); + sb.append('}'); + return sb.toString(); + } + + public String getId() { + return id; + } + + public PolygenicScore setId(String id) { + this.id = id; + return this; + } + + public double getEffectWeight() { + return effectWeight; + } + + public PolygenicScore setEffectWeight(double effectWeight) { + this.effectWeight = effectWeight; + return this; + } + + public double getAlleleFrequencyEffect() { + return alleleFrequencyEffect; + } + + public PolygenicScore setAlleleFrequencyEffect(double alleleFrequencyEffect) { + this.alleleFrequencyEffect = alleleFrequencyEffect; + return this; + } + + public double getOr() { + return Or; + } + + public PolygenicScore setOr(double or) { + Or = or; + return this; + } + + public String getLocusName() { + return locusName; + } + + public PolygenicScore setLocusName(String locusName) { + this.locusName = locusName; + return this; + } +} diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/VariantPolygenicScore.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/VariantPolygenicScore.java new file mode 100644 index 00000000..aff34488 --- /dev/null +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/VariantPolygenicScore.java @@ -0,0 +1,100 @@ +/* + * + * + */ + +package org.opencb.biodata.models.core.pgs; + +import java.util.List; + +public class VariantPolygenicScore { + + private String chromosome; + private int position; + private String effectAllele; + private String otherAllele; + private List polygenicScores; + + public VariantPolygenicScore() { + } + + public VariantPolygenicScore(String chromosome, int position, String effectAllele, String otherAllele, + List polygenicScores) { + this.chromosome = chromosome; + this.position = position; + this.effectAllele = effectAllele; + this.otherAllele = otherAllele; + this.polygenicScores = polygenicScores; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("VariantPolygenicScore{"); + sb.append("chromosome='").append(chromosome).append('\''); + sb.append(", position=").append(position); + sb.append(", effectAllele='").append(effectAllele).append('\''); + sb.append(", otherAllele='").append(otherAllele).append('\''); + sb.append(", polygenicScores=").append(polygenicScores); + sb.append('}'); + return sb.toString(); + } + + public String getChromosome() { + return chromosome; + } + + public VariantPolygenicScore setChromosome(String chromosome) { + this.chromosome = chromosome; + return this; + } + + public int getPosition() { + return position; + } + + public VariantPolygenicScore setPosition(int position) { + this.position = position; + return this; + } + + public String getEffectAllele() { + return effectAllele; + } + + public VariantPolygenicScore setEffectAllele(String effectAllele) { + this.effectAllele = effectAllele; + return this; + } + + public String getOtherAllele() { + return otherAllele; + } + + public VariantPolygenicScore setOtherAllele(String otherAllele) { + this.otherAllele = otherAllele; + return this; + } + + public List getPolygenicScores() { + return polygenicScores; + } + + public VariantPolygenicScore setPolygenicScores(List polygenicScores) { + this.polygenicScores = polygenicScores; + return this; + } +} From 9b2b410a5adf52bc8f514e8acf6209ad40048e6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Thu, 21 Dec 2023 16:54:29 +0100 Subject: [PATCH 02/24] models: add protein substitution prediction (e.g., for AlphaMissense), #TASK-5419, #TASK-5388 --- .../core/ProteinSubstitutionPrediction.java | 114 ++++++++++++++++++ .../models/core/ProteinSubstitutionScore.java | 73 +++++++++++ 2 files changed, 187 insertions(+) create mode 100644 biodata-models/src/main/java/org/opencb/biodata/models/core/ProteinSubstitutionPrediction.java create mode 100644 biodata-models/src/main/java/org/opencb/biodata/models/core/ProteinSubstitutionScore.java diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/ProteinSubstitutionPrediction.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/ProteinSubstitutionPrediction.java new file mode 100644 index 00000000..7b6ae392 --- /dev/null +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/ProteinSubstitutionPrediction.java @@ -0,0 +1,114 @@ +/* + * + * + */ + +package org.opencb.biodata.models.core; + +import java.util.ArrayList; +import java.util.List; + +public class ProteinSubstitutionPrediction { + + private String transcriptId; + private String uniprotId; + private int position; + private String aaReference; + private String source; + private List scores; + + public ProteinSubstitutionPrediction() { + this.scores = new ArrayList<>(); + } + + public ProteinSubstitutionPrediction(String transcriptId, String uniprotId, int position, String aaReference, String source, + List scores) { + this.transcriptId = transcriptId; + this.uniprotId = uniprotId; + this.position = position; + this.aaReference = aaReference; + this.source = source; + this.scores = scores; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("ProteinSubstitutionPrediction{"); + sb.append("transcriptId='").append(transcriptId).append('\''); + sb.append(", uniprotId='").append(uniprotId).append('\''); + sb.append(", position=").append(position); + sb.append(", aaReference='").append(aaReference).append('\''); + sb.append(", source='").append(source).append('\''); + sb.append(", scores=").append(scores); + sb.append('}'); + return sb.toString(); + } + + public String getTranscriptId() { + return transcriptId; + } + + public ProteinSubstitutionPrediction setTranscriptId(String transcriptId) { + this.transcriptId = transcriptId; + return this; + } + + public String getUniprotId() { + return uniprotId; + } + + public ProteinSubstitutionPrediction setUniprotId(String uniprotId) { + this.uniprotId = uniprotId; + return this; + } + + public int getPosition() { + return position; + } + + public ProteinSubstitutionPrediction setPosition(int position) { + this.position = position; + return this; + } + + public String getAaReference() { + return aaReference; + } + + public ProteinSubstitutionPrediction setAaReference(String aaReference) { + this.aaReference = aaReference; + return this; + } + + public String getSource() { + return source; + } + + public ProteinSubstitutionPrediction setSource(String source) { + this.source = source; + return this; + } + + public List getScores() { + return scores; + } + + public ProteinSubstitutionPrediction setScores(List scores) { + this.scores = scores; + return this; + } +} diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/ProteinSubstitutionScore.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/ProteinSubstitutionScore.java new file mode 100644 index 00000000..fc04065f --- /dev/null +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/ProteinSubstitutionScore.java @@ -0,0 +1,73 @@ +/* + * + * + */ + +package org.opencb.biodata.models.core; + +public class ProteinSubstitutionScore { + + private String aaAlternate; + private double score; + private String effect; + + public ProteinSubstitutionScore() { + } + + public ProteinSubstitutionScore(String aaAlternate, double score, String effect) { + this.aaAlternate = aaAlternate; + this.score = score; + this.effect = effect; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("ProteinSubstitutionScore{"); + sb.append("aaAlternate='").append(aaAlternate).append('\''); + sb.append(", score=").append(score); + sb.append(", effect='").append(effect).append('\''); + sb.append('}'); + return sb.toString(); + } + + public String getAaAlternate() { + return aaAlternate; + } + + public ProteinSubstitutionScore setAaAlternate(String aaAlternate) { + this.aaAlternate = aaAlternate; + return this; + } + + public double getScore() { + return score; + } + + public ProteinSubstitutionScore setScore(double score) { + this.score = score; + return this; + } + + public String getEffect() { + return effect; + } + + public ProteinSubstitutionScore setEffect(String effect) { + this.effect = effect; + return this; + } +} From ea903254bf9cfc9b49debfe06a692326fe919aa0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Fri, 22 Dec 2023 09:40:05 +0100 Subject: [PATCH 03/24] models: simplified PGS data models, #TASK-5407, #TASK-5387 --- .../EfoTrait.java => PubmedReference.java} | 54 +++++--- .../models/core/pgs/CommonPolygenicScore.java | 66 +++++---- .../models/core/pgs/PerformanceMetrics.java | 125 ------------------ .../core/pgs/{Cohort.java => PgsCohort.java} | 24 +++- .../models/core/pgs/PolygenicScore.java | 56 ++------ 5 files changed, 104 insertions(+), 221 deletions(-) rename biodata-models/src/main/java/org/opencb/biodata/models/core/{pgs/EfoTrait.java => PubmedReference.java} (52%) delete mode 100644 biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/PerformanceMetrics.java rename biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/{Cohort.java => PgsCohort.java} (67%) diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/EfoTrait.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/PubmedReference.java similarity index 52% rename from biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/EfoTrait.java rename to biodata-models/src/main/java/org/opencb/biodata/models/core/PubmedReference.java index 1b0e316e..89f66826 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/EfoTrait.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/PubmedReference.java @@ -17,30 +17,33 @@ * */ -package org.opencb.biodata.models.core.pgs; +package org.opencb.biodata.models.core; -public class EfoTrait { +public class PubmedReference { private String id; - private String label; - private String description; + private String title; + private String jounal; + private String date; private String url; - public EfoTrait() { + public PubmedReference() { } - public EfoTrait(String id, String label, String description, String url) { + public PubmedReference(String id, String title, String jounal, String date, String url) { this.id = id; - this.label = label; - this.description = description; + this.title = title; + this.jounal = jounal; + this.date = date; this.url = url; } @Override public String toString() { - final StringBuilder sb = new StringBuilder("EfoTrait{"); + final StringBuilder sb = new StringBuilder("PubmedReference{"); sb.append("id='").append(id).append('\''); - sb.append(", label='").append(label).append('\''); - sb.append(", description='").append(description).append('\''); + sb.append(", title='").append(title).append('\''); + sb.append(", jounal='").append(jounal).append('\''); + sb.append(", date='").append(date).append('\''); sb.append(", url='").append(url).append('\''); sb.append('}'); return sb.toString(); @@ -50,26 +53,35 @@ public String getId() { return id; } - public EfoTrait setId(String id) { + public PubmedReference setId(String id) { this.id = id; return this; } - public String getLabel() { - return label; + public String getTitle() { + return title; } - public EfoTrait setLabel(String label) { - this.label = label; + public PubmedReference setTitle(String title) { + this.title = title; return this; } - public String getDescription() { - return description; + public String getJounal() { + return jounal; } - public EfoTrait setDescription(String description) { - this.description = description; + public PubmedReference setJounal(String jounal) { + this.jounal = jounal; + return this; + } + + public String getDate() { + return date; + } + + public PubmedReference setDate(String date) { + this.date = date; return this; } @@ -77,7 +89,7 @@ public String getUrl() { return url; } - public EfoTrait setUrl(String url) { + public PubmedReference setUrl(String url) { this.url = url; return this; } diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/CommonPolygenicScore.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/CommonPolygenicScore.java index 6cd2b015..daa52cd6 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/CommonPolygenicScore.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/CommonPolygenicScore.java @@ -19,24 +19,40 @@ package org.opencb.biodata.models.core.pgs; +import org.opencb.biodata.models.core.OntologyTermAnnotation; +import org.opencb.biodata.models.core.PubmedReference; + import java.util.ArrayList; import java.util.List; +import java.util.Map; public class CommonPolygenicScore { private String id; private String name; private String source; private String version; - private List pubmedIds; - private List efoTraits; - private List cohorts; - private List performanceMetrics; + private List pubmedRefs; + private List traits; + private List cohorts; + private List> values; public CommonPolygenicScore() { - pubmedIds = new ArrayList<>(); - efoTraits = new ArrayList<>(); - cohorts = new ArrayList<>(); - performanceMetrics = new ArrayList<>(); + this.pubmedRefs = new ArrayList<>(); + this.traits = new ArrayList<>(); + this.cohorts = new ArrayList<>(); + this.values = new ArrayList<>(); + } + + public CommonPolygenicScore(String id, String name, String source, String version, List pubmedRefs, + List traits, List cohorts, List> values) { + this.id = id; + this.name = name; + this.source = source; + this.version = version; + this.pubmedRefs = pubmedRefs; + this.traits = traits; + this.cohorts = cohorts; + this.values = values; } @Override @@ -46,10 +62,10 @@ public String toString() { sb.append(", name='").append(name).append('\''); sb.append(", source='").append(source).append('\''); sb.append(", version='").append(version).append('\''); - sb.append(", pubmedIds=").append(pubmedIds); - sb.append(", efoTraits=").append(efoTraits); + sb.append(", pubmedRefs=").append(pubmedRefs); + sb.append(", traits=").append(traits); sb.append(", cohorts=").append(cohorts); - sb.append(", performanceMetrics=").append(performanceMetrics); + sb.append(", values=").append(values); sb.append('}'); return sb.toString(); } @@ -90,39 +106,39 @@ public CommonPolygenicScore setVersion(String version) { return this; } - public List getPubmedIds() { - return pubmedIds; + public List getPubmedRefs() { + return pubmedRefs; } - public CommonPolygenicScore setPubmedIds(List pubmedIds) { - this.pubmedIds = pubmedIds; + public CommonPolygenicScore setPubmedRefs(List pubmedRefs) { + this.pubmedRefs = pubmedRefs; return this; } - public List getEfoTraits() { - return efoTraits; + public List getTraits() { + return traits; } - public CommonPolygenicScore setEfoTraits(List efoTraits) { - this.efoTraits = efoTraits; + public CommonPolygenicScore setTraits(List traits) { + this.traits = traits; return this; } - public List getCohorts() { + public List getCohorts() { return cohorts; } - public CommonPolygenicScore setCohorts(List cohorts) { + public CommonPolygenicScore setCohorts(List cohorts) { this.cohorts = cohorts; return this; } - public List getPerformanceMetrics() { - return performanceMetrics; + public List> getValues() { + return values; } - public CommonPolygenicScore setPerformanceMetrics(List performanceMetrics) { - this.performanceMetrics = performanceMetrics; + public CommonPolygenicScore setValues(List> values) { + this.values = values; return this; } } diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/PerformanceMetrics.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/PerformanceMetrics.java deleted file mode 100644 index 6d9b263c..00000000 --- a/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/PerformanceMetrics.java +++ /dev/null @@ -1,125 +0,0 @@ -/* - * - * - */ - -package org.opencb.biodata.models.core.pgs; - -public class PerformanceMetrics { - private String id; - // Hazard ratio (HR) - private String hazardRatio; - // Odds ratio (OR) - private String oddsRatio; - private String beta; - // Area Under the Receiver-Operating Characteristic Curve (AUROC) - private String auroc; - // Concordance Statistic (C-index) - private String cIndex; - private String otherMetrics; - - public PerformanceMetrics() { - } - - public PerformanceMetrics(String id, String hazardRatio, String oddsRatio, String beta, String auroc, String cIndex, - String otherMetrics) { - this.id = id; - this.hazardRatio = hazardRatio; - this.oddsRatio = oddsRatio; - this.beta = beta; - this.auroc = auroc; - this.cIndex = cIndex; - this.otherMetrics = otherMetrics; - } - - @Override - public String toString() { - final StringBuilder sb = new StringBuilder("PerformanceMetrics{"); - sb.append("id='").append(id).append('\''); - sb.append(", hazardRatio=").append(hazardRatio); - sb.append(", oddsRatio=").append(oddsRatio); - sb.append(", beta=").append(beta); - sb.append(", auroc=").append(auroc); - sb.append(", cIndex=").append(cIndex); - sb.append(", otherMetrics='").append(otherMetrics).append('\''); - sb.append('}'); - return sb.toString(); - } - - public String getId() { - return id; - } - - public PerformanceMetrics setId(String id) { - this.id = id; - return this; - } - - public String getHazardRatio() { - return hazardRatio; - } - - public PerformanceMetrics setHazardRatio(String hazardRatio) { - this.hazardRatio = hazardRatio; - return this; - } - - public String getOddsRatio() { - return oddsRatio; - } - - public PerformanceMetrics setOddsRatio(String oddsRatio) { - this.oddsRatio = oddsRatio; - return this; - } - - public String getBeta() { - return beta; - } - - public PerformanceMetrics setBeta(String beta) { - this.beta = beta; - return this; - } - - public String getAuroc() { - return auroc; - } - - public PerformanceMetrics setAuroc(String auroc) { - this.auroc = auroc; - return this; - } - - public String getcIndex() { - return cIndex; - } - - public PerformanceMetrics setcIndex(String cIndex) { - this.cIndex = cIndex; - return this; - } - - public String getOtherMetrics() { - return otherMetrics; - } - - public PerformanceMetrics setOtherMetrics(String otherMetrics) { - this.otherMetrics = otherMetrics; - return this; - } -} diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/Cohort.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/PgsCohort.java similarity index 67% rename from biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/Cohort.java rename to biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/PgsCohort.java index ee74b25a..c5d44e7d 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/Cohort.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/PgsCohort.java @@ -19,23 +19,26 @@ package org.opencb.biodata.models.core.pgs; -public class Cohort { +public class PgsCohort { private String id; private String name; + private String description; - public Cohort() { + public PgsCohort() { } - public Cohort(String id, String name) { + public PgsCohort(String id, String name, String description) { this.id = id; this.name = name; + this.description = description; } @Override public String toString() { - final StringBuilder sb = new StringBuilder("Cohort{"); + final StringBuilder sb = new StringBuilder("PgsCohort{"); sb.append("id='").append(id).append('\''); sb.append(", name='").append(name).append('\''); + sb.append(", description='").append(description).append('\''); sb.append('}'); return sb.toString(); } @@ -44,7 +47,7 @@ public String getId() { return id; } - public Cohort setId(String id) { + public PgsCohort setId(String id) { this.id = id; return this; } @@ -53,8 +56,17 @@ public String getName() { return name; } - public Cohort setName(String name) { + public PgsCohort setName(String name) { this.name = name; return this; } + + public String getDescription() { + return description; + } + + public PgsCohort setDescription(String description) { + this.description = description; + return this; + } } diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/PolygenicScore.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/PolygenicScore.java index f116c106..6a314345 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/PolygenicScore.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/PolygenicScore.java @@ -19,33 +19,28 @@ package org.opencb.biodata.models.core.pgs; +import java.util.HashMap; +import java.util.Map; + public class PolygenicScore { private String id; - private double effectWeight; - private double alleleFrequencyEffect; - private double Or; - private String locusName; + private Map values; public PolygenicScore() { + this.values = new HashMap<>(); } - public PolygenicScore(String id, double effectWeight, double alleleFrequencyEffect, double or, String locusName) { + public PolygenicScore(String id, Map values) { this.id = id; - this.effectWeight = effectWeight; - this.alleleFrequencyEffect = alleleFrequencyEffect; - Or = or; - this.locusName = locusName; + this.values = values; } @Override public String toString() { final StringBuilder sb = new StringBuilder("PolygenicScore{"); sb.append("id='").append(id).append('\''); - sb.append(", effectWeight=").append(effectWeight); - sb.append(", alleleFrequencyEffect=").append(alleleFrequencyEffect); - sb.append(", Or=").append(Or); - sb.append(", locusName='").append(locusName).append('\''); + sb.append(", values=").append(values); sb.append('}'); return sb.toString(); } @@ -59,39 +54,12 @@ public PolygenicScore setId(String id) { return this; } - public double getEffectWeight() { - return effectWeight; - } - - public PolygenicScore setEffectWeight(double effectWeight) { - this.effectWeight = effectWeight; - return this; - } - - public double getAlleleFrequencyEffect() { - return alleleFrequencyEffect; - } - - public PolygenicScore setAlleleFrequencyEffect(double alleleFrequencyEffect) { - this.alleleFrequencyEffect = alleleFrequencyEffect; - return this; - } - - public double getOr() { - return Or; - } - - public PolygenicScore setOr(double or) { - Or = or; - return this; - } - - public String getLocusName() { - return locusName; + public Map getValues() { + return values; } - public PolygenicScore setLocusName(String locusName) { - this.locusName = locusName; + public PolygenicScore setValues(Map values) { + this.values = values; return this; } } From aaa5f546b3a4ad269ba4e5e01009ff1b2d10e9be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Fri, 22 Dec 2023 14:28:36 +0100 Subject: [PATCH 04/24] models: add PGS to variant annotation, #TASK-5411, #TASK-5387 --- .../src/main/avro/variantAnnotation.avdl | 25 +++++ .../biodata/models/core/PubmedReference.java | 96 ------------------- .../models/core/pgs/CommonPolygenicScore.java | 2 +- 3 files changed, 26 insertions(+), 97 deletions(-) delete mode 100644 biodata-models/src/main/java/org/opencb/biodata/models/core/PubmedReference.java diff --git a/biodata-models/src/main/avro/variantAnnotation.avdl b/biodata-models/src/main/avro/variantAnnotation.avdl index 240f9043..6f1fa3e6 100644 --- a/biodata-models/src/main/avro/variantAnnotation.avdl +++ b/biodata-models/src/main/avro/variantAnnotation.avdl @@ -274,6 +274,30 @@ protocol VariantAnnotations { union { null, array } studies; } + record PubmedReference { + union { null, string } id; + union { null, string } title; + union { null, string } journal; + union { null, string } date; + union { null, string } url; + } + + record VariantPolygenicScore { + union { null, string } effectAllele; + union { null, string } otherAllele; + union { null, map } values; + } + + record PolygenicScoreAnnotation { + union { null, string } id; + union { null, string } name; + union { null, string } source; + union { null, array } traits; + union { null, array } pubmedReferences; + union { null, map } values; + union { null, array } variants; + } + record CancerHotspotAlternateAnnotation { union { null, string } aminoacidAlternate; union { null, int } count; @@ -332,6 +356,7 @@ protocol VariantAnnotations { union { null, array } gwas; union { null, array } cancerHotspots; union { null, array } functionalScore; + union { null, array } polygenicScores; union { null, array } cytoband; union { null, array } repeat; union { null, array } drugs; diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/PubmedReference.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/PubmedReference.java deleted file mode 100644 index 89f66826..00000000 --- a/biodata-models/src/main/java/org/opencb/biodata/models/core/PubmedReference.java +++ /dev/null @@ -1,96 +0,0 @@ -/* - * - * - */ - -package org.opencb.biodata.models.core; - -public class PubmedReference { - private String id; - private String title; - private String jounal; - private String date; - private String url; - - public PubmedReference() { - } - - public PubmedReference(String id, String title, String jounal, String date, String url) { - this.id = id; - this.title = title; - this.jounal = jounal; - this.date = date; - this.url = url; - } - - @Override - public String toString() { - final StringBuilder sb = new StringBuilder("PubmedReference{"); - sb.append("id='").append(id).append('\''); - sb.append(", title='").append(title).append('\''); - sb.append(", jounal='").append(jounal).append('\''); - sb.append(", date='").append(date).append('\''); - sb.append(", url='").append(url).append('\''); - sb.append('}'); - return sb.toString(); - } - - public String getId() { - return id; - } - - public PubmedReference setId(String id) { - this.id = id; - return this; - } - - public String getTitle() { - return title; - } - - public PubmedReference setTitle(String title) { - this.title = title; - return this; - } - - public String getJounal() { - return jounal; - } - - public PubmedReference setJounal(String jounal) { - this.jounal = jounal; - return this; - } - - public String getDate() { - return date; - } - - public PubmedReference setDate(String date) { - this.date = date; - return this; - } - - public String getUrl() { - return url; - } - - public PubmedReference setUrl(String url) { - this.url = url; - return this; - } -} diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/CommonPolygenicScore.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/CommonPolygenicScore.java index daa52cd6..1283548d 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/CommonPolygenicScore.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/CommonPolygenicScore.java @@ -20,7 +20,7 @@ package org.opencb.biodata.models.core.pgs; import org.opencb.biodata.models.core.OntologyTermAnnotation; -import org.opencb.biodata.models.core.PubmedReference; +import org.opencb.biodata.models.variant.avro.PubmedReference; import java.util.ArrayList; import java.util.List; From 0f44e8feb163a84a3e0e0b692101cdbf1ea4d9e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Wed, 3 Jan 2024 16:23:17 +0100 Subject: [PATCH 05/24] models: improve PGS models for variant annotation, #TASK-5411, #TASK-5387 --- biodata-models/src/main/avro/variantAnnotation.avdl | 7 ++++--- .../biodata/models/core/pgs/CommonPolygenicScore.java | 10 +++++----- .../opencb/biodata/models/core/pgs/PolygenicScore.java | 8 ++++---- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/biodata-models/src/main/avro/variantAnnotation.avdl b/biodata-models/src/main/avro/variantAnnotation.avdl index 6f1fa3e6..fb80f328 100644 --- a/biodata-models/src/main/avro/variantAnnotation.avdl +++ b/biodata-models/src/main/avro/variantAnnotation.avdl @@ -282,7 +282,7 @@ protocol VariantAnnotations { union { null, string } url; } - record VariantPolygenicScore { + record PolygenicScoreVariant { union { null, string } effectAllele; union { null, string } otherAllele; union { null, map } values; @@ -292,10 +292,11 @@ protocol VariantAnnotations { union { null, string } id; union { null, string } name; union { null, string } source; + union { null, string } version; union { null, array } traits; union { null, array } pubmedReferences; - union { null, map } values; - union { null, array } variants; + union { null, array> } values; + union { null, array } variants; } record CancerHotspotAlternateAnnotation { diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/CommonPolygenicScore.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/CommonPolygenicScore.java index 1283548d..740770a3 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/CommonPolygenicScore.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/CommonPolygenicScore.java @@ -19,7 +19,7 @@ package org.opencb.biodata.models.core.pgs; -import org.opencb.biodata.models.core.OntologyTermAnnotation; +import org.opencb.biodata.models.variant.avro.OntologyTermAnnotation; import org.opencb.biodata.models.variant.avro.PubmedReference; import java.util.ArrayList; @@ -34,7 +34,7 @@ public class CommonPolygenicScore { private List pubmedRefs; private List traits; private List cohorts; - private List> values; + private List> values; public CommonPolygenicScore() { this.pubmedRefs = new ArrayList<>(); @@ -44,7 +44,7 @@ public CommonPolygenicScore() { } public CommonPolygenicScore(String id, String name, String source, String version, List pubmedRefs, - List traits, List cohorts, List> values) { + List traits, List cohorts, List> values) { this.id = id; this.name = name; this.source = source; @@ -133,11 +133,11 @@ public CommonPolygenicScore setCohorts(List cohorts) { return this; } - public List> getValues() { + public List> getValues() { return values; } - public CommonPolygenicScore setValues(List> values) { + public CommonPolygenicScore setValues(List> values) { this.values = values; return this; } diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/PolygenicScore.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/PolygenicScore.java index 6a314345..18148434 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/PolygenicScore.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/pgs/PolygenicScore.java @@ -25,13 +25,13 @@ public class PolygenicScore { private String id; - private Map values; + private Map values; public PolygenicScore() { this.values = new HashMap<>(); } - public PolygenicScore(String id, Map values) { + public PolygenicScore(String id, Map values) { this.id = id; this.values = values; } @@ -54,11 +54,11 @@ public PolygenicScore setId(String id) { return this; } - public Map getValues() { + public Map getValues() { return values; } - public PolygenicScore setValues(Map values) { + public PolygenicScore setValues(Map values) { this.values = values; return this; } From f7007bfd5cc1a016869fbd26ef5f38c95d1b440a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Fri, 5 Jan 2024 08:53:30 +0100 Subject: [PATCH 06/24] models: update data models for AlphaMissense and Revel, #TASK-5419, #TASK-5388 --- .../core/ProteinSubstitutionPrediction.java | 75 +++++++++++++++---- ...> ProteinSubstitutionPredictionScore.java} | 28 +++++-- 2 files changed, 82 insertions(+), 21 deletions(-) rename biodata-models/src/main/java/org/opencb/biodata/models/core/{ProteinSubstitutionScore.java => ProteinSubstitutionPredictionScore.java} (63%) diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/ProteinSubstitutionPrediction.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/ProteinSubstitutionPrediction.java index 7b6ae392..99523452 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/core/ProteinSubstitutionPrediction.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/ProteinSubstitutionPrediction.java @@ -24,40 +24,80 @@ public class ProteinSubstitutionPrediction { + private String chromosome; + private int position; + private String reference; private String transcriptId; private String uniprotId; - private int position; + private int aaPosition; private String aaReference; private String source; - private List scores; + private String version; + private List scores; public ProteinSubstitutionPrediction() { this.scores = new ArrayList<>(); } - public ProteinSubstitutionPrediction(String transcriptId, String uniprotId, int position, String aaReference, String source, - List scores) { + public ProteinSubstitutionPrediction(String chromosome, int position, String reference, String transcriptId, String uniprotId, + int aaPosition, String aaReference, String source, String version, + List scores) { + this.chromosome = chromosome; + this.position = position; + this.reference = reference; this.transcriptId = transcriptId; this.uniprotId = uniprotId; - this.position = position; + this.aaPosition = aaPosition; this.aaReference = aaReference; this.source = source; + this.version = version; this.scores = scores; } @Override public String toString() { final StringBuilder sb = new StringBuilder("ProteinSubstitutionPrediction{"); - sb.append("transcriptId='").append(transcriptId).append('\''); - sb.append(", uniprotId='").append(uniprotId).append('\''); + sb.append("chromosome='").append(chromosome).append('\''); sb.append(", position=").append(position); + sb.append(", reference='").append(reference).append('\''); + sb.append(", transcriptId='").append(transcriptId).append('\''); + sb.append(", uniprotId='").append(uniprotId).append('\''); + sb.append(", aaPosition=").append(aaPosition); sb.append(", aaReference='").append(aaReference).append('\''); sb.append(", source='").append(source).append('\''); + sb.append(", version='").append(version).append('\''); sb.append(", scores=").append(scores); sb.append('}'); return sb.toString(); } + public String getChromosome() { + return chromosome; + } + + public ProteinSubstitutionPrediction setChromosome(String chromosome) { + this.chromosome = chromosome; + return this; + } + + public int getPosition() { + return position; + } + + public ProteinSubstitutionPrediction setPosition(int position) { + this.position = position; + return this; + } + + public String getReference() { + return reference; + } + + public ProteinSubstitutionPrediction setReference(String reference) { + this.reference = reference; + return this; + } + public String getTranscriptId() { return transcriptId; } @@ -76,12 +116,12 @@ public ProteinSubstitutionPrediction setUniprotId(String uniprotId) { return this; } - public int getPosition() { - return position; + public int getAaPosition() { + return aaPosition; } - public ProteinSubstitutionPrediction setPosition(int position) { - this.position = position; + public ProteinSubstitutionPrediction setAaPosition(int aaPosition) { + this.aaPosition = aaPosition; return this; } @@ -103,11 +143,20 @@ public ProteinSubstitutionPrediction setSource(String source) { return this; } - public List getScores() { + public String getVersion() { + return version; + } + + public ProteinSubstitutionPrediction setVersion(String version) { + this.version = version; + return this; + } + + public List getScores() { return scores; } - public ProteinSubstitutionPrediction setScores(List scores) { + public ProteinSubstitutionPrediction setScores(List scores) { this.scores = scores; return this; } diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/ProteinSubstitutionScore.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/ProteinSubstitutionPredictionScore.java similarity index 63% rename from biodata-models/src/main/java/org/opencb/biodata/models/core/ProteinSubstitutionScore.java rename to biodata-models/src/main/java/org/opencb/biodata/models/core/ProteinSubstitutionPredictionScore.java index fc04065f..671ac040 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/core/ProteinSubstitutionScore.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/ProteinSubstitutionPredictionScore.java @@ -19,16 +19,18 @@ package org.opencb.biodata.models.core; -public class ProteinSubstitutionScore { +public class ProteinSubstitutionPredictionScore { + private String alternate; private String aaAlternate; private double score; private String effect; - public ProteinSubstitutionScore() { + public ProteinSubstitutionPredictionScore() { } - public ProteinSubstitutionScore(String aaAlternate, double score, String effect) { + public ProteinSubstitutionPredictionScore(String alternate, String aaAlternate, double score, String effect) { + this.alternate = alternate; this.aaAlternate = aaAlternate; this.score = score; this.effect = effect; @@ -36,19 +38,29 @@ public ProteinSubstitutionScore(String aaAlternate, double score, String effect) @Override public String toString() { - final StringBuilder sb = new StringBuilder("ProteinSubstitutionScore{"); - sb.append("aaAlternate='").append(aaAlternate).append('\''); + final StringBuilder sb = new StringBuilder("ProteinSubstitutionPredictionScore{"); + sb.append("alternate='").append(alternate).append('\''); + sb.append(", aaAlternate='").append(aaAlternate).append('\''); sb.append(", score=").append(score); sb.append(", effect='").append(effect).append('\''); sb.append('}'); return sb.toString(); } + public String getAlternate() { + return alternate; + } + + public ProteinSubstitutionPredictionScore setAlternate(String alternate) { + this.alternate = alternate; + return this; + } + public String getAaAlternate() { return aaAlternate; } - public ProteinSubstitutionScore setAaAlternate(String aaAlternate) { + public ProteinSubstitutionPredictionScore setAaAlternate(String aaAlternate) { this.aaAlternate = aaAlternate; return this; } @@ -57,7 +69,7 @@ public double getScore() { return score; } - public ProteinSubstitutionScore setScore(double score) { + public ProteinSubstitutionPredictionScore setScore(double score) { this.score = score; return this; } @@ -66,7 +78,7 @@ public String getEffect() { return effect; } - public ProteinSubstitutionScore setEffect(String effect) { + public ProteinSubstitutionPredictionScore setEffect(String effect) { this.effect = effect; return this; } From 8babad9b9995b6f29f7209d3afd66d782b1e467b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Thu, 28 Mar 2024 16:30:44 +0100 Subject: [PATCH 07/24] formats: implement parser for file miRNA.dat, #TASK-5954, #TASK-5564 --- .../feature/mirbase/MirBaseParser.java | 118 ++++++++++++++++++ .../mirbase/MirBaseParserCallback.java | 7 ++ .../feature/mirbase/MirBaseParserTest.java | 105 ++++++++++++++++ .../src/test/resources/miRNA.small.dat.gz | Bin 0 -> 31665 bytes .../opencb/biodata/models/core/MiRnaGene.java | 2 +- 5 files changed, 231 insertions(+), 1 deletion(-) create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/feature/mirbase/MirBaseParser.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/feature/mirbase/MirBaseParserCallback.java create mode 100644 biodata-formats/src/test/java/org/opencb/biodata/formats/feature/mirbase/MirBaseParserTest.java create mode 100644 biodata-formats/src/test/resources/miRNA.small.dat.gz diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/mirbase/MirBaseParser.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/mirbase/MirBaseParser.java new file mode 100644 index 00000000..e360f978 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/mirbase/MirBaseParser.java @@ -0,0 +1,118 @@ +package org.opencb.biodata.formats.feature.mirbase; + +import org.opencb.biodata.models.core.MiRnaGene; +import org.opencb.biodata.models.core.MiRnaMature; +import org.opencb.commons.utils.FileUtils; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.file.Path; + +public class MirBaseParser { + + private static final String ID_LABEL = "ID"; + private static final String AC_LABEL = "AC"; + private static final String DE_LABEL = "DE"; + private static final String FT_LABEL = "FT"; + private static final String SQ_LABEL = "SQ"; + private static final String END_OF_ITEM_LABEL = "XX"; + private static final String END_OF_RECORD_LABEL = "//"; + + private static final String MIRNA_LABEL = "miRNA"; + + private MirBaseParser() { + throw new IllegalStateException("Utility class"); + } + + public static void parse(Path miRnaDatFile, String species, MirBaseParserCallback callback) throws IOException { + try (BufferedReader datReader = new BufferedReader(new InputStreamReader(FileUtils.newInputStream(miRnaDatFile)))) { + String miRBaseAccession = null; + String miRBaseID = null; + MiRnaGene miRnaGene = null; + String line; + while ((line = datReader.readLine()) != null) { + String[] split = line.split("\\s+"); + switch (split[0]) { + case ID_LABEL: { + miRBaseID = split[1]; + break; + } + case AC_LABEL: { + miRBaseAccession = split[1].split(";")[0]; + break; + } + case DE_LABEL: { + if (line.contains(species)) { + miRnaGene = new MiRnaGene(); + miRnaGene.setId(miRBaseID) + .setAccession(miRBaseAccession); + } + break; + } + case FT_LABEL: { + if (miRnaGene != null && MIRNA_LABEL.equalsIgnoreCase(split[1])) { + processMiRnaMature(line, miRnaGene, datReader); + } + break; + } + case SQ_LABEL: { + if (miRnaGene != null) { + StringBuilder seq = new StringBuilder(); + // Read until END_OF_RECORD_LABEL + while (!(line = datReader.readLine()).equals(END_OF_RECORD_LABEL)) { + split = line.split("\\s+"); + for (int i = 1; i < split.length - 1; i++) { + seq.append(split[i]); + } + } + miRnaGene.setSequence(seq.toString()); + + // Update mature sequences + for (MiRnaMature mature : miRnaGene.getMatures()) { + if (mature.getStart() > 0 && mature.getEnd() > 0) { + mature.setSequence(miRnaGene.getSequence().substring(mature.getStart() - 1, mature.getEnd())); + } + } + + // Callback + callback.processMiRnaGene(miRnaGene); + miRnaGene = null; + } + break; + } + default: { + // Do nothing + break; + } + } + } + } + } + + private static void processMiRnaMature(String headerLine, MiRnaGene miRnaGene, BufferedReader datReader) throws IOException { + // Create MiRNA mature from header line, + // e.g: FT miRNA 6..27 + MiRnaMature miRnaMature = new MiRnaMature(); + String[] split = headerLine.split("\\s+"); + String[] pos = split[2].split("\\.\\."); + miRnaMature.setStart(Integer.parseInt(pos[0])); + miRnaMature.setEnd(Integer.parseInt(pos[1])); + + String line; + while (!(line = datReader.readLine()).equals(END_OF_ITEM_LABEL)) { + split = line.split("\\s+"); + if (split[0].equalsIgnoreCase(FT_LABEL) && split[1].equalsIgnoreCase(MIRNA_LABEL)) { + processMiRnaMature(line, miRnaGene, datReader); + break; + } else { + if (line.contains("accession=")) { + miRnaMature.setAccession(line.split("accession=")[1].replace("\"", "")); + } else if (line.contains("product=")) { + miRnaMature.setId(line.split("product=")[1].replace("\"", "")); + } + } + } + miRnaGene.getMatures().add(miRnaMature); + } +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/mirbase/MirBaseParserCallback.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/mirbase/MirBaseParserCallback.java new file mode 100644 index 00000000..3909d007 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/mirbase/MirBaseParserCallback.java @@ -0,0 +1,7 @@ +package org.opencb.biodata.formats.feature.mirbase; + +import org.opencb.biodata.models.core.MiRnaGene; + +public interface MirBaseParserCallback { + boolean processMiRnaGene(MiRnaGene miRnaGene); +} diff --git a/biodata-formats/src/test/java/org/opencb/biodata/formats/feature/mirbase/MirBaseParserTest.java b/biodata-formats/src/test/java/org/opencb/biodata/formats/feature/mirbase/MirBaseParserTest.java new file mode 100644 index 00000000..162e5d5b --- /dev/null +++ b/biodata-formats/src/test/java/org/opencb/biodata/formats/feature/mirbase/MirBaseParserTest.java @@ -0,0 +1,105 @@ +package org.opencb.biodata.formats.feature.mirbase; + +import org.junit.Assert; +import org.junit.Test; +import org.opencb.biodata.models.core.MiRnaGene; +import org.opencb.biodata.models.core.MiRnaMature; + +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; + +public class MirBaseParserTest { + + + + // Implementation of the MirBaseParserCallback function + public class MyCallback implements MirBaseParserCallback { + private String msg; + private List miRnaGenes; + + public MyCallback(String msg) { + this.msg = msg; + this.miRnaGenes = new ArrayList<>(); + } + + @Override + public boolean processMiRnaGene(MiRnaGene miRnaGene) { + System.out.println(msg); + System.out.println(miRnaGene.toString()); + miRnaGenes.add(miRnaGene); + return true; + } + + public List getMiRnaGenes() { + return miRnaGenes; + } + + public MiRnaGene getMiRnaGene(String accession) { + for (MiRnaGene miRnaGene : miRnaGenes) { + if (accession.equals(miRnaGene.getAccession())) { + return miRnaGene; + } + } + return null; + } + + public int getCounter() { + return miRnaGenes.size(); + } + } + + @Test + public void testMirBaseParser() throws IOException { + Path datFile = Paths.get(getClass().getResource("/miRNA.small.dat.gz").getPath()); + + MyCallback callback = new MyCallback(">>> Testing message"); + + MirBaseParser.parse(datFile, "Homo sapiens", callback); + Assert.assertEquals(50, callback.getCounter()); + + MiRnaGene mi0000060 = callback.getMiRnaGene("MI0000060"); + Assert.assertEquals("hsa-let-7a-1", mi0000060.getId()); + Assert.assertEquals("ugggaUGAGGUAGUAGGUUGUAUAGUUuuagggucacacccaccacugggagauaaCUAUACAAUCUACUGUCUUUCcua".toUpperCase(), mi0000060.getSequence().toUpperCase()); + int found = 0; + for (MiRnaMature mature : mi0000060.getMatures()) { + if ("MIMAT0000062".equals(mature.getAccession())) { + found++; + Assert.assertEquals("hsa-let-7a-5p", mature.getId()); + Assert.assertEquals("UGAGGUAGUAGGUUGUAUAGUU".toUpperCase(), mature.getSequence().toUpperCase()); + Assert.assertEquals(6, mature.getStart()); + Assert.assertEquals(27, mature.getEnd()); + } else if ("MIMAT0004481".equals(mature.getAccession())) { + found++; + Assert.assertEquals("hsa-let-7a-3p", mature.getId()); + Assert.assertEquals("CUAUACAAUCUACUGUCUUUC".toUpperCase(), mature.getSequence().toUpperCase()); + Assert.assertEquals(57, mature.getStart()); + Assert.assertEquals(77, mature.getEnd()); + } + } + Assert.assertEquals(2, found); + + MiRnaGene mi0000077 = callback.getMiRnaGene("MI0000077"); + Assert.assertEquals("hsa-mir-21", mi0000077.getId()); + Assert.assertEquals("ugucgggUAGCUUAUCAGACUGAUGUUGAcuguugaaucucauggCAACACCAGUCGAUGGGCUGUcugaca".toUpperCase(), mi0000077.getSequence().toUpperCase()); + found = 0; + for (MiRnaMature mature : mi0000077.getMatures()) { + if ("MIMAT0000076".equals(mature.getAccession())) { + found++; + Assert.assertEquals("hsa-miR-21-5p", mature.getId()); + Assert.assertEquals("UAGCUUAUCAGACUGAUGUUGA".toUpperCase(), mature.getSequence().toUpperCase()); + Assert.assertEquals(8, mature.getStart()); + Assert.assertEquals(29, mature.getEnd()); + } else if ("MIMAT0004494".equals(mature.getAccession())) { + found++; + Assert.assertEquals("hsa-miR-21-3p", mature.getId()); + Assert.assertEquals("CAACACCAGUCGAUGGGCUGU".toUpperCase(), mature.getSequence().toUpperCase()); + Assert.assertEquals(46, mature.getStart()); + Assert.assertEquals(66, mature.getEnd()); + } + } + Assert.assertEquals(2, found); + } +} \ No newline at end of file diff --git a/biodata-formats/src/test/resources/miRNA.small.dat.gz b/biodata-formats/src/test/resources/miRNA.small.dat.gz new file mode 100644 index 0000000000000000000000000000000000000000..3d0fec10c7bb99921fde55678dfa4cdcaae4fba9 GIT binary patch literal 31665 zcmV)1K+V4&iwFn+bOmMr18r$iPC+hnZDDL|E@WYJ0PLMxciT3$$KUfQu<~*`YgsZu z0=&el556R>6FZYS?n&EO^8ix}trMB_NJ83t`?L23DN`aPkr^qz2x?a=TNF(IB=Ez& z{m)JqV{tO-jgo0EVC8EvRoOs|2M6r({ow&SetmwxqKF-R=)d^=_ZNr9aLC1(Loa@S zzdCshzdBY)mXB}L^TBBm1ggc_t^RHmYsjtW1o^?24@*&?Dgp$J5u9mGGZqm4qjYd z!O<@dGp4RJTy{2P@kmW3EdR`=!|WTjVZ#0zPH))VIKNicqlAr(ZdkPH@b-F~PuTxLyOM-m;;^Z& zD*H=)!*+Zcjz&XuJ7FKL_SoCxuVe%lTwd?R>{{WbWBK$Z8JlrSvO%80rR#|qLg=F7 z{^I2y>+zT!4f7FPzV|A0f?mk$S1;;U??<7CimN{zs{#81dSAsi?Cl==MSX)lIX!`n zhk?0)9%e`I>*E_eh67G7;P?9L7o9N}v3D?3Z_BnE=eKuys;0v{QzL3m^7mab9^NL| z6n-%r!2gGzhcO*NV>4^e{g{ zH})1T^sR#a+hZR_(6}+aJN$c!199=z3>w^8@Ac$b>0A8pG-1CqJZ0w!Zob|mR`3u_ zlK*IU9EMp6&(qgLjV8?Hzx8HAsRnq%QBT}f$Jm^zGP;xDit#la;0CWakFW=v0DHi% z_JF_G1Cj6@F3KnF9X#j^#%z#4JF{3#rsD+f$`8XVhQI#j9{V^_@fY@HHjF3VG6k26 zgEvkllVYqUw+gx|%d=jb53tpE)q8N+@krCpS^hPtwAGvpd*Y57xL@`TT#6p!7naPJ z1L)0Beup?YTd<&E&?aFXj0{FxKD8P4;s@sM3y z(X)A-zzu$J4)?vDPT9LloO2ai-W5C&AJpVOdYHi|4B6ppoRae5nfX+`>Bq@Ul1+wR z6E+x5V(15$S)t1+gM2tn;a1Sogk29a>aNLf+Ir@FucqwdbgbhkJr+ejz)carJrP9E z7u+#@adL?_{hPxJSiIiA0_6q=W(n$>k3RkL^5W?5<7-&f9C<4Dxhs4Z?kmq12aL{e zjO&|OwctWN=NhM_jKw$O{5GH9F#JyUYV+X1vS!9y4aXBU&CTgdTX7MBZ^9JEy#HZ3 zn~vcdunr{yT#@iL`{}o<%YV?yg%8@4-J1Jla+k!z&)=Ze%<1q9<(JJ9HcqOyDttak z%$Mn6rsH9nCUnouzDmz#ak`x0RrE$`{I!_3)Ig7>2m&cwH>_#YGjD#u$0_;7s;~3h zOlSMQYBhq%Eg4VXz!z_DP;c?ZuN=#Re%}ktXN#|W700FpKfk;vms`FPo}vqRwAqdbGr`4{hb|AzT7ALH_r;UL4KaeRp@ z%N`R_;559~e7>{MNZ$@Kh4U1wA?IfwVB*B5^<{G(`A)wd^45LCr4x1OBX8q^sB;k{s|2h3qt2h4NKihuBe&Z||dVx8(VWlG0!3hz|P zQmydcRBL>TKWGiVRq$5BH)5uWLsfj=iRB4(8pDC`RUPB8_yq<>s}%lJs^F{edmY2O z)vqXevA_S~Y{tvOtS4q4HuAD)-v>z|`ZlCci}BPC;2LDzIv zTw3e(zjc#9KYZR##~r09MIxO6?r<1cvZ`Pus(7uXNfk&|y3&aBW- z^?8bz3Rf`9;2b5-nNw9}=CUV%uZu;?<^C%ew4NV2Kk+DvRzvo4*g7=8_Y>MWSiH7) zZSi_3UQ74DOu{w1ej}q(zVCDC_k34Gb8t0J8MDLFl*D9V(6SvLK8f63EYUq zQ$sCp1=@HmT$r;VF!zhAQ}Gn>eSQrYeJeqm&W${At4FileDz$#n$c%hH*ozGJjd2d zCTIwNC--x1RuMS*jwUZ!qTp1SzD>q3u;yfN4cTgeZ46M3Fby276!1AAJNy^P8n`=!uQ2=$l06(-yh=V#(-Z+5&IoUj&jIrl@;M^D zN&)TR1Pu+Wy&^Hk=eNUgkIz1A1+*>FIb44N1Y}mN zSp&=>KjV=OSLeHMb)t54HDoqd=gc}i%_qHIVTpxXKq01FDt)7;3|C=TK9l$h^g6KF zm7c^mBe>;@%Rj4`Y7Ks*#JmNe;OuQ$RGUjJdpudLAPYU{L z$oq`^1&bWJdmEb2ZfLL@BDf&fuOC@X#wd3cmH;4{E^;|`7k@-V`s+79l@iH^j+>v5PRUC-febq{AS z=r4sMGDz-lEzPE|KPu-exZJr16mi@&n>W1X{O)ErQpKr~k;?KE9yFL}e!4ijd}U}1 z*7ASg3g2f87pZb|lLLibx9xW$)w9C#;>17UK9Ms0pabMKXL_h18A^URcuZax+C#9mnXZ~St9 z7*w4zQR7a@r=61Lk|)EbosxIIQOR`#_)LpEs01`W*z4c!CZAA+Aohx@K4ec5w8;YXVyJ_=O{ zI?<7AO-J5VV^w}!fVuSg{XldIU}Dig z2k6X%tpM#Qj8#bu_963;sa9h|3Yl3kg1o%I68}tWi%v}#I$C-a+Hq4jJA4_=VvulM zd^zA>P{%)6e%Jy3{C4;!E&f^jdrbJZbBI)f#N9-sMN-^NL|TH2&)Ym$K|GQk}g_l$d;41j1X)Z}+>aMH(=ikO=8 zL_DIPON??(3mC*xta~-VpkIF20S1G1Fz8ztd~Ptfb2wE4%H71NMXKCooT{Pa&f!!f zB=n3U_(pxI-2x!n`yV}jt2pIv5T}AJjoNAiWHU~A>v2l#&Djg(TH6}X(?WLGJWdh( zprmz~DZ(B`$zvE788b~eV~TK!LM8QToKob5jZ?C%rD$5i&`K4f%l7+h zYqN4^9bz8Cb~t6rojp68vbMD+1%uYMW^HRr-H!zv1&)t!#C`XHY)$A{>i|dFsBblA zOxYA0g{EChDOnz12F*^C#}QL% z2z-OVJ0K)X`k0MI6B(iZQJ01$^jOA46q}B}E9fjNKWro;*+@oFJ2DC_8Cf#2WMs+6 zlF?%zBkOfEB*?J@Y~aex1)_B;bc$xO4uNRl!+0HK*zX5XClrjjH0qI}U>mQaG-fi7 zV9+HPl*>|ssX`J}I%2x4qJ%|GwebxacEc24g%&e3k)iR6p@l3qS;EC@7J`N(NO*}b zh;)g>NRUb8m>AY(YEwlil64r2$`6}hFl_WXa;F^(MivI28w~CoPFb&`^*UOw;{r~3 z91Uw?zF-W_K*`_n5R0T4vX~WgF;!%+jxtu*gaI84<0-A%u=3*d#Ci64IKE8=Mg35P zm15wMD{aO)F`Gd**{R$mqFaa9s_0=noE?i+(jwlaQJY7%c~~n!&fTzoGc^899yO+p zy1_!~Xi>^VF)G}GzD^7mqfmb;NssnAOe56{e?{dlQwkR|_PX-Y?yFx?K^OScq>fUU zn8822K1Qta3hVw&;glaXTJhXQa9gkDgyRjTw&3>6z|C6mo)mUkE1tFDSu5TGb_x!& zT_Bxcqv@pGta$C?!i4icRy=4_&?!fjw=ZD`je5dXJl9z91X=O;f>e>~E#wYRq#H)K znd0Y#Y7rIYqcj4n141~+OmROl(Ban%4VITyDTR(-rj${nL8Xj0IHk-3l0j-{Si>uF z>o7R0h2B1QfUlP-q7mQeTIkOc26qmptQF5%@vIeZ0jDIUVg~-^QZdOX+TMQ%4{Fff z2qm^#J$ZxialCzzIkDyH$xYU@!f>`m9%p5?fLlqv^p;uEiYR`vrYRE@r%N{|dOFef zY5brBP%*{OnMxd(2e39(xE3(^5JRbp{7HF7dDm+$x00(h-aD;z*Y8 z8#VYpJwdjFhZ)sv&8c7|&Bw5OptBYi3x$e`owq4J+!5Amk8{=wq>Xrr671ph$bV z%&DNaRBl7FLLJ-)h#LVz43HzF#DDKcqmsx<_295ZFbRhvgc7h)ES|#r)`pfXz8V7R zgjaspKJS#b)7pSjTavUS$t+1)lC&h*fh2duv{%xibVICL?QTwKfp;#l);>v8cn?@) zt&`LOZ=cNzje4YP)@uf`LSsP^ka?>!97y~;t#+lK5yTm0*0n(JV0+=fiT4~vsCLb$G9 z$#!*@2`-q#R1~#kR0z(vV$n*6Zd^@;bx{^K#cWr^LjXM_M3WfXD1=M!s7RH83Z*El zCx^g^G7OrTJlTfEExa0paK&oDg=Q9PC%7Q2;DTjA%Yv2#Eel!}d@L;J`7-Eva&r~% zIw#KtL5FC#hg4hWMuP1#g2j_oTktE&s{nq-ls7RWLAO>OSP}T)Thdri=#VWykSd%g z%Am!1ct&^u>z<&=Fl|&@SWbdzOb}^wyw<*&Nboz<7I-_g1!2_|ED2f?v?ORr(30R| zAVKNMeG#}}PlT>`AQHrAy$;Z{4WWn%>Qu8KHYmcyJ7foMS2g0th;X@;MA~qvA}e}s zDL&75aaCDhg(M-g@f(vZ&?JhWd61mH6zYdGS&qjH3y~wS3Lc|Gk(fgKFwq1Z&T(uk zjxD&bDGy||YD6;@wo{D|RyD$6;iJMrH}IoSM8(ygj@5v{`hKY58wLdU3sB*Not{9) z!@%4?53?iq_3@1!!vUv&PnY`Z7o9N}v3D?3Z>y`nHp?S`9=4}CjWL|srm(ogD;LnR zX}Go*#K&K8S0eEFo{jypv41v;q;(d_JI6hW_NC+Zd}ni}z^LCBPA35DfJa*u!`LM9 zb%=BP(P9`MjxP=Jm9c@%=c^_`ug%*CswGt1MZGXta}ATB4r|_#Tq;2z{TAZ}O&p<- zUyUgpgQP7ewO8X$vtg-|Us@+WJPUb=ze|2qtX9Qn&f93GiXp5jh9$p8MSeR+N7f){ z4T8()=+hBS0yv+0ju#!U(J<>tc(UoR*YB?`U;laf`u%HM=^W1G{Q(b9f{qrQ_QbIb zA&Gh7sF{`Kw6&Jzhc2sKC;I(Bc4Ep7{Iv}QidG_GMOHHYSTQNJef}mU zrFKB42NH7VZh>+;)X>|8ki+)N%~gwp$YpLuECfQC2&ESh{fn4FO0<(|N=Ycf)2Pfx zNB-X5^Ps0c0)EQfqx~p!ov;^$p5s5lB5)g1=khz%5TLOG5t&&pU@nt44WUiQk{V8CKh>Ma-(b0#-FVf{g8qw9|| z+lG?EMk?#>g^Tg?n=!JXnvKKID=ITWYfxDY^oW-5R5$fe z88&VFs;N4o-kVnhX98nx{*8%@4zESt6lX;;!; zP^qOR?rmqmf}=Mzf?Gs%MSWb%NuVqSR9K+uY3S5tGkH|a$TdtErLz&332%{h7iB`| zqC77e^xjiLJWY}T1faRG-#WS`x>@#9D8}(+R-j-EAD>=^d0^m4T@QqonLg2V=l`PuVhn3q9 z2$&&Ht1zzbsMOKER+;?K@&Mbb70bg)@iVc^uStl82?_~9u zc*918*{HDHjtXm+8hTZF4I^YP8AT5mGu9y;avQbzw;VGj`~9GcY^ldP#L#V{u7A6j zG0&^i;jdKD@aC;-ROUjN%4bD(D3;{MG8p&~=`ztjx+v67UO=?Bkvp+oho8KOiWHlQ z(|g@JIH>C~o&D2u281*k)TXw#3YuX=h=yD(GUTWz^@zK|5kLVeyPM3y#1>9%7Afum@2;a zq*d{?>qonOwChK^ezfc7i%9LJv7#31^p$^^tvDB`#WUW!O~Z?Dq5dXLT?CUoyK@u9C^yf{XX1|lw-%mor{cyNOj_T95$oU(j z(BB&g9yatrdn3UMvG0WuNTfV~DS#+!K$RD98;et=9?Iq;KUP5pP}U#rZK-y^Xpf82 zL9~@7%4Ul?j_3{}-<6^}yD!BKn73EV-c`G-hH2S*TFYL$tiGztN|TQ72BB*h$`MfY z>gV+~&(?r8<$1!>5=TY8$?^@`E?||tzQ4n}Qic)NvBiy*xX9u5bDZEbu;K2*lk~M` zN_(b;;MfPlKX-pL&A@^KO?Y+>#bdTVpS#0jl5kZU9x(0oI(#MrmoIeTxIqJ4OcFCN z>o99GP>3^8Z9x&EgE?wKk5km!Ax{n`)WGdBF8Adk(l2lLVO)jBWv>poX*f#I7$!vS zX(95*zLe`o8y#t*BW-&3Z%psP{?YcrndKc!n31Avh6lK!$6~|^IfX3ufGC^Mx{UU4 zT}E#TY3=EvY#gz9<1}O$?RxXZYn3#ioGA>f#a)?5$YsOg&P6mYqoO1fw#yQ+6+_AB z7;Zy$iu!8v)jLZCN=(DUxvtEidQT#0)Y zU#F=e)6XRVaLKp~sS#WrljIoOCF_*eLO9f(_%{Gbvdxaom8+gMNp-4C>Rf^vz7D}*RmMvXig7-XW@ z9)WIj>0+}AHx)K!b3}PjoWY=u956bp$U92 zjpSZ>GU{j-)2|bB%vPxeQB~webg4vT#=qpV@ zk>_L+|JCtGw$+Jt#z17L9!NTZShST32^&n%7VjgBUPh-Fp&K124k#}{8IOdiP+SB` z;~rZfva(6hxFD$-PB^IyLTbEJdv5-IsTyV8lrv1K#@A9c8viu@Y5ddpr}6LGT#caNDP(;beQJ(^TC~t< zO7)1mx1ety6>Ev1-;WTjL_sEMjbX~0NQswZC*riCq!3a)2!#rDX)69&l;K<$rB3^< z*bqjtClA*YqgU9?d-L~7^{DqzoncZvzLx6IQaxH-TB}QIb!n~PGmuk*C(Eg>{o4A` zatw23Ibo?Fd5)_tPM*7W^iE%Bq5aG^EXvlmX}*FJgcHKb^?Y@Q7%QOmL^OsJi06!F zTK9TRUjkoolSbIX1yA!kc&;nmuJTwqXahHRd?VU}nK5Q?-8aSEU6yafm&2KC;{1~s z`<>~5z88MH&eJ5>@K`kR!{yKKPX%1~uKDbQj=D*2Z-KzsZJw=!YS?CxeWvyRBG?bV zeU91R7t1(5VE}cTx$IxITFsx$%Ta@+bt{+ve!R&L@qiRoBrhfzc5@fQ5(;Mqzu`Ck zEjF#btuena%KoT1Hrw-31sm*D>FO~thKY3fTBPf- zL%eo2KW>QE&Svdw*3M=bYhXDqf*)U9&+pQr?61+}x}6+(A?@kxPV!Rac;@1SH^3Xz?TUVGKe<4Hu&B zbre0M?1C>vB_MyV@Ldo64b#2zweH=~(chEfC!MCP)3kM(_6|QeHfHKLj&V2e|xyvh1#cFao9#w})VlN>TK2B7H+p)A?`=BFD4O9D-^3*Uj zOexRTN_iTlG)!rj(lB)#Fm>)Y&rS13({NmzFI-p$4w?BfD(~4chqJ~Ms)5r0)CfeJ zLiulUfU&;Y+_#4y2P-2jLsqrW`%HIK{YiwrU8_B03;kkKC1*Z zXDE2A5eTMyDTQN#@>8V?%2NVVI1Hcy4Nw}OG(bIWfa(-4FfRPFA1&7kjhW@zfqSS` zlq`00%!1>N2#V=7?k6nCtTM&D#$HC)+RCu~R$NFS6v%LbC%p&^2H_;UkJ=JU$dh5N z7+LN3KTPZrK_o?2fQ$lu(fm%Bu*eIljflUdG&3^o2N>eei|#rSmLDqDkwCSMxW=$` zB-HE3v8^MIJ1W@ny)bY>dG@b!e#5{XJmb-V!Gd=Ii{OU6egOjy7iIw?%$~!q&lg1w z2fT*W;(hVyW3gs%#eRiL^-I%QEQC1)>l*f_IVUlkr?yr4cb~ykb`5PoxxksHCd|~Z zzInD5t#Vn~rRa6M^1PtbR;1vcP#0~b= zSm;6AVI>v<<463*vYzyw(|4}0_PCMt7|aL_s~T1{to9Wq|J5*Gg+H2x>&-$pYzmIa z)hpeULSDcCbPJZMT}{C**9@NFnlpz7D!z?#TGeqnq^9y1RlznSztuHJ%J}*+R zb@|<0%yT$6W7n1B8GJV?fQja?8TW6D=c`iFYr|J@EmL+t`W#hQ-5pM!!4#DpN>H;~ z;o>Jlm9$3f+_}S*jDu-QJ~-+sutV5%FqsC8O+2&S#Phoq*HJ8siZw)3_6tF>po$Mv zFtrc?;FwN_=o%4eQZ03KtX5Z}a=ucbFRb%LG7QA2BJNz&TBXk$N?v_06`Ag>{$Ltv zG#ys{A$Zc|AHH)WJqD=LqQ4qRHIg1Dl72A!7v_(qV}|a`bZp~r`dRKM{VdD?93Gt% zOi`_20Jr(TZKEnsxWmK9f+;F4Jgf*?Ls<{DDM?KbOBHYBwA8cgmUe1E`dJunMHLi~ z!6=-H68>mg8^f@u%zY$uw(x+0oP*?AEH6N%o>dewK8uKr{+3C>UMi{JF!*GXzh%Ji}42kdC){^6^$WU|k2S@6uD?`2Gy~8<+>9Cu@{` z0`A6+@DNR*JwHjtpF3QAG?+p_e)^0*Pbrzu;wKSjKwp&Gw(;!gY|1>dgkGLXCo^QVY=c7Q2JKd^W*7nxcX@D$h7B! zMv{s2o*8}G^GJJ<+~`cWcq>ZLqe46t3Mx>99+(fET@jTWO6boDir1^IfKkQtQH#iV zl%a^8IMHB~h`Okv#F3(3J1^Bnr&E5YqCekN(%&%c`9N#WYx>jlr|D1ApQgWWPk+x1 zJ9K}vZ8x0R*5Q(DLqNZAlr&scG?+qoewUIIp<&L$pMFg9J&9G-z3 zOrc;ursO@*QCafdq>4nQ*BAblkZ`;xd5>*Xs@8)pr4rSHe_Jy{0G}@ zW#$%K6qlqLLKI6!<%iJcCxhxZg~JuJfgDi`9xJGLWzi@u)DWL3gxo%sq!ilxD4s7u z_KJu$g=ue@3laYw9UACH7XbI`=&Lt{hw11CT1Q{=pXNW!f13X^|9yM@`@{9l?6V(D z!wPK|raLRJ4<=(vbpXb}(ukajDe3?m43^aJXp9mh#C`G+Tgp?E5O}}?SSG-VePIEB zjjEL1ZcXw;di5}=l{|g8jZ4vg5hXm3#}7ZC9o>SwdI^&V2cTFFV+DXBifyfnrP>$XTL&O?lmIxa3_vhOlI1%|6xAH3IZkt&<~XJ~?mOkUSEg}J zZagoTnQrL29k~`Ctg_9v_7%XBh>9t1P@#dtgbdwY^86E^0yWdP zKl|5}dHHR)vzjflq=@lr;SRZuM&{g50wIxn>OiXvkgm{(kG#b}=vn_7!aTY#Y4CM$#8T6Kph@qUe4(d@+) z{3lQrfb|-6XD+f;#y0#8NO8le28UCuQEmAr*=F_+SXbaQIIhj_d9f(A43{2QGveqY zETjM=ABs)1Si-QEb8<&SRjj!Qfu3BJZ?jyk)wPJ~`$n;k*6zMN3*6vg(sg0=@GB;~ zDi-YWy#S{_WMvZh@71p-utvezU0sOJ@q=@i)PJRa3pm3I=^-oq3Z(U7B!0>~x%2=igFPT>xQnQtr9Wm`2)0E6YaY5B&f69h9S zq<%a5S=e=*z$>rZ3s@=A7z?*tW-xJMYy}b2AIJ;PtE&aiR~+f;MIPVZ7I0#(%TBy4 zR&kCfAW!WbA>TSKa$-c$ohXBQB3&zRYG)}7PtCxXnSu8(4m+-I`$1WM28evYq1j6~ z>A(MtooBGP=j;kj9grSw@FlE-@8C@0j~sq}`x-A4Ub5Hkkf2v^m%Xnr@qG;Q90CG7 zoGXkoDM1m}F}O(^fB+qhvSpUX5f-Oe$Jrg&Xer;RKZ9#q&fdoxSm{rL04%W3d{{!! zwJq0joQicfe6vX562M*fmVe}{BIk66W`7p-b9A=c!m@hP1YDT4Z&M7BY2G^}sD5~N zHUnzI-ij7((pyqH-lkQrFW$^NLtMqOTX4HY3%FC^-@C>A2KxphzFHOQIkPS6)G_Rt zV;Y^Cu%nXbdxqm%el>lpZ<3tfvUgY=Xj6zA4^{@i1DryxzzWOp9{f4`l&&)dkJDMs z;Vn`BZH%YR$|vtVU>5%VSH8MiWI0^^v%jCvS8NwR7CAQlCMDD9DvJP`-@X7)j`J0K z8v|y3RV?9`zsuIn7b(!ocAKJQ`zqsfIxvN$pFeVZYF-u5V#9vJdmhHUC<3InT^MbSgkOQ&D=zhHT8TyjtYrMn3zWos6osLDB@-2KB6tiFyFw1xa1LzHT=coQ=NHZ2J-w<&#+B+q#e6+ z`t6cGV1WgpZaGG-Eblj|a4NicQ#mL& zm@hIYuvTGjUH8&MQLaDuISkYdJn&mFKZ?R*pD?~~8Gj<1M1Id5-!3wuk+hlx-^9ho zc$IQkc&$_01{k(!-3^*U8g`6#(+vW@9MWI21wITg%c0xg13n#aBM83u2Tt$BpYR-R zvQPLJ|4P>Tn=3ksf4YgAenL4e(2KUks(J|Isz4(2;^vXw8x@Mc(57fG!zu|)F7DxS zG(Rq{!CeMzQ*>qqmK!yPCMM!!eGa#Ll`ijbSa0b0&tsSlwrKHf@m%2#7BLJqoWQz> zmNC!aFQRRhi}oDGmv@V5d3}C)E<7k{go~>Q_;z=pn`sIhK(|13g!`s>KFW-YXP@{G8z5I)d*KfgiuuTUI@ynmzzj^t=KQrZr zFW-K6|MK6jU%mya-7&lXqthG5lvcaMZ2OK$iwA}T7D%$;IWrkp;r8E3=Pz8bAAWmQ zRurJyKZ3a)#_75=lF4{RLp%{KC3yqH2dYatpIiPYGxFiu0S)VA(x|8tcNTQgCTn{# z?6aL+e?Ip|blP!(hi&IZr>C=>mF90C%Xgb~MUvt0t zJ4QxUo4?xp)#k4@f3^9m&0lT)GHw29^S96ZHAAp}Yu@=>#Wg(2x3}#1OT2{30)E{N=-630%G(d`y4zwM(b&R>;O;k!H=4n%1*5Wh#SN)M zn{xc#y~(cF-`4>Jn4JfH?HI87oCns#VJ|9k&oMkT95rM!{VNNzHz?z9=yHh!yQ3>o$K zUsjp;1(PN|hGufgRmWX!H20-#FQhOrx2a>}qe-8LSkJ$pZ3HjhQrY-Y^jvA6d7N9^4%o%R zrQaTrUA%9L+X4%CM6%;|zQyg!d_|{L_iYxp_u3KO!ulmGHevm8Cam8CH?t$u^1uLX ze9Og-I2gtFo_4d#^Q$YSN;zN*8UHH=u_)g^FGXUnumOwu*Qc=z}8)ylKD>T zncE`1!zj2Rj@dpnJMeK675jkhPtk+*SEadDl?R^Dym-4D_Q&OIKlf&6Zd6A~PW1u_@KpKFK z34j!Y|2P0h&2QS{`PDt1J<=$)VLM<_mg_)VtbzX+DdDWLbRDCo{wgcrbb&F%)E2`N zmy+U7oQmo0H9jePz4@4;8GTW(i!#b=!P6~F2GIf^&|f)>J53W8}a_IO;~S+An9>y1h)vOHM%$8>p0MP+qjQBx%jsQjGNHTr6*VL%Z=IhNEY ztEx%uIP|eH5qpGXGAOZ#@`}(yOL;{}oikKs~h-H@AbSOrL8h5Zt z9EOa8`P>>_hL^x&I3?bEJ22j>RA-NJQ;)C&ZYFe7+e#?*o7wi>e(7WioF|VnlSC}| zI4XV%VLTDqxOA(TxGpxCwN zbAN<zLff)lkM20HrI3q!iW8v!Mhg~SX-K)T?_7wa3p!oL5C@>#+?8Y%tzmCw= z)PK~}|7at0`%1=W>VH(!f7^WDq5eAE?p3eqY_N08A>i8w_#Ram+w+IB=_ z3Z$l6e%%NF4|@O{t4m^oPlLZL$94>>?7=zA1HcuiLkN$@HZR!u3tTGFRr)7^+;boR zLYRLScNtRDTVP03I6hn6AXZ#ppSN3_+ez;cR*;y}G)tpFNiFaB2DkT6KpEDTySojd z#oIUmmQCq9fVgtZb9n{Kgq3XB`DHm$KuUNwZ()Ie#|Q>XC?w;=K5Gk9!Sp|4Rgw!{ zSGegbnJWCz1B?Ju^#7R-nE;ENmZ9Z6vSxITE5cHWP*uXoa6VW#7|<}{H#p|(H+IEP zS`moC1=gS8f3bM5VI8+X22B3{yB>Ym$`{M_#iZ;M-3Ho;Y^!jHMC_pfOkwV1Tag1*2`|Wh4!JAFggR?9_}$ zmCmCj7CO&mJ$M*?WA3t#*VS@Yo-QnzSUG?iLuI_{7#?g9izBMI(=sd6^Hv%51E&C( zo^J7qKc`8zTsNieo?`LbupiFfymdvU_H%}pA-|zJ z4;3UMcw9EfHwVfMb>?$>oFZQ^2>xZ~CpLJzVe%}lIa-2mTra1yaQ3OQSe|l=0p7L( z-wWI;KwUkuZ%|ksYsD2<9v#Dd$OhtM>?E9Hln=`B!9Qs3bhjAra1ri~(edy?lZSml zppVMJ@%F~}Cf28jza`%K22tM zdYzFUDuX8q>0_7OCF_?qOpD=P(`dekKLH1Lr@m#(d@C4Wi+Gk}xfSK6EA|FB=rx?u z8aWhr>W}y%K;0D@XL$k!*>ftBvxO6X@viLB?=k!W2xS}h=nFJ19~w)J@vxDh&C{=A zo^}kMhgLyL>I2$~wpewm4t~U}*gn+$tN5Fisx?ZMu z4HeD=G}4E76B^zK;ZgWvXM$*HAL@*9CWz($RldlXVB@tpgX<3fE}N=KSjfw~;AwuB zuIpO{JwQuzG>CbiapmNNYdCp)Th_Ap7Ve60xFPRN>aB23{8_l`uQ)lBfE9WyMs;+` zsK`L+vIv_l3$MK?+Y+$C@J3RMCGSP$vPcT@HlUv(iAb)Ba`mHvybvfGi|I%3cjft2 zQIVyzRU&is(G$YBtj;$=#VQE9E(^c>aJZnXZ|-wh_}XEi9TrEjdHOW$&!<^-&kg;+ z^~;OBYLViHzfiCp9DG%*#iwXt{i}aQ5`2CeqjzKp zRFaGJa)XdtKBudAyU1?JKE8VS{0eTP?RL&wKRmSqC!E=VOE!o5K&drtRBNO9c#LX= z*XR)$)s5Iv*ZfIXBM%0b3Wx&2ZT&d#@7%lAj~#Pe$URgi(lOSp>hpscJ)^3L`^Iov zz@O0Qu{t%=oI`EVB14bq zp)xNF%ZVqXV^)^S5b0v_Rhd92ZWf91<9M)C0v=`k(5t24OOIr*FD}(Ll_pSM;c&WO z!I8(MY8z;<4Rnks-L;dB0Ht+MsYdCeM(M{LR9fS!jQu6KC8#PubP41s4p9`Sk?~ZS<9LyXt}OJy=D( zQ6dooi(5$Y`aWVd9e+OeMx>=#K{&!72&RnLbY|+x$!4>L2j!)~`<|#x;B?y8AVw#) z#~lk~drC}8WGp9gJd>YO5#@;fsHiJ*R8gCvf=7a;gQ6qDU%uJp|8Q?!nx*9bzRJmT zeL~Iun*Wbv4@6_I#^7VZ;72PPF~Iv9@|_>ro?l;x5}08hI?D>nbiiLjXIKFTu=6*G zs&Slfgo8Pp5-5B9$xb+*`y(ncBan>h(}WIXMb+u9GudjYjCDi~+;%~@1onSYF+CbAdQnpnefp~Bzk1D+>M-h8WBU?V45S`=IEy79Kkk+PQ%gm-BH_j z$DD=}Uhn+SvF~IePWZep5l2?=7DpV!`0~K`8sW_c<7{6?)6nbgpbdNy};7$^aOtSK~^N_jadIau0bn7lyx&J@^# zsSS+M70BKy&$>=(Kz#=KgiJANU#i&sl>&sN5hk>rRYq9oDUGncs@&x@J94dOZG>qf zOdDa^2-8NGHo~+Krj0OdgngkA=9MN^kM4IU>(q-Q4q}9b^SNV;GQviLR3Z9)3nPrm zy^koZ^Eiz#GDawc9E~uC!m7gB2-_`qiL!TSQK33F?RlXc#zeVyF2(ThkRrmcDpCQ* zd~@*z_qnREEel>s;|!}}N^6Ycei}w*iK;!cp{D3}>lJz(Bg`_sAg!XW3OrsjZ1AW} zup>1vjxQ$a7(u&hMI8ZXD`MU?Xlu|u9?(_@06ZeluAy1vBwI#bWST5vPX`>RP>wx< zn9)5vm@R=*CQU+cX8yn=1ZO_?#zn>rk0QffPDjxDQV?Wm9Ftht>rX-ElroI_3nclB z5X=%u%A9s7#5RVgHA$(*q{5VlKP=C$E{#J;3zVmz=*IAuA_c)R%MX>TtWH7bD-KUr zM~95*-HKy0CiHHF-mUn4jVQfap^YeQL_Ibm>e232NUAe*DcSF$lhm%&s{uGtmAbF+6&M4JWGtULI|8dg3khhe;i#>KM=Kse57>g+VM{JW(fz|T-YYksQ??gnH zVW=7vVQn5d>=n3Ol(zQw7edNJrdIxQ|hF4$&j#sUW{4?M1k10VpPjk+G zt#(u5jfp}yTrM2VEeW`z%1wzJmy|4xW`J8a<@Q9i(bT(7@%DqYyG!ww6d>o!vFy{p z4rYO4mA1uuQ5Joh!x{yDekzZJL%mbekR@IB!1{3+SWmOSi8i>j!F5aq*W-?L+t2wn z-}#|M`ko;&`z>1?ALoNfQ6I3Q+$M+@3tz%D`jiTnd@a-lixL&pWY+*GR2WixMhe2) zjKQ6d-js61YgnW4B4d3?sVPi4!x2nr(CSURiC}rITE1>pQFQF^nXEurEuc8wVg+sN zLGW#PVr3ynpN-mtGx@n^I$Q$fOhU#yk3aQK?}+<-LfApUS65cWR*x7 zkz1IP@xj3{!RGU_DOxOXWxBKB`w! z+GeT{&F;pAEvzw{tTCfyjhR8)8jHvxOHd;LrU~H=2X1VT zmBsOx@{Fr+Mm(HzH1GIW18nEz+g)qSks5ePYs?E&)>vTdv&I~)fv3%}X2f5;?C-h8 zm+`&m*$(%#FZSE`a|7PV@wkGM78RCha&L$B`R>g0!-r!NwH5XFt*B$zi_>KywP|?N zrs1P46R8eB)O&H?;a;2>ggZYBsmEzK&G#LvUS_=k-1eFER^y06n2P@B;ydARqsp07 zt*^2aJ4bI)xLtUjZtwBT@vJS|JWx&?8YW{#U{b5{#OqY*Z0`9f!{^A_s>0`jR?=L{ z%8UxMGHFMSGTnrp9~Xgiq~}NS5(UM~ajM5dS`(-^L5X!`52`&sEcgk{C^|XrV=xukEHKEVS*WZMVl}yFJ=%J+3kKKzr79fv33VDVX;DIhpDU$3AC=}`l#Gyi=mc{6{sSzja#7JRwNF=4OFirCbt`sR2{eMrMw{a~#3+nXQ-iOd=t$!b&toUh5?~ zjb|GehM11PJ8-itZc>~2u6s1*A5DX^0 zqf@jzzgi7C5=H~&V}Natd}#`W-z{HhdDrs85vGB&*EG1+hQU!721f|US}01hpVvF6IvMQgmk0P5%xOShqChql+ zIv^?oIQ?D$ouEEolz@&unShSJE1=`ZXxFdq@&tq%5i;OU>GJFs@-#XQnqDi|GoiFQ5u1rh_=Kd9sOp?V3?*TZNrm=3iQfxQ6;Td~LeZRp zKv4{oUSFEh=PUB~g7QOUu&1X4)xCwHgX88@4JaB=juKE5fxeFeP-8Gn;3fa{0`DQXygA)(uXd%1=&Vam7y7qI zIu=^lY_nBQ?o4>(EWhZG~>4cpZgTM&wu{N|87c6(nRwwR&+= zwxD`E5hki2iNMDyc{@QmLYy3FNWf4t%yZBy@VdYf?(?j9>g<%g)q2At_pCjY{n^J> zjJ6wr`7|O=pQetsed2Z^i=Bsy(kev?Jf~4}%A^1bvtb7A z`mH(Hbtq`gFz%KjZ-rYLtv|}(;~2V&g6C-Vfi)L#sv$*+=Uu-w7N}C3>iCz}&ET_* zy#W@c(hS^Jz7-ohYBNv=SZMjVF9!7asdV#3gKMyei z@JRQr{d3zCu!N9>ZSW5hepvhV_BXifN9&f0rCj^ z8(0czaj`i%_^w6ETXw_Y!NU-h6!@RY9bE&NF<4#c0>Ph)cB(&bd|v`KAgq!big=M{ zEP%K^?;q+Zi~)0Q=4DXKYBhg0Z%mKyU~;$>sWcNAFJ)OM?1B3?CTSRhZJH-_lOdTx zM*w1RE}mokLs6+o?wG+8hkqOih#~VBN15+k&4vG9Rn+o23Yae>)WTHm-

y+nLo84UG@hJK* z>g9!<)jPEIil{3!Zo+*c>Y~9t>$eVJ>I-4&@PQSYp%;`fJ{MVW4a@yKyLgVPYo7Ca zcKJ&5{*Jw?T(>W~VaBs6H0e^6?jh!O_tbXHnJvIYXX)rHoktQi)(NUP27P!%W`?#E zw5@Oqwt_l$=ZN+zL#z3|ZR$8eNGY^>)#06>aYSkCh($vZ7W+Xdr4(9Z`8;C_xNKN6 z6QjD)2qU-_Jk9UYbvpvBiY_~4H4Qd2lYsS};iZoj@W5o7Y!xE}|3F_^$7hAs41OyT zX`acTI?|d(3{#7M|`=-*mVp~d{eq%1)~tRR#O=nM@uRN_FaKnS$=r)_sPW5L&uVos5Wg3pMzK4rMV814nxjJ#7XH5%S zt;i~cu8i7RZ78zZM$1}7F$gSGijr1T=L_?J65(v-qbFQ~=|8%JnsRa>jyz*-H#SRGTE%{7K+Fwu=MrLp>?O;xv9g3f8hGh4gy z%mt-miHg%`r=rS5!DLhv?xu>`AwumCRWA5YQxzKI4GF~2FW>E&s&@IIQj=L7&%CdA z2sEhe)PvZmkIWo=nx#XI!7S2x7TPR2YP0Bw^(;cW`MzToiKN2ngSJuR8rA2eNz_w!4DY`aD{1X}>SWMGoO)3vD8CV-(oY*se zZq^nK{%DcdooJ|`U6G*BUyKcbX|lxA*Z^V08=)B?pdU^J&W%tX6(S^)EVBp}Xpz3S zV=VbZEOP0D z$E9HAu+r0>$7JC{D+GxL0Uvi%-5Co8U7)x#=V3wotjda z*lMT{i!OG-M~F;VPAxKF`n6Nm*%ch-RmD3Z7K|C<3^Q7aG6)D$U$_V*q5?FWE5D>4y!<%c7(UG@nMLxUQ8H27%n(ctrB#ZybbMt95jf9dvKHO}hb7Be`7iEHKr zcn_GCx^$T3|0D{U4JjHB2F>>!2$eCZeZ`esr}}&}g#Jqfp#g<~_CshL%UP|2invz% zTB^&oyf2#)`ulJOa%2vHmlmLyGj20-xH?NXeJ$v#zrRUe?AeD5jQQL&h66M_w$6yi z$tfy9H@eh8O@UmLxOYaBiTAiw=kx7uL~;s?j0#cwnJ7`jOJa5(ipy{kq#=1%dV|}R z1^$`Ph^s)dQlFvCM@}1;6eU@347KFw$fFX{h_jSM7)fOt5l|^4WTUD*=th?{_tt?{ zZbZU`-G`d{>p*MP)U2sllWEp`M6B7uB-4OKeB=;$hQxNcxLX+2b>2Q6q8<*|%bc$H=C zhF76b9{J7~=5=5&R6*r;$gdWWA}*o_@CmxuIC{}g(EuZ>(4p4ZWhANl1buFdTyGSe zb)a2F(=gQm7p*3ILZ&K>kO&s4f23vW>WMY)q z@uQxKhLj>0aD$|u5N#+%esK@O)cm;gpWUIq?iw1+v!(Q;!P$^kr#^>UzDk$(IKDS@ zv*t0}yIb6H+)AI_BEAC-iWp6tJcqxCwplLPa~NOVEvn+e&o9qKv|1WrXLMR;iL!lYlVCjy3Qz=f0s6SMVsU;OA5i7oTE|}Io1@x z`~`X62r)%i96wkvzlm!DeK=SX%QW^WIEw}~Zytg-k1(j*pTTN92VF}9~~@p(+vx>52?v&jbgIlxfD2B({U%P zBzp^ejjLrgJZlttHDxx%UQwH{Q7t6w^K2q!;VwGyQYyzuk%64_S242` z>xLk9Qjzkcn?FG%J@Gg$f(;d&8TL|$+F0TB9@gyHuiYnVBO27Kc@Wlo8Y-b0KQ(@8 z{QPe56PWC$a_YxY2VhUuOUJwu^yf==qZJNuH$o#jKeTtVO%eBJh0ptHD_BnBh;Jv& zVj9+tHjDWAu;MJxrf7;JS`_LmwMZu%ihfqk7V!BpOA2aDXjRIg7ELul+>s!@$w=># zq%Oe(AV(~5G#1T-Rd18R6S|J>4+O!+*un(n@Z4LvNxuySyXLt37|gjC)vswWxG4~` zce`IevDogn_7o++utnU>r;tMYQKY(vpD+>;{-LB|E>u+#;$!s0MvBVyA_19->ybbt zMLiy9yB`O)zbHWu6z@}05e;f6)=mu6kAh6v1OFV+b7{}xs4;# zyYTHjc58-P?44YfE>eod`nhrxQp!@zlW^sD=j!mJYr4);a^>Ev>oK7UkCZC?T0gu| znAx^ER9UpBGAgMus;H7OS#TF$?B=tm!pib|YpI$ND;ZVDD#+`4s31~X*18%jRbv|a zq-tb?8Y?wcGL4lQD>YU=W~}THn1>14@hf>nQpW!SAGHDp9A}~}j6L?^IY%KxcsAa` zh0bv5fXnqdM;~L38jP1Q(p6qhzg^*c(XE$vbWhBn!JK_c^ST;>6i%cKa?!||Z?ZeM zSMJwIOr6g0R$cLGR{8}VmK-Ogz&^F%HM4EU#9oK{RL6DH@VAK?ejw7s-ua;&X%d<; zjKrR)%@Qv_p(%05*|*5G^3xreacS4QSG-(ikA8%CD2pyI|_D_XBkz!W%x!Ul%04YB`-1G zjFa>e{RU$=%CoO!ztI!{n*=p9G1&M^12`1QPBhjOzA?h22p=*jbohogDYQwUO$u#N z9HmL2;rcPcwNb%!Zy2{*!L|8K*w^-LVD!NG_#(WgiCgLbXUkALDQ0tBkNjCRMNQnkD(`vZ|&32mY#-vucJ3n;R zx^qc0hoffTbc&TYUqdosQtB;bItsAyk zrpaOpTqa7PFZh<-71zsjvw*KG{d+h}_$4LubjYeo#5e= zu+m@nh1Jj7DPi*T4kZ^)dIZC3?2y{f2$q?m$WF5!&Ax!xaRC`)aB|8jJ9|SFoLa>b zxT_DrsB9os4`jk=$fclXf(3DeXqAXsz*q*-`Q#Y25Fw6Hgsgm3DxO4V3AKVpQBoQE z0^g!CRKXn`0Bi2oyK;4;f<`cnU>d;=i(q!eSG{|VLGuueU>d`E3OA{sMv( zBN41PG=h1up%DnSUj)k2O6rFpFw2Dr&MRZG>b~!e$sf2?a@ib(Znxgb^D9s79qs$9 zV<3ukRl}p%pFJfxV2uronHk{5gJJ6(T8)rT1tA;BIEG)zA;BE#cQbKZQ>tMODoTTs z=I&1&0(B%_dTPCO0MM{;#7@kcSe+>mC)<{=)6T&;VL8V|jHO#Tw2{bueZP~7bC1z- zr2W0ag)k%n#`b7%c9PvyHF2N^R1`;M23=Cp4<5E?1}i zMRWTLPY%OaIr5V!OpV9bF$z=Ol+yXuL8bGD7X+YgO$V}jD|DNU7)8GiWFty9d zPjdDN9y&Z+w03=8;XxG>crZ5?EuD=X9_2`V`XN$@f%syx3cCMqhUqDW($B*c?Ld4oK;A2((Z-6zvX4eC|& zAl@L)axAx6Mbi~~1M}=_IHfhNb1+~35r2e51BT#v0o>2dpNnigIPn+n$}asL!!KY4 z+{UeJyYlR=q7-lPIkQ5T16{{>*v(^le(3o|wZ13WX7-OFh4EN(9N71~Sb(ubNeM8u zM<3yg{@I<<`w|63GGLV5;Mk&sgP6?aWDieib8s7{@0iWqq?@}q!fV_Rqld>ff~5*9aG0Cf8lEY*r_(C*`7&GX z1fiR!L14{-(Cplp?exy06w&uJU2D2NPP!I1c<@-$U1IBr{<>5`azkUKM zz=<~tc5*iQeU)oK%glOl2~T>c zHnY6i%zBY$sE3g*Ia`4_w9XPvnj3Vb_L*Cc7umb!P+(eqP_o@s468OwC~saNihKlQ zd<&5AqPWhpPn(bT>?PVlF<3=8dkNh;1K0)|vPvra0Dqq4FzM%U(tuUf(g)L3&+tog zFTP1>?j#>fzM>6wlyb(zL7ds(oB7QQQ15OT|Bprr?lqX9)9t;2=H2i#oSDJHMwo?) z5P>bnqhz|X6UD{-g9DZk@HG(NlII^cNI+NX`|Z(_JxvOAm%ntd0T;GWvHKeOGg1=`J&4TI2f%bVeS0b~CQ(AL!!I)(lG zC0hU|!PWaG@WHRpdP>MvS;sg^DSLTdcIG7vOM!R63A;`+x|`F@N#iyUZZwh2S;pPLE-H2GIWZ1y`B0SI z{}tv*esc%Jgz)%|V}+eJh>!7ljn6h-&x`CPfoJw55-!+%z_-7YLnW=la&WHhafYGH z)tht^!2=KIfx`jTV-B}6%_JY0UO>j6fL zsj6cd56fBB=Q&|=>xsPsN*JDa!bqXUnYsTtQ8%@l{;K}AO}c`6G=~WqyDe8^1+at? zSo|7TEVBldpd|oB?w?ykyt~7K@k6q5>A*Qv!=W3|OoOlwgjCSsX7kUAr?q@65w!uE)5FqR%2* z#A|kS*?;PdfxRqYMXXBIslI!YT}hfSPhWZi{JBpYtvYn^l+daNDV`i!b$rQB7p)A( zm#X6ZXl3#8-vMxi2iNawPnH=Pi|4gJBK2~U_+M)T3c6>ntKoQ&UPciRjvl2|kHm_s`!EP=L! z3V9lRT*fD5wH+$-v*26NT)Jf|e$TFCfj@ zFor#YhsC_>D|k?Om2h0d9pLA~nInx;osvIy#Z?543Wda(_b`8YqxjQTX|;GFF2QYR zwU~Y7Bhj^mVFZy3BOqOKCS7C{j8IU4aspKNH<7X}6mAe<$bo#7{7JIsDAgjeT9Pkt zm5G<{srXYJmmv7lQu1dom_PT1C}{rF{Hgg<^QY#|?~Ok-TYe908H{GjhxPCpz&1#> zdLUam^SS3~wrqZziY)^bTUyP;5sx-wy^1Z(aEC1^5rn*(=*5f#T?)x-@?J9hC&HcA zNIEnlDoq}V0NJ9$mV}j9PA6e?F`X}Y`Bs-LulM>keI;9l1KIL{BITMbHCt-7)NHBQ z@_S>;rf>7Q{C5a{UN_%s{+yma!%^{l$e#}j=+sy68%sw&oK4&~)nnn$P{p6V;7>zc zT%}vDky8Mle2V}|DOJFcBAvM)WgdV|xw9_PsibSOqeD%uP1;hYKaG+vl%a-hsQ7b_ zZ!@q}{An43`BVEgHGgXU)cmRW^HK1p=Fa2g&W9Cm8ZdW2zbDKvbAO!Q^C@ztWvG%+ z>}IaN*UI&`jc!rDLe%VQaHnv5a>1R24EhwpkCet=)Gko@ss(c@k0_TBol(Ltm~-SS zushsoS0Adl(+ic{X%6E~`-wp36Q?(yYcpL8?vJkO@fD| zcB)f6pf}8(&%F`(swlQJ;yk}*qKQ3SqKRcHpoA9ZswkV=+Kg=n`f*GsNdO<1(4{6( zA##Qxf)q@onP4#xv0F9jumdGd^`X)krp%1BtYJ{%XeiN8 zqM<}X$rFVVNB%PaN}R^q1Es$@^SM1dtrV$bcv`7**!%-;I6YCJ`w{1PEL39ckwt8z zzwULFAa#iBRw@*m;BH+~L?SJgVXzm5i;=4&il~wX33^Z*n4(lHsR^T>g|t9~)8#K} z3zt;lRv(UVmDt0m#MM-ysYFwWrjjR0C9eEu0F}6nw+E6Xp3kjOS{LTQhIQP7kxmc8y-(TsK!@^>H1tHGH%EsD6#vBry^P)VW4hClyE^2Oa`rUbd@BO z6`&+XyW5^!N19_|xGt04I=p*qSifsT&hOcYAuq-|P4N4H2Q{i!G}fxZtEUPt*a zML+hxf^LdjRTV;=BIhVk!ySqmPbETGqlQKejT#y?z9LAV=IHQ*BjkwTAr{_2Q}ctd z+b&1L5ySm3t|~9eY5F&8f{sRIa(@>(xX8LsFauwdVoV|d9i)j){t60JMF&WM|6k=r zQNi-^^eX5OTQ`M_a#eI>;LCRu&{0OHD*YRV5;_Koqq!` zVn7syN^v2qsP|Xos}?#hT^&rYNAZQ|5f~ll@T(7%&=Dx1W1!Ia-g44Mgd92(`!SG_ z&ctS#jC3Y;or&$r0y2KT5|A(d8L%njH{L3$`Psg2S+-w&;}@rE`_rat`|WgX&TctC z!aZJ^x7jvYRA-c~qh)a;^0H(0a=j206RGCwGTkz~!lJe3F|4cb^-*IA+prIrNc3Jg z^4Wk^uK5OTl102s|3~ZQjNXb?wmDyKqCCBm9Tde6H(9=67jG_BHZ_7p5AP4ODd&!g zU`ARTE;Qs%8Z6XFqMkVurcwJn;X*L2j7dWW!yp_UZRH;(T!;vd;+D_Ik#TafleVTfzr3wJ1FcYYw7_dL$|r(wB_U zy$pg5*#^~zO0reTTv`Jq)PpBNwoe=xwKI)rWPI|@v=+vhzUK&x3l78?-q=j3aJW}0 zII#C@c=Rvw&~O2q;Ak?XRASnJ-|(LSU$pYKTVmQ6k4$;nwB07(VtfzQZ{ea)Amw96 z!XHL_!AOD|xA40Dvj|hGII#+{2IgF05Hr;sRCKHN6?DZ6!-{E?kwR+!*+4OEXj@~3 z#te-a8Z({%W+>cGAusnQxQ?9Pb%B^h`Qh+NAND|{WoTQ&iG~vmCmK$k0G!m@W~L#%AHW8^;9?r( zzXQMp53b+WTVr^m-=_Iy%WzT0{?zN(W@n2G4uoMUU|Nzh^KG83=-2Mw7|&OtJs2eX z)ri!E z4%ym`6OSSywSq@1B90@qKLj%oZYA>CiQq&c8+oW0eoWzA-TE2O0v6CHD21M?!F}0F zQ9r{h)8N%q=PIdgppLPr)8IALX{ysy$DRV!smRSN{~bbZX7l~G02}m}4Buoj4_w>J z8(sz%32e9)4xH$>2_`?dYs^;38PghF${EY;FuzyM7zK){$^zpei4C{IC~Y2-+pszU zWxtaLiHgu-^o-15!>IG}^mcRDDCeHyc@#Yv_VV2>o0NI;BNFHBfpW&C&YRb4qS-{V z345w+VwL|6VH2zQ{xEFfk1Di)Oa5?`9&=D~{i9g&t;6kx4c|?K@jJ|JnA5F0>h`fo zA+y_16A<^VsGI^z2uD!On2F*~R0=Ah@}sEi!z8Y#N-YRC3knD#)+qWi@TJJFH|-LI zM)*wziW+0v8cv=foIKI>xrbAp&iXvAq&mzG)te!Ux6OBc_(pY-!V!BW_Mm8%$;5Gn z%lP1#PUu|d;|xkcdWbWeQJi7-yB6f~?Y8A{yw38CK`OY{A*wefLi3l3H&S^5lEjfA zE>Wv&csH1(5J~Q|?eUk^ZWW;_0&i~)C(xW+C5eU!q@chzdmXc>Cj2gBbRi($Bd}L9+ zz^eJhZHH}$d>*Qk#AW7jA$Cc4A)FFIU#D5VfY1d|r4zQ12x*?b;CCn+R0QL~brZWI zO80|O-p?MWDJMU)bbc|;OUSm{^kdR7vw&-BH@e~p*l)c`b| z@#TIDJ($PAJNW9Y64_z%9s6_*{XrLAi{DN2%r~G#ab~(-Wb%v8%AQpVXuH$%(igvKrV^UFu z4=dv+zt@=gD#be*2RVskD7h!v+nxDvl0FRJIrF(Ss_bJpVms=PwY*KByHV-t)^Nc& zbJ}9wyOMqObaH*RO5m9whw4exRVDhPUJ%|891BVZDOJB-t%T2_+&IMwT;s<$ob_I8I zeN%Sl9YEdX+Y|P3nR1wq);P`onitzYZ{y|77JKk(@tOZOU2%4WYd-rI{x`ZoO1WY0 zVD^7^8{gg{%+Lx_cM=EYzwu{;DQN$FmcwGd#^vN{o53F}>-7Ovf4+|Lm{y2%gO{c= z01X|}biFB-FwC16{ssrG9FVt^IXY*KZJk<{H?wRa0cS53dcNlYvdiK4C&#OL9pmYL z;2+}~j#u-;rudlNa~J~znsgI0pn|`DEY=zR;)-wh$CO>*M{B-Hp}V57pI5*L7d;od zZ~fbT&LrwbOHROyGOQ&hVedtIlq!q211yF43dSQog)!RZ8Huaj!9uy70hXno5Ft|e zr!32&d(wi1Z!iB&(ns?v7#y?RSzY080JMNFjZ@RNt(oaNHV&@0->Kypj&IeE&iW?F z`7PcA?~6@=56=}$M(GXQ81VR9#h>A!f${!10~XEjwkff+y3ODYlgyUFWQVB2S9gHT zn=L#0`w4x;cEOi$AT<64{`4IAK8oO~0IFU8BItj{Y0h4tjQS$`xJmIlU&QS9bj#Um+0Lsx{_{^dFF5-TFa_Mjh`|>> zBY)-V0^sO(_#lRVUn7X+TllgyzJJG;A7L1`#H}yV@(L7rhTro~Set+=-_gU2WBnQ* z(FFT*4$t&k4sY2nFlGHx(5dq~{)unWWttW2Re66ukJo=9o_)ZPjuv=g@9?o%;avAT z&prb}$8ZTQp@rueEZC$apXDFnwuiapOlZkxqOo6o!6#{rmc;pvozb2&&)ABu;ATzX zW0k$dw@aijj>a(jSeqEj^g8GHy}aG4Oz>(tB^v_FqEpi`On?OQVQ-~nSf=ZkN1q0-rI-Wgqd=o~;H1=5+mf--SV90j$jKSS< zAs($PUgHwO-u&~Vw!hAb`C2 z2Io__13$s@v02Jn46ULqS@vWPswPwCGz?)bHH@$^m~FSP<^LpbMcOBquYbn)C*KPU z<0o{mz=wXKQYsI8%L$u@GzA%?&O#-2P?;<#Iw=1`whTw*198_t$8izfq&G!;!agtH zfrqtPG?%VX4;+T%8fJ~pDGW8S(e^c)UkU}0jxs9Zwp zVd;VzfTv6>AYt)_n|bE-=`(otd>)Jl3UJ&3uJ=YKr!&q}UF;JQx6C@H(Q2q@neB~x zs%e8Ia!@>+Jlm8OAbs2viAYjgXdIzD17P%V1p6$gR9jHmg3=b0wxF~Hr7b9JL1_!>u~|@7WgewH7L;o? z57}oyjWbWkGLbgQS2IsMFi%DW%~%hs*w%FUotP&Um?!oa^JKu@QKOUJ+B|Wa8$6WO zY=wP!QBfhS6O161)=47DhLhwk!#$sZqIswoL4wwa2=77Tr0AF@WRE0Rh+O1RQC2i* z36Bit3EUm?grax2Op8*QCp{v+VUKx28-09WF4U13C>k0yG#(Q)woOirMTfv5(~xB| z%?=iM!fR|+AGM$egC^+!J~L}3YR#KvDNOS}jc}Mn4L1}o2b1*miTF3F%S_VGH@jY7 zSc)SN;s~Es&;^EgURK!zbYk$t-{gmM|G-QiMwSb>kSh~_44^%Jf; zU}GVC=FmtE-4?B&qhocwOog{&QQ+i9XC@sS6fFfk`gw~Hmm65#(LW(YkMFYO{h4S7 z-@=Rvk1>qxew%$(VbKsGD2kYT|qHJIhasf{T1!-q^_t(Zbi7~JCU)ri0Ia!>^e zqcas-mTlSfeX?_jMQc<}RRmYb!l)_U4Apq8`6`xILVOAS9{y*6ZIn7$a*8D7y85Y2uiM-z}BeI z+ukcpJxZYuJe1CX(Adj3WTlC z^0FPyyc5yw!t&z|Ngi}vC-BOwg%>anrWB@pnZeYF!L=LqZyaNo>@LL#u4rfCMIPVZ zBJRB|PxfsA3t&couF)ZKW9L-0mWiJ9FPCB zca=I3`g&LCaJx#~uz&3!&<=tlauE1>S830#QlQ-@2Xmj)J`zW49RUaS_(*)WKH%%; z?x5-Mu;yxIdbrn<@ny^cq>#eD00gz!?@qSN3!|D@)bKZi-7jxTH3Y|3biO{$0#j_<7N$5WN2KGoE>FkN0h@OUzITJ=&hQbm*;e> zEX&?!WvNB;9dnPBg$+JME9;RNGaYN}X&W;-ic9nMQS-Kr;?hxEUz@kXfNMGHrTcFf5Ena?PY07`?$!B%9=ovLYUq&TjT<(rQ=AAh5o|52oKJ{1Emf za3q3~Q7RG3Nm0NQ7ZFmt6k&|0Fa&3m3s2gMjF%Z1-Cc}AqyV-&LdxDv; z!aaJ~DX#$??uh@4O22WES|@WLq3mun!l6}Tv?1_y41uT3(Ot8`vmM(m7t(*m*T}cORJKn1xfAkSosv+@ z6;ab9vAV+O-+MSQo=1!FtX{o*e#L%)70I_w-4JdbJSSJPy=HsO_L}WA+fS96?K`sO zLD)`03JnoILu}GV{&w?_Z;&7j>_0rdpHB0;pnrcf{Xbmiq1nqx>HlOH{hN&<2LbH? z_%;0dZTP431k93_B@FN_(LM#%lp?q!;#82H6UF01qVPdCOPa)T(Ewn71^og3O@#md M4^)D matures; public MiRnaGene() { - + matures = new ArrayList<>(); } public MiRnaGene(String accession, String id, String status, String sequence, List matures) { From babb593cc3994e0b29a3f8ade2e0fbb05e2d22fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Thu, 25 Apr 2024 18:43:49 +0200 Subject: [PATCH 08/24] models: update PharmGKB classes according to the data model changes, #TASK-5775, #TASK-5564 --- .../models/pharma/guideline/Literature.java | 20 +++++++++++++++++++ .../guideline/PharmaDosingGuideline.java | 10 ++++++++++ 2 files changed, 30 insertions(+) diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/pharma/guideline/Literature.java b/biodata-models/src/main/java/org/opencb/biodata/models/pharma/guideline/Literature.java index e657d414..81eeb651 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/pharma/guideline/Literature.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/pharma/guideline/Literature.java @@ -8,7 +8,9 @@ public class Literature { private String _sameAs; private List crossReferences; private String objCls; + private String pubDate; private List terms; + private String type; public float getId() { return id; @@ -55,6 +57,15 @@ public Literature setObjCls(String objCls) { return this; } + public String getPubDate() { + return pubDate; + } + + public Literature setPubDate(String pubDate) { + this.pubDate = pubDate; + return this; + } + public List getTerms() { return terms; } @@ -63,6 +74,15 @@ public Literature setTerms(List terms) { this.terms = terms; return this; } + + public String getType() { + return type; + } + + public Literature setType(String type) { + this.type = type; + return this; + } } diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/pharma/guideline/PharmaDosingGuideline.java b/biodata-models/src/main/java/org/opencb/biodata/models/pharma/guideline/PharmaDosingGuideline.java index 0f2e39ef..11e1b9f0 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/pharma/guideline/PharmaDosingGuideline.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/pharma/guideline/PharmaDosingGuideline.java @@ -15,6 +15,7 @@ public class PharmaDosingGuideline { private boolean hasTestingInfo; private List history; private List literature; + private boolean otherPrescribingGuidance; private boolean pediatric; private PediatricMarkdown pediatricMarkdown; private boolean recommendation; @@ -136,6 +137,15 @@ public PharmaDosingGuideline setLiterature(List literature) { return this; } + public boolean isOtherPrescribingGuidance() { + return otherPrescribingGuidance; + } + + public PharmaDosingGuideline setOtherPrescribingGuidance(boolean otherPrescribingGuidance) { + this.otherPrescribingGuidance = otherPrescribingGuidance; + return this; + } + public boolean isPediatric() { return pediatric; } From 382831ab8868c915a2798222d885c4b41a2f511a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Mon, 2 Jun 2025 09:31:52 +0200 Subject: [PATCH 09/24] formats: add parser for UniProt 2025-02, #TASK-5576, #TASK-5564 --- .../protein/uniprot/UniProtParser.java | 37 +- .../uniprot/v202502jaxb/CitationType.java | 527 +++++ .../uniprot/v202502jaxb/CofactorType.java | 134 ++ .../uniprot/v202502jaxb/CommentType.java | 1791 +++++++++++++++++ .../uniprot/v202502jaxb/ConsortiumType.java | 68 + .../uniprot/v202502jaxb/DbReferenceType.java | 192 ++ .../protein/uniprot/v202502jaxb/Entry.java | 625 ++++++ .../uniprot/v202502jaxb/EventType.java | 76 + .../uniprot/v202502jaxb/EvidenceType.java | 153 ++ .../v202502jaxb/EvidencedStringType.java | 101 + .../uniprot/v202502jaxb/FeatureType.java | 369 ++++ .../uniprot/v202502jaxb/GeneLocationType.java | 152 ++ .../uniprot/v202502jaxb/GeneNameType.java | 140 ++ .../protein/uniprot/v202502jaxb/GeneType.java | 79 + .../uniprot/v202502jaxb/ImportedFromType.java | 71 + .../uniprot/v202502jaxb/InteractantType.java | 145 ++ .../uniprot/v202502jaxb/IsoformType.java | 370 ++++ .../uniprot/v202502jaxb/KeywordType.java | 128 ++ .../uniprot/v202502jaxb/LigandPartType.java | 152 ++ .../uniprot/v202502jaxb/LigandType.java | 152 ++ .../uniprot/v202502jaxb/LocationType.java | 153 ++ .../uniprot/v202502jaxb/MoleculeType.java | 96 + .../uniprot/v202502jaxb/NameListType.java | 82 + .../uniprot/v202502jaxb/ObjectFactory.java | 524 +++++ .../uniprot/v202502jaxb/OrganismNameType.java | 106 + .../uniprot/v202502jaxb/OrganismType.java | 241 +++ .../uniprot/v202502jaxb/PersonType.java | 65 + .../PhysiologicalReactionType.java | 140 ++ .../uniprot/v202502jaxb/PositionType.java | 143 ++ .../uniprot/v202502jaxb/PropertyType.java | 92 + .../v202502jaxb/ProteinExistenceType.java | 78 + .../uniprot/v202502jaxb/ProteinType.java | 1109 ++++++++++ .../uniprot/v202502jaxb/ReactionType.java | 139 ++ .../uniprot/v202502jaxb/ReferenceType.java | 193 ++ .../uniprot/v202502jaxb/SequenceType.java | 242 +++ .../uniprot/v202502jaxb/SourceDataType.java | 461 +++++ .../uniprot/v202502jaxb/SourceType.java | 98 + .../uniprot/v202502jaxb/StatusType.java | 107 + .../v202502jaxb/SubcellularLocationType.java | 142 ++ .../protein/uniprot/v202502jaxb/Uniprot.java | 105 + .../uniprot/v202502jaxb/package-info.java | 9 + .../protein/uniprot/UniProtParserTest.java | 22 + .../resources/uniprot-202502/uniprot-test.xml | 118 ++ 43 files changed, 9903 insertions(+), 24 deletions(-) create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/CitationType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/CofactorType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/CommentType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/ConsortiumType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/DbReferenceType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/Entry.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/EventType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/EvidenceType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/EvidencedStringType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/FeatureType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/GeneLocationType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/GeneNameType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/GeneType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/ImportedFromType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/InteractantType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/IsoformType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/KeywordType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/LigandPartType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/LigandType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/LocationType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/MoleculeType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/NameListType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/ObjectFactory.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/OrganismNameType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/OrganismType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/PersonType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/PhysiologicalReactionType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/PositionType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/PropertyType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/ProteinExistenceType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/ProteinType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/ReactionType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/ReferenceType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/SequenceType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/SourceDataType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/SourceType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/StatusType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/SubcellularLocationType.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/Uniprot.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/package-info.java create mode 100644 biodata-formats/src/test/java/org/opencb/biodata/formats/protein/uniprot/UniProtParserTest.java create mode 100644 biodata-formats/src/test/resources/uniprot-202502/uniprot-test.xml diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/UniProtParser.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/UniProtParser.java index 33bd105b..09f8046c 100644 --- a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/UniProtParser.java +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/UniProtParser.java @@ -29,40 +29,29 @@ */ public class UniProtParser { - public final static String UNIPROT_CONTEXT = "org.opencb.biodata.formats.protein.uniprot.v202003jaxb"; + public final static String UNIPROT_202003_CONTEXT = "org.opencb.biodata.formats.protein.uniprot.v202003jaxb"; + public final static String UNIPROT_202502_CONTEXT = "org.opencb.biodata.formats.protein.uniprot.v202502jaxb"; + public final static String UNIPROT_LATEST_CONTEXT = UNIPROT_202502_CONTEXT; + @Deprecated public static void saveXMLInfo(Object obj, String filename) throws FileNotFoundException, JAXBException { + saveXMLInfo(obj, UNIPROT_LATEST_CONTEXT, filename); + } + + public static void saveXMLInfo(Object obj, String uniprotContext, String filename) throws FileNotFoundException, JAXBException { JAXBContext jaxbContext; - jaxbContext = JAXBContext.newInstance(UNIPROT_CONTEXT); + jaxbContext = JAXBContext.newInstance(uniprotContext); Marshaller marshaller = jaxbContext.createMarshaller(); marshaller.marshal(obj, new FileOutputStream(filename)); } - /** - * Checks if XML info path exists and loads it - * - * @throws javax.xml.bind.JAXBException - * @throws java.io.IOException - */ public static Object loadXMLInfo(String filename) throws JAXBException { - Object obj = null; - JAXBContext jaxbContext = JAXBContext.newInstance(UNIPROT_CONTEXT); - Unmarshaller unmarshaller = jaxbContext.createUnmarshaller(); - obj = unmarshaller.unmarshal(new File(filename)); - return obj; + return loadXMLInfo(filename, UNIPROT_LATEST_CONTEXT); } - /** - * Checks if XML info path exists and loads it - * - * @throws javax.xml.bind.JAXBException - * @throws java.io.IOException - */ - public static Object loadXMLInfo(String filename, String uniprotVersion) throws JAXBException { - Object obj = null; - JAXBContext jaxbContext = JAXBContext.newInstance(uniprotVersion); + public static Object loadXMLInfo(String filename, String uniprotContext) throws JAXBException { + JAXBContext jaxbContext = JAXBContext.newInstance(uniprotContext); Unmarshaller unmarshaller = jaxbContext.createUnmarshaller(); - obj = unmarshaller.unmarshal(new File(filename)); - return obj; + return unmarshaller.unmarshal(new File(filename)); } } diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/CitationType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/CitationType.java new file mode 100644 index 00000000..0e9f2a11 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/CitationType.java @@ -0,0 +1,527 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import java.util.ArrayList; +import java.util.List; +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlAttribute; +import javax.xml.bind.annotation.XmlType; + + +/** + * Describes different types of citations. + * Equivalent to the flat file RX-, RG-, RA-, RT- and RL-lines. + * + *

Java class for citationType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="citationType">
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <sequence>
+ *         <element name="title" type="{http://www.w3.org/2001/XMLSchema}string" minOccurs="0"/>
+ *         <element name="editorList" type="{https://uniprot.org/uniprot}nameListType" minOccurs="0"/>
+ *         <element name="authorList" type="{https://uniprot.org/uniprot}nameListType" minOccurs="0"/>
+ *         <element name="locator" type="{http://www.w3.org/2001/XMLSchema}string" minOccurs="0"/>
+ *         <element name="dbReference" type="{https://uniprot.org/uniprot}dbReferenceType" maxOccurs="unbounded" minOccurs="0"/>
+ *       </sequence>
+ *       <attribute name="type" use="required">
+ *         <simpleType>
+ *           <restriction base="{http://www.w3.org/2001/XMLSchema}string">
+ *             <enumeration value="book"/>
+ *             <enumeration value="journal article"/>
+ *             <enumeration value="online journal article"/>
+ *             <enumeration value="patent"/>
+ *             <enumeration value="submission"/>
+ *             <enumeration value="thesis"/>
+ *             <enumeration value="unpublished observations"/>
+ *           </restriction>
+ *         </simpleType>
+ *       </attribute>
+ *       <attribute name="date">
+ *         <simpleType>
+ *           <union memberTypes=" {http://www.w3.org/2001/XMLSchema}date {http://www.w3.org/2001/XMLSchema}gYearMonth {http://www.w3.org/2001/XMLSchema}gYear">
+ *           </union>
+ *         </simpleType>
+ *       </attribute>
+ *       <attribute name="name" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *       <attribute name="volume" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *       <attribute name="first" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *       <attribute name="last" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *       <attribute name="publisher" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *       <attribute name="city" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *       <attribute name="db" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *       <attribute name="number" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *       <attribute name="institute" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *       <attribute name="country" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "citationType", propOrder = { + "title", + "editorList", + "authorList", + "locator", + "dbReference" +}) +public class CitationType { + + protected String title; + protected NameListType editorList; + protected NameListType authorList; + protected String locator; + protected List dbReference; + @XmlAttribute(name = "type", required = true) + protected String type; + @XmlAttribute(name = "date") + protected String date; + @XmlAttribute(name = "name") + protected String name; + @XmlAttribute(name = "volume") + protected String volume; + @XmlAttribute(name = "first") + protected String first; + @XmlAttribute(name = "last") + protected String last; + @XmlAttribute(name = "publisher") + protected String publisher; + @XmlAttribute(name = "city") + protected String city; + @XmlAttribute(name = "db") + protected String db; + @XmlAttribute(name = "number") + protected String number; + @XmlAttribute(name = "institute") + protected String institute; + @XmlAttribute(name = "country") + protected String country; + + /** + * Gets the value of the title property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getTitle() { + return title; + } + + /** + * Sets the value of the title property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setTitle(String value) { + this.title = value; + } + + /** + * Gets the value of the editorList property. + * + * @return + * possible object is + * {@link NameListType } + * + */ + public NameListType getEditorList() { + return editorList; + } + + /** + * Sets the value of the editorList property. + * + * @param value + * allowed object is + * {@link NameListType } + * + */ + public void setEditorList(NameListType value) { + this.editorList = value; + } + + /** + * Gets the value of the authorList property. + * + * @return + * possible object is + * {@link NameListType } + * + */ + public NameListType getAuthorList() { + return authorList; + } + + /** + * Sets the value of the authorList property. + * + * @param value + * allowed object is + * {@link NameListType } + * + */ + public void setAuthorList(NameListType value) { + this.authorList = value; + } + + /** + * Gets the value of the locator property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getLocator() { + return locator; + } + + /** + * Sets the value of the locator property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setLocator(String value) { + this.locator = value; + } + + /** + * Gets the value of the dbReference property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the dbReference property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getDbReference().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link DbReferenceType } + * + * + */ + public List getDbReference() { + if (dbReference == null) { + dbReference = new ArrayList(); + } + return this.dbReference; + } + + /** + * Gets the value of the type property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getType() { + return type; + } + + /** + * Sets the value of the type property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setType(String value) { + this.type = value; + } + + /** + * Gets the value of the date property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getDate() { + return date; + } + + /** + * Sets the value of the date property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setDate(String value) { + this.date = value; + } + + /** + * Gets the value of the name property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getName() { + return name; + } + + /** + * Sets the value of the name property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setName(String value) { + this.name = value; + } + + /** + * Gets the value of the volume property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getVolume() { + return volume; + } + + /** + * Sets the value of the volume property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setVolume(String value) { + this.volume = value; + } + + /** + * Gets the value of the first property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getFirst() { + return first; + } + + /** + * Sets the value of the first property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setFirst(String value) { + this.first = value; + } + + /** + * Gets the value of the last property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getLast() { + return last; + } + + /** + * Sets the value of the last property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setLast(String value) { + this.last = value; + } + + /** + * Gets the value of the publisher property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getPublisher() { + return publisher; + } + + /** + * Sets the value of the publisher property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setPublisher(String value) { + this.publisher = value; + } + + /** + * Gets the value of the city property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getCity() { + return city; + } + + /** + * Sets the value of the city property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setCity(String value) { + this.city = value; + } + + /** + * Gets the value of the db property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getDb() { + return db; + } + + /** + * Sets the value of the db property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setDb(String value) { + this.db = value; + } + + /** + * Gets the value of the number property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getNumber() { + return number; + } + + /** + * Sets the value of the number property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setNumber(String value) { + this.number = value; + } + + /** + * Gets the value of the institute property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getInstitute() { + return institute; + } + + /** + * Sets the value of the institute property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setInstitute(String value) { + this.institute = value; + } + + /** + * Gets the value of the country property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getCountry() { + return country; + } + + /** + * Sets the value of the country property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setCountry(String value) { + this.country = value; + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/CofactorType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/CofactorType.java new file mode 100644 index 00000000..66c7d8c3 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/CofactorType.java @@ -0,0 +1,134 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import java.util.ArrayList; +import java.util.List; +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlAttribute; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlType; + + +/** + * Describes a cofactor. + * + *

Java class for cofactorType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="cofactorType">
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <sequence>
+ *         <element name="name" type="{http://www.w3.org/2001/XMLSchema}string"/>
+ *         <element name="dbReference" type="{https://uniprot.org/uniprot}dbReferenceType"/>
+ *       </sequence>
+ *       <attribute name="evidence" type="{https://uniprot.org/uniprot}intListType" />
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "cofactorType", propOrder = { + "name", + "dbReference" +}) +public class CofactorType { + + @XmlElement(required = true) + protected String name; + @XmlElement(required = true) + protected DbReferenceType dbReference; + @XmlAttribute(name = "evidence") + protected List evidence; + + /** + * Gets the value of the name property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getName() { + return name; + } + + /** + * Sets the value of the name property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setName(String value) { + this.name = value; + } + + /** + * Gets the value of the dbReference property. + * + * @return + * possible object is + * {@link DbReferenceType } + * + */ + public DbReferenceType getDbReference() { + return dbReference; + } + + /** + * Sets the value of the dbReference property. + * + * @param value + * allowed object is + * {@link DbReferenceType } + * + */ + public void setDbReference(DbReferenceType value) { + this.dbReference = value; + } + + /** + * Gets the value of the evidence property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the evidence property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getEvidence().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link Integer } + * + * + */ + public List getEvidence() { + if (evidence == null) { + evidence = new ArrayList(); + } + return this.evidence; + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/CommentType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/CommentType.java new file mode 100644 index 00000000..1343c2ff --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/CommentType.java @@ -0,0 +1,1791 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import java.util.ArrayList; +import java.util.List; +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlAttribute; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlSchemaType; +import javax.xml.bind.annotation.XmlType; + + +/** + * Describes different types of general annotations. + * Equivalent to the flat file CC-line. + * + *

Java class for commentType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="commentType">
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <sequence>
+ *         <element name="molecule" type="{https://uniprot.org/uniprot}moleculeType" minOccurs="0"/>
+ *         <choice minOccurs="0">
+ *           <group ref="{https://uniprot.org/uniprot}bpcCommentGroup"/>
+ *           <sequence>
+ *             <element name="reaction" type="{https://uniprot.org/uniprot}reactionType"/>
+ *             <element name="physiologicalReaction" type="{https://uniprot.org/uniprot}physiologicalReactionType" maxOccurs="2" minOccurs="0"/>
+ *           </sequence>
+ *           <sequence>
+ *             <element name="cofactor" type="{https://uniprot.org/uniprot}cofactorType" maxOccurs="unbounded"/>
+ *           </sequence>
+ *           <sequence>
+ *             <element name="subcellularLocation" type="{https://uniprot.org/uniprot}subcellularLocationType" maxOccurs="unbounded"/>
+ *           </sequence>
+ *           <element name="conflict">
+ *             <complexType>
+ *               <complexContent>
+ *                 <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *                   <sequence>
+ *                     <element name="sequence" minOccurs="0">
+ *                       <complexType>
+ *                         <complexContent>
+ *                           <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *                             <attribute name="resource" use="required">
+ *                               <simpleType>
+ *                                 <restriction base="{http://www.w3.org/2001/XMLSchema}string">
+ *                                   <enumeration value="EMBL-CDS"/>
+ *                                   <enumeration value="EMBL"/>
+ *                                 </restriction>
+ *                               </simpleType>
+ *                             </attribute>
+ *                             <attribute name="id" use="required" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *                             <attribute name="version" type="{http://www.w3.org/2001/XMLSchema}int" />
+ *                           </restriction>
+ *                         </complexContent>
+ *                       </complexType>
+ *                     </element>
+ *                   </sequence>
+ *                   <attribute name="type" use="required">
+ *                     <simpleType>
+ *                       <restriction base="{http://www.w3.org/2001/XMLSchema}string">
+ *                         <enumeration value="frameshift"/>
+ *                         <enumeration value="erroneous initiation"/>
+ *                         <enumeration value="erroneous termination"/>
+ *                         <enumeration value="erroneous gene model prediction"/>
+ *                         <enumeration value="erroneous translation"/>
+ *                         <enumeration value="miscellaneous discrepancy"/>
+ *                       </restriction>
+ *                     </simpleType>
+ *                   </attribute>
+ *                   <attribute name="ref" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *                 </restriction>
+ *               </complexContent>
+ *             </complexType>
+ *           </element>
+ *           <sequence>
+ *             <element name="link" maxOccurs="unbounded" minOccurs="0">
+ *               <complexType>
+ *                 <complexContent>
+ *                   <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *                     <attribute name="uri" use="required" type="{http://www.w3.org/2001/XMLSchema}anyURI" />
+ *                   </restriction>
+ *                 </complexContent>
+ *               </complexType>
+ *             </element>
+ *           </sequence>
+ *           <sequence>
+ *             <element name="event" type="{https://uniprot.org/uniprot}eventType" maxOccurs="4"/>
+ *             <element name="isoform" type="{https://uniprot.org/uniprot}isoformType" maxOccurs="unbounded" minOccurs="0"/>
+ *           </sequence>
+ *           <sequence>
+ *             <element name="interactant" type="{https://uniprot.org/uniprot}interactantType" maxOccurs="2" minOccurs="2"/>
+ *             <element name="organismsDiffer" type="{http://www.w3.org/2001/XMLSchema}boolean"/>
+ *             <element name="experiments" type="{http://www.w3.org/2001/XMLSchema}int"/>
+ *           </sequence>
+ *           <element name="disease">
+ *             <complexType>
+ *               <complexContent>
+ *                 <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *                   <sequence>
+ *                     <element name="name" type="{http://www.w3.org/2001/XMLSchema}string"/>
+ *                     <element name="acronym" type="{http://www.w3.org/2001/XMLSchema}string"/>
+ *                     <element name="description" type="{http://www.w3.org/2001/XMLSchema}string"/>
+ *                     <element name="dbReference" type="{https://uniprot.org/uniprot}dbReferenceType"/>
+ *                   </sequence>
+ *                   <attribute name="id" use="required" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *                 </restriction>
+ *               </complexContent>
+ *             </complexType>
+ *           </element>
+ *         </choice>
+ *         <element name="location" type="{https://uniprot.org/uniprot}locationType" maxOccurs="unbounded" minOccurs="0"/>
+ *         <element name="text" type="{https://uniprot.org/uniprot}evidencedStringType" maxOccurs="unbounded" minOccurs="0"/>
+ *       </sequence>
+ *       <attribute name="type" use="required">
+ *         <simpleType>
+ *           <restriction base="{http://www.w3.org/2001/XMLSchema}string">
+ *             <enumeration value="allergen"/>
+ *             <enumeration value="alternative products"/>
+ *             <enumeration value="biotechnology"/>
+ *             <enumeration value="biophysicochemical properties"/>
+ *             <enumeration value="catalytic activity"/>
+ *             <enumeration value="caution"/>
+ *             <enumeration value="cofactor"/>
+ *             <enumeration value="developmental stage"/>
+ *             <enumeration value="disease"/>
+ *             <enumeration value="domain"/>
+ *             <enumeration value="disruption phenotype"/>
+ *             <enumeration value="activity regulation"/>
+ *             <enumeration value="function"/>
+ *             <enumeration value="induction"/>
+ *             <enumeration value="miscellaneous"/>
+ *             <enumeration value="pathway"/>
+ *             <enumeration value="pharmaceutical"/>
+ *             <enumeration value="polymorphism"/>
+ *             <enumeration value="PTM"/>
+ *             <enumeration value="RNA editing"/>
+ *             <enumeration value="similarity"/>
+ *             <enumeration value="subcellular location"/>
+ *             <enumeration value="sequence caution"/>
+ *             <enumeration value="subunit"/>
+ *             <enumeration value="tissue specificity"/>
+ *             <enumeration value="toxic dose"/>
+ *             <enumeration value="online information"/>
+ *             <enumeration value="mass spectrometry"/>
+ *             <enumeration value="interaction"/>
+ *           </restriction>
+ *         </simpleType>
+ *       </attribute>
+ *       <attribute name="locationType" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *       <attribute name="name" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *       <attribute name="mass" type="{http://www.w3.org/2001/XMLSchema}float" />
+ *       <attribute name="error" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *       <attribute name="method" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *       <attribute name="evidence" type="{https://uniprot.org/uniprot}intListType" />
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "commentType", propOrder = { + "molecule", + "absorption", + "kinetics", + "phDependence", + "redoxPotential", + "temperatureDependence", + "reaction", + "physiologicalReaction", + "cofactor", + "subcellularLocation", + "conflict", + "link", + "event", + "isoform", + "interactant", + "organismsDiffer", + "experiments", + "disease", + "location", + "text" +}) +public class CommentType { + + protected MoleculeType molecule; + protected CommentType.Absorption absorption; + protected CommentType.Kinetics kinetics; + protected CommentType.PhDependence phDependence; + protected CommentType.RedoxPotential redoxPotential; + protected CommentType.TemperatureDependence temperatureDependence; + protected ReactionType reaction; + protected List physiologicalReaction; + protected List cofactor; + protected List subcellularLocation; + protected CommentType.Conflict conflict; + protected List link; + protected List event; + protected List isoform; + protected List interactant; + @XmlElement(defaultValue = "false") + protected Boolean organismsDiffer; + protected Integer experiments; + protected CommentType.Disease disease; + protected List location; + protected List text; + @XmlAttribute(name = "type", required = true) + protected String type; + @XmlAttribute(name = "locationType") + protected String locationType; + @XmlAttribute(name = "name") + protected String name; + @XmlAttribute(name = "mass") + protected Float mass; + @XmlAttribute(name = "error") + protected String error; + @XmlAttribute(name = "method") + protected String method; + @XmlAttribute(name = "evidence") + protected List evidence; + + /** + * Gets the value of the molecule property. + * + * @return + * possible object is + * {@link MoleculeType } + * + */ + public MoleculeType getMolecule() { + return molecule; + } + + /** + * Sets the value of the molecule property. + * + * @param value + * allowed object is + * {@link MoleculeType } + * + */ + public void setMolecule(MoleculeType value) { + this.molecule = value; + } + + /** + * Gets the value of the absorption property. + * + * @return + * possible object is + * {@link CommentType.Absorption } + * + */ + public CommentType.Absorption getAbsorption() { + return absorption; + } + + /** + * Sets the value of the absorption property. + * + * @param value + * allowed object is + * {@link CommentType.Absorption } + * + */ + public void setAbsorption(CommentType.Absorption value) { + this.absorption = value; + } + + /** + * Gets the value of the kinetics property. + * + * @return + * possible object is + * {@link CommentType.Kinetics } + * + */ + public CommentType.Kinetics getKinetics() { + return kinetics; + } + + /** + * Sets the value of the kinetics property. + * + * @param value + * allowed object is + * {@link CommentType.Kinetics } + * + */ + public void setKinetics(CommentType.Kinetics value) { + this.kinetics = value; + } + + /** + * Gets the value of the phDependence property. + * + * @return + * possible object is + * {@link CommentType.PhDependence } + * + */ + public CommentType.PhDependence getPhDependence() { + return phDependence; + } + + /** + * Sets the value of the phDependence property. + * + * @param value + * allowed object is + * {@link CommentType.PhDependence } + * + */ + public void setPhDependence(CommentType.PhDependence value) { + this.phDependence = value; + } + + /** + * Gets the value of the redoxPotential property. + * + * @return + * possible object is + * {@link CommentType.RedoxPotential } + * + */ + public CommentType.RedoxPotential getRedoxPotential() { + return redoxPotential; + } + + /** + * Sets the value of the redoxPotential property. + * + * @param value + * allowed object is + * {@link CommentType.RedoxPotential } + * + */ + public void setRedoxPotential(CommentType.RedoxPotential value) { + this.redoxPotential = value; + } + + /** + * Gets the value of the temperatureDependence property. + * + * @return + * possible object is + * {@link CommentType.TemperatureDependence } + * + */ + public CommentType.TemperatureDependence getTemperatureDependence() { + return temperatureDependence; + } + + /** + * Sets the value of the temperatureDependence property. + * + * @param value + * allowed object is + * {@link CommentType.TemperatureDependence } + * + */ + public void setTemperatureDependence(CommentType.TemperatureDependence value) { + this.temperatureDependence = value; + } + + /** + * Gets the value of the reaction property. + * + * @return + * possible object is + * {@link ReactionType } + * + */ + public ReactionType getReaction() { + return reaction; + } + + /** + * Sets the value of the reaction property. + * + * @param value + * allowed object is + * {@link ReactionType } + * + */ + public void setReaction(ReactionType value) { + this.reaction = value; + } + + /** + * Gets the value of the physiologicalReaction property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the physiologicalReaction property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getPhysiologicalReaction().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link PhysiologicalReactionType } + * + * + */ + public List getPhysiologicalReaction() { + if (physiologicalReaction == null) { + physiologicalReaction = new ArrayList(); + } + return this.physiologicalReaction; + } + + /** + * Gets the value of the cofactor property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the cofactor property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getCofactor().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link CofactorType } + * + * + */ + public List getCofactor() { + if (cofactor == null) { + cofactor = new ArrayList(); + } + return this.cofactor; + } + + /** + * Gets the value of the subcellularLocation property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the subcellularLocation property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getSubcellularLocation().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link SubcellularLocationType } + * + * + */ + public List getSubcellularLocation() { + if (subcellularLocation == null) { + subcellularLocation = new ArrayList(); + } + return this.subcellularLocation; + } + + /** + * Gets the value of the conflict property. + * + * @return + * possible object is + * {@link CommentType.Conflict } + * + */ + public CommentType.Conflict getConflict() { + return conflict; + } + + /** + * Sets the value of the conflict property. + * + * @param value + * allowed object is + * {@link CommentType.Conflict } + * + */ + public void setConflict(CommentType.Conflict value) { + this.conflict = value; + } + + /** + * Gets the value of the link property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the link property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getLink().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link CommentType.Link } + * + * + */ + public List getLink() { + if (link == null) { + link = new ArrayList(); + } + return this.link; + } + + /** + * Gets the value of the event property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the event property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getEvent().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link EventType } + * + * + */ + public List getEvent() { + if (event == null) { + event = new ArrayList(); + } + return this.event; + } + + /** + * Gets the value of the isoform property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the isoform property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getIsoform().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link IsoformType } + * + * + */ + public List getIsoform() { + if (isoform == null) { + isoform = new ArrayList(); + } + return this.isoform; + } + + /** + * Gets the value of the interactant property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the interactant property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getInteractant().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link InteractantType } + * + * + */ + public List getInteractant() { + if (interactant == null) { + interactant = new ArrayList(); + } + return this.interactant; + } + + /** + * Gets the value of the organismsDiffer property. + * + * @return + * possible object is + * {@link Boolean } + * + */ + public Boolean isOrganismsDiffer() { + return organismsDiffer; + } + + /** + * Sets the value of the organismsDiffer property. + * + * @param value + * allowed object is + * {@link Boolean } + * + */ + public void setOrganismsDiffer(Boolean value) { + this.organismsDiffer = value; + } + + /** + * Gets the value of the experiments property. + * + * @return + * possible object is + * {@link Integer } + * + */ + public Integer getExperiments() { + return experiments; + } + + /** + * Sets the value of the experiments property. + * + * @param value + * allowed object is + * {@link Integer } + * + */ + public void setExperiments(Integer value) { + this.experiments = value; + } + + /** + * Gets the value of the disease property. + * + * @return + * possible object is + * {@link CommentType.Disease } + * + */ + public CommentType.Disease getDisease() { + return disease; + } + + /** + * Sets the value of the disease property. + * + * @param value + * allowed object is + * {@link CommentType.Disease } + * + */ + public void setDisease(CommentType.Disease value) { + this.disease = value; + } + + /** + * Gets the value of the location property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the location property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getLocation().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link LocationType } + * + * + */ + public List getLocation() { + if (location == null) { + location = new ArrayList(); + } + return this.location; + } + + /** + * Gets the value of the text property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the text property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getText().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link EvidencedStringType } + * + * + */ + public List getText() { + if (text == null) { + text = new ArrayList(); + } + return this.text; + } + + /** + * Gets the value of the type property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getType() { + return type; + } + + /** + * Sets the value of the type property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setType(String value) { + this.type = value; + } + + /** + * Gets the value of the locationType property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getLocationType() { + return locationType; + } + + /** + * Sets the value of the locationType property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setLocationType(String value) { + this.locationType = value; + } + + /** + * Gets the value of the name property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getName() { + return name; + } + + /** + * Sets the value of the name property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setName(String value) { + this.name = value; + } + + /** + * Gets the value of the mass property. + * + * @return + * possible object is + * {@link Float } + * + */ + public Float getMass() { + return mass; + } + + /** + * Sets the value of the mass property. + * + * @param value + * allowed object is + * {@link Float } + * + */ + public void setMass(Float value) { + this.mass = value; + } + + /** + * Gets the value of the error property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getError() { + return error; + } + + /** + * Sets the value of the error property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setError(String value) { + this.error = value; + } + + /** + * Gets the value of the method property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getMethod() { + return method; + } + + /** + * Sets the value of the method property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setMethod(String value) { + this.method = value; + } + + /** + * Gets the value of the evidence property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the evidence property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getEvidence().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link Integer } + * + * + */ + public List getEvidence() { + if (evidence == null) { + evidence = new ArrayList(); + } + return this.evidence; + } + + + /** + *

Java class for anonymous complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+     * <complexType>
+     *   <complexContent>
+     *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+     *       <sequence>
+     *         <element name="max" type="{https://uniprot.org/uniprot}evidencedStringType" minOccurs="0"/>
+     *         <element name="text" type="{https://uniprot.org/uniprot}evidencedStringType" maxOccurs="unbounded" minOccurs="0"/>
+     *       </sequence>
+     *     </restriction>
+     *   </complexContent>
+     * </complexType>
+     * 
+ * + * + */ + @XmlAccessorType(XmlAccessType.FIELD) + @XmlType(name = "", propOrder = { + "max", + "text" + }) + public static class Absorption { + + protected EvidencedStringType max; + protected List text; + + /** + * Gets the value of the max property. + * + * @return + * possible object is + * {@link EvidencedStringType } + * + */ + public EvidencedStringType getMax() { + return max; + } + + /** + * Sets the value of the max property. + * + * @param value + * allowed object is + * {@link EvidencedStringType } + * + */ + public void setMax(EvidencedStringType value) { + this.max = value; + } + + /** + * Gets the value of the text property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the text property. + * + *

+ * For example, to add a new item, do as follows: + *

+         *    getText().add(newItem);
+         * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link EvidencedStringType } + * + * + */ + public List getText() { + if (text == null) { + text = new ArrayList(); + } + return this.text; + } + + } + + + /** + *

Java class for anonymous complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+     * <complexType>
+     *   <complexContent>
+     *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+     *       <sequence>
+     *         <element name="sequence" minOccurs="0">
+     *           <complexType>
+     *             <complexContent>
+     *               <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+     *                 <attribute name="resource" use="required">
+     *                   <simpleType>
+     *                     <restriction base="{http://www.w3.org/2001/XMLSchema}string">
+     *                       <enumeration value="EMBL-CDS"/>
+     *                       <enumeration value="EMBL"/>
+     *                     </restriction>
+     *                   </simpleType>
+     *                 </attribute>
+     *                 <attribute name="id" use="required" type="{http://www.w3.org/2001/XMLSchema}string" />
+     *                 <attribute name="version" type="{http://www.w3.org/2001/XMLSchema}int" />
+     *               </restriction>
+     *             </complexContent>
+     *           </complexType>
+     *         </element>
+     *       </sequence>
+     *       <attribute name="type" use="required">
+     *         <simpleType>
+     *           <restriction base="{http://www.w3.org/2001/XMLSchema}string">
+     *             <enumeration value="frameshift"/>
+     *             <enumeration value="erroneous initiation"/>
+     *             <enumeration value="erroneous termination"/>
+     *             <enumeration value="erroneous gene model prediction"/>
+     *             <enumeration value="erroneous translation"/>
+     *             <enumeration value="miscellaneous discrepancy"/>
+     *           </restriction>
+     *         </simpleType>
+     *       </attribute>
+     *       <attribute name="ref" type="{http://www.w3.org/2001/XMLSchema}string" />
+     *     </restriction>
+     *   </complexContent>
+     * </complexType>
+     * 
+ * + * + */ + @XmlAccessorType(XmlAccessType.FIELD) + @XmlType(name = "", propOrder = { + "sequence" + }) + public static class Conflict { + + protected CommentType.Conflict.Sequence sequence; + @XmlAttribute(name = "type", required = true) + protected String type; + @XmlAttribute(name = "ref") + protected String ref; + + /** + * Gets the value of the sequence property. + * + * @return + * possible object is + * {@link CommentType.Conflict.Sequence } + * + */ + public CommentType.Conflict.Sequence getSequence() { + return sequence; + } + + /** + * Sets the value of the sequence property. + * + * @param value + * allowed object is + * {@link CommentType.Conflict.Sequence } + * + */ + public void setSequence(CommentType.Conflict.Sequence value) { + this.sequence = value; + } + + /** + * Gets the value of the type property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getType() { + return type; + } + + /** + * Sets the value of the type property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setType(String value) { + this.type = value; + } + + /** + * Gets the value of the ref property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getRef() { + return ref; + } + + /** + * Sets the value of the ref property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setRef(String value) { + this.ref = value; + } + + + /** + *

Java class for anonymous complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+         * <complexType>
+         *   <complexContent>
+         *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+         *       <attribute name="resource" use="required">
+         *         <simpleType>
+         *           <restriction base="{http://www.w3.org/2001/XMLSchema}string">
+         *             <enumeration value="EMBL-CDS"/>
+         *             <enumeration value="EMBL"/>
+         *           </restriction>
+         *         </simpleType>
+         *       </attribute>
+         *       <attribute name="id" use="required" type="{http://www.w3.org/2001/XMLSchema}string" />
+         *       <attribute name="version" type="{http://www.w3.org/2001/XMLSchema}int" />
+         *     </restriction>
+         *   </complexContent>
+         * </complexType>
+         * 
+ * + * + */ + @XmlAccessorType(XmlAccessType.FIELD) + @XmlType(name = "") + public static class Sequence { + + @XmlAttribute(name = "resource", required = true) + protected String resource; + @XmlAttribute(name = "id", required = true) + protected String id; + @XmlAttribute(name = "version") + protected Integer version; + + /** + * Gets the value of the resource property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getResource() { + return resource; + } + + /** + * Sets the value of the resource property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setResource(String value) { + this.resource = value; + } + + /** + * Gets the value of the id property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getId() { + return id; + } + + /** + * Sets the value of the id property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setId(String value) { + this.id = value; + } + + /** + * Gets the value of the version property. + * + * @return + * possible object is + * {@link Integer } + * + */ + public Integer getVersion() { + return version; + } + + /** + * Sets the value of the version property. + * + * @param value + * allowed object is + * {@link Integer } + * + */ + public void setVersion(Integer value) { + this.version = value; + } + + } + + } + + + /** + *

Java class for anonymous complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+     * <complexType>
+     *   <complexContent>
+     *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+     *       <sequence>
+     *         <element name="name" type="{http://www.w3.org/2001/XMLSchema}string"/>
+     *         <element name="acronym" type="{http://www.w3.org/2001/XMLSchema}string"/>
+     *         <element name="description" type="{http://www.w3.org/2001/XMLSchema}string"/>
+     *         <element name="dbReference" type="{https://uniprot.org/uniprot}dbReferenceType"/>
+     *       </sequence>
+     *       <attribute name="id" use="required" type="{http://www.w3.org/2001/XMLSchema}string" />
+     *     </restriction>
+     *   </complexContent>
+     * </complexType>
+     * 
+ * + * + */ + @XmlAccessorType(XmlAccessType.FIELD) + @XmlType(name = "", propOrder = { + "name", + "acronym", + "description", + "dbReference" + }) + public static class Disease { + + @XmlElement(required = true) + protected String name; + @XmlElement(required = true) + protected String acronym; + @XmlElement(required = true) + protected String description; + @XmlElement(required = true) + protected DbReferenceType dbReference; + @XmlAttribute(name = "id", required = true) + protected String id; + + /** + * Gets the value of the name property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getName() { + return name; + } + + /** + * Sets the value of the name property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setName(String value) { + this.name = value; + } + + /** + * Gets the value of the acronym property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getAcronym() { + return acronym; + } + + /** + * Sets the value of the acronym property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setAcronym(String value) { + this.acronym = value; + } + + /** + * Gets the value of the description property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getDescription() { + return description; + } + + /** + * Sets the value of the description property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setDescription(String value) { + this.description = value; + } + + /** + * Gets the value of the dbReference property. + * + * @return + * possible object is + * {@link DbReferenceType } + * + */ + public DbReferenceType getDbReference() { + return dbReference; + } + + /** + * Sets the value of the dbReference property. + * + * @param value + * allowed object is + * {@link DbReferenceType } + * + */ + public void setDbReference(DbReferenceType value) { + this.dbReference = value; + } + + /** + * Gets the value of the id property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getId() { + return id; + } + + /** + * Sets the value of the id property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setId(String value) { + this.id = value; + } + + } + + + /** + *

Java class for anonymous complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+     * <complexType>
+     *   <complexContent>
+     *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+     *       <sequence>
+     *         <element name="KM" type="{https://uniprot.org/uniprot}evidencedStringType" maxOccurs="unbounded" minOccurs="0"/>
+     *         <element name="Vmax" type="{https://uniprot.org/uniprot}evidencedStringType" maxOccurs="unbounded" minOccurs="0"/>
+     *         <element name="text" type="{https://uniprot.org/uniprot}evidencedStringType" maxOccurs="unbounded" minOccurs="0"/>
+     *       </sequence>
+     *     </restriction>
+     *   </complexContent>
+     * </complexType>
+     * 
+ * + * + */ + @XmlAccessorType(XmlAccessType.FIELD) + @XmlType(name = "", propOrder = { + "km", + "vmax", + "text" + }) + public static class Kinetics { + + @XmlElement(name = "KM") + protected List km; + @XmlElement(name = "Vmax") + protected List vmax; + protected List text; + + /** + * Gets the value of the km property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the km property. + * + *

+ * For example, to add a new item, do as follows: + *

+         *    getKM().add(newItem);
+         * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link EvidencedStringType } + * + * + */ + public List getKM() { + if (km == null) { + km = new ArrayList(); + } + return this.km; + } + + /** + * Gets the value of the vmax property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the vmax property. + * + *

+ * For example, to add a new item, do as follows: + *

+         *    getVmax().add(newItem);
+         * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link EvidencedStringType } + * + * + */ + public List getVmax() { + if (vmax == null) { + vmax = new ArrayList(); + } + return this.vmax; + } + + /** + * Gets the value of the text property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the text property. + * + *

+ * For example, to add a new item, do as follows: + *

+         *    getText().add(newItem);
+         * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link EvidencedStringType } + * + * + */ + public List getText() { + if (text == null) { + text = new ArrayList(); + } + return this.text; + } + + } + + + /** + *

Java class for anonymous complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+     * <complexType>
+     *   <complexContent>
+     *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+     *       <attribute name="uri" use="required" type="{http://www.w3.org/2001/XMLSchema}anyURI" />
+     *     </restriction>
+     *   </complexContent>
+     * </complexType>
+     * 
+ * + * + */ + @XmlAccessorType(XmlAccessType.FIELD) + @XmlType(name = "") + public static class Link { + + @XmlAttribute(name = "uri", required = true) + @XmlSchemaType(name = "anyURI") + protected String uri; + + /** + * Gets the value of the uri property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getUri() { + return uri; + } + + /** + * Sets the value of the uri property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setUri(String value) { + this.uri = value; + } + + } + + + /** + *

Java class for anonymous complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+     * <complexType>
+     *   <complexContent>
+     *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+     *       <sequence>
+     *         <element name="text" type="{https://uniprot.org/uniprot}evidencedStringType" maxOccurs="unbounded"/>
+     *       </sequence>
+     *     </restriction>
+     *   </complexContent>
+     * </complexType>
+     * 
+ * + * + */ + @XmlAccessorType(XmlAccessType.FIELD) + @XmlType(name = "", propOrder = { + "text" + }) + public static class PhDependence { + + @XmlElement(required = true) + protected List text; + + /** + * Gets the value of the text property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the text property. + * + *

+ * For example, to add a new item, do as follows: + *

+         *    getText().add(newItem);
+         * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link EvidencedStringType } + * + * + */ + public List getText() { + if (text == null) { + text = new ArrayList(); + } + return this.text; + } + + } + + + /** + *

Java class for anonymous complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+     * <complexType>
+     *   <complexContent>
+     *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+     *       <sequence>
+     *         <element name="text" type="{https://uniprot.org/uniprot}evidencedStringType" maxOccurs="unbounded"/>
+     *       </sequence>
+     *     </restriction>
+     *   </complexContent>
+     * </complexType>
+     * 
+ * + * + */ + @XmlAccessorType(XmlAccessType.FIELD) + @XmlType(name = "", propOrder = { + "text" + }) + public static class RedoxPotential { + + @XmlElement(required = true) + protected List text; + + /** + * Gets the value of the text property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the text property. + * + *

+ * For example, to add a new item, do as follows: + *

+         *    getText().add(newItem);
+         * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link EvidencedStringType } + * + * + */ + public List getText() { + if (text == null) { + text = new ArrayList(); + } + return this.text; + } + + } + + + /** + *

Java class for anonymous complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+     * <complexType>
+     *   <complexContent>
+     *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+     *       <sequence>
+     *         <element name="text" type="{https://uniprot.org/uniprot}evidencedStringType" maxOccurs="unbounded"/>
+     *       </sequence>
+     *     </restriction>
+     *   </complexContent>
+     * </complexType>
+     * 
+ * + * + */ + @XmlAccessorType(XmlAccessType.FIELD) + @XmlType(name = "", propOrder = { + "text" + }) + public static class TemperatureDependence { + + @XmlElement(required = true) + protected List text; + + /** + * Gets the value of the text property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the text property. + * + *

+ * For example, to add a new item, do as follows: + *

+         *    getText().add(newItem);
+         * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link EvidencedStringType } + * + * + */ + public List getText() { + if (text == null) { + text = new ArrayList(); + } + return this.text; + } + + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/ConsortiumType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/ConsortiumType.java new file mode 100644 index 00000000..35ecd026 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/ConsortiumType.java @@ -0,0 +1,68 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlAttribute; +import javax.xml.bind.annotation.XmlType; + + +/** + * Describes the authors of a citation when these are represented by a consortium. + * Equivalent to the flat file RG-line. + * + *

Java class for consortiumType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="consortiumType">
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <attribute name="name" use="required" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "consortiumType") +public class ConsortiumType { + + @XmlAttribute(name = "name", required = true) + protected String name; + + /** + * Gets the value of the name property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getName() { + return name; + } + + /** + * Sets the value of the name property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setName(String value) { + this.name = value; + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/DbReferenceType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/DbReferenceType.java new file mode 100644 index 00000000..1e3af9f2 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/DbReferenceType.java @@ -0,0 +1,192 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import java.util.ArrayList; +import java.util.List; +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlAttribute; +import javax.xml.bind.annotation.XmlType; + + +/** + * Describes a database cross-reference. + * Equivalent to the flat file DR-line. + * + * + *

Java class for dbReferenceType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="dbReferenceType">
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <sequence>
+ *         <element name="molecule" type="{https://uniprot.org/uniprot}moleculeType" minOccurs="0"/>
+ *         <element name="property" type="{https://uniprot.org/uniprot}propertyType" maxOccurs="unbounded" minOccurs="0"/>
+ *       </sequence>
+ *       <attribute name="type" use="required" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *       <attribute name="id" use="required" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *       <attribute name="evidence" type="{https://uniprot.org/uniprot}intListType" />
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "dbReferenceType", propOrder = { + "molecule", + "property" +}) +public class DbReferenceType { + + protected MoleculeType molecule; + protected List property; + @XmlAttribute(name = "type", required = true) + protected String type; + @XmlAttribute(name = "id", required = true) + protected String id; + @XmlAttribute(name = "evidence") + protected List evidence; + + /** + * Gets the value of the molecule property. + * + * @return + * possible object is + * {@link MoleculeType } + * + */ + public MoleculeType getMolecule() { + return molecule; + } + + /** + * Sets the value of the molecule property. + * + * @param value + * allowed object is + * {@link MoleculeType } + * + */ + public void setMolecule(MoleculeType value) { + this.molecule = value; + } + + /** + * Gets the value of the property property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the property property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getProperty().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link PropertyType } + * + * + */ + public List getProperty() { + if (property == null) { + property = new ArrayList(); + } + return this.property; + } + + /** + * Gets the value of the type property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getType() { + return type; + } + + /** + * Sets the value of the type property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setType(String value) { + this.type = value; + } + + /** + * Gets the value of the id property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getId() { + return id; + } + + /** + * Sets the value of the id property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setId(String value) { + this.id = value; + } + + /** + * Gets the value of the evidence property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the evidence property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getEvidence().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link Integer } + * + * + */ + public List getEvidence() { + if (evidence == null) { + evidence = new ArrayList(); + } + return this.evidence; + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/Entry.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/Entry.java new file mode 100644 index 00000000..c23368b2 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/Entry.java @@ -0,0 +1,625 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import java.util.ArrayList; +import java.util.List; +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlAttribute; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlRootElement; +import javax.xml.bind.annotation.XmlSchemaType; +import javax.xml.bind.annotation.XmlType; +import javax.xml.datatype.XMLGregorianCalendar; + + +/** + *

Java class for anonymous complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType>
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <sequence>
+ *         <element name="accession" type="{http://www.w3.org/2001/XMLSchema}string" maxOccurs="unbounded"/>
+ *         <element name="name" type="{http://www.w3.org/2001/XMLSchema}string" maxOccurs="unbounded"/>
+ *         <element name="protein" type="{https://uniprot.org/uniprot}proteinType"/>
+ *         <element name="gene" type="{https://uniprot.org/uniprot}geneType" maxOccurs="unbounded" minOccurs="0"/>
+ *         <element name="organism" type="{https://uniprot.org/uniprot}organismType"/>
+ *         <element name="organismHost" type="{https://uniprot.org/uniprot}organismType" maxOccurs="unbounded" minOccurs="0"/>
+ *         <element name="geneLocation" type="{https://uniprot.org/uniprot}geneLocationType" maxOccurs="unbounded" minOccurs="0"/>
+ *         <element name="reference" type="{https://uniprot.org/uniprot}referenceType" maxOccurs="unbounded"/>
+ *         <element name="comment" type="{https://uniprot.org/uniprot}commentType" maxOccurs="unbounded" minOccurs="0"/>
+ *         <element name="dbReference" type="{https://uniprot.org/uniprot}dbReferenceType" maxOccurs="unbounded" minOccurs="0"/>
+ *         <element name="proteinExistence" type="{https://uniprot.org/uniprot}proteinExistenceType"/>
+ *         <element name="keyword" type="{https://uniprot.org/uniprot}keywordType" maxOccurs="unbounded" minOccurs="0"/>
+ *         <element name="feature" type="{https://uniprot.org/uniprot}featureType" maxOccurs="unbounded" minOccurs="0"/>
+ *         <element name="evidence" type="{https://uniprot.org/uniprot}evidenceType" maxOccurs="unbounded" minOccurs="0"/>
+ *         <element name="sequence" type="{https://uniprot.org/uniprot}sequenceType"/>
+ *       </sequence>
+ *       <attribute name="dataset" use="required">
+ *         <simpleType>
+ *           <restriction base="{http://www.w3.org/2001/XMLSchema}string">
+ *             <enumeration value="Swiss-Prot"/>
+ *             <enumeration value="TrEMBL"/>
+ *           </restriction>
+ *         </simpleType>
+ *       </attribute>
+ *       <attribute name="created" use="required" type="{http://www.w3.org/2001/XMLSchema}date" />
+ *       <attribute name="modified" use="required" type="{http://www.w3.org/2001/XMLSchema}date" />
+ *       <attribute name="version" use="required" type="{http://www.w3.org/2001/XMLSchema}int" />
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "", propOrder = { + "accession", + "name", + "protein", + "gene", + "organism", + "organismHost", + "geneLocation", + "reference", + "comment", + "dbReference", + "proteinExistence", + "keyword", + "feature", + "evidence", + "sequence" +}) +@XmlRootElement(name = "entry") +public class Entry { + + @XmlElement(required = true) + protected List accession; + @XmlElement(required = true) + protected List name; + @XmlElement(required = true) + protected ProteinType protein; + protected List gene; + @XmlElement(required = true) + protected OrganismType organism; + protected List organismHost; + protected List geneLocation; + @XmlElement(required = true) + protected List reference; + @XmlElement(nillable = true) + protected List comment; + protected List dbReference; + @XmlElement(required = true) + protected ProteinExistenceType proteinExistence; + protected List keyword; + protected List feature; + protected List evidence; + @XmlElement(required = true) + protected SequenceType sequence; + @XmlAttribute(name = "dataset", required = true) + protected String dataset; + @XmlAttribute(name = "created", required = true) + @XmlSchemaType(name = "date") + protected XMLGregorianCalendar created; + @XmlAttribute(name = "modified", required = true) + @XmlSchemaType(name = "date") + protected XMLGregorianCalendar modified; + @XmlAttribute(name = "version", required = true) + protected int version; + + /** + * Gets the value of the accession property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the accession property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getAccession().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link String } + * + * + */ + public List getAccession() { + if (accession == null) { + accession = new ArrayList(); + } + return this.accession; + } + + /** + * Gets the value of the name property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the name property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getName().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link String } + * + * + */ + public List getName() { + if (name == null) { + name = new ArrayList(); + } + return this.name; + } + + /** + * Gets the value of the protein property. + * + * @return + * possible object is + * {@link ProteinType } + * + */ + public ProteinType getProtein() { + return protein; + } + + /** + * Sets the value of the protein property. + * + * @param value + * allowed object is + * {@link ProteinType } + * + */ + public void setProtein(ProteinType value) { + this.protein = value; + } + + /** + * Gets the value of the gene property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the gene property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getGene().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link GeneType } + * + * + */ + public List getGene() { + if (gene == null) { + gene = new ArrayList(); + } + return this.gene; + } + + /** + * Gets the value of the organism property. + * + * @return + * possible object is + * {@link OrganismType } + * + */ + public OrganismType getOrganism() { + return organism; + } + + /** + * Sets the value of the organism property. + * + * @param value + * allowed object is + * {@link OrganismType } + * + */ + public void setOrganism(OrganismType value) { + this.organism = value; + } + + /** + * Gets the value of the organismHost property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the organismHost property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getOrganismHost().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link OrganismType } + * + * + */ + public List getOrganismHost() { + if (organismHost == null) { + organismHost = new ArrayList(); + } + return this.organismHost; + } + + /** + * Gets the value of the geneLocation property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the geneLocation property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getGeneLocation().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link GeneLocationType } + * + * + */ + public List getGeneLocation() { + if (geneLocation == null) { + geneLocation = new ArrayList(); + } + return this.geneLocation; + } + + /** + * Gets the value of the reference property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the reference property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getReference().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link ReferenceType } + * + * + */ + public List getReference() { + if (reference == null) { + reference = new ArrayList(); + } + return this.reference; + } + + /** + * Gets the value of the comment property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the comment property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getComment().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link CommentType } + * + * + */ + public List getComment() { + if (comment == null) { + comment = new ArrayList(); + } + return this.comment; + } + + /** + * Gets the value of the dbReference property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the dbReference property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getDbReference().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link DbReferenceType } + * + * + */ + public List getDbReference() { + if (dbReference == null) { + dbReference = new ArrayList(); + } + return this.dbReference; + } + + /** + * Gets the value of the proteinExistence property. + * + * @return + * possible object is + * {@link ProteinExistenceType } + * + */ + public ProteinExistenceType getProteinExistence() { + return proteinExistence; + } + + /** + * Sets the value of the proteinExistence property. + * + * @param value + * allowed object is + * {@link ProteinExistenceType } + * + */ + public void setProteinExistence(ProteinExistenceType value) { + this.proteinExistence = value; + } + + /** + * Gets the value of the keyword property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the keyword property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getKeyword().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link KeywordType } + * + * + */ + public List getKeyword() { + if (keyword == null) { + keyword = new ArrayList(); + } + return this.keyword; + } + + /** + * Gets the value of the feature property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the feature property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getFeature().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link FeatureType } + * + * + */ + public List getFeature() { + if (feature == null) { + feature = new ArrayList(); + } + return this.feature; + } + + /** + * Gets the value of the evidence property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the evidence property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getEvidence().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link EvidenceType } + * + * + */ + public List getEvidence() { + if (evidence == null) { + evidence = new ArrayList(); + } + return this.evidence; + } + + /** + * Gets the value of the sequence property. + * + * @return + * possible object is + * {@link SequenceType } + * + */ + public SequenceType getSequence() { + return sequence; + } + + /** + * Sets the value of the sequence property. + * + * @param value + * allowed object is + * {@link SequenceType } + * + */ + public void setSequence(SequenceType value) { + this.sequence = value; + } + + /** + * Gets the value of the dataset property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getDataset() { + return dataset; + } + + /** + * Sets the value of the dataset property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setDataset(String value) { + this.dataset = value; + } + + /** + * Gets the value of the created property. + * + * @return + * possible object is + * {@link XMLGregorianCalendar } + * + */ + public XMLGregorianCalendar getCreated() { + return created; + } + + /** + * Sets the value of the created property. + * + * @param value + * allowed object is + * {@link XMLGregorianCalendar } + * + */ + public void setCreated(XMLGregorianCalendar value) { + this.created = value; + } + + /** + * Gets the value of the modified property. + * + * @return + * possible object is + * {@link XMLGregorianCalendar } + * + */ + public XMLGregorianCalendar getModified() { + return modified; + } + + /** + * Sets the value of the modified property. + * + * @param value + * allowed object is + * {@link XMLGregorianCalendar } + * + */ + public void setModified(XMLGregorianCalendar value) { + this.modified = value; + } + + /** + * Gets the value of the version property. + * + */ + public int getVersion() { + return version; + } + + /** + * Sets the value of the version property. + * + */ + public void setVersion(int value) { + this.version = value; + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/EventType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/EventType.java new file mode 100644 index 00000000..6fd460d7 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/EventType.java @@ -0,0 +1,76 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlAttribute; +import javax.xml.bind.annotation.XmlType; + + +/** + * Describes the type of events that cause alternative products. + * + *

Java class for eventType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="eventType">
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <attribute name="type" use="required">
+ *         <simpleType>
+ *           <restriction base="{http://www.w3.org/2001/XMLSchema}string">
+ *             <enumeration value="alternative splicing"/>
+ *             <enumeration value="alternative initiation"/>
+ *             <enumeration value="alternative promoter"/>
+ *             <enumeration value="ribosomal frameshifting"/>
+ *           </restriction>
+ *         </simpleType>
+ *       </attribute>
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "eventType") +public class EventType { + + @XmlAttribute(name = "type", required = true) + protected String type; + + /** + * Gets the value of the type property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getType() { + return type; + } + + /** + * Sets the value of the type property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setType(String value) { + this.type = value; + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/EvidenceType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/EvidenceType.java new file mode 100644 index 00000000..1ab65240 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/EvidenceType.java @@ -0,0 +1,153 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import java.math.BigInteger; +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlAttribute; +import javax.xml.bind.annotation.XmlType; + + +/** + * Describes the evidence for an annotation. + * No flat file equivalent. + * + *

Java class for evidenceType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="evidenceType">
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <sequence>
+ *         <element name="source" type="{https://uniprot.org/uniprot}sourceType" minOccurs="0"/>
+ *         <element name="importedFrom" type="{https://uniprot.org/uniprot}importedFromType" minOccurs="0"/>
+ *       </sequence>
+ *       <attribute name="type" use="required" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *       <attribute name="key" use="required" type="{http://www.w3.org/2001/XMLSchema}integer" />
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "evidenceType", propOrder = { + "source", + "importedFrom" +}) +public class EvidenceType { + + protected SourceType source; + protected ImportedFromType importedFrom; + @XmlAttribute(name = "type", required = true) + protected String type; + @XmlAttribute(name = "key", required = true) + protected BigInteger key; + + /** + * Gets the value of the source property. + * + * @return + * possible object is + * {@link SourceType } + * + */ + public SourceType getSource() { + return source; + } + + /** + * Sets the value of the source property. + * + * @param value + * allowed object is + * {@link SourceType } + * + */ + public void setSource(SourceType value) { + this.source = value; + } + + /** + * Gets the value of the importedFrom property. + * + * @return + * possible object is + * {@link ImportedFromType } + * + */ + public ImportedFromType getImportedFrom() { + return importedFrom; + } + + /** + * Sets the value of the importedFrom property. + * + * @param value + * allowed object is + * {@link ImportedFromType } + * + */ + public void setImportedFrom(ImportedFromType value) { + this.importedFrom = value; + } + + /** + * Gets the value of the type property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getType() { + return type; + } + + /** + * Sets the value of the type property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setType(String value) { + this.type = value; + } + + /** + * Gets the value of the key property. + * + * @return + * possible object is + * {@link BigInteger } + * + */ + public BigInteger getKey() { + return key; + } + + /** + * Sets the value of the key property. + * + * @param value + * allowed object is + * {@link BigInteger } + * + */ + public void setKey(BigInteger value) { + this.key = value; + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/EvidencedStringType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/EvidencedStringType.java new file mode 100644 index 00000000..17f372bb --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/EvidencedStringType.java @@ -0,0 +1,101 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import java.util.ArrayList; +import java.util.List; +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlAttribute; +import javax.xml.bind.annotation.XmlType; +import javax.xml.bind.annotation.XmlValue; + + +/** + *

Java class for evidencedStringType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="evidencedStringType">
+ *   <simpleContent>
+ *     <extension base="<http://www.w3.org/2001/XMLSchema>string">
+ *       <attribute name="evidence" type="{https://uniprot.org/uniprot}intListType" />
+ *     </extension>
+ *   </simpleContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "evidencedStringType", propOrder = { + "value" +}) +public class EvidencedStringType { + + @XmlValue + protected String value; + @XmlAttribute(name = "evidence") + protected List evidence; + + /** + * Gets the value of the value property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getValue() { + return value; + } + + /** + * Sets the value of the value property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setValue(String value) { + this.value = value; + } + + /** + * Gets the value of the evidence property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the evidence property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getEvidence().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link Integer } + * + * + */ + public List getEvidence() { + if (evidence == null) { + evidence = new ArrayList(); + } + return this.evidence; + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/FeatureType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/FeatureType.java new file mode 100644 index 00000000..75572a6e --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/FeatureType.java @@ -0,0 +1,369 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import java.util.ArrayList; +import java.util.List; +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlAttribute; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlType; + + +/** + * Describes different types of sequence annotations. + * Equivalent to the flat file FT-line. + * + *

Java class for featureType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="featureType">
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <sequence>
+ *         <element name="original" type="{http://www.w3.org/2001/XMLSchema}string" minOccurs="0"/>
+ *         <element name="variation" type="{http://www.w3.org/2001/XMLSchema}string" maxOccurs="unbounded" minOccurs="0"/>
+ *         <element name="location" type="{https://uniprot.org/uniprot}locationType"/>
+ *         <element name="ligand" type="{https://uniprot.org/uniprot}ligandType" minOccurs="0"/>
+ *         <element name="ligandPart" type="{https://uniprot.org/uniprot}ligandPartType" minOccurs="0"/>
+ *       </sequence>
+ *       <attribute name="type" use="required">
+ *         <simpleType>
+ *           <restriction base="{http://www.w3.org/2001/XMLSchema}string">
+ *             <enumeration value="active site"/>
+ *             <enumeration value="binding site"/>
+ *             <enumeration value="chain"/>
+ *             <enumeration value="coiled-coil region"/>
+ *             <enumeration value="compositionally biased region"/>
+ *             <enumeration value="cross-link"/>
+ *             <enumeration value="disulfide bond"/>
+ *             <enumeration value="DNA-binding region"/>
+ *             <enumeration value="domain"/>
+ *             <enumeration value="glycosylation site"/>
+ *             <enumeration value="helix"/>
+ *             <enumeration value="initiator methionine"/>
+ *             <enumeration value="lipid moiety-binding region"/>
+ *             <enumeration value="modified residue"/>
+ *             <enumeration value="mutagenesis site"/>
+ *             <enumeration value="non-consecutive residues"/>
+ *             <enumeration value="non-terminal residue"/>
+ *             <enumeration value="peptide"/>
+ *             <enumeration value="propeptide"/>
+ *             <enumeration value="region of interest"/>
+ *             <enumeration value="repeat"/>
+ *             <enumeration value="non-standard amino acid"/>
+ *             <enumeration value="sequence conflict"/>
+ *             <enumeration value="sequence variant"/>
+ *             <enumeration value="short sequence motif"/>
+ *             <enumeration value="signal peptide"/>
+ *             <enumeration value="site"/>
+ *             <enumeration value="splice variant"/>
+ *             <enumeration value="strand"/>
+ *             <enumeration value="topological domain"/>
+ *             <enumeration value="transit peptide"/>
+ *             <enumeration value="transmembrane region"/>
+ *             <enumeration value="turn"/>
+ *             <enumeration value="unsure residue"/>
+ *             <enumeration value="zinc finger region"/>
+ *             <enumeration value="intramembrane region"/>
+ *           </restriction>
+ *         </simpleType>
+ *       </attribute>
+ *       <attribute name="id" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *       <attribute name="description" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *       <attribute name="evidence" type="{https://uniprot.org/uniprot}intListType" />
+ *       <attribute name="ref" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "featureType", propOrder = { + "original", + "variation", + "location", + "ligand", + "ligandPart" +}) +public class FeatureType { + + protected String original; + protected List variation; + @XmlElement(required = true) + protected LocationType location; + protected LigandType ligand; + protected LigandPartType ligandPart; + @XmlAttribute(name = "type", required = true) + protected String type; + @XmlAttribute(name = "id") + protected String id; + @XmlAttribute(name = "description") + protected String description; + @XmlAttribute(name = "evidence") + protected List evidence; + @XmlAttribute(name = "ref") + protected String ref; + + /** + * Gets the value of the original property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getOriginal() { + return original; + } + + /** + * Sets the value of the original property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setOriginal(String value) { + this.original = value; + } + + /** + * Gets the value of the variation property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the variation property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getVariation().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link String } + * + * + */ + public List getVariation() { + if (variation == null) { + variation = new ArrayList(); + } + return this.variation; + } + + /** + * Gets the value of the location property. + * + * @return + * possible object is + * {@link LocationType } + * + */ + public LocationType getLocation() { + return location; + } + + /** + * Sets the value of the location property. + * + * @param value + * allowed object is + * {@link LocationType } + * + */ + public void setLocation(LocationType value) { + this.location = value; + } + + /** + * Gets the value of the ligand property. + * + * @return + * possible object is + * {@link LigandType } + * + */ + public LigandType getLigand() { + return ligand; + } + + /** + * Sets the value of the ligand property. + * + * @param value + * allowed object is + * {@link LigandType } + * + */ + public void setLigand(LigandType value) { + this.ligand = value; + } + + /** + * Gets the value of the ligandPart property. + * + * @return + * possible object is + * {@link LigandPartType } + * + */ + public LigandPartType getLigandPart() { + return ligandPart; + } + + /** + * Sets the value of the ligandPart property. + * + * @param value + * allowed object is + * {@link LigandPartType } + * + */ + public void setLigandPart(LigandPartType value) { + this.ligandPart = value; + } + + /** + * Gets the value of the type property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getType() { + return type; + } + + /** + * Sets the value of the type property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setType(String value) { + this.type = value; + } + + /** + * Gets the value of the id property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getId() { + return id; + } + + /** + * Sets the value of the id property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setId(String value) { + this.id = value; + } + + /** + * Gets the value of the description property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getDescription() { + return description; + } + + /** + * Sets the value of the description property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setDescription(String value) { + this.description = value; + } + + /** + * Gets the value of the evidence property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the evidence property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getEvidence().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link Integer } + * + * + */ + public List getEvidence() { + if (evidence == null) { + evidence = new ArrayList(); + } + return this.evidence; + } + + /** + * Gets the value of the ref property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getRef() { + return ref; + } + + /** + * Sets the value of the ref property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setRef(String value) { + this.ref = value; + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/GeneLocationType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/GeneLocationType.java new file mode 100644 index 00000000..5d0f4cc6 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/GeneLocationType.java @@ -0,0 +1,152 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import java.util.ArrayList; +import java.util.List; +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlAttribute; +import javax.xml.bind.annotation.XmlType; + + +/** + * Describes non-nuclear gene locations (organelles and plasmids). + * Equivalent to the flat file OG-line. + * + *

Java class for geneLocationType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="geneLocationType">
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <sequence>
+ *         <element name="name" type="{https://uniprot.org/uniprot}statusType" maxOccurs="unbounded" minOccurs="0"/>
+ *       </sequence>
+ *       <attribute name="type" use="required">
+ *         <simpleType>
+ *           <restriction base="{http://www.w3.org/2001/XMLSchema}string">
+ *             <enumeration value="apicoplast"/>
+ *             <enumeration value="chloroplast"/>
+ *             <enumeration value="organellar chromatophore"/>
+ *             <enumeration value="cyanelle"/>
+ *             <enumeration value="hydrogenosome"/>
+ *             <enumeration value="mitochondrion"/>
+ *             <enumeration value="non-photosynthetic plastid"/>
+ *             <enumeration value="nucleomorph"/>
+ *             <enumeration value="plasmid"/>
+ *             <enumeration value="plastid"/>
+ *           </restriction>
+ *         </simpleType>
+ *       </attribute>
+ *       <attribute name="evidence" type="{https://uniprot.org/uniprot}intListType" />
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "geneLocationType", propOrder = { + "name" +}) +public class GeneLocationType { + + protected List name; + @XmlAttribute(name = "type", required = true) + protected String type; + @XmlAttribute(name = "evidence") + protected List evidence; + + /** + * Gets the value of the name property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the name property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getName().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link StatusType } + * + * + */ + public List getName() { + if (name == null) { + name = new ArrayList(); + } + return this.name; + } + + /** + * Gets the value of the type property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getType() { + return type; + } + + /** + * Sets the value of the type property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setType(String value) { + this.type = value; + } + + /** + * Gets the value of the evidence property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the evidence property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getEvidence().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link Integer } + * + * + */ + public List getEvidence() { + if (evidence == null) { + evidence = new ArrayList(); + } + return this.evidence; + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/GeneNameType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/GeneNameType.java new file mode 100644 index 00000000..6a93d9e7 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/GeneNameType.java @@ -0,0 +1,140 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import java.util.ArrayList; +import java.util.List; +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlAttribute; +import javax.xml.bind.annotation.XmlType; +import javax.xml.bind.annotation.XmlValue; + + +/** + * Describes different types of gene designations. + * Equivalent to the flat file GN-line. + * + *

Java class for geneNameType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="geneNameType">
+ *   <simpleContent>
+ *     <extension base="<http://www.w3.org/2001/XMLSchema>string">
+ *       <attribute name="evidence" type="{https://uniprot.org/uniprot}intListType" />
+ *       <attribute name="type" use="required">
+ *         <simpleType>
+ *           <restriction base="{http://www.w3.org/2001/XMLSchema}string">
+ *             <enumeration value="primary"/>
+ *             <enumeration value="synonym"/>
+ *             <enumeration value="ordered locus"/>
+ *             <enumeration value="ORF"/>
+ *           </restriction>
+ *         </simpleType>
+ *       </attribute>
+ *     </extension>
+ *   </simpleContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "geneNameType", propOrder = { + "value" +}) +public class GeneNameType { + + @XmlValue + protected String value; + @XmlAttribute(name = "evidence") + protected List evidence; + @XmlAttribute(name = "type", required = true) + protected String type; + + /** + * Gets the value of the value property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getValue() { + return value; + } + + /** + * Sets the value of the value property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setValue(String value) { + this.value = value; + } + + /** + * Gets the value of the evidence property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the evidence property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getEvidence().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link Integer } + * + * + */ + public List getEvidence() { + if (evidence == null) { + evidence = new ArrayList(); + } + return this.evidence; + } + + /** + * Gets the value of the type property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getType() { + return type; + } + + /** + * Sets the value of the type property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setType(String value) { + this.type = value; + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/GeneType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/GeneType.java new file mode 100644 index 00000000..da9c58d4 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/GeneType.java @@ -0,0 +1,79 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import java.util.ArrayList; +import java.util.List; +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlType; + + +/** + * Describes a gene. + * Equivalent to the flat file GN-line. + * + *

Java class for geneType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="geneType">
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <sequence>
+ *         <element name="name" type="{https://uniprot.org/uniprot}geneNameType" maxOccurs="unbounded"/>
+ *       </sequence>
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "geneType", propOrder = { + "name" +}) +public class GeneType { + + @XmlElement(required = true) + protected List name; + + /** + * Gets the value of the name property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the name property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getName().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link GeneNameType } + * + * + */ + public List getName() { + if (name == null) { + name = new ArrayList(); + } + return this.name; + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/ImportedFromType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/ImportedFromType.java new file mode 100644 index 00000000..4fc9b1ef --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/ImportedFromType.java @@ -0,0 +1,71 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlType; + + +/** + * Describes the source of the evidence, when it is not assigned by UniProt, but imported from an external database. + * + *

Java class for importedFromType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="importedFromType">
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <sequence>
+ *         <element name="dbReference" type="{https://uniprot.org/uniprot}dbReferenceType"/>
+ *       </sequence>
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "importedFromType", propOrder = { + "dbReference" +}) +public class ImportedFromType { + + @XmlElement(required = true) + protected DbReferenceType dbReference; + + /** + * Gets the value of the dbReference property. + * + * @return + * possible object is + * {@link DbReferenceType } + * + */ + public DbReferenceType getDbReference() { + return dbReference; + } + + /** + * Sets the value of the dbReference property. + * + * @param value + * allowed object is + * {@link DbReferenceType } + * + */ + public void setDbReference(DbReferenceType value) { + this.dbReference = value; + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/InteractantType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/InteractantType.java new file mode 100644 index 00000000..8cbcb12d --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/InteractantType.java @@ -0,0 +1,145 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlAttribute; +import javax.xml.bind.annotation.XmlType; + + +/** + *

Java class for interactantType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="interactantType">
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <group ref="{https://uniprot.org/uniprot}interactantGroup" minOccurs="0"/>
+ *       <attribute name="intactId" use="required" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "interactantType", propOrder = { + "id", + "label", + "dbReference" +}) +public class InteractantType { + + protected String id; + protected String label; + protected DbReferenceType dbReference; + @XmlAttribute(name = "intactId", required = true) + protected String intactId; + + /** + * Gets the value of the id property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getId() { + return id; + } + + /** + * Sets the value of the id property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setId(String value) { + this.id = value; + } + + /** + * Gets the value of the label property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getLabel() { + return label; + } + + /** + * Sets the value of the label property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setLabel(String value) { + this.label = value; + } + + /** + * Gets the value of the dbReference property. + * + * @return + * possible object is + * {@link DbReferenceType } + * + */ + public DbReferenceType getDbReference() { + return dbReference; + } + + /** + * Sets the value of the dbReference property. + * + * @param value + * allowed object is + * {@link DbReferenceType } + * + */ + public void setDbReference(DbReferenceType value) { + this.dbReference = value; + } + + /** + * Gets the value of the intactId property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getIntactId() { + return intactId; + } + + /** + * Sets the value of the intactId property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setIntactId(String value) { + this.intactId = value; + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/IsoformType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/IsoformType.java new file mode 100644 index 00000000..cae4da7b --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/IsoformType.java @@ -0,0 +1,370 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import java.util.ArrayList; +import java.util.List; +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlAttribute; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlType; +import javax.xml.bind.annotation.XmlValue; + + +/** + * Describes isoforms in 'alternative products' annotations. + * + *

Java class for isoformType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="isoformType">
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <sequence>
+ *         <element name="id" type="{http://www.w3.org/2001/XMLSchema}string" maxOccurs="unbounded"/>
+ *         <element name="name" maxOccurs="unbounded">
+ *           <complexType>
+ *             <simpleContent>
+ *               <extension base="<http://www.w3.org/2001/XMLSchema>string">
+ *                 <attribute name="evidence" type="{https://uniprot.org/uniprot}intListType" />
+ *               </extension>
+ *             </simpleContent>
+ *           </complexType>
+ *         </element>
+ *         <element name="sequence">
+ *           <complexType>
+ *             <complexContent>
+ *               <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *                 <attribute name="type" use="required">
+ *                   <simpleType>
+ *                     <restriction base="{http://www.w3.org/2001/XMLSchema}string">
+ *                       <enumeration value="not described"/>
+ *                       <enumeration value="described"/>
+ *                       <enumeration value="displayed"/>
+ *                       <enumeration value="external"/>
+ *                     </restriction>
+ *                   </simpleType>
+ *                 </attribute>
+ *                 <attribute name="ref" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *               </restriction>
+ *             </complexContent>
+ *           </complexType>
+ *         </element>
+ *         <element name="text" type="{https://uniprot.org/uniprot}evidencedStringType" maxOccurs="unbounded" minOccurs="0"/>
+ *       </sequence>
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "isoformType", propOrder = { + "id", + "name", + "sequence", + "text" +}) +public class IsoformType { + + @XmlElement(required = true) + protected List id; + @XmlElement(required = true) + protected List name; + @XmlElement(required = true) + protected IsoformType.Sequence sequence; + protected List text; + + /** + * Gets the value of the id property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the id property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getId().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link String } + * + * + */ + public List getId() { + if (id == null) { + id = new ArrayList(); + } + return this.id; + } + + /** + * Gets the value of the name property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the name property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getName().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link IsoformType.Name } + * + * + */ + public List getName() { + if (name == null) { + name = new ArrayList(); + } + return this.name; + } + + /** + * Gets the value of the sequence property. + * + * @return + * possible object is + * {@link IsoformType.Sequence } + * + */ + public IsoformType.Sequence getSequence() { + return sequence; + } + + /** + * Sets the value of the sequence property. + * + * @param value + * allowed object is + * {@link IsoformType.Sequence } + * + */ + public void setSequence(IsoformType.Sequence value) { + this.sequence = value; + } + + /** + * Gets the value of the text property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the text property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getText().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link EvidencedStringType } + * + * + */ + public List getText() { + if (text == null) { + text = new ArrayList(); + } + return this.text; + } + + + /** + *

Java class for anonymous complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+     * <complexType>
+     *   <simpleContent>
+     *     <extension base="<http://www.w3.org/2001/XMLSchema>string">
+     *       <attribute name="evidence" type="{https://uniprot.org/uniprot}intListType" />
+     *     </extension>
+     *   </simpleContent>
+     * </complexType>
+     * 
+ * + * + */ + @XmlAccessorType(XmlAccessType.FIELD) + @XmlType(name = "", propOrder = { + "value" + }) + public static class Name { + + @XmlValue + protected String value; + @XmlAttribute(name = "evidence") + protected List evidence; + + /** + * Gets the value of the value property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getValue() { + return value; + } + + /** + * Sets the value of the value property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setValue(String value) { + this.value = value; + } + + /** + * Gets the value of the evidence property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the evidence property. + * + *

+ * For example, to add a new item, do as follows: + *

+         *    getEvidence().add(newItem);
+         * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link Integer } + * + * + */ + public List getEvidence() { + if (evidence == null) { + evidence = new ArrayList(); + } + return this.evidence; + } + + } + + + /** + *

Java class for anonymous complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+     * <complexType>
+     *   <complexContent>
+     *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+     *       <attribute name="type" use="required">
+     *         <simpleType>
+     *           <restriction base="{http://www.w3.org/2001/XMLSchema}string">
+     *             <enumeration value="not described"/>
+     *             <enumeration value="described"/>
+     *             <enumeration value="displayed"/>
+     *             <enumeration value="external"/>
+     *           </restriction>
+     *         </simpleType>
+     *       </attribute>
+     *       <attribute name="ref" type="{http://www.w3.org/2001/XMLSchema}string" />
+     *     </restriction>
+     *   </complexContent>
+     * </complexType>
+     * 
+ * + * + */ + @XmlAccessorType(XmlAccessType.FIELD) + @XmlType(name = "") + public static class Sequence { + + @XmlAttribute(name = "type", required = true) + protected String type; + @XmlAttribute(name = "ref") + protected String ref; + + /** + * Gets the value of the type property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getType() { + return type; + } + + /** + * Sets the value of the type property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setType(String value) { + this.type = value; + } + + /** + * Gets the value of the ref property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getRef() { + return ref; + } + + /** + * Sets the value of the ref property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setRef(String value) { + this.ref = value; + } + + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/KeywordType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/KeywordType.java new file mode 100644 index 00000000..d2b47f9d --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/KeywordType.java @@ -0,0 +1,128 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import java.util.ArrayList; +import java.util.List; +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlAttribute; +import javax.xml.bind.annotation.XmlType; +import javax.xml.bind.annotation.XmlValue; + + +/** + *

Java class for keywordType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="keywordType">
+ *   <simpleContent>
+ *     <extension base="<http://www.w3.org/2001/XMLSchema>string">
+ *       <attribute name="evidence" type="{https://uniprot.org/uniprot}intListType" />
+ *       <attribute name="id" use="required" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *     </extension>
+ *   </simpleContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "keywordType", propOrder = { + "value" +}) +public class KeywordType { + + @XmlValue + protected String value; + @XmlAttribute(name = "evidence") + protected List evidence; + @XmlAttribute(name = "id", required = true) + protected String id; + + /** + * Gets the value of the value property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getValue() { + return value; + } + + /** + * Sets the value of the value property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setValue(String value) { + this.value = value; + } + + /** + * Gets the value of the evidence property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the evidence property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getEvidence().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link Integer } + * + * + */ + public List getEvidence() { + if (evidence == null) { + evidence = new ArrayList(); + } + return this.evidence; + } + + /** + * Gets the value of the id property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getId() { + return id; + } + + /** + * Sets the value of the id property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setId(String value) { + this.id = value; + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/LigandPartType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/LigandPartType.java new file mode 100644 index 00000000..21b9a718 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/LigandPartType.java @@ -0,0 +1,152 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlType; + + +/** + * Describes a ligand part. + * + *

Java class for ligandPartType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="ligandPartType">
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <sequence>
+ *         <element name="name" type="{http://www.w3.org/2001/XMLSchema}string"/>
+ *         <element name="dbReference" type="{https://uniprot.org/uniprot}dbReferenceType" minOccurs="0"/>
+ *         <element name="label" type="{http://www.w3.org/2001/XMLSchema}string" minOccurs="0"/>
+ *         <element name="note" type="{http://www.w3.org/2001/XMLSchema}string" minOccurs="0"/>
+ *       </sequence>
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "ligandPartType", propOrder = { + "name", + "dbReference", + "label", + "note" +}) +public class LigandPartType { + + @XmlElement(required = true) + protected String name; + protected DbReferenceType dbReference; + protected String label; + protected String note; + + /** + * Gets the value of the name property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getName() { + return name; + } + + /** + * Sets the value of the name property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setName(String value) { + this.name = value; + } + + /** + * Gets the value of the dbReference property. + * + * @return + * possible object is + * {@link DbReferenceType } + * + */ + public DbReferenceType getDbReference() { + return dbReference; + } + + /** + * Sets the value of the dbReference property. + * + * @param value + * allowed object is + * {@link DbReferenceType } + * + */ + public void setDbReference(DbReferenceType value) { + this.dbReference = value; + } + + /** + * Gets the value of the label property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getLabel() { + return label; + } + + /** + * Sets the value of the label property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setLabel(String value) { + this.label = value; + } + + /** + * Gets the value of the note property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getNote() { + return note; + } + + /** + * Sets the value of the note property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setNote(String value) { + this.note = value; + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/LigandType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/LigandType.java new file mode 100644 index 00000000..22e17ee9 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/LigandType.java @@ -0,0 +1,152 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlType; + + +/** + * Describes a ligand. + * + *

Java class for ligandType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="ligandType">
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <sequence>
+ *         <element name="name" type="{http://www.w3.org/2001/XMLSchema}string"/>
+ *         <element name="dbReference" type="{https://uniprot.org/uniprot}dbReferenceType" minOccurs="0"/>
+ *         <element name="label" type="{http://www.w3.org/2001/XMLSchema}string" minOccurs="0"/>
+ *         <element name="note" type="{http://www.w3.org/2001/XMLSchema}string" minOccurs="0"/>
+ *       </sequence>
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "ligandType", propOrder = { + "name", + "dbReference", + "label", + "note" +}) +public class LigandType { + + @XmlElement(required = true) + protected String name; + protected DbReferenceType dbReference; + protected String label; + protected String note; + + /** + * Gets the value of the name property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getName() { + return name; + } + + /** + * Sets the value of the name property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setName(String value) { + this.name = value; + } + + /** + * Gets the value of the dbReference property. + * + * @return + * possible object is + * {@link DbReferenceType } + * + */ + public DbReferenceType getDbReference() { + return dbReference; + } + + /** + * Sets the value of the dbReference property. + * + * @param value + * allowed object is + * {@link DbReferenceType } + * + */ + public void setDbReference(DbReferenceType value) { + this.dbReference = value; + } + + /** + * Gets the value of the label property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getLabel() { + return label; + } + + /** + * Sets the value of the label property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setLabel(String value) { + this.label = value; + } + + /** + * Gets the value of the note property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getNote() { + return note; + } + + /** + * Sets the value of the note property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setNote(String value) { + this.note = value; + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/LocationType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/LocationType.java new file mode 100644 index 00000000..6530635c --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/LocationType.java @@ -0,0 +1,153 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlAttribute; +import javax.xml.bind.annotation.XmlType; + + +/** + * Describes a sequence location as either a range with a begin and end or as a position. The 'sequence' attribute is only used when the location is not on the canonical sequence displayed in the current entry. + * + *

Java class for locationType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="locationType">
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <choice>
+ *         <sequence>
+ *           <element name="begin" type="{https://uniprot.org/uniprot}positionType"/>
+ *           <element name="end" type="{https://uniprot.org/uniprot}positionType"/>
+ *         </sequence>
+ *         <element name="position" type="{https://uniprot.org/uniprot}positionType"/>
+ *       </choice>
+ *       <attribute name="sequence" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "locationType", propOrder = { + "begin", + "end", + "position" +}) +public class LocationType { + + protected PositionType begin; + protected PositionType end; + protected PositionType position; + @XmlAttribute(name = "sequence") + protected String sequence; + + /** + * Gets the value of the begin property. + * + * @return + * possible object is + * {@link PositionType } + * + */ + public PositionType getBegin() { + return begin; + } + + /** + * Sets the value of the begin property. + * + * @param value + * allowed object is + * {@link PositionType } + * + */ + public void setBegin(PositionType value) { + this.begin = value; + } + + /** + * Gets the value of the end property. + * + * @return + * possible object is + * {@link PositionType } + * + */ + public PositionType getEnd() { + return end; + } + + /** + * Sets the value of the end property. + * + * @param value + * allowed object is + * {@link PositionType } + * + */ + public void setEnd(PositionType value) { + this.end = value; + } + + /** + * Gets the value of the position property. + * + * @return + * possible object is + * {@link PositionType } + * + */ + public PositionType getPosition() { + return position; + } + + /** + * Sets the value of the position property. + * + * @param value + * allowed object is + * {@link PositionType } + * + */ + public void setPosition(PositionType value) { + this.position = value; + } + + /** + * Gets the value of the sequence property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getSequence() { + return sequence; + } + + /** + * Sets the value of the sequence property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setSequence(String value) { + this.sequence = value; + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/MoleculeType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/MoleculeType.java new file mode 100644 index 00000000..4575c633 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/MoleculeType.java @@ -0,0 +1,96 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlAttribute; +import javax.xml.bind.annotation.XmlType; +import javax.xml.bind.annotation.XmlValue; + + +/** + * Describes a molecule by name or unique identifier. + * + *

Java class for moleculeType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="moleculeType">
+ *   <simpleContent>
+ *     <extension base="<http://www.w3.org/2001/XMLSchema>string">
+ *       <attribute name="id" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *     </extension>
+ *   </simpleContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "moleculeType", propOrder = { + "value" +}) +public class MoleculeType { + + @XmlValue + protected String value; + @XmlAttribute(name = "id") + protected String id; + + /** + * Gets the value of the value property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getValue() { + return value; + } + + /** + * Sets the value of the value property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setValue(String value) { + this.value = value; + } + + /** + * Gets the value of the id property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getId() { + return id; + } + + /** + * Sets the value of the id property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setId(String value) { + this.id = value; + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/NameListType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/NameListType.java new file mode 100644 index 00000000..3f9a8771 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/NameListType.java @@ -0,0 +1,82 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import java.util.ArrayList; +import java.util.List; +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlElements; +import javax.xml.bind.annotation.XmlType; + + +/** + *

Java class for nameListType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="nameListType">
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <choice maxOccurs="unbounded">
+ *         <element name="consortium" type="{https://uniprot.org/uniprot}consortiumType"/>
+ *         <element name="person" type="{https://uniprot.org/uniprot}personType"/>
+ *       </choice>
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "nameListType", propOrder = { + "consortiumOrPerson" +}) +public class NameListType { + + @XmlElements({ + @XmlElement(name = "consortium", type = ConsortiumType.class), + @XmlElement(name = "person", type = PersonType.class) + }) + protected List consortiumOrPerson; + + /** + * Gets the value of the consortiumOrPerson property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the consortiumOrPerson property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getConsortiumOrPerson().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link ConsortiumType } + * {@link PersonType } + * + * + */ + public List getConsortiumOrPerson() { + if (consortiumOrPerson == null) { + consortiumOrPerson = new ArrayList(); + } + return this.consortiumOrPerson; + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/ObjectFactory.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/ObjectFactory.java new file mode 100644 index 00000000..110ee58d --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/ObjectFactory.java @@ -0,0 +1,524 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import javax.xml.bind.JAXBElement; +import javax.xml.bind.annotation.XmlElementDecl; +import javax.xml.bind.annotation.XmlRegistry; +import javax.xml.namespace.QName; + + +/** + * This object contains factory methods for each + * Java content interface and Java element interface + * generated in the org.opencb.biodata.formats.protein.uniprot.v202502jaxb package. + *

An ObjectFactory allows you to programatically + * construct new instances of the Java representation + * for XML content. The Java representation of XML + * content can consist of schema derived interfaces + * and classes representing the binding of schema + * type definitions, element declarations and model + * groups. Factory methods for each of these are + * provided in this class. + * + */ +@XmlRegistry +public class ObjectFactory { + + private final static QName _Copyright_QNAME = new QName("https://uniprot.org/uniprot", "copyright"); + + /** + * Create a new ObjectFactory that can be used to create new instances of schema derived classes for package: org.opencb.biodata.formats.protein.uniprot.v202502jaxb + * + */ + public ObjectFactory() { + } + + /** + * Create an instance of {@link SourceDataType } + * + */ + public SourceDataType createSourceDataType() { + return new SourceDataType(); + } + + /** + * Create an instance of {@link IsoformType } + * + */ + public IsoformType createIsoformType() { + return new IsoformType(); + } + + /** + * Create an instance of {@link CommentType } + * + */ + public CommentType createCommentType() { + return new CommentType(); + } + + /** + * Create an instance of {@link CommentType.Conflict } + * + */ + public CommentType.Conflict createCommentTypeConflict() { + return new CommentType.Conflict(); + } + + /** + * Create an instance of {@link OrganismType } + * + */ + public OrganismType createOrganismType() { + return new OrganismType(); + } + + /** + * Create an instance of {@link ProteinType } + * + */ + public ProteinType createProteinType() { + return new ProteinType(); + } + + /** + * Create an instance of {@link Entry } + * + */ + public Entry createEntry() { + return new Entry(); + } + + /** + * Create an instance of {@link GeneType } + * + */ + public GeneType createGeneType() { + return new GeneType(); + } + + /** + * Create an instance of {@link GeneLocationType } + * + */ + public GeneLocationType createGeneLocationType() { + return new GeneLocationType(); + } + + /** + * Create an instance of {@link ReferenceType } + * + */ + public ReferenceType createReferenceType() { + return new ReferenceType(); + } + + /** + * Create an instance of {@link DbReferenceType } + * + */ + public DbReferenceType createDbReferenceType() { + return new DbReferenceType(); + } + + /** + * Create an instance of {@link ProteinExistenceType } + * + */ + public ProteinExistenceType createProteinExistenceType() { + return new ProteinExistenceType(); + } + + /** + * Create an instance of {@link KeywordType } + * + */ + public KeywordType createKeywordType() { + return new KeywordType(); + } + + /** + * Create an instance of {@link FeatureType } + * + */ + public FeatureType createFeatureType() { + return new FeatureType(); + } + + /** + * Create an instance of {@link EvidenceType } + * + */ + public EvidenceType createEvidenceType() { + return new EvidenceType(); + } + + /** + * Create an instance of {@link SequenceType } + * + */ + public SequenceType createSequenceType() { + return new SequenceType(); + } + + /** + * Create an instance of {@link Uniprot } + * + */ + public Uniprot createUniprot() { + return new Uniprot(); + } + + /** + * Create an instance of {@link StatusType } + * + */ + public StatusType createStatusType() { + return new StatusType(); + } + + /** + * Create an instance of {@link PositionType } + * + */ + public PositionType createPositionType() { + return new PositionType(); + } + + /** + * Create an instance of {@link ConsortiumType } + * + */ + public ConsortiumType createConsortiumType() { + return new ConsortiumType(); + } + + /** + * Create an instance of {@link GeneNameType } + * + */ + public GeneNameType createGeneNameType() { + return new GeneNameType(); + } + + /** + * Create an instance of {@link LocationType } + * + */ + public LocationType createLocationType() { + return new LocationType(); + } + + /** + * Create an instance of {@link CitationType } + * + */ + public CitationType createCitationType() { + return new CitationType(); + } + + /** + * Create an instance of {@link PropertyType } + * + */ + public PropertyType createPropertyType() { + return new PropertyType(); + } + + /** + * Create an instance of {@link PhysiologicalReactionType } + * + */ + public PhysiologicalReactionType createPhysiologicalReactionType() { + return new PhysiologicalReactionType(); + } + + /** + * Create an instance of {@link CofactorType } + * + */ + public CofactorType createCofactorType() { + return new CofactorType(); + } + + /** + * Create an instance of {@link EvidencedStringType } + * + */ + public EvidencedStringType createEvidencedStringType() { + return new EvidencedStringType(); + } + + /** + * Create an instance of {@link PersonType } + * + */ + public PersonType createPersonType() { + return new PersonType(); + } + + /** + * Create an instance of {@link ImportedFromType } + * + */ + public ImportedFromType createImportedFromType() { + return new ImportedFromType(); + } + + /** + * Create an instance of {@link LigandPartType } + * + */ + public LigandPartType createLigandPartType() { + return new LigandPartType(); + } + + /** + * Create an instance of {@link LigandType } + * + */ + public LigandType createLigandType() { + return new LigandType(); + } + + /** + * Create an instance of {@link EventType } + * + */ + public EventType createEventType() { + return new EventType(); + } + + /** + * Create an instance of {@link InteractantType } + * + */ + public InteractantType createInteractantType() { + return new InteractantType(); + } + + /** + * Create an instance of {@link NameListType } + * + */ + public NameListType createNameListType() { + return new NameListType(); + } + + /** + * Create an instance of {@link ReactionType } + * + */ + public ReactionType createReactionType() { + return new ReactionType(); + } + + /** + * Create an instance of {@link SourceType } + * + */ + public SourceType createSourceType() { + return new SourceType(); + } + + /** + * Create an instance of {@link MoleculeType } + * + */ + public MoleculeType createMoleculeType() { + return new MoleculeType(); + } + + /** + * Create an instance of {@link OrganismNameType } + * + */ + public OrganismNameType createOrganismNameType() { + return new OrganismNameType(); + } + + /** + * Create an instance of {@link SubcellularLocationType } + * + */ + public SubcellularLocationType createSubcellularLocationType() { + return new SubcellularLocationType(); + } + + /** + * Create an instance of {@link SourceDataType.Strain } + * + */ + public SourceDataType.Strain createSourceDataTypeStrain() { + return new SourceDataType.Strain(); + } + + /** + * Create an instance of {@link SourceDataType.Plasmid } + * + */ + public SourceDataType.Plasmid createSourceDataTypePlasmid() { + return new SourceDataType.Plasmid(); + } + + /** + * Create an instance of {@link SourceDataType.Transposon } + * + */ + public SourceDataType.Transposon createSourceDataTypeTransposon() { + return new SourceDataType.Transposon(); + } + + /** + * Create an instance of {@link SourceDataType.Tissue } + * + */ + public SourceDataType.Tissue createSourceDataTypeTissue() { + return new SourceDataType.Tissue(); + } + + /** + * Create an instance of {@link IsoformType.Name } + * + */ + public IsoformType.Name createIsoformTypeName() { + return new IsoformType.Name(); + } + + /** + * Create an instance of {@link IsoformType.Sequence } + * + */ + public IsoformType.Sequence createIsoformTypeSequence() { + return new IsoformType.Sequence(); + } + + /** + * Create an instance of {@link CommentType.Absorption } + * + */ + public CommentType.Absorption createCommentTypeAbsorption() { + return new CommentType.Absorption(); + } + + /** + * Create an instance of {@link CommentType.Kinetics } + * + */ + public CommentType.Kinetics createCommentTypeKinetics() { + return new CommentType.Kinetics(); + } + + /** + * Create an instance of {@link CommentType.PhDependence } + * + */ + public CommentType.PhDependence createCommentTypePhDependence() { + return new CommentType.PhDependence(); + } + + /** + * Create an instance of {@link CommentType.RedoxPotential } + * + */ + public CommentType.RedoxPotential createCommentTypeRedoxPotential() { + return new CommentType.RedoxPotential(); + } + + /** + * Create an instance of {@link CommentType.TemperatureDependence } + * + */ + public CommentType.TemperatureDependence createCommentTypeTemperatureDependence() { + return new CommentType.TemperatureDependence(); + } + + /** + * Create an instance of {@link CommentType.Link } + * + */ + public CommentType.Link createCommentTypeLink() { + return new CommentType.Link(); + } + + /** + * Create an instance of {@link CommentType.Disease } + * + */ + public CommentType.Disease createCommentTypeDisease() { + return new CommentType.Disease(); + } + + /** + * Create an instance of {@link CommentType.Conflict.Sequence } + * + */ + public CommentType.Conflict.Sequence createCommentTypeConflictSequence() { + return new CommentType.Conflict.Sequence(); + } + + /** + * Create an instance of {@link OrganismType.Lineage } + * + */ + public OrganismType.Lineage createOrganismTypeLineage() { + return new OrganismType.Lineage(); + } + + /** + * Create an instance of {@link ProteinType.RecommendedName } + * + */ + public ProteinType.RecommendedName createProteinTypeRecommendedName() { + return new ProteinType.RecommendedName(); + } + + /** + * Create an instance of {@link ProteinType.AlternativeName } + * + */ + public ProteinType.AlternativeName createProteinTypeAlternativeName() { + return new ProteinType.AlternativeName(); + } + + /** + * Create an instance of {@link ProteinType.SubmittedName } + * + */ + public ProteinType.SubmittedName createProteinTypeSubmittedName() { + return new ProteinType.SubmittedName(); + } + + /** + * Create an instance of {@link ProteinType.Domain } + * + */ + public ProteinType.Domain createProteinTypeDomain() { + return new ProteinType.Domain(); + } + + /** + * Create an instance of {@link ProteinType.Component } + * + */ + public ProteinType.Component createProteinTypeComponent() { + return new ProteinType.Component(); + } + + /** + * Create an instance of {@link JAXBElement }{@code <}{@link String }{@code >}} + * + */ + @XmlElementDecl(namespace = "https://uniprot.org/uniprot", name = "copyright") + public JAXBElement createCopyright(String value) { + return new JAXBElement(_Copyright_QNAME, String.class, null, value); + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/OrganismNameType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/OrganismNameType.java new file mode 100644 index 00000000..66b82145 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/OrganismNameType.java @@ -0,0 +1,106 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlAttribute; +import javax.xml.bind.annotation.XmlType; +import javax.xml.bind.annotation.XmlValue; + + +/** + * Describes different types of source organism names. + * + *

Java class for organismNameType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="organismNameType">
+ *   <simpleContent>
+ *     <extension base="<http://www.w3.org/2001/XMLSchema>string">
+ *       <attribute name="type" use="required">
+ *         <simpleType>
+ *           <restriction base="{http://www.w3.org/2001/XMLSchema}string">
+ *             <enumeration value="common"/>
+ *             <enumeration value="full"/>
+ *             <enumeration value="scientific"/>
+ *             <enumeration value="synonym"/>
+ *             <enumeration value="abbreviation"/>
+ *           </restriction>
+ *         </simpleType>
+ *       </attribute>
+ *     </extension>
+ *   </simpleContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "organismNameType", propOrder = { + "value" +}) +public class OrganismNameType { + + @XmlValue + protected String value; + @XmlAttribute(name = "type", required = true) + protected String type; + + /** + * Gets the value of the value property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getValue() { + return value; + } + + /** + * Sets the value of the value property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setValue(String value) { + this.value = value; + } + + /** + * Gets the value of the type property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getType() { + return type; + } + + /** + * Sets the value of the type property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setType(String value) { + this.type = value; + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/OrganismType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/OrganismType.java new file mode 100644 index 00000000..b1610c89 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/OrganismType.java @@ -0,0 +1,241 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import java.util.ArrayList; +import java.util.List; +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlAttribute; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlType; + + +/** + * Describes the source organism. + * + *

Java class for organismType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="organismType">
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <sequence>
+ *         <element name="name" type="{https://uniprot.org/uniprot}organismNameType" maxOccurs="unbounded"/>
+ *         <element name="dbReference" type="{https://uniprot.org/uniprot}dbReferenceType" maxOccurs="unbounded"/>
+ *         <element name="lineage" minOccurs="0">
+ *           <complexType>
+ *             <complexContent>
+ *               <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *                 <sequence>
+ *                   <element name="taxon" type="{http://www.w3.org/2001/XMLSchema}string" maxOccurs="unbounded"/>
+ *                 </sequence>
+ *               </restriction>
+ *             </complexContent>
+ *           </complexType>
+ *         </element>
+ *       </sequence>
+ *       <attribute name="evidence" type="{https://uniprot.org/uniprot}intListType" />
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "organismType", propOrder = { + "name", + "dbReference", + "lineage" +}) +public class OrganismType { + + @XmlElement(required = true) + protected List name; + @XmlElement(required = true) + protected List dbReference; + protected OrganismType.Lineage lineage; + @XmlAttribute(name = "evidence") + protected List evidence; + + /** + * Gets the value of the name property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the name property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getName().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link OrganismNameType } + * + * + */ + public List getName() { + if (name == null) { + name = new ArrayList(); + } + return this.name; + } + + /** + * Gets the value of the dbReference property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the dbReference property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getDbReference().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link DbReferenceType } + * + * + */ + public List getDbReference() { + if (dbReference == null) { + dbReference = new ArrayList(); + } + return this.dbReference; + } + + /** + * Gets the value of the lineage property. + * + * @return + * possible object is + * {@link OrganismType.Lineage } + * + */ + public OrganismType.Lineage getLineage() { + return lineage; + } + + /** + * Sets the value of the lineage property. + * + * @param value + * allowed object is + * {@link OrganismType.Lineage } + * + */ + public void setLineage(OrganismType.Lineage value) { + this.lineage = value; + } + + /** + * Gets the value of the evidence property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the evidence property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getEvidence().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link Integer } + * + * + */ + public List getEvidence() { + if (evidence == null) { + evidence = new ArrayList(); + } + return this.evidence; + } + + + /** + *

Java class for anonymous complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+     * <complexType>
+     *   <complexContent>
+     *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+     *       <sequence>
+     *         <element name="taxon" type="{http://www.w3.org/2001/XMLSchema}string" maxOccurs="unbounded"/>
+     *       </sequence>
+     *     </restriction>
+     *   </complexContent>
+     * </complexType>
+     * 
+ * + * + */ + @XmlAccessorType(XmlAccessType.FIELD) + @XmlType(name = "", propOrder = { + "taxon" + }) + public static class Lineage { + + @XmlElement(required = true) + protected List taxon; + + /** + * Gets the value of the taxon property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the taxon property. + * + *

+ * For example, to add a new item, do as follows: + *

+         *    getTaxon().add(newItem);
+         * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link String } + * + * + */ + public List getTaxon() { + if (taxon == null) { + taxon = new ArrayList(); + } + return this.taxon; + } + + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/PersonType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/PersonType.java new file mode 100644 index 00000000..4eb6fcc4 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/PersonType.java @@ -0,0 +1,65 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlAttribute; +import javax.xml.bind.annotation.XmlType; + + +/** + *

Java class for personType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="personType">
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <attribute name="name" use="required" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "personType") +public class PersonType { + + @XmlAttribute(name = "name", required = true) + protected String name; + + /** + * Gets the value of the name property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getName() { + return name; + } + + /** + * Sets the value of the name property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setName(String value) { + this.name = value; + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/PhysiologicalReactionType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/PhysiologicalReactionType.java new file mode 100644 index 00000000..7bcaf0bb --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/PhysiologicalReactionType.java @@ -0,0 +1,140 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import java.util.ArrayList; +import java.util.List; +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlAttribute; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlType; + + +/** + * Describes a physiological reaction. + * + *

Java class for physiologicalReactionType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="physiologicalReactionType">
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <sequence>
+ *         <element name="dbReference" type="{https://uniprot.org/uniprot}dbReferenceType"/>
+ *       </sequence>
+ *       <attribute name="direction" use="required">
+ *         <simpleType>
+ *           <restriction base="{http://www.w3.org/2001/XMLSchema}string">
+ *             <enumeration value="left-to-right"/>
+ *             <enumeration value="right-to-left"/>
+ *           </restriction>
+ *         </simpleType>
+ *       </attribute>
+ *       <attribute name="evidence" type="{https://uniprot.org/uniprot}intListType" />
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "physiologicalReactionType", propOrder = { + "dbReference" +}) +public class PhysiologicalReactionType { + + @XmlElement(required = true) + protected DbReferenceType dbReference; + @XmlAttribute(name = "direction", required = true) + protected String direction; + @XmlAttribute(name = "evidence") + protected List evidence; + + /** + * Gets the value of the dbReference property. + * + * @return + * possible object is + * {@link DbReferenceType } + * + */ + public DbReferenceType getDbReference() { + return dbReference; + } + + /** + * Sets the value of the dbReference property. + * + * @param value + * allowed object is + * {@link DbReferenceType } + * + */ + public void setDbReference(DbReferenceType value) { + this.dbReference = value; + } + + /** + * Gets the value of the direction property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getDirection() { + return direction; + } + + /** + * Sets the value of the direction property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setDirection(String value) { + this.direction = value; + } + + /** + * Gets the value of the evidence property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the evidence property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getEvidence().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link Integer } + * + * + */ + public List getEvidence() { + if (evidence == null) { + evidence = new ArrayList(); + } + return this.evidence; + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/PositionType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/PositionType.java new file mode 100644 index 00000000..c1fb9210 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/PositionType.java @@ -0,0 +1,143 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import java.math.BigInteger; +import java.util.ArrayList; +import java.util.List; +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlAttribute; +import javax.xml.bind.annotation.XmlSchemaType; +import javax.xml.bind.annotation.XmlType; + + +/** + *

Java class for positionType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="positionType">
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <attribute name="position" type="{http://www.w3.org/2001/XMLSchema}unsignedLong" />
+ *       <attribute name="status" default="certain">
+ *         <simpleType>
+ *           <restriction base="{http://www.w3.org/2001/XMLSchema}string">
+ *             <enumeration value="certain"/>
+ *             <enumeration value="uncertain"/>
+ *             <enumeration value="less than"/>
+ *             <enumeration value="greater than"/>
+ *             <enumeration value="unknown"/>
+ *           </restriction>
+ *         </simpleType>
+ *       </attribute>
+ *       <attribute name="evidence" type="{https://uniprot.org/uniprot}intListType" />
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "positionType") +public class PositionType { + + @XmlAttribute(name = "position") + @XmlSchemaType(name = "unsignedLong") + protected BigInteger position; + @XmlAttribute(name = "status") + protected String status; + @XmlAttribute(name = "evidence") + protected List evidence; + + /** + * Gets the value of the position property. + * + * @return + * possible object is + * {@link BigInteger } + * + */ + public BigInteger getPosition() { + return position; + } + + /** + * Sets the value of the position property. + * + * @param value + * allowed object is + * {@link BigInteger } + * + */ + public void setPosition(BigInteger value) { + this.position = value; + } + + /** + * Gets the value of the status property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getStatus() { + if (status == null) { + return "certain"; + } else { + return status; + } + } + + /** + * Sets the value of the status property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setStatus(String value) { + this.status = value; + } + + /** + * Gets the value of the evidence property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the evidence property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getEvidence().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link Integer } + * + * + */ + public List getEvidence() { + if (evidence == null) { + evidence = new ArrayList(); + } + return this.evidence; + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/PropertyType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/PropertyType.java new file mode 100644 index 00000000..70462be8 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/PropertyType.java @@ -0,0 +1,92 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlAttribute; +import javax.xml.bind.annotation.XmlType; + + +/** + *

Java class for propertyType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="propertyType">
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <attribute name="type" use="required" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *       <attribute name="value" use="required" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "propertyType") +public class PropertyType { + + @XmlAttribute(name = "type", required = true) + protected String type; + @XmlAttribute(name = "value", required = true) + protected String value; + + /** + * Gets the value of the type property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getType() { + return type; + } + + /** + * Sets the value of the type property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setType(String value) { + this.type = value; + } + + /** + * Gets the value of the value property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getValue() { + return value; + } + + /** + * Sets the value of the value property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setValue(String value) { + this.value = value; + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/ProteinExistenceType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/ProteinExistenceType.java new file mode 100644 index 00000000..ea7324f2 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/ProteinExistenceType.java @@ -0,0 +1,78 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlAttribute; +import javax.xml.bind.annotation.XmlType; + + +/** + * Describes the evidence for the protein's existence. + * Equivalent to the flat file PE-line. + * + *

Java class for proteinExistenceType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="proteinExistenceType">
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <attribute name="type" use="required">
+ *         <simpleType>
+ *           <restriction base="{http://www.w3.org/2001/XMLSchema}string">
+ *             <enumeration value="evidence at protein level"/>
+ *             <enumeration value="evidence at transcript level"/>
+ *             <enumeration value="inferred from homology"/>
+ *             <enumeration value="predicted"/>
+ *             <enumeration value="uncertain"/>
+ *           </restriction>
+ *         </simpleType>
+ *       </attribute>
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "proteinExistenceType") +public class ProteinExistenceType { + + @XmlAttribute(name = "type", required = true) + protected String type; + + /** + * Gets the value of the type property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getType() { + return type; + } + + /** + * Sets the value of the type property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setType(String value) { + this.type = value; + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/ProteinType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/ProteinType.java new file mode 100644 index 00000000..8bc20da1 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/ProteinType.java @@ -0,0 +1,1109 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import java.util.ArrayList; +import java.util.List; +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlType; + + +/** + * Describes the names for the protein and parts thereof. + * Equivalent to the flat file DE-line. + * + *

Java class for proteinType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="proteinType">
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <sequence>
+ *         <group ref="{https://uniprot.org/uniprot}proteinNameGroup"/>
+ *         <element name="domain" maxOccurs="unbounded" minOccurs="0">
+ *           <complexType>
+ *             <complexContent>
+ *               <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *                 <group ref="{https://uniprot.org/uniprot}proteinNameGroup"/>
+ *               </restriction>
+ *             </complexContent>
+ *           </complexType>
+ *         </element>
+ *         <element name="component" maxOccurs="unbounded" minOccurs="0">
+ *           <complexType>
+ *             <complexContent>
+ *               <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *                 <group ref="{https://uniprot.org/uniprot}proteinNameGroup"/>
+ *               </restriction>
+ *             </complexContent>
+ *           </complexType>
+ *         </element>
+ *       </sequence>
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "proteinType", propOrder = { + "recommendedName", + "alternativeName", + "submittedName", + "allergenName", + "biotechName", + "cdAntigenName", + "innName", + "domain", + "component" +}) +public class ProteinType { + + protected ProteinType.RecommendedName recommendedName; + protected List alternativeName; + protected List submittedName; + protected EvidencedStringType allergenName; + protected EvidencedStringType biotechName; + protected List cdAntigenName; + protected List innName; + protected List domain; + protected List component; + + /** + * Gets the value of the recommendedName property. + * + * @return + * possible object is + * {@link ProteinType.RecommendedName } + * + */ + public ProteinType.RecommendedName getRecommendedName() { + return recommendedName; + } + + /** + * Sets the value of the recommendedName property. + * + * @param value + * allowed object is + * {@link ProteinType.RecommendedName } + * + */ + public void setRecommendedName(ProteinType.RecommendedName value) { + this.recommendedName = value; + } + + /** + * Gets the value of the alternativeName property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the alternativeName property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getAlternativeName().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link ProteinType.AlternativeName } + * + * + */ + public List getAlternativeName() { + if (alternativeName == null) { + alternativeName = new ArrayList(); + } + return this.alternativeName; + } + + /** + * Gets the value of the submittedName property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the submittedName property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getSubmittedName().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link ProteinType.SubmittedName } + * + * + */ + public List getSubmittedName() { + if (submittedName == null) { + submittedName = new ArrayList(); + } + return this.submittedName; + } + + /** + * Gets the value of the allergenName property. + * + * @return + * possible object is + * {@link EvidencedStringType } + * + */ + public EvidencedStringType getAllergenName() { + return allergenName; + } + + /** + * Sets the value of the allergenName property. + * + * @param value + * allowed object is + * {@link EvidencedStringType } + * + */ + public void setAllergenName(EvidencedStringType value) { + this.allergenName = value; + } + + /** + * Gets the value of the biotechName property. + * + * @return + * possible object is + * {@link EvidencedStringType } + * + */ + public EvidencedStringType getBiotechName() { + return biotechName; + } + + /** + * Sets the value of the biotechName property. + * + * @param value + * allowed object is + * {@link EvidencedStringType } + * + */ + public void setBiotechName(EvidencedStringType value) { + this.biotechName = value; + } + + /** + * Gets the value of the cdAntigenName property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the cdAntigenName property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getCdAntigenName().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link EvidencedStringType } + * + * + */ + public List getCdAntigenName() { + if (cdAntigenName == null) { + cdAntigenName = new ArrayList(); + } + return this.cdAntigenName; + } + + /** + * Gets the value of the innName property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the innName property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getInnName().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link EvidencedStringType } + * + * + */ + public List getInnName() { + if (innName == null) { + innName = new ArrayList(); + } + return this.innName; + } + + /** + * Gets the value of the domain property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the domain property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getDomain().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link ProteinType.Domain } + * + * + */ + public List getDomain() { + if (domain == null) { + domain = new ArrayList(); + } + return this.domain; + } + + /** + * Gets the value of the component property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the component property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getComponent().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link ProteinType.Component } + * + * + */ + public List getComponent() { + if (component == null) { + component = new ArrayList(); + } + return this.component; + } + + + /** + *

Java class for anonymous complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+     * <complexType>
+     *   <complexContent>
+     *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+     *       <sequence>
+     *         <element name="fullName" type="{https://uniprot.org/uniprot}evidencedStringType" minOccurs="0"/>
+     *         <element name="shortName" type="{https://uniprot.org/uniprot}evidencedStringType" maxOccurs="unbounded" minOccurs="0"/>
+     *         <element name="ecNumber" type="{https://uniprot.org/uniprot}evidencedStringType" maxOccurs="unbounded" minOccurs="0"/>
+     *       </sequence>
+     *     </restriction>
+     *   </complexContent>
+     * </complexType>
+     * 
+ * + * + */ + @XmlAccessorType(XmlAccessType.FIELD) + @XmlType(name = "", propOrder = { + "fullName", + "shortName", + "ecNumber" + }) + public static class AlternativeName { + + protected EvidencedStringType fullName; + protected List shortName; + protected List ecNumber; + + /** + * Gets the value of the fullName property. + * + * @return + * possible object is + * {@link EvidencedStringType } + * + */ + public EvidencedStringType getFullName() { + return fullName; + } + + /** + * Sets the value of the fullName property. + * + * @param value + * allowed object is + * {@link EvidencedStringType } + * + */ + public void setFullName(EvidencedStringType value) { + this.fullName = value; + } + + /** + * Gets the value of the shortName property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the shortName property. + * + *

+ * For example, to add a new item, do as follows: + *

+         *    getShortName().add(newItem);
+         * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link EvidencedStringType } + * + * + */ + public List getShortName() { + if (shortName == null) { + shortName = new ArrayList(); + } + return this.shortName; + } + + /** + * Gets the value of the ecNumber property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the ecNumber property. + * + *

+ * For example, to add a new item, do as follows: + *

+         *    getEcNumber().add(newItem);
+         * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link EvidencedStringType } + * + * + */ + public List getEcNumber() { + if (ecNumber == null) { + ecNumber = new ArrayList(); + } + return this.ecNumber; + } + + } + + + /** + *

Java class for anonymous complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+     * <complexType>
+     *   <complexContent>
+     *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+     *       <group ref="{https://uniprot.org/uniprot}proteinNameGroup"/>
+     *     </restriction>
+     *   </complexContent>
+     * </complexType>
+     * 
+ * + * + */ + @XmlAccessorType(XmlAccessType.FIELD) + @XmlType(name = "", propOrder = { + "recommendedName", + "alternativeName", + "submittedName", + "allergenName", + "biotechName", + "cdAntigenName", + "innName" + }) + public static class Component { + + protected ProteinType.RecommendedName recommendedName; + protected List alternativeName; + protected List submittedName; + protected EvidencedStringType allergenName; + protected EvidencedStringType biotechName; + protected List cdAntigenName; + protected List innName; + + /** + * Gets the value of the recommendedName property. + * + * @return + * possible object is + * {@link ProteinType.RecommendedName } + * + */ + public ProteinType.RecommendedName getRecommendedName() { + return recommendedName; + } + + /** + * Sets the value of the recommendedName property. + * + * @param value + * allowed object is + * {@link ProteinType.RecommendedName } + * + */ + public void setRecommendedName(ProteinType.RecommendedName value) { + this.recommendedName = value; + } + + /** + * Gets the value of the alternativeName property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the alternativeName property. + * + *

+ * For example, to add a new item, do as follows: + *

+         *    getAlternativeName().add(newItem);
+         * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link ProteinType.AlternativeName } + * + * + */ + public List getAlternativeName() { + if (alternativeName == null) { + alternativeName = new ArrayList(); + } + return this.alternativeName; + } + + /** + * Gets the value of the submittedName property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the submittedName property. + * + *

+ * For example, to add a new item, do as follows: + *

+         *    getSubmittedName().add(newItem);
+         * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link ProteinType.SubmittedName } + * + * + */ + public List getSubmittedName() { + if (submittedName == null) { + submittedName = new ArrayList(); + } + return this.submittedName; + } + + /** + * Gets the value of the allergenName property. + * + * @return + * possible object is + * {@link EvidencedStringType } + * + */ + public EvidencedStringType getAllergenName() { + return allergenName; + } + + /** + * Sets the value of the allergenName property. + * + * @param value + * allowed object is + * {@link EvidencedStringType } + * + */ + public void setAllergenName(EvidencedStringType value) { + this.allergenName = value; + } + + /** + * Gets the value of the biotechName property. + * + * @return + * possible object is + * {@link EvidencedStringType } + * + */ + public EvidencedStringType getBiotechName() { + return biotechName; + } + + /** + * Sets the value of the biotechName property. + * + * @param value + * allowed object is + * {@link EvidencedStringType } + * + */ + public void setBiotechName(EvidencedStringType value) { + this.biotechName = value; + } + + /** + * Gets the value of the cdAntigenName property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the cdAntigenName property. + * + *

+ * For example, to add a new item, do as follows: + *

+         *    getCdAntigenName().add(newItem);
+         * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link EvidencedStringType } + * + * + */ + public List getCdAntigenName() { + if (cdAntigenName == null) { + cdAntigenName = new ArrayList(); + } + return this.cdAntigenName; + } + + /** + * Gets the value of the innName property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the innName property. + * + *

+ * For example, to add a new item, do as follows: + *

+         *    getInnName().add(newItem);
+         * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link EvidencedStringType } + * + * + */ + public List getInnName() { + if (innName == null) { + innName = new ArrayList(); + } + return this.innName; + } + + } + + + /** + *

Java class for anonymous complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+     * <complexType>
+     *   <complexContent>
+     *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+     *       <group ref="{https://uniprot.org/uniprot}proteinNameGroup"/>
+     *     </restriction>
+     *   </complexContent>
+     * </complexType>
+     * 
+ * + * + */ + @XmlAccessorType(XmlAccessType.FIELD) + @XmlType(name = "", propOrder = { + "recommendedName", + "alternativeName", + "submittedName", + "allergenName", + "biotechName", + "cdAntigenName", + "innName" + }) + public static class Domain { + + protected ProteinType.RecommendedName recommendedName; + protected List alternativeName; + protected List submittedName; + protected EvidencedStringType allergenName; + protected EvidencedStringType biotechName; + protected List cdAntigenName; + protected List innName; + + /** + * Gets the value of the recommendedName property. + * + * @return + * possible object is + * {@link ProteinType.RecommendedName } + * + */ + public ProteinType.RecommendedName getRecommendedName() { + return recommendedName; + } + + /** + * Sets the value of the recommendedName property. + * + * @param value + * allowed object is + * {@link ProteinType.RecommendedName } + * + */ + public void setRecommendedName(ProteinType.RecommendedName value) { + this.recommendedName = value; + } + + /** + * Gets the value of the alternativeName property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the alternativeName property. + * + *

+ * For example, to add a new item, do as follows: + *

+         *    getAlternativeName().add(newItem);
+         * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link ProteinType.AlternativeName } + * + * + */ + public List getAlternativeName() { + if (alternativeName == null) { + alternativeName = new ArrayList(); + } + return this.alternativeName; + } + + /** + * Gets the value of the submittedName property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the submittedName property. + * + *

+ * For example, to add a new item, do as follows: + *

+         *    getSubmittedName().add(newItem);
+         * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link ProteinType.SubmittedName } + * + * + */ + public List getSubmittedName() { + if (submittedName == null) { + submittedName = new ArrayList(); + } + return this.submittedName; + } + + /** + * Gets the value of the allergenName property. + * + * @return + * possible object is + * {@link EvidencedStringType } + * + */ + public EvidencedStringType getAllergenName() { + return allergenName; + } + + /** + * Sets the value of the allergenName property. + * + * @param value + * allowed object is + * {@link EvidencedStringType } + * + */ + public void setAllergenName(EvidencedStringType value) { + this.allergenName = value; + } + + /** + * Gets the value of the biotechName property. + * + * @return + * possible object is + * {@link EvidencedStringType } + * + */ + public EvidencedStringType getBiotechName() { + return biotechName; + } + + /** + * Sets the value of the biotechName property. + * + * @param value + * allowed object is + * {@link EvidencedStringType } + * + */ + public void setBiotechName(EvidencedStringType value) { + this.biotechName = value; + } + + /** + * Gets the value of the cdAntigenName property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the cdAntigenName property. + * + *

+ * For example, to add a new item, do as follows: + *

+         *    getCdAntigenName().add(newItem);
+         * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link EvidencedStringType } + * + * + */ + public List getCdAntigenName() { + if (cdAntigenName == null) { + cdAntigenName = new ArrayList(); + } + return this.cdAntigenName; + } + + /** + * Gets the value of the innName property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the innName property. + * + *

+ * For example, to add a new item, do as follows: + *

+         *    getInnName().add(newItem);
+         * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link EvidencedStringType } + * + * + */ + public List getInnName() { + if (innName == null) { + innName = new ArrayList(); + } + return this.innName; + } + + } + + + /** + *

Java class for anonymous complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+     * <complexType>
+     *   <complexContent>
+     *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+     *       <sequence>
+     *         <element name="fullName" type="{https://uniprot.org/uniprot}evidencedStringType"/>
+     *         <element name="shortName" type="{https://uniprot.org/uniprot}evidencedStringType" maxOccurs="unbounded" minOccurs="0"/>
+     *         <element name="ecNumber" type="{https://uniprot.org/uniprot}evidencedStringType" maxOccurs="unbounded" minOccurs="0"/>
+     *       </sequence>
+     *     </restriction>
+     *   </complexContent>
+     * </complexType>
+     * 
+ * + * + */ + @XmlAccessorType(XmlAccessType.FIELD) + @XmlType(name = "", propOrder = { + "fullName", + "shortName", + "ecNumber" + }) + public static class RecommendedName { + + @XmlElement(required = true) + protected EvidencedStringType fullName; + protected List shortName; + protected List ecNumber; + + /** + * Gets the value of the fullName property. + * + * @return + * possible object is + * {@link EvidencedStringType } + * + */ + public EvidencedStringType getFullName() { + return fullName; + } + + /** + * Sets the value of the fullName property. + * + * @param value + * allowed object is + * {@link EvidencedStringType } + * + */ + public void setFullName(EvidencedStringType value) { + this.fullName = value; + } + + /** + * Gets the value of the shortName property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the shortName property. + * + *

+ * For example, to add a new item, do as follows: + *

+         *    getShortName().add(newItem);
+         * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link EvidencedStringType } + * + * + */ + public List getShortName() { + if (shortName == null) { + shortName = new ArrayList(); + } + return this.shortName; + } + + /** + * Gets the value of the ecNumber property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the ecNumber property. + * + *

+ * For example, to add a new item, do as follows: + *

+         *    getEcNumber().add(newItem);
+         * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link EvidencedStringType } + * + * + */ + public List getEcNumber() { + if (ecNumber == null) { + ecNumber = new ArrayList(); + } + return this.ecNumber; + } + + } + + + /** + *

Java class for anonymous complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+     * <complexType>
+     *   <complexContent>
+     *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+     *       <sequence>
+     *         <element name="fullName" type="{https://uniprot.org/uniprot}evidencedStringType"/>
+     *         <element name="ecNumber" type="{https://uniprot.org/uniprot}evidencedStringType" maxOccurs="unbounded" minOccurs="0"/>
+     *       </sequence>
+     *     </restriction>
+     *   </complexContent>
+     * </complexType>
+     * 
+ * + * + */ + @XmlAccessorType(XmlAccessType.FIELD) + @XmlType(name = "", propOrder = { + "fullName", + "ecNumber" + }) + public static class SubmittedName { + + @XmlElement(required = true) + protected EvidencedStringType fullName; + protected List ecNumber; + + /** + * Gets the value of the fullName property. + * + * @return + * possible object is + * {@link EvidencedStringType } + * + */ + public EvidencedStringType getFullName() { + return fullName; + } + + /** + * Sets the value of the fullName property. + * + * @param value + * allowed object is + * {@link EvidencedStringType } + * + */ + public void setFullName(EvidencedStringType value) { + this.fullName = value; + } + + /** + * Gets the value of the ecNumber property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the ecNumber property. + * + *

+ * For example, to add a new item, do as follows: + *

+         *    getEcNumber().add(newItem);
+         * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link EvidencedStringType } + * + * + */ + public List getEcNumber() { + if (ecNumber == null) { + ecNumber = new ArrayList(); + } + return this.ecNumber; + } + + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/ReactionType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/ReactionType.java new file mode 100644 index 00000000..1e2ed974 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/ReactionType.java @@ -0,0 +1,139 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import java.util.ArrayList; +import java.util.List; +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlAttribute; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlType; + + +/** + * Describes a chemical reaction. + * + *

Java class for reactionType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="reactionType">
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <sequence>
+ *         <element name="text" type="{http://www.w3.org/2001/XMLSchema}string"/>
+ *         <element name="dbReference" type="{https://uniprot.org/uniprot}dbReferenceType" maxOccurs="unbounded"/>
+ *       </sequence>
+ *       <attribute name="evidence" type="{https://uniprot.org/uniprot}intListType" />
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "reactionType", propOrder = { + "text", + "dbReference" +}) +public class ReactionType { + + @XmlElement(required = true) + protected String text; + @XmlElement(required = true) + protected List dbReference; + @XmlAttribute(name = "evidence") + protected List evidence; + + /** + * Gets the value of the text property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getText() { + return text; + } + + /** + * Sets the value of the text property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setText(String value) { + this.text = value; + } + + /** + * Gets the value of the dbReference property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the dbReference property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getDbReference().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link DbReferenceType } + * + * + */ + public List getDbReference() { + if (dbReference == null) { + dbReference = new ArrayList(); + } + return this.dbReference; + } + + /** + * Gets the value of the evidence property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the evidence property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getEvidence().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link Integer } + * + * + */ + public List getEvidence() { + if (evidence == null) { + evidence = new ArrayList(); + } + return this.evidence; + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/ReferenceType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/ReferenceType.java new file mode 100644 index 00000000..1ec87250 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/ReferenceType.java @@ -0,0 +1,193 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import java.util.ArrayList; +import java.util.List; +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlAttribute; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlType; + + +/** + * Describes a citation and a summary of its content. + * Equivalent to the flat file RN-, RP-, RC-, RX-, RG-, RA-, RT- and RL-lines. + * + *

Java class for referenceType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="referenceType">
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <sequence>
+ *         <element name="citation" type="{https://uniprot.org/uniprot}citationType"/>
+ *         <group ref="{https://uniprot.org/uniprot}sptrCitationGroup"/>
+ *       </sequence>
+ *       <attribute name="evidence" type="{https://uniprot.org/uniprot}intListType" />
+ *       <attribute name="key" use="required" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "referenceType", propOrder = { + "citation", + "scope", + "source" +}) +public class ReferenceType { + + @XmlElement(required = true) + protected CitationType citation; + @XmlElement(required = true) + protected List scope; + protected SourceDataType source; + @XmlAttribute(name = "evidence") + protected List evidence; + @XmlAttribute(name = "key", required = true) + protected String key; + + /** + * Gets the value of the citation property. + * + * @return + * possible object is + * {@link CitationType } + * + */ + public CitationType getCitation() { + return citation; + } + + /** + * Sets the value of the citation property. + * + * @param value + * allowed object is + * {@link CitationType } + * + */ + public void setCitation(CitationType value) { + this.citation = value; + } + + /** + * Gets the value of the scope property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the scope property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getScope().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link String } + * + * + */ + public List getScope() { + if (scope == null) { + scope = new ArrayList(); + } + return this.scope; + } + + /** + * Gets the value of the source property. + * + * @return + * possible object is + * {@link SourceDataType } + * + */ + public SourceDataType getSource() { + return source; + } + + /** + * Sets the value of the source property. + * + * @param value + * allowed object is + * {@link SourceDataType } + * + */ + public void setSource(SourceDataType value) { + this.source = value; + } + + /** + * Gets the value of the evidence property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the evidence property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getEvidence().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link Integer } + * + * + */ + public List getEvidence() { + if (evidence == null) { + evidence = new ArrayList(); + } + return this.evidence; + } + + /** + * Gets the value of the key property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getKey() { + return key; + } + + /** + * Sets the value of the key property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setKey(String value) { + this.key = value; + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/SequenceType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/SequenceType.java new file mode 100644 index 00000000..87c08d60 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/SequenceType.java @@ -0,0 +1,242 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlAttribute; +import javax.xml.bind.annotation.XmlSchemaType; +import javax.xml.bind.annotation.XmlType; +import javax.xml.bind.annotation.XmlValue; +import javax.xml.datatype.XMLGregorianCalendar; + + +/** + *

Java class for sequenceType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="sequenceType">
+ *   <simpleContent>
+ *     <extension base="<http://www.w3.org/2001/XMLSchema>string">
+ *       <attribute name="length" use="required" type="{http://www.w3.org/2001/XMLSchema}int" />
+ *       <attribute name="mass" use="required" type="{http://www.w3.org/2001/XMLSchema}int" />
+ *       <attribute name="checksum" use="required" type="{http://www.w3.org/2001/XMLSchema}string" />
+ *       <attribute name="modified" use="required" type="{http://www.w3.org/2001/XMLSchema}date" />
+ *       <attribute name="version" use="required" type="{http://www.w3.org/2001/XMLSchema}int" />
+ *       <attribute name="precursor" type="{http://www.w3.org/2001/XMLSchema}boolean" />
+ *       <attribute name="fragment">
+ *         <simpleType>
+ *           <restriction base="{http://www.w3.org/2001/XMLSchema}string">
+ *             <enumeration value="single"/>
+ *             <enumeration value="multiple"/>
+ *           </restriction>
+ *         </simpleType>
+ *       </attribute>
+ *     </extension>
+ *   </simpleContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "sequenceType", propOrder = { + "value" +}) +public class SequenceType { + + @XmlValue + protected String value; + @XmlAttribute(name = "length", required = true) + protected int length; + @XmlAttribute(name = "mass", required = true) + protected int mass; + @XmlAttribute(name = "checksum", required = true) + protected String checksum; + @XmlAttribute(name = "modified", required = true) + @XmlSchemaType(name = "date") + protected XMLGregorianCalendar modified; + @XmlAttribute(name = "version", required = true) + protected int version; + @XmlAttribute(name = "precursor") + protected Boolean precursor; + @XmlAttribute(name = "fragment") + protected String fragment; + + /** + * Gets the value of the value property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getValue() { + return value; + } + + /** + * Sets the value of the value property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setValue(String value) { + this.value = value; + } + + /** + * Gets the value of the length property. + * + */ + public int getLength() { + return length; + } + + /** + * Sets the value of the length property. + * + */ + public void setLength(int value) { + this.length = value; + } + + /** + * Gets the value of the mass property. + * + */ + public int getMass() { + return mass; + } + + /** + * Sets the value of the mass property. + * + */ + public void setMass(int value) { + this.mass = value; + } + + /** + * Gets the value of the checksum property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getChecksum() { + return checksum; + } + + /** + * Sets the value of the checksum property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setChecksum(String value) { + this.checksum = value; + } + + /** + * Gets the value of the modified property. + * + * @return + * possible object is + * {@link XMLGregorianCalendar } + * + */ + public XMLGregorianCalendar getModified() { + return modified; + } + + /** + * Sets the value of the modified property. + * + * @param value + * allowed object is + * {@link XMLGregorianCalendar } + * + */ + public void setModified(XMLGregorianCalendar value) { + this.modified = value; + } + + /** + * Gets the value of the version property. + * + */ + public int getVersion() { + return version; + } + + /** + * Sets the value of the version property. + * + */ + public void setVersion(int value) { + this.version = value; + } + + /** + * Gets the value of the precursor property. + * + * @return + * possible object is + * {@link Boolean } + * + */ + public Boolean isPrecursor() { + return precursor; + } + + /** + * Sets the value of the precursor property. + * + * @param value + * allowed object is + * {@link Boolean } + * + */ + public void setPrecursor(Boolean value) { + this.precursor = value; + } + + /** + * Gets the value of the fragment property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getFragment() { + return fragment; + } + + /** + * Sets the value of the fragment property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setFragment(String value) { + this.fragment = value; + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/SourceDataType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/SourceDataType.java new file mode 100644 index 00000000..1a1fd12d --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/SourceDataType.java @@ -0,0 +1,461 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import java.util.ArrayList; +import java.util.List; +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlAttribute; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlElements; +import javax.xml.bind.annotation.XmlType; +import javax.xml.bind.annotation.XmlValue; + + +/** + * Describes the source of the sequence according to the citation. + * Equivalent to the flat file RC-line. + * + *

Java class for sourceDataType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="sourceDataType">
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <choice maxOccurs="unbounded">
+ *         <element name="strain">
+ *           <complexType>
+ *             <simpleContent>
+ *               <extension base="<http://www.w3.org/2001/XMLSchema>string">
+ *                 <attribute name="evidence" type="{https://uniprot.org/uniprot}intListType" />
+ *               </extension>
+ *             </simpleContent>
+ *           </complexType>
+ *         </element>
+ *         <element name="plasmid">
+ *           <complexType>
+ *             <simpleContent>
+ *               <extension base="<http://www.w3.org/2001/XMLSchema>string">
+ *                 <attribute name="evidence" type="{https://uniprot.org/uniprot}intListType" />
+ *               </extension>
+ *             </simpleContent>
+ *           </complexType>
+ *         </element>
+ *         <element name="transposon">
+ *           <complexType>
+ *             <simpleContent>
+ *               <extension base="<http://www.w3.org/2001/XMLSchema>string">
+ *                 <attribute name="evidence" type="{https://uniprot.org/uniprot}intListType" />
+ *               </extension>
+ *             </simpleContent>
+ *           </complexType>
+ *         </element>
+ *         <element name="tissue">
+ *           <complexType>
+ *             <simpleContent>
+ *               <extension base="<http://www.w3.org/2001/XMLSchema>string">
+ *                 <attribute name="evidence" type="{https://uniprot.org/uniprot}intListType" />
+ *               </extension>
+ *             </simpleContent>
+ *           </complexType>
+ *         </element>
+ *       </choice>
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "sourceDataType", propOrder = { + "strainOrPlasmidOrTransposon" +}) +public class SourceDataType { + + @XmlElements({ + @XmlElement(name = "strain", type = SourceDataType.Strain.class), + @XmlElement(name = "plasmid", type = SourceDataType.Plasmid.class), + @XmlElement(name = "transposon", type = SourceDataType.Transposon.class), + @XmlElement(name = "tissue", type = SourceDataType.Tissue.class) + }) + protected List strainOrPlasmidOrTransposon; + + /** + * Gets the value of the strainOrPlasmidOrTransposon property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the strainOrPlasmidOrTransposon property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getStrainOrPlasmidOrTransposon().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link SourceDataType.Strain } + * {@link SourceDataType.Plasmid } + * {@link SourceDataType.Transposon } + * {@link SourceDataType.Tissue } + * + * + */ + public List getStrainOrPlasmidOrTransposon() { + if (strainOrPlasmidOrTransposon == null) { + strainOrPlasmidOrTransposon = new ArrayList(); + } + return this.strainOrPlasmidOrTransposon; + } + + + /** + *

Java class for anonymous complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+     * <complexType>
+     *   <simpleContent>
+     *     <extension base="<http://www.w3.org/2001/XMLSchema>string">
+     *       <attribute name="evidence" type="{https://uniprot.org/uniprot}intListType" />
+     *     </extension>
+     *   </simpleContent>
+     * </complexType>
+     * 
+ * + * + */ + @XmlAccessorType(XmlAccessType.FIELD) + @XmlType(name = "", propOrder = { + "value" + }) + public static class Plasmid { + + @XmlValue + protected String value; + @XmlAttribute(name = "evidence") + protected List evidence; + + /** + * Gets the value of the value property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getValue() { + return value; + } + + /** + * Sets the value of the value property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setValue(String value) { + this.value = value; + } + + /** + * Gets the value of the evidence property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the evidence property. + * + *

+ * For example, to add a new item, do as follows: + *

+         *    getEvidence().add(newItem);
+         * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link Integer } + * + * + */ + public List getEvidence() { + if (evidence == null) { + evidence = new ArrayList(); + } + return this.evidence; + } + + } + + + /** + *

Java class for anonymous complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+     * <complexType>
+     *   <simpleContent>
+     *     <extension base="<http://www.w3.org/2001/XMLSchema>string">
+     *       <attribute name="evidence" type="{https://uniprot.org/uniprot}intListType" />
+     *     </extension>
+     *   </simpleContent>
+     * </complexType>
+     * 
+ * + * + */ + @XmlAccessorType(XmlAccessType.FIELD) + @XmlType(name = "", propOrder = { + "value" + }) + public static class Strain { + + @XmlValue + protected String value; + @XmlAttribute(name = "evidence") + protected List evidence; + + /** + * Gets the value of the value property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getValue() { + return value; + } + + /** + * Sets the value of the value property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setValue(String value) { + this.value = value; + } + + /** + * Gets the value of the evidence property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the evidence property. + * + *

+ * For example, to add a new item, do as follows: + *

+         *    getEvidence().add(newItem);
+         * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link Integer } + * + * + */ + public List getEvidence() { + if (evidence == null) { + evidence = new ArrayList(); + } + return this.evidence; + } + + } + + + /** + *

Java class for anonymous complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+     * <complexType>
+     *   <simpleContent>
+     *     <extension base="<http://www.w3.org/2001/XMLSchema>string">
+     *       <attribute name="evidence" type="{https://uniprot.org/uniprot}intListType" />
+     *     </extension>
+     *   </simpleContent>
+     * </complexType>
+     * 
+ * + * + */ + @XmlAccessorType(XmlAccessType.FIELD) + @XmlType(name = "", propOrder = { + "value" + }) + public static class Tissue { + + @XmlValue + protected String value; + @XmlAttribute(name = "evidence") + protected List evidence; + + /** + * Gets the value of the value property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getValue() { + return value; + } + + /** + * Sets the value of the value property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setValue(String value) { + this.value = value; + } + + /** + * Gets the value of the evidence property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the evidence property. + * + *

+ * For example, to add a new item, do as follows: + *

+         *    getEvidence().add(newItem);
+         * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link Integer } + * + * + */ + public List getEvidence() { + if (evidence == null) { + evidence = new ArrayList(); + } + return this.evidence; + } + + } + + + /** + *

Java class for anonymous complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+     * <complexType>
+     *   <simpleContent>
+     *     <extension base="<http://www.w3.org/2001/XMLSchema>string">
+     *       <attribute name="evidence" type="{https://uniprot.org/uniprot}intListType" />
+     *     </extension>
+     *   </simpleContent>
+     * </complexType>
+     * 
+ * + * + */ + @XmlAccessorType(XmlAccessType.FIELD) + @XmlType(name = "", propOrder = { + "value" + }) + public static class Transposon { + + @XmlValue + protected String value; + @XmlAttribute(name = "evidence") + protected List evidence; + + /** + * Gets the value of the value property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getValue() { + return value; + } + + /** + * Sets the value of the value property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setValue(String value) { + this.value = value; + } + + /** + * Gets the value of the evidence property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the evidence property. + * + *

+ * For example, to add a new item, do as follows: + *

+         *    getEvidence().add(newItem);
+         * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link Integer } + * + * + */ + public List getEvidence() { + if (evidence == null) { + evidence = new ArrayList(); + } + return this.evidence; + } + + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/SourceType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/SourceType.java new file mode 100644 index 00000000..40aecfb8 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/SourceType.java @@ -0,0 +1,98 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import java.math.BigInteger; +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlAttribute; +import javax.xml.bind.annotation.XmlType; + + +/** + * Describes the source of the data using a database cross-reference (or a 'ref' attribute when the source cannot be found in a public data source, such as PubMed, and is cited only within the UniProtKB entry). + * + *

Java class for sourceType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="sourceType">
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <sequence>
+ *         <element name="dbReference" type="{https://uniprot.org/uniprot}dbReferenceType" minOccurs="0"/>
+ *       </sequence>
+ *       <attribute name="ref" type="{http://www.w3.org/2001/XMLSchema}integer" />
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "sourceType", propOrder = { + "dbReference" +}) +public class SourceType { + + protected DbReferenceType dbReference; + @XmlAttribute(name = "ref") + protected BigInteger ref; + + /** + * Gets the value of the dbReference property. + * + * @return + * possible object is + * {@link DbReferenceType } + * + */ + public DbReferenceType getDbReference() { + return dbReference; + } + + /** + * Sets the value of the dbReference property. + * + * @param value + * allowed object is + * {@link DbReferenceType } + * + */ + public void setDbReference(DbReferenceType value) { + this.dbReference = value; + } + + /** + * Gets the value of the ref property. + * + * @return + * possible object is + * {@link BigInteger } + * + */ + public BigInteger getRef() { + return ref; + } + + /** + * Sets the value of the ref property. + * + * @param value + * allowed object is + * {@link BigInteger } + * + */ + public void setRef(BigInteger value) { + this.ref = value; + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/StatusType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/StatusType.java new file mode 100644 index 00000000..0552f857 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/StatusType.java @@ -0,0 +1,107 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlAttribute; +import javax.xml.bind.annotation.XmlType; +import javax.xml.bind.annotation.XmlValue; + + +/** + * Indicates whether the name of a plasmid is known or unknown. + * + *

Java class for statusType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="statusType">
+ *   <simpleContent>
+ *     <extension base="<http://www.w3.org/2001/XMLSchema>string">
+ *       <attribute name="status" default="known">
+ *         <simpleType>
+ *           <restriction base="{http://www.w3.org/2001/XMLSchema}string">
+ *             <enumeration value="known"/>
+ *             <enumeration value="unknown"/>
+ *           </restriction>
+ *         </simpleType>
+ *       </attribute>
+ *     </extension>
+ *   </simpleContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "statusType", propOrder = { + "value" +}) +public class StatusType { + + @XmlValue + protected String value; + @XmlAttribute(name = "status") + protected String status; + + /** + * Gets the value of the value property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getValue() { + return value; + } + + /** + * Sets the value of the value property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setValue(String value) { + this.value = value; + } + + /** + * Gets the value of the status property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getStatus() { + if (status == null) { + return "known"; + } else { + return status; + } + } + + /** + * Sets the value of the status property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setStatus(String value) { + this.status = value; + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/SubcellularLocationType.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/SubcellularLocationType.java new file mode 100644 index 00000000..97b9dd49 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/SubcellularLocationType.java @@ -0,0 +1,142 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import java.util.ArrayList; +import java.util.List; +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlType; + + +/** + * Describes the subcellular location and optionally the topology and orientation of a molecule. + * + *

Java class for subcellularLocationType complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType name="subcellularLocationType">
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <sequence>
+ *         <element name="location" type="{https://uniprot.org/uniprot}evidencedStringType" maxOccurs="unbounded"/>
+ *         <element name="topology" type="{https://uniprot.org/uniprot}evidencedStringType" maxOccurs="unbounded" minOccurs="0"/>
+ *         <element name="orientation" type="{https://uniprot.org/uniprot}evidencedStringType" maxOccurs="unbounded" minOccurs="0"/>
+ *       </sequence>
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "subcellularLocationType", propOrder = { + "location", + "topology", + "orientation" +}) +public class SubcellularLocationType { + + @XmlElement(required = true) + protected List location; + protected List topology; + protected List orientation; + + /** + * Gets the value of the location property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the location property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getLocation().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link EvidencedStringType } + * + * + */ + public List getLocation() { + if (location == null) { + location = new ArrayList(); + } + return this.location; + } + + /** + * Gets the value of the topology property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the topology property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getTopology().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link EvidencedStringType } + * + * + */ + public List getTopology() { + if (topology == null) { + topology = new ArrayList(); + } + return this.topology; + } + + /** + * Gets the value of the orientation property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the orientation property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getOrientation().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link EvidencedStringType } + * + * + */ + public List getOrientation() { + if (orientation == null) { + orientation = new ArrayList(); + } + return this.orientation; + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/Uniprot.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/Uniprot.java new file mode 100644 index 00000000..93e871ed --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/Uniprot.java @@ -0,0 +1,105 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + + +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; + +import java.util.ArrayList; +import java.util.List; +import javax.xml.bind.annotation.XmlAccessType; +import javax.xml.bind.annotation.XmlAccessorType; +import javax.xml.bind.annotation.XmlElement; +import javax.xml.bind.annotation.XmlRootElement; +import javax.xml.bind.annotation.XmlType; + + +/** + *

Java class for anonymous complex type. + * + *

The following schema fragment specifies the expected content contained within this class. + * + *

+ * <complexType>
+ *   <complexContent>
+ *     <restriction base="{http://www.w3.org/2001/XMLSchema}anyType">
+ *       <sequence>
+ *         <element ref="{https://uniprot.org/uniprot}entry" maxOccurs="unbounded"/>
+ *         <element ref="{https://uniprot.org/uniprot}copyright" minOccurs="0"/>
+ *       </sequence>
+ *     </restriction>
+ *   </complexContent>
+ * </complexType>
+ * 
+ * + * + */ +@XmlAccessorType(XmlAccessType.FIELD) +@XmlType(name = "", propOrder = { + "entry", + "copyright" +}) +@XmlRootElement(name = "uniprot") +public class Uniprot { + + @XmlElement(required = true) + protected List entry; + protected String copyright; + + /** + * Gets the value of the entry property. + * + *

+ * This accessor method returns a reference to the live list, + * not a snapshot. Therefore any modification you make to the + * returned list will be present inside the JAXB object. + * This is why there is not a set method for the entry property. + * + *

+ * For example, to add a new item, do as follows: + *

+     *    getEntry().add(newItem);
+     * 
+ * + * + *

+ * Objects of the following type(s) are allowed in the list + * {@link Entry } + * + * + */ + public List getEntry() { + if (entry == null) { + entry = new ArrayList(); + } + return this.entry; + } + + /** + * Gets the value of the copyright property. + * + * @return + * possible object is + * {@link String } + * + */ + public String getCopyright() { + return copyright; + } + + /** + * Sets the value of the copyright property. + * + * @param value + * allowed object is + * {@link String } + * + */ + public void setCopyright(String value) { + this.copyright = value; + } + +} diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/package-info.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/package-info.java new file mode 100644 index 00000000..b41de4e5 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/protein/uniprot/v202502jaxb/package-info.java @@ -0,0 +1,9 @@ +// +// This file was generated by the JavaTM Architecture for XML Binding(JAXB) Reference Implementation, v2.2.8-b130911.1802 +// See http://java.sun.com/xml/jaxb +// Any modifications to this file will be lost upon recompilation of the source schema. +// Generated on: 2025.05.30 at 11:28:51 AM CEST +// + +@javax.xml.bind.annotation.XmlSchema(namespace = "https://uniprot.org/uniprot", elementFormDefault = javax.xml.bind.annotation.XmlNsForm.QUALIFIED) +package org.opencb.biodata.formats.protein.uniprot.v202502jaxb; diff --git a/biodata-formats/src/test/java/org/opencb/biodata/formats/protein/uniprot/UniProtParserTest.java b/biodata-formats/src/test/java/org/opencb/biodata/formats/protein/uniprot/UniProtParserTest.java new file mode 100644 index 00000000..3344ff37 --- /dev/null +++ b/biodata-formats/src/test/java/org/opencb/biodata/formats/protein/uniprot/UniProtParserTest.java @@ -0,0 +1,22 @@ +package org.opencb.biodata.formats.protein.uniprot; + +import org.junit.Assert; +import org.junit.Test; +import org.opencb.biodata.formats.protein.uniprot.v202502jaxb.Uniprot; + +import javax.xml.bind.JAXBException; + +public class UniProtParserTest { + + @Test + public void testParse() throws JAXBException { + + String fullFilename = getClass().getResource("/uniprot-202502/uniprot-test.xml").getPath(); + + Uniprot uniprot = (Uniprot) UniProtParser.loadXMLInfo(fullFilename, UniProtParser.UNIPROT_202502_CONTEXT); + + System.out.println("fullFilename = " + fullFilename); + System.out.println("uniprot.getEntry().size() = " + uniprot.getEntry().size()); + Assert.assertEquals(1, uniprot.getEntry().size()); + } +} \ No newline at end of file diff --git a/biodata-formats/src/test/resources/uniprot-202502/uniprot-test.xml b/biodata-formats/src/test/resources/uniprot-202502/uniprot-test.xml new file mode 100644 index 00000000..b39ad575 --- /dev/null +++ b/biodata-formats/src/test/resources/uniprot-202502/uniprot-test.xml @@ -0,0 +1,118 @@ + + + + Q49VE6 + Y2119_STAS1 + + + UPF0741 protein SSP2119 + + + + SSP2119 + + + Staphylococcus saprophyticus subsp. saprophyticus (strain ATCC 15305 / DSM 20229 / NCIMB 8711 / NCTC 7292 / S-41) + + + Bacteria + Bacillati + Bacillota + Bacilli + Bacillales + Staphylococcaceae + Staphylococcus + + + + + Whole genome sequence of Staphylococcus saprophyticus reveals the pathogenesis of uncomplicated urinary tract infection. + + + + + + + + + + + + + + + + + + + NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA] + + ATCC 15305 / DSM 20229 / NCIMB 8711 / NCTC 7292 / S-41 + + + + Belongs to the UPF0741 family. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Coiled coil + Reference proteome + + + + + + + + + + + + + + + + + + + + + + + + + + + + + MQNKFLICDDCQGVNCKSLEKKLTKLDPEAEIEIGCQSYCGPGRRKTFAFVNNRPLAALTEDELMEKVEKQLQKPRDHEEEERLRKRNEERKRRKEEQDRKLKEKLAQRKHK + + \ No newline at end of file From a6692c7658cceb66587d1eed5b5d3cb774cef813 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Tue, 10 Jun 2025 09:47:33 +0200 Subject: [PATCH 10/24] models: add imprinted data in gene annotation, #TASK-7745, #TASK-5564 --- .../biodata/models/core/GeneAnnotation.java | 44 +++++--- .../biodata/models/core/ImprintedGene.java | 103 ++++++++++++++++++ 2 files changed, 134 insertions(+), 13 deletions(-) create mode 100644 biodata-models/src/main/java/org/opencb/biodata/models/core/ImprintedGene.java diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/GeneAnnotation.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/GeneAnnotation.java index 376a0fc8..02b0c0db 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/core/GeneAnnotation.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/GeneAnnotation.java @@ -19,12 +19,12 @@ package org.opencb.biodata.models.core; +import org.opencb.biodata.models.variant.avro.Constraint; import org.opencb.biodata.models.variant.avro.Expression; import org.opencb.biodata.models.variant.avro.GeneDrugInteraction; import org.opencb.biodata.models.variant.avro.GeneTraitAssociation; -import org.opencb.biodata.models.variant.avro.Constraint; - +import java.util.ArrayList; import java.util.List; @@ -37,34 +37,41 @@ public class GeneAnnotation { private List mirnaTargets; private List cancerAssociations; private List cancerHotspots; + private List imprinted; public GeneAnnotation() { + this.expression = new ArrayList<>(); + this.diseases = new ArrayList<>(); + this.drugs = new ArrayList<>(); + this.constraints = new ArrayList<>(); + this.mirnaTargets = new ArrayList<>(); + this.cancerAssociations = new ArrayList<>(); + this.cancerHotspots = new ArrayList<>(); + this.imprinted = new ArrayList<>(); } @Deprecated public GeneAnnotation(List expression, List diseases, List drugs, List constraints, List mirnaTargets) { - this.expression = expression; - this.diseases = diseases; - this.drugs = drugs; - this.constraints = constraints; - this.mirnaTargets = mirnaTargets; + this(expression, diseases, drugs, constraints, mirnaTargets, new ArrayList<>(), new ArrayList<>(), new ArrayList<>()); } @Deprecated public GeneAnnotation(List expression, List diseases, List drugs, List constraints, List mirnaTargets, List cancerAssociations) { - this.expression = expression; - this.diseases = diseases; - this.drugs = drugs; - this.constraints = constraints; - this.mirnaTargets = mirnaTargets; - this.cancerAssociations = cancerAssociations; + this(expression, diseases, drugs, constraints, mirnaTargets, cancerAssociations, new ArrayList<>(), new ArrayList<>()); } + @Deprecated public GeneAnnotation(List expression, List diseases, List drugs, List constraints, List mirnaTargets, List cancerAssociations, List cancerHotspots) { + this(expression, diseases, drugs, constraints, mirnaTargets, cancerAssociations, cancerHotspots, new ArrayList<>()); + } + + public GeneAnnotation(List expression, List diseases, List drugs, + List constraints, List mirnaTargets, List cancerAssociations, + List cancerHotspots, List imprinted) { this.expression = expression; this.diseases = diseases; this.drugs = drugs; @@ -72,6 +79,7 @@ public GeneAnnotation(List expression, List di this.mirnaTargets = mirnaTargets; this.cancerAssociations = cancerAssociations; this.cancerHotspots = cancerHotspots; + this.imprinted = imprinted; } @Override @@ -84,6 +92,7 @@ public String toString() { sb.append(", mirnaTargets=").append(mirnaTargets); sb.append(", cancerAssociations=").append(cancerAssociations); sb.append(", cancerHotspots=").append(cancerHotspots); + sb.append(", imprinted=").append(imprinted); sb.append('}'); return sb.toString(); } @@ -150,4 +159,13 @@ public GeneAnnotation setCancerHotspots(List cancerHotspots) { this.cancerHotspots = cancerHotspots; return this; } + + public List getImprinted() { + return imprinted; + } + + public GeneAnnotation setImprinted(List imprinted) { + this.imprinted = imprinted; + return this; + } } diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/ImprintedGene.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/ImprintedGene.java new file mode 100644 index 00000000..f3c4e7c3 --- /dev/null +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/ImprintedGene.java @@ -0,0 +1,103 @@ +/* + * + * + */ + +package org.opencb.biodata.models.core; + +import org.opencb.commons.datastore.core.ObjectMap; + +import java.util.Map; + + +public class ImprintedGene { + + private String geneName; + private String status; + private String expressedAllele; + private Map attributes; + private String source; + + public ImprintedGene() { + this.attributes = new ObjectMap(); + } + + public ImprintedGene(String geneName, String status, String expressedAllele, Map attributes, String source) { + this.geneName = geneName; + this.status = status; + this.expressedAllele = expressedAllele; + this.attributes = attributes; + this.source = source; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("ImprintedGene{"); + sb.append("geneName='").append(geneName).append('\''); + sb.append(", status='").append(status).append('\''); + sb.append(", expressedAllele='").append(expressedAllele).append('\''); + sb.append(", attributes=").append(attributes); + sb.append(", source='").append(source).append('\''); + sb.append('}'); + return sb.toString(); + } + + public String getGeneName() { + return geneName; + } + + public ImprintedGene setGeneName(String geneName) { + this.geneName = geneName; + return this; + } + + public String getStatus() { + return status; + } + + public ImprintedGene setStatus(String status) { + this.status = status; + return this; + } + + public String getExpressedAllele() { + return expressedAllele; + } + + public ImprintedGene setExpressedAllele(String expressedAllele) { + this.expressedAllele = expressedAllele; + return this; + } + + public Map getAttributes() { + return attributes; + } + + public ImprintedGene setAttributes(Map attributes) { + this.attributes = attributes; + return this; + } + + public String getSource() { + return source; + } + + public ImprintedGene setSource(String source) { + this.source = source; + return this; + } +} From 6fc7cf495c3cfaf3144037f9c8812370d7e8247a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Fri, 21 Feb 2025 08:51:57 +0100 Subject: [PATCH 11/24] formats: add COSMIC parser for v101, #TASK-7430, #TASK-7367 --- .../formats/variant/cosmic/CosmicParser.java | 2 +- .../variant/cosmic/CosmicParser101.java | 627 ++++++++++++++++++ .../variant/cosmic/CosmicParserTest.java | 68 +- ...l_Cosmic_Classification_v101_GRCh38.tsv.gz | Bin 0 -> 555 bytes ...mic_GenomeScreensMutant_v101_GRCh38.tsv.gz | Bin 0 -> 1327 bytes 5 files changed, 690 insertions(+), 7 deletions(-) create mode 100755 biodata-formats/src/main/java/org/opencb/biodata/formats/variant/cosmic/CosmicParser101.java create mode 100644 biodata-formats/src/test/resources/Small_Cosmic_Classification_v101_GRCh38.tsv.gz create mode 100644 biodata-formats/src/test/resources/Small_Cosmic_GenomeScreensMutant_v101_GRCh38.tsv.gz diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/variant/cosmic/CosmicParser.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/variant/cosmic/CosmicParser.java index 85ff905f..0728e1c3 100755 --- a/biodata-formats/src/main/java/org/opencb/biodata/formats/variant/cosmic/CosmicParser.java +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/variant/cosmic/CosmicParser.java @@ -199,7 +199,7 @@ public static void parse(Path cosmicFile, String version, String name, String as ignoredCosmicLines += evidenceEntries.size(); } old = sequenceLocation; - evidenceEntries.clear(); + evidenceEntries = new ArrayList<>(); evidenceEntries.add(evidenceEntry); } } else { diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/variant/cosmic/CosmicParser101.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/variant/cosmic/CosmicParser101.java new file mode 100755 index 00000000..e6376f70 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/variant/cosmic/CosmicParser101.java @@ -0,0 +1,627 @@ +/* + * + * + */ + +package org.opencb.biodata.formats.variant.cosmic; + +import org.apache.commons.lang3.StringUtils; +import org.opencb.biodata.formats.io.FileFormatException; +import org.opencb.biodata.formats.variant.VariantAnnotationUtils; +import org.opencb.biodata.models.sequence.SequenceLocation; +import org.opencb.biodata.models.variant.avro.*; +import org.opencb.commons.utils.FileUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.file.Path; +import java.text.NumberFormat; +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class CosmicParser101 { + + // GenomeScreensMutant + private static final int GENE_SYMBOL_COL = 0; + private static final int COSMIC_GENE_ID_COL = 1; + private static final int TRANSCRIPT_ACCESSION_COL = 2; + private static final int COSMIC_SAMPLE_ID_COL = 3; + private static final int SAMPLE_NAME_COL = 4; + private static final int COSMIC_PHENOTYPE_ID_COL = 5; + private static final int GENOMIC_MUTATION_ID_COL = 6; + private static final int LEGACY_MUTATION_ID_COL = 7; + private static final int MUTATION_ID_COL = 8; + private static final int MUTATION_CDS_COL = 9; + private static final int MUTATION_AA_COL = 10; + private static final int MUTATION_DESCRIPTION_COL = 11; + private static final int MUTATION_ZYGOSITY_COL = 12; + private static final int LOH_COL = 13; + private static final int CHROMOSOME_COL = 14; + private static final int GENOME_START_COL = 15; + private static final int GENOME_STOP_COL = 16; + private static final int STRAND_COL = 17; + private static final int PUBMED_PMID_COL = 18; + private static final int COSMIC_STUDY_ID_COL = 19; + private static final int HGVSP_COL = 20; + private static final int HGVSC_COL = 21; + private static final int HGVSG_COL = 22; + private static final int GENOMIC_WT_ALLELE_COL = 23; + private static final int GENOMIC_MUT_ALLELE_COL = 24; + private static final int MUTATION_SOMATIC_STATUS_COL = 25; + + // Clasification + private static final int COSMIC_PHENOTYPE_ID_CLASSIFICATION_COL = 0; + private static final int PRIMARY_SITE_COL = 1; + private static final int SITE_SUBTYPE_1_COL = 2; + private static final int SITE_SUBTYPE_2_COL = 3; + private static final int SITE_SUBTYPE_3_COL = 4; + private static final int PRIMARY_HISTOLOGY_COL = 5; + private static final int HISTOLOGY_SUBTYPE_1_COL = 6; + private static final int HISTOLOGY_SUBTYPE_2_COL = 7; + private static final int HISTOLOGY_SUBTYPE_3_COL = 8; + private static final int NCI_CODE_COL = 9; + private static final int EFO_COL = 10; + + private static final String SYMBOL = "symbol"; + + private static final String HGVS_INSERTION_TAG = "ins"; + private static final String HGVS_SNV_CHANGE_SYMBOL = ">"; + private static final String HGVS_DELETION_TAG = "del"; + private static final String HGVS_DUPLICATION_TAG = "dup"; + private static final String CHROMOSOME = "CHR"; + private static final String START = "START"; + private static final String END = "END"; + private static final String REF = "REF"; + private static final String ALT = "ALT"; + + private static final String VARIANT_STRING_PATTERN = "[ACGT]*"; + + private static final Pattern mutationGRCh37GenomePositionPattern = Pattern.compile("(?<" + CHROMOSOME + ">\\S+):(?<" + START + ">\\d+)-(?<" + END + ">\\d+)"); + private static final Pattern snvPattern = Pattern.compile("c\\.\\d+((\\+|\\-|_)\\d+)?(?<" + REF + ">([ACTG])+)>(?<" + ALT + ">([ACTG])+)"); + + private static Logger logger = LoggerFactory.getLogger(CosmicParser101.class); + + private CosmicParser101() { + throw new IllegalStateException("Utility class"); + } + + /* + [column number:label] Heading Description + -------------------------------------------------------------------------------------------------------- + [00:A] GENE_SYMBOL The gene name for which the data has been curated in COSMIC. In most cases this is the accepted HGNC identifier. + [01:B] COSMIC_GENE_ID A unique COSMIC gene identifier (COSG) is used to identify a gene within the file. This identifier can be used to retrieve additional Gene information from the Cosmic_Genes file. + [02:C] TRANSCRIPT_ACCESSION Unique Ensembl Transcript identifier (ENST). For details see: https://www.ensembl.org/info/genome/stable_ids/index.html. This identifier can be used to retrieve additional Transcript information from the Cosmic_Transcripts file. + [03:D] COSMIC_SAMPLE_ID A unique COSMIC sample identifier (COSS) is used to identify a sample. This identifier can be used to retrieve additional Sample information from the Cosmic_Sample file. + [04:E] SAMPLE_NAME The sample name can be derived from a number of sources. In many cases it originates from the cell line name. Other sources include names assigned by the annotators, or an incremented number assigned during an anonymization process. + [05:F] COSMIC_PHENOTYPE_ID A unique COSMIC identifier (COSO) for the classification. This identifier can be used to retrieve tissue and histology information from the classification file. + [06:G] GENOMIC_MUTATION_ID Genomic mutation identifier (COSV) to indicate the definitive position of the variant on the genome. This identifier is trackable and stable between different versions of the release. This identifier can be used to retrieve additional legacy mutation ids from the Cosmic_MutationTracking file. + [07:H] LEGACY_MUTATION_ID Legacy mutation identifier (COSM) or (COSN) that will represent existing COSM or COSN mutation identifiers. + [08:I] MUTATION_ID An internal mutation identifier to uniquely represent each mutation on a specific transcript on a given assembly build. This identifier can be used to retrieve additional legacy mutation ids from the Cosmic_MutationTracking file. + [09:J] MUTATION_CDS The change that has occurred in the nucleotide sequence. Formatting is identical to the method used for the peptide sequence. + [10:K] MUTATION_AA The change that has occurred in the peptide sequence. Formatting is based on the recommendations made by the Human Genome Variation Society. The description of each type can be found by following the link to the Mutation Overview page. + [11:L] MUTATION_DESCRIPTION Types of mutations at the amino acid level. Aggregated sequence ontology terms, for more details see: https://www.ensembl.org/info/genome/variation/prediction/predicted_data.html#consequences + [12:M] MUTATION_ZYGOSITY Information on whether the mutation was reported to be homozygous, heterozygous or unknown within the sample. + [13:N] LOH LOH Information on whether the gene was reported to have loss of heterozygosity in the sample: yes, no or unknown. + [14:O] CHROMOSOME The chromosome location of a given genome screen (1-22, X, Y or MT). + [15:P] GENOME_START The start coordinate of a given genome screen. + [16:Q] GENOME_STOP The end coordinate of a given genome screen. + [17:R] STRAND Positive or negative (+/-). + [18:S] PUBMED_PMID The PUBMED ID for the paper that the sample was noted in, linking to pubmed to provide more details of the publication. + [19:T] COSMIC_STUDY_ID A unique COSMIC study identifier (COSU) is used to identify a study that have involved this sample. + [20:U] HGVSP Human Genome Variation Society peptide syntax. + [21:V] HGVSC Human Genome Variation Society coding dna sequence syntax (CDS). + [22:W] HGVSG Human Genome Variation Society genomic syntax (3' shifted). + [23:X] GENOMIC_WT_ALLELE Genomic Wild type allele sequence. + [24:Y] GENOMIC_MUT_ALLELE Genomic mutation allele sequence. + [25:Z] MUTATION_SOMATIC_STATUS Information on whether the sample was reported to be Confirmed somatic variant, Reported in another cancer sample as somatic or Variant of unknown origin: + * Reported in another cancer sample as somatic = when the mutation has been reported as somatic previously but not in current paper + * Confirmed somatic variant = if the mutation has been confirmed to be somatic in the experiment by sequencing both the tumour and a matched normal from the same patient + * Variant of unknown origin = When the tumour has been sequenced without a matched normal tissue from the same individual, the somatic status of the variant cannot be assessed + */ + /** + * Method to parse the COSMIC data (from version 101) and call the callback function for the evidence entries for the given location + * + * @param genomeScreensMutantFile Cosmic GenomeScreensMutant file + * @param classificationFile Cosmic Classification file + * @param version Cosmic version, e.g: v101 + * @param name Evidence source name, e.g.: cosmic + * @param assembly Assembly, e.g.: GRCh38 + * @param callback Callback function to process the evidence entries for that location + * @throws IOException + */ + public static void parse(Path genomeScreensMutantFile, Path classificationFile, String version, String name, String assembly, + CosmicParserCallback callback) + throws IOException, FileFormatException { + final int numFields = 26; + Map classificationMap = getClassificationMap(classificationFile); + + long t0; + long t1 = 0; + long t2 = 0; + List evidenceEntries = new ArrayList<>(); + SequenceLocation old = null; + + int totalNumberRecords = 0; + int ignoredCosmicLines = 0; + int numberProcessedRecords = 0; + int invalidPositionLines = 0; + int invalidSubstitutionLines = 0; + int invalidDeletionLines = 0; + int invalidInsertionLines = 0; + int invalidDuplicationLines = 0; + int invalidMutationCDSOtherReason = 0; + + int numLine = 1; + try (BufferedReader cosmicReader = new BufferedReader(new InputStreamReader(FileUtils.newInputStream(genomeScreensMutantFile)))) { + String line = cosmicReader.readLine(); // First line is the header -> ignore it + logger.info("Skipping header line: {}", line); + getFields(line, numFields, numLine++); + + while ((line = cosmicReader.readLine()) != null) { + String[] fields = getFields(line, numFields, numLine); + + t0 = System.currentTimeMillis(); + EvidenceEntry evidenceEntry = buildCosmic(name, version, assembly, fields, classificationMap); + t1 += System.currentTimeMillis() - t0; + + String mutationCds = fields[MUTATION_CDS_COL]; + VariantType variantType = getVariantType(mutationCds); + + SequenceLocation sequenceLocation = parseLocation(fields[CHROMOSOME_COL], fields[STRAND_COL], fields[GENOME_START_COL], + fields[GENOME_STOP_COL], variantType); + + if (sequenceLocation == null) { + invalidPositionLines++; + } + if (old == null) { + old = sequenceLocation; + } + + if (sequenceLocation != null) { + // Parse variant + boolean validVariant = false; + + if (variantType != null) { + switch (variantType) { + case SNV: + validVariant = parseSnv(mutationCds, sequenceLocation); + if (!validVariant) { + invalidSubstitutionLines++; + } + break; + case DELETION: + validVariant = parseDeletion(mutationCds, sequenceLocation); + if (!validVariant) { + invalidDeletionLines++; + } + break; + case INSERTION: + validVariant = parseInsertion(mutationCds, sequenceLocation); + if (!validVariant) { + invalidInsertionLines++; + } + break; + case DUPLICATION: + validVariant = parseDuplication(mutationCds); + if (!validVariant) { + invalidDuplicationLines++; + } + break; + default: + logger.warn("Skipping unkonwn variant type = {}", variantType); + validVariant = false; + invalidMutationCDSOtherReason++; + } + } + + if (validVariant) { + if (sequenceLocation.getStart() == old.getStart() && sequenceLocation.getAlternate().equals(old.getAlternate())) { + evidenceEntries.add(evidenceEntry); + } else { + boolean success = callback.processEvidenceEntries(old, evidenceEntries); + t2 += System.currentTimeMillis() - t0; + if (success) { + numberProcessedRecords += evidenceEntries.size(); + } else { + ignoredCosmicLines += evidenceEntries.size(); + } + old = sequenceLocation; + evidenceEntries = new ArrayList<>(); + evidenceEntries.add(evidenceEntry); + } + } else { + ignoredCosmicLines++; + } + } else { + ignoredCosmicLines++; + } + totalNumberRecords++; + + if (totalNumberRecords % 10000 == 0) { + logger.info("totalNumberRecords = {}", totalNumberRecords); + logger.info("numberIndexedRecords = {} ({} %)", numberProcessedRecords, + (numberProcessedRecords * 100 / totalNumberRecords)); + logger.info("ignoredCosmicLines = {}", ignoredCosmicLines); + logger.info("buildCosmic time = {}", t1); + logger.info("callback time = {}", t2); + + t1 = 0; + t2 = 0; + } + + numLine++; + } + } finally { + logger.info("Done"); + logger.info("Total number of parsed Cosmic records: {}", totalNumberRecords); + logger.info("Number of processed Cosmic records: {}", numberProcessedRecords); + NumberFormat formatter = NumberFormat.getInstance(); + if (logger.isInfoEnabled()) { + logger.info("{} cosmic lines ignored: ", formatter.format(ignoredCosmicLines)); + } + if (invalidPositionLines > 0 && logger.isInfoEnabled()) { + logger.info("\t- {} lines by invalid position", formatter.format(invalidPositionLines)); + } + if (invalidSubstitutionLines > 0 && logger.isInfoEnabled()) { + logger.info("\t- {} lines by invalid substitution CDS", formatter.format(invalidSubstitutionLines)); + } + if (invalidInsertionLines > 0 && logger.isInfoEnabled()) { + logger.info("\t- {} lines by invalid insertion CDS", formatter.format(invalidInsertionLines)); + } + if (invalidDeletionLines > 0 && logger.isInfoEnabled()) { + logger.info("\t- {} lines by invalid deletion CDS", formatter.format(invalidDeletionLines)); + } + if (invalidDuplicationLines > 0 && logger.isInfoEnabled()) { + logger.info("\t- {} lines because mutation CDS is a duplication", formatter.format(invalidDuplicationLines)); + } + if (invalidMutationCDSOtherReason > 0 && logger.isInfoEnabled()) { + logger.info("\t- {} lines because mutation CDS is invalid for other reasons", + formatter.format(invalidMutationCDSOtherReason)); + } + } + } + + /* + [column number:label] Heading Description + -------------------------------------------------------------------------------------------------------- + [00:A] COSMIC_PHENOTYPE_ID A unique COSMIC identifier (COSO) for the classification. Other download files can be linked to this file using this identifier. + [01:B] PRIMARY_SITE Primary tissue specified in COSMIC. + [02:C] SITE_SUBTYPE_1 Sub tissue specified in COSMIC. + [03:D] SITE_SUBTYPE_2 Sub tissue specified in COSMIC. + [04:E] SITE_SUBTYPE_3 Sub tissue specified in COSMIC. + [05:F] PRIMARY_HISTOLOGY Primary histology specified in COSMIC. + [06:G] HISTOLOGY_SUBTYPE_1 Sub histology specified in COSMIC. + [07:H] HISTOLOGY_SUBTYPE_2 Sub histology specified in COSMIC. + [08:I] HISTOLOGY_SUBTYPE_3 Sub histology specified in COSMIC. + [09:J] NCI_CODE NCI thesaurus code for tumour histological classification. For details see https://ncit.nci.nih.gov + [10:Q] EFO Experimental Factor Ontology (EFO), for details see https://www.ebi.ac.uk/efo/ + */ + private static Map getClassificationMap(Path classificationFile) throws FileFormatException, IOException { + final int numFields = 11; + Map classificationMap = new HashMap<>(); + + int numLine = 1; + try (BufferedReader cosmicReader = new BufferedReader(new InputStreamReader(FileUtils.newInputStream(classificationFile)))) { + String line = cosmicReader.readLine(); // First line is the header -> ignore it + logger.info("Skipping header line: {}", line); + getFields(line, numFields, numLine++); + + while ((line = cosmicReader.readLine()) != null) { + String[] fields = getFields(line, numFields, numLine); + String cosoId = fields[0]; + if (StringUtils.isEmpty(cosoId)) { + throw new FileFormatException("Missing COSMIC_PHENOTYPE_ID at line #" + numLine + ": " + line); + } + + // Add to the map + classificationMap.put(cosoId, fields); + numLine++; + } + } + + return classificationMap; + } + + private static String[] getFields(String line, int numFields, int numLine) throws FileFormatException { + String[] fields = line.split("\t", -1); + if (fields.length != numFields) { + throw new FileFormatException("Invalid COSMIC format file. Expected " + numFields + " fields, got " + fields.length + + " at line #" + numLine + ": " + line); + } + return fields; + } + + private static VariantType getVariantType(String mutationCds) { + if (mutationCds.contains(HGVS_SNV_CHANGE_SYMBOL)) { + return VariantType.SNV; + } else if (mutationCds.contains(HGVS_DELETION_TAG)) { + return VariantType.DELETION; + } else if (mutationCds.contains(HGVS_INSERTION_TAG)) { + return VariantType.INSERTION; + } else if (mutationCds.contains(HGVS_DUPLICATION_TAG)) { + return VariantType.DUPLICATION; + } else { + return null; + } + } + + private static boolean parseDuplication(String dup) { + // TODO: The only Duplication in Cosmic V70 is a structural variation that is not going to be serialized + return false; + } + + private static boolean parseInsertion(String mutationCds, SequenceLocation sequenceLocation) { + boolean validVariant = true; + String[] insParts = mutationCds.split("ins"); + + if (insParts.length > 1) { + String insertedNucleotides = insParts[1]; + if (insertedNucleotides.matches("\\d+") || !insertedNucleotides.matches(VARIANT_STRING_PATTERN)) { + //c.503_508ins30 + validVariant = false; + } else { + sequenceLocation.setReference(""); + sequenceLocation.setAlternate(getPositiveStrandString(insertedNucleotides, sequenceLocation.getStrand())); + } + } else { + validVariant = false; + } + + return validVariant; + } + + private static boolean parseDeletion(String mutationCds, SequenceLocation sequenceLocation) { + boolean validVariant = true; + String[] mutationCDSArray = mutationCds.split("del"); + + // For deletions, only deletions of, at most, deletionLength nucleotide are allowed + if (mutationCDSArray.length < 2) { // c.503_508del (usually, deletions of several nucleotides) + // TODO: allow these variants + validVariant = false; + } else if (mutationCDSArray[1].matches("\\d+") + || !mutationCDSArray[1].matches(VARIANT_STRING_PATTERN)) { // Avoid allele strings containing Ns, for example + validVariant = false; + } else { + sequenceLocation.setReference(getPositiveStrandString(mutationCDSArray[1], sequenceLocation.getStrand())); + sequenceLocation.setAlternate(""); + } + + return validVariant; + } + + private static boolean parseSnv(String mutationCds, SequenceLocation sequenceLocation) { + boolean validVariant = true; + Matcher snvMatcher = snvPattern.matcher(mutationCds); + + if (snvMatcher.matches()) { + String ref = snvMatcher.group(REF); + String alt = snvMatcher.group(ALT); + if (!ref.equalsIgnoreCase("N") && !alt.equalsIgnoreCase("N")) { + sequenceLocation.setReference(getPositiveStrandString(ref, sequenceLocation.getStrand())); + sequenceLocation.setAlternate(getPositiveStrandString(alt, sequenceLocation.getStrand())); + } else { + validVariant = false; + } + } else { + validVariant = false; + } + + return validVariant; + } + + private static String getPositiveStrandString(String alleleString, String strand) { + if (strand.equals("-")) { + return VariantAnnotationUtils.reverseComplement(alleleString, true); + } else { + return alleleString; + } + } + + private static EvidenceEntry buildCosmic(String name, String version, String assembly, String[] fields, + Map classificationMap) { + String id = fields[GENOMIC_MUTATION_ID_COL]; + String cosoId = fields[COSMIC_PHENOTYPE_ID_COL]; + String url = "https://cancer.sanger.ac.uk/cosmic/search?q=" + id; + + EvidenceSource evidenceSource = new EvidenceSource(name, version, null); + SomaticInformation somaticInformation = getSomaticInformation(classificationMap.get(cosoId)); + List genomicFeatureList = getGenomicFeature(fields); + + List additionalProperties = new ArrayList<>(); + if (StringUtils.isNotEmpty(fields[GENOMIC_MUTATION_ID_COL])) { + additionalProperties.add(new Property("GENOMIC_MUTATION_ID", "Genomic mutation ID (COSV)", fields[GENOMIC_MUTATION_ID_COL])); + } + if (StringUtils.isNotEmpty(fields[LEGACY_MUTATION_ID_COL])) { + additionalProperties.add(new Property("LEGACY_MUTATION_ID", "Legacy ID (COSM) or (COSN)", fields[LEGACY_MUTATION_ID_COL])); + } + if (StringUtils.isNotEmpty(fields[MUTATION_CDS_COL])) { + additionalProperties.add(new Property("MUTATION_CDS", "Change in the nucleotide sequence", fields[MUTATION_CDS_COL])); + } + if (StringUtils.isNotEmpty(fields[MUTATION_AA_COL])) { + additionalProperties.add(new Property("MUTATION_AA", "Change in the peptide sequence", fields[MUTATION_AA_COL])); + } + if (StringUtils.isNotEmpty(fields[MUTATION_DESCRIPTION_COL])) { + additionalProperties.add(new Property("MUTATION_DESCRIPTION", "Description", fields[MUTATION_DESCRIPTION_COL])); + } + if (StringUtils.isNotEmpty(fields[MUTATION_ZYGOSITY_COL])) { + additionalProperties.add(new Property("MUTATION_ZYGOSITY", "Mutation Zygosity", fields[MUTATION_ZYGOSITY_COL])); + } + if (StringUtils.isNotEmpty(fields[MUTATION_SOMATIC_STATUS_COL])) { + additionalProperties.add(new Property("MUTATION_SOMATIC_STATUS", "Mutation Somatic Status", + fields[MUTATION_SOMATIC_STATUS_COL])); + } + if (StringUtils.isNotEmpty(fields[LOH_COL])) { + additionalProperties.add(new Property("LOH", "Loss of heterozygosity", fields[LOH_COL])); + } + if (StringUtils.isNotEmpty(fields[HGVSP_COL])) { + additionalProperties.add(new Property("HGVSP", "HGVS (peptide)", fields[HGVSP_COL])); + } + if (StringUtils.isNotEmpty(fields[HGVSC_COL])) { + additionalProperties.add(new Property("HGVSC", "HGVS (CDS)", fields[HGVSC_COL])); + } + if (StringUtils.isNotEmpty(fields[HGVSG_COL])) { + additionalProperties.add(new Property("HGVSG", "HGVS (3' shifted)", fields[HGVSG_COL])); + } + + List bibliography = getBibliography(fields[PUBMED_PMID_COL]); + + return new EvidenceEntry(evidenceSource, Collections.emptyList(), somaticInformation, + url, id, assembly, + getAlleleOriginList(Collections.singletonList(fields[MUTATION_SOMATIC_STATUS_COL])), + Collections.emptyList(), genomicFeatureList, null, null, null, null, + EthnicCategory.Z, null, null, null, additionalProperties, bibliography); + } + + private static SomaticInformation getSomaticInformation(String[] fields) { + String primarySite = null; + if (!isMissing(fields[PRIMARY_SITE_COL])) { + primarySite = fields[PRIMARY_SITE_COL].replace("_", " "); + } + String siteSubtype = null; + if (!isMissing(fields[SITE_SUBTYPE_1_COL])) { + siteSubtype = fields[SITE_SUBTYPE_1_COL].replace("_", " "); + } + String primaryHistology = null; + if (!isMissing(fields[PRIMARY_HISTOLOGY_COL])) { + primaryHistology = fields[PRIMARY_HISTOLOGY_COL].replace("_", " "); + } + String histologySubtype = null; + if (!isMissing(fields[HISTOLOGY_SUBTYPE_1_COL])) { + histologySubtype = fields[HISTOLOGY_SUBTYPE_1_COL].replace("_", " "); + } + String tumourOrigin = null; + + String sampleSource = null; + if (!isMissing(fields[SAMPLE_NAME_COL])) { + sampleSource = fields[SAMPLE_NAME_COL].replace("_", " "); + } + + return new SomaticInformation(primarySite, siteSubtype, primaryHistology, histologySubtype, tumourOrigin, sampleSource); + } + + private static List getBibliography(String bibliographyString) { + if (!isMissing(bibliographyString)) { + return Collections.singletonList("PMID:" + bibliographyString); + } + + return Collections.emptyList(); + } + + private static List getGenomicFeature(String[] fields) { + List genomicFeatureList = new ArrayList<>(5); + // Add gene symbol and COSMIC gene + if (StringUtils.isNotEmpty(fields[GENE_SYMBOL_COL])) { + Map xrefs = new HashMap<>(); + if (StringUtils.isNotEmpty(fields[COSMIC_GENE_ID_COL])) { + xrefs.put("COSMIC_GENE", fields[COSMIC_GENE_ID_COL]); + } + genomicFeatureList.add(createGeneGenomicFeature(fields[GENE_SYMBOL_COL].split("_")[0], FeatureTypes.gene, xrefs)); + } + + // Add transcript ID + if (StringUtils.isNotEmpty(fields[TRANSCRIPT_ACCESSION_COL])) { + genomicFeatureList.add(createGeneGenomicFeature(fields[TRANSCRIPT_ACCESSION_COL], FeatureTypes.transcript)); + } + + return genomicFeatureList; + } + + private static SequenceLocation parseLocation(String chrom, String strand, String start, String end, VariantType variantType) { + SequenceLocation sequenceLocation = new SequenceLocation(); + sequenceLocation.setChromosome(getCosmicChromosome(chrom)); + sequenceLocation.setStrand(strand); + if (VariantType.INSERTION.equals(variantType)) { + sequenceLocation.setEnd(Integer.parseInt(start)); + sequenceLocation.setStart(Integer.parseInt(end)); + } else { + sequenceLocation.setStart(Integer.parseInt(start)); + sequenceLocation.setEnd(Integer.parseInt(end)); + } + return sequenceLocation; + } + + private static String getCosmicChromosome(String chromosome) { + switch (chromosome) { + case "23": + return "X"; + case "24": + return "Y"; + case "25": + return "MT"; + default: + return chromosome; + } + } + + private static GenomicFeature createGeneGenomicFeature(String featureId, FeatureTypes featureTypes) { + Map map = new HashMap<>(1); + map.put(SYMBOL, featureId); + return new GenomicFeature(featureTypes, null, map); + } + + private static GenomicFeature createGeneGenomicFeature(String featureId, FeatureTypes featureTypes, Map xrefs) { + xrefs.put(SYMBOL, featureId); + return new GenomicFeature(featureTypes, null, xrefs); + } + + private static Map ORIGIN_STRING_TO_ALLELE_ORIGIN = new HashMap<>(); + + static { + + /////////////////////////////////////////////////////////////////////// + ///// ClinVar and Cosmic allele origins to SO terms /////////////// + /////////////////////////////////////////////////////////////////////// + ORIGIN_STRING_TO_ALLELE_ORIGIN.put("germline", AlleleOrigin.germline_variant); + ORIGIN_STRING_TO_ALLELE_ORIGIN.put("maternal", AlleleOrigin.maternal_variant); + ORIGIN_STRING_TO_ALLELE_ORIGIN.put("de novo", AlleleOrigin.de_novo_variant); + ORIGIN_STRING_TO_ALLELE_ORIGIN.put("paternal", AlleleOrigin.paternal_variant); + ORIGIN_STRING_TO_ALLELE_ORIGIN.put("somatic", AlleleOrigin.somatic_variant); + } + + + private static List getAlleleOriginList(List sourceOriginList) { + List alleleOrigin; + alleleOrigin = new ArrayList<>(sourceOriginList.size()); + for (String originString : sourceOriginList) { + AlleleOrigin alleleOriginValue = VariantAnnotationUtils.parseAlleleOrigin(originString); + if (alleleOriginValue != null) { + alleleOrigin.add(alleleOriginValue); + } else { + logger.debug("No SO term found for allele origin {}. Skipping.", originString); + } + } + return alleleOrigin; + } + + private static boolean isMissing(String string) { + return !((string != null) && !string.isEmpty() + && !string.replace(" ", "") + .replace("not specified", "") + .replace("NS", "") + .replace("NA", "") + .replace("na", "") + .replace("NULL", "") + .replace("null", "") + .replace("\t", "") + .replace(".", "") + .replace("-", "").isEmpty()); + } +} diff --git a/biodata-formats/src/test/java/org/opencb/biodata/formats/variant/cosmic/CosmicParserTest.java b/biodata-formats/src/test/java/org/opencb/biodata/formats/variant/cosmic/CosmicParserTest.java index 9aba1123..5b2fc2c4 100644 --- a/biodata-formats/src/test/java/org/opencb/biodata/formats/variant/cosmic/CosmicParserTest.java +++ b/biodata-formats/src/test/java/org/opencb/biodata/formats/variant/cosmic/CosmicParserTest.java @@ -9,18 +9,20 @@ import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.HashMap; import java.util.List; +import java.util.Map; public class CosmicParserTest { // Implementation of the LineCallback function public class MyCallback implements CosmicParserCallback { private String msg; - private int counter; + private Map> results; public MyCallback(String msg) { this.msg = msg; - this.counter = 0; + this.results = new HashMap<>(); } @Override @@ -30,13 +32,20 @@ public boolean processEvidenceEntries(SequenceLocation sequenceLocation, List>>> " + seqLoc); + results.get(seqLoc).addAll(evidenceEntries); + } else { + results.put(seqLoc, evidenceEntries); + } + return true; } - public int getCounter() { - return counter; + public Map> getResults() { + return results; } } @@ -50,6 +59,53 @@ public void testCosmicParser() throws IOException, FileFormatException { MyCallback callback = new MyCallback(">>> Testing message"); CosmicParser.parse(cosmicFile, version, name, assembly, callback); - Assert.assertEquals(90, callback.getCounter()); + Map> results = callback.getResults(); + + for (Map.Entry> entry : results.entrySet()) { + System.out.println(entry.getKey() + " --> size = " + entry.getValue().size()); + for (EvidenceEntry evidenceEntry : entry.getValue()) { + System.out.println("\t\tid = " + evidenceEntry.getId()); + } + } + + Assert.assertEquals(89, results.size()); + } + + @Test + public void testCosmicParserV101() throws IOException, FileFormatException { + Path genomeScreensMutantFile = Paths.get(getClass().getResource("/Small_Cosmic_GenomeScreensMutant_v101_GRCh38.tsv.gz").getPath()); + Path classificationFile = Paths.get(getClass().getResource("/Small_Cosmic_Classification_v101_GRCh38.tsv.gz").getPath()); + String version = "v101"; + String name = "cosmic"; + String assembly = "GRCh38"; + + MyCallback callback = new MyCallback(">>> Testing message"); + + CosmicParser101.parse(genomeScreensMutantFile, classificationFile, version, name, assembly, callback); + Map> results = callback.getResults(); + + Assert.assertEquals(6, results.size()); + + for (Map.Entry> entry : results.entrySet()) { + System.out.println(entry.getKey() + " --> size = " + entry.getValue().size()); + for (EvidenceEntry evidenceEntry : entry.getValue()) { + System.out.println("\t\tid = " + evidenceEntry.getId()); + } + } + + SequenceLocation sequenceLocation = new SequenceLocation("20", 17605163, 17605163, "A", "G", "+"); + System.out.println("sequenceLocation.toString() = " + sequenceLocation); + Assert.assertTrue(results.containsKey(SeqLocationtoString(sequenceLocation))); + List evidenceEntries = results.get(SeqLocationtoString(sequenceLocation)); + Assert.assertEquals(1, evidenceEntries.size()); + EvidenceEntry entry = evidenceEntries.get(0); + Assert.assertEquals("COSV55713044", entry.getId()); + Assert.assertEquals("ovary", entry.getSomaticInformation().getPrimarySite()); + Assert.assertTrue(entry.getAdditionalProperties().stream().anyMatch(p -> p.getId().equals("HGVSG") && p.getValue().equals("20:g.17605163A>G"))); + } + + private String SeqLocationtoString(SequenceLocation location) { + return location.getChromosome() + ":" + location.getStart() + "-" + location.getEnd() + ":" + location.getReference() + ":" + + location.getAlternate(); } } \ No newline at end of file diff --git a/biodata-formats/src/test/resources/Small_Cosmic_Classification_v101_GRCh38.tsv.gz b/biodata-formats/src/test/resources/Small_Cosmic_Classification_v101_GRCh38.tsv.gz new file mode 100644 index 0000000000000000000000000000000000000000..d8e8a3911d02a5285b8b2ad13da8dd01e8c8e955 GIT binary patch literal 555 zcmV+`0@VE|eRie(O28Kk^lg`9w@8~%mFz>FECX(16>JtvvibWpxsX<{ zO%Xz#i{~(B&O}MNj3f5+xL76Y&C`O#bMW*WFTXu+SQ@Vvp#QM+_xEEEel*cX6Aw2Z z<8+<;N`7v@aC%o`>_Zbi3|3LhqGY}Riyz7TU6iB=rYN8&B*1JBjg|#irRO*2O)izL zIH-9o%aS(-cJ-ZdBI05XY>lZO9B04Z&qO9?JfC%Mj(E|IMPaUeD1j$56yOlLK0+XC z1aHk0S|tpxOBR7ES42VxZ#M(|;9MM0>iLL* z_D!m(${BBY$#PMa<3>=32CTYk%9+k|DYM41Jk!mqV@+q7tma2d4_^qx$fqR0?g{gS z=VFA~{^@w7yY@f8B!J*2^&?d tG*YXv)Rb22y1SobvOZI)9dG6C$Y_M^v~TYM^$5AY`v*q8PBQTX005Ej1#`44`MA>wZDTR>gV6iYi1Mb@Mfs4s%yD2|Yr zG9}QY~KT|2#fi39oIkzYx6XVljToKFUHBN z8Xn?!h-Yb+SjgzO_}4Nks>^!G=EVg|F0P8as0w7&W&o0Pd{qw*MainbaK_5(^E{oc z${a`c^y=%`GNf{meXPp%f71TXj(k2rmH9lKr(=fw(lI9_fFJy|3O}xm;T7+XV z??`EuE~=V`KTX_973uJVB1vAj;WX;mR7eK?3(S&Y=il5e%ljqp+hY7j>Y zBMn5E2yv>UB%7SYkmh%`X&!gmW+aMbnk(;x7eg>%A)pFKCJ5*BpkZaH3{ZIffuhF=u+eipb z3G{!6N@-m6as?P0YDU>1IAx@5!28Aj+*9j+I6uIxchB>l4u^&U=pn;%^12D~Wg*;o zr-}AzFCn$s!%9)3e8?`|WO}1Q9o~YY)~t&C$wQ zc`WV~Ewrr?!quRj6!hU5+<4H((}a}sooilb4hG%`1tsMWOu8CTinHhGO@G2Pf&XD& zhE9<|fmir{z!;uWM_&lEn~~#C*~<1qL&B_RbQejD&kk)7r?^9<7<}rrwyiA)n>_>p z9%+0YXbYoM);7dHwQ|3){f}nb{J7sf?T_4Ba|mK+IR?GW4FQ^?vRW%E#`U7SMSh`P zvhPg``YiY9aIgr@=n?yJ`PdzOvi)-M)O>5U|1>At-R5r7ynQpP>cwG}O4xm9P0DcN zQG}f?C`*(oi;JY12<<0g;=7{ZLLy<=xk6hZk#Oq@kBgH6rU(qM+d&;n!&5CO#uR(e z6vV@b;bFuK!J%_%r`ZeXO@Bfq9;W4CXIO||AC`5x$ko9XsKPOS>vR~?2mTH-8#h$W z6n9F0TYcXK`TN2!Bvkh$eokl<4HmiqLPARA!vIm}A_~7&ZX@N@#AvBq_Ft0|Sk`%T lC^rhiuL;;eLsRxbp3`4|v2qvnoBv1O{1+u*Zf(X2005&xlcN9t literal 0 HcmV?d00001 From d4c6e7aec889526485215c680b4e66df58c42eda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Thu, 27 Feb 2025 10:09:38 +0100 Subject: [PATCH 12/24] formats: remove additional property GENOMIC_MUTATION_ID since it is the ID itself, #TASK-7340, #TASK-7367 --- .../opencb/biodata/formats/variant/cosmic/CosmicParser101.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/variant/cosmic/CosmicParser101.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/variant/cosmic/CosmicParser101.java index e6376f70..439bfa02 100755 --- a/biodata-formats/src/main/java/org/opencb/biodata/formats/variant/cosmic/CosmicParser101.java +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/variant/cosmic/CosmicParser101.java @@ -446,9 +446,6 @@ private static EvidenceEntry buildCosmic(String name, String version, String ass List genomicFeatureList = getGenomicFeature(fields); List additionalProperties = new ArrayList<>(); - if (StringUtils.isNotEmpty(fields[GENOMIC_MUTATION_ID_COL])) { - additionalProperties.add(new Property("GENOMIC_MUTATION_ID", "Genomic mutation ID (COSV)", fields[GENOMIC_MUTATION_ID_COL])); - } if (StringUtils.isNotEmpty(fields[LEGACY_MUTATION_ID_COL])) { additionalProperties.add(new Property("LEGACY_MUTATION_ID", "Legacy ID (COSM) or (COSN)", fields[LEGACY_MUTATION_ID_COL])); } From cada2e63b41d7da5d446bc67b94de294d4775d0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Tue, 4 Mar 2025 08:15:43 +0100 Subject: [PATCH 13/24] formats: fix NumberFormatException, #TASK-7430, #TASK-7367 --- .../variant/cosmic/CosmicParser101.java | 39 ++++++++++++++++--- 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/variant/cosmic/CosmicParser101.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/variant/cosmic/CosmicParser101.java index 439bfa02..a2e5534a 100755 --- a/biodata-formats/src/main/java/org/opencb/biodata/formats/variant/cosmic/CosmicParser101.java +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/variant/cosmic/CosmicParser101.java @@ -37,6 +37,12 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +/** + * COSMIC parser, version 101 and later + * Two files are required: + * - the COSMIC genome screens mutant file and + * - the COSMIC classification file + */ public class CosmicParser101 { // GenomeScreensMutant @@ -255,10 +261,10 @@ public static void parse(Path genomeScreensMutantFile, Path classificationFile, } totalNumberRecords++; - if (totalNumberRecords % 10000 == 0) { + if (totalNumberRecords % 100000 == 0) { logger.info("totalNumberRecords = {}", totalNumberRecords); logger.info("numberIndexedRecords = {} ({} %)", numberProcessedRecords, - (numberProcessedRecords * 100 / totalNumberRecords)); + String.format("%.2f", (numberProcessedRecords * 100.0 / totalNumberRecords))); logger.info("ignoredCosmicLines = {}", ignoredCosmicLines); logger.info("buildCosmic time = {}", t1); logger.info("callback time = {}", t2); @@ -542,15 +548,36 @@ private static List getGenomicFeature(String[] fields) { } private static SequenceLocation parseLocation(String chrom, String strand, String start, String end, VariantType variantType) { + // Sanity checks + if (StringUtils.isEmpty(chrom)) { + logger.warn("Missing chromosome when building the sequence location"); + return null; + } + int startValue; + int endValue; + try { + startValue = Integer.parseInt(start); + } catch (NumberFormatException e) { + logger.warn("Error parsing start value: '" + start + "'", e); + return null; + } + try { + endValue = Integer.parseInt(end); + } catch (NumberFormatException e) { + logger.warn("Error parsing end value: '" + end + "'", e); + return null; + } + SequenceLocation sequenceLocation = new SequenceLocation(); sequenceLocation.setChromosome(getCosmicChromosome(chrom)); sequenceLocation.setStrand(strand); + if (VariantType.INSERTION.equals(variantType)) { - sequenceLocation.setEnd(Integer.parseInt(start)); - sequenceLocation.setStart(Integer.parseInt(end)); + sequenceLocation.setEnd(startValue); + sequenceLocation.setStart(endValue); } else { - sequenceLocation.setStart(Integer.parseInt(start)); - sequenceLocation.setEnd(Integer.parseInt(end)); + sequenceLocation.setStart(startValue); + sequenceLocation.setEnd(endValue); } return sequenceLocation; } From 064f56abadd60db0d7faaab4d0f73faab173991b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Thu, 24 Jul 2025 15:07:01 +0200 Subject: [PATCH 14/24] formats: add ChimerDB parser, #TASK-7830, #TASK-5564 --- biodata-formats/pom.xml | 10 +++ .../feature/chemirdb/ChemirDbParser.java | 71 ++++++++++++++++++ .../chemirdb/ChemirDbParserCallback.java | 33 ++++++++ .../feature/chemirdb/ChemirDbParserTest.java | 69 +++++++++++++++++ .../src/test/resources/ChimerKB4.small.xlsx | Bin 0 -> 8521 bytes pom.xml | 14 +++- 6 files changed, 196 insertions(+), 1 deletion(-) create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chemirdb/ChemirDbParser.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chemirdb/ChemirDbParserCallback.java create mode 100644 biodata-formats/src/test/java/org/opencb/biodata/formats/feature/chemirdb/ChemirDbParserTest.java create mode 100644 biodata-formats/src/test/resources/ChimerKB4.small.xlsx diff --git a/biodata-formats/pom.xml b/biodata-formats/pom.xml index 639c0866..883e455c 100644 --- a/biodata-formats/pom.xml +++ b/biodata-formats/pom.xml @@ -93,6 +93,16 @@ jackson-mapper-asl test --> + + + org.apache.poi + poi + + + org.apache.poi + poi-ooxml + + junit junit diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chemirdb/ChemirDbParser.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chemirdb/ChemirDbParser.java new file mode 100644 index 00000000..a3601d06 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chemirdb/ChemirDbParser.java @@ -0,0 +1,71 @@ +/* + * + * + */ + +package org.opencb.biodata.formats.feature.chemirdb; + +import org.apache.poi.hssf.usermodel.HSSFWorkbook; +import org.apache.poi.ss.usermodel.Cell; +import org.apache.poi.ss.usermodel.Row; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.ss.usermodel.Workbook; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.opencb.commons.utils.FileUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.FileInputStream; +import java.io.IOException; +import java.nio.file.Path; +import java.util.Iterator; + +public class ChemirDbParser { + + private static Logger logger = LoggerFactory.getLogger(ChemirDbParser.class); + + public static void parse(Path xlsxPath, ChemirDbParserCallback callback) throws IOException { + logger.info("Parsing ChemirDB file: {}", xlsxPath); + FileUtils.checkFile(xlsxPath); + + try (FileInputStream excelFile = new FileInputStream(xlsxPath.toFile()); + Workbook workbook = new XSSFWorkbook(excelFile)) { + // Get the first sheet from the workbook + Sheet sheet = workbook.getSheetAt(0); + + // Iterate over rows + Iterator rowIterator = sheet.iterator(); + while (rowIterator.hasNext()) { + Row currentRow = rowIterator.next(); + + // Skip header row if needed (e.g., if first row is header) + if (currentRow.getRowNum() == 0) { + continue; + } + + // Iterate over cells in the current row + Iterator cellIterator = currentRow.iterator(); + while (cellIterator.hasNext()) { + Cell currentCell = cellIterator.next(); + } + } + } catch (IOException e) { + throw new IOException("Error reading the ChemirDB file: " + e.getMessage(), e); + } + logger.info("ChemirDB file parsed successfully: {}", xlsxPath); + } +} \ No newline at end of file diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chemirdb/ChemirDbParserCallback.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chemirdb/ChemirDbParserCallback.java new file mode 100644 index 00000000..7c6b4ff1 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chemirdb/ChemirDbParserCallback.java @@ -0,0 +1,33 @@ +/* + * + * + */ + +package org.opencb.biodata.formats.feature.chemirdb; + +import org.opencb.biodata.models.core.MiRnaGene; +import org.opencb.commons.utils.FileUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.FileInputStream; +import java.io.IOException; +import java.nio.file.Path; + +public interface ChemirDbParserCallback { + boolean processChemirDbItem(Object object); +} diff --git a/biodata-formats/src/test/java/org/opencb/biodata/formats/feature/chemirdb/ChemirDbParserTest.java b/biodata-formats/src/test/java/org/opencb/biodata/formats/feature/chemirdb/ChemirDbParserTest.java new file mode 100644 index 00000000..0aec4e01 --- /dev/null +++ b/biodata-formats/src/test/java/org/opencb/biodata/formats/feature/chemirdb/ChemirDbParserTest.java @@ -0,0 +1,69 @@ +package org.opencb.biodata.formats.feature.chemirdb; + + +import org.junit.Assert; +import org.junit.Test; +import org.opencb.biodata.formats.feature.mirbase.MirBaseParser; +import org.opencb.biodata.formats.feature.mirbase.MirBaseParserCallback; +import org.opencb.biodata.formats.feature.mirbase.MirBaseParserTest; +import org.opencb.biodata.models.core.MiRnaGene; + +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; + +public class ChemirDbParserTest { + + + @Test + public void testParse() throws IOException { + Path xlsxPath = Paths.get(getClass().getResource("/ChimerKB4.small.xlsx").getPath()); + + ChemirDbParserTest.MyCallback callback = new ChemirDbParserTest.MyCallback(">>> Testing message"); + + ChemirDbParser.parse(xlsxPath, callback); + Assert.assertEquals(50, callback.getCounter()); + +// MiRnaGene mi0000060 = callback.getMiRnaGene("MI0000060"); + } + + + // Implementation of the MirBaseParserCallback function + public class MyCallback implements ChemirDbParserCallback { + private String msg; + private List objects; + + public MyCallback(String msg) { + this.msg = msg; + this.objects = new ArrayList<>(); + } + + @Override + public boolean processChemirDbItem(Object object) { + System.out.println(msg); + System.out.println(objects.toString()); + objects.add(object); + return true; + } + + public List getMiRnaGenes() { + return objects; + } + +// public MiRnaGene getMiRnaGene(String accession) { +// for (MiRnaGene miRnaGene : miRnaGenes) { +// if (accession.equals(miRnaGene.getAccession())) { +// return miRnaGene; +// } +// } +// return null; +// } + + public int getCounter() { + return objects.size(); + } + } + +} \ No newline at end of file diff --git a/biodata-formats/src/test/resources/ChimerKB4.small.xlsx b/biodata-formats/src/test/resources/ChimerKB4.small.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..99f9798046e0d0b786457eb6bd1af6bc231c1c9d GIT binary patch literal 8521 zcma)h1ymH=w=Xetmoy9w(ui~n-6h@K9fEYj&@dn+DJcyqC?G8%(ka~`B@Gh38NUC0 zdcE&{>z!GPnZwy<@3Z&sclJ7KZ*?U^03jSIDk@x|)|CO=9|HsW+tkC_$&-@<_O}`` zqKL?i8+{ZQo9yq?NF;6b_7iyYm3&}G=GS|3{wyxi=Yjs=s0MFJP?8jaPKWrVvKR4f z&SP`rVr5+`3|Qk6Z%O=%^06=X$Uiyi4$%$kDAMCteyN!3aXJG+T;LnXxn_+ty7Wwh z*33#MK1N$DTKPvf8=E5jvf8--zqwelTe?ZmiN1`;sC%;~8||}Uf*>n6wx;_!nf;Pk zmh(v~p4y1Crgj0DYKs%6&$YiQD)Y2q{R-GU_k-5k_{{d_A|InYa>WU@BWmtF1wWw5 zCf&C}RCWjLraH!$?8M>Jl~7P^D#Dsppl%Bzz(ME#TVQaY5-*@OTs<5uTwNVGUN}1? zXzIJnbK%|{u>kJp4O%Dp*idSmXyBV^R8w7J$@HsOo&k5+WjIGo3Q=|o=Y+ka_3dLX zil`w17|X3$zS*X^k5?%{TZ zXB?Ui+pB^07S9X}M*^1X@E<$;tU!)Qk=%_n;60z_e~+B~3co|AD=ExuUpGx@oc8P4 zFSGL3E0{X52dv4BNMpcqnTyhm_cU?7ok$B!SibLSmdDZUnY6gpIP>hR|Jhjs%*hvPVpq&6eE47qZz{kb3bY`eVzT3NK}19~;=;mlu~< zbic!=Wz;$1oJua=+&4{8c~UN%oi*(1x5A^_m$?!mUJ&*aAu?Vh~$cTI|R)8=zmjZ6@=_9@k@xA_osvEy?th2ruSMl}$9P*l- z)`9wW1hJoIL&@{dimPlX5PvczQCLNX-|-m1H5G7+Yxu`dbVa2_ttEWHvxu?FxSH_h zxIm96>2IJ0m?L5^-OTA-ev(&U7Wu`%)cFLfIwU6@+%zM?m~WC;mMF@digq8d%{?Bpr-_75#homA>cB>s?R-JQYX38 zZA=A^7hAnxCoF|I22~tEfk`Lb_#BR9)cR=D@M*?Q5%b`)AfyEu$_~~OtYyq-HF{+s zAtTg8jyHVD187wH5hNN}HTSL47&i`$(8Oa87rsHyY>=qWD0;o z0OINN+VBo~B%+@Ea)=_yK-!}o$@0^Vv~0+v0A!_;3$rg2sJtcs@#H3&E|4B7K_X|5 z2sjl(auez5AEc}#tZFTSICQz=%1O}S%4I=VbMY=f6j_d53IgB=4r6?Yz<`_czJsP$ z%k4o+O94J~qn~gZR@%1F8K`fyXiL zwFX%L<+~#l_=+6nI4`n-PqkHhz6&~s@IP0?5n5x*2wPGR8T&HcGre6KwGL(@&?w|>LAJky zKecIZS$EK11GbyQvl-7OeZj9q$gAOHv4v}tvEr#_isII^W8}1P_%dhCtodyYWW+$s zYn;udhd!xlQ`di}_1I|2zRo3=EZ+xXDzfFJflK(drOk(@r{JvjBh%Ag|LY@UBir)o zOi7T;Decvm>_<`;$gA*QPK-My%Xijoo=MbI{t2d5m$^Q7w?*f+Ze=*x&%CMRo%Q=Z zgwtM5=B(fxat{l?6wZI*l69uxmhV;-0UW&TG)bzvVTKRHZhu$op2QoJ%sZ3)rPx96 zMs#?GMb))NS9IxKs+FLE;0l?2vq0`xZCRU}M(@rQ{}()r76;q?hrt5?dJoXm>LNnpuXMZTm+9Q0?@nU z167?Hr_~s=X*$lMmAq}M38_k|%H0@e^lfDUj9m_wkj?z-8B1+C6X8Nfi(KZ0guhi$; z_|*m7HfQMjxEtq9?tSWAPQug3p|#tI#Wh#HA61=l*vHa z2N=CM_>aA^W@jE9_iYHV1nFtj5+A)ySsZO$5ET4A=_)3eUECPCIcw3*w9R30$rMito-p=HQ8@aKL6pE3COtCr>s z?i;hg#3I7}tNwhy{M@XghJyJCn#2hFlt+JV~=^LQ(ZeL^|*u%=OrRtvJl}G~dphx`k(c{I2 zew*j!l@g1ksR}%0_LVtruY3$k?y`m}^9R>F19PkADVOC36-cl^Xt{GZmE66Q?7~3R??Kw8}BOT08e2M0)Z=7^FH|0U z=vL)`Yt>+C0%zWSb-cL=X>uYS>!J zR(wF;e79Vd{bawo;CJDBs*6d-jebiH!gESj9E<_z?;;OE!Bwh9kfwXdp0j?OoO`jk zc~nRrfMsD&=!8u4B$Mhuj;vKSQG`*haAGn1&@X|)S2^+LuvqUmaEjdL@*}{_jx(Js z6+uz|@x|D@Y@A4iOyS$;oJHB#X64v@?t+%+EpKv`?>sEu=jjHa!NPmZac8k1bf-ut zR9Q4S6_fJ;td@APU-(j2F1d2D5^?%@W1pqDMlbpu6H1?U(5ytuvUT%`QFnvnEI%id z5(Jm!o~DoV=!S94^Js_hRz+n*1*!pGw}>ZC zF*e`Og@O<7rD0P7@XjG8bn^`>^j~@S__d(m^zH;N> zLQ?`T$JoE4^8klEh5VB>=~fT8l(P#Z&SK-^bpsC+g=CRU4as{d%jDl5uZ^>U@d3rD>aX)%1!iwRf3 z2O-5v4t120rV#7_b(OX#aq5F;Re5pC%;hG3?CK#z zmPC0xx0t|-$)0`@O?o^RM8AlK0F^2b)+Yh^Ji{8z{px_R*o`#ln&yhXAN448$GP3i#$!edj{|qYkg5&acv}BYrKrszAsg0O|nrD3X%b z)_I_f3}1-+c&>DcS06Nx9jAf5E?yz^A&2z24=OEX7#8nLfFnydJ{i1SW)SHA3xpNu z{?ro+w>Z!R*PQAQa%iPvxI;ymd5D*XgzuH){g+p_q>_Fox1&dbHi|J(*7*a66BH$MB#tD@6At33G4`@N- zG5hi7tZ^Pk-=E=&J*pAjbdR1Cg>sli61nKbbuXIAjqeIE99`4tUx$x>QQrs z&VILngHdL--i@sq(2!(=3vz-k#-5+3 z+aewY4Kw@z>Vu?JgQ4nomM>%}ZUq4PA}s_6T3U!12n`iE4YVGhskc1a%%BK*EXe@$ zoG%4LHx`p_RY4RB+)B7HnM1oWWVy-#;N8&mbh8dmC67>iAo5E%{}0V>oWg>@F?BR~ zS#^Xkcm`l2fKoTv4232*m`$?i1X85dmJ_dnz zKEt>eX3KB*lUsCn;103_H=XdC3&XcI5zcVe0M>d^f558JuF*;BpvB zq4@h3XZ6Yj0CZj08goD(X1HR3`$c`MI@;Nin5Ug*KFuR``=cl5!4K;cL4a_@kW6_K zCNmP>!Vsu4>a(}xZcmAdZIGp>VD5Bb0uBP8&LZ%@V8!Z(!b&C%tVDeYIzr`*ZUm57 z>_bXHMQ%tB>%{Uf*ke}a;5$3sBS!Kko2jE^3Uhh2AlFhd?}xI{L3IH88C!&el3Akv z%R=gdg*vnc4a^S~DjzJkKH`n`#vog}h6G83A2MknL|P`pFwc_+ODXH-L_@Bnee4v@ zhRqEB1Usr!sfPA(PWXcfVW^3N$FI9Y8L95(Q*u&VVy5&vANFJ#BAm?;S(%hggC8JOGz^lGw7_VMnl0td4%54SXRbMiU zSC=GyDD4N7$9$x(8#TGTkQcJz8RMg@RL%l>;# z+L$P4&PytWnJup9I5;0wDPVSx4hzNJ_a=d|3=usV8gXo9iYUx*p#pc3hS-fjkTCh> ziRs2}D+x!1tyrT=>W2WmO%Ijjq@*OeG{c%yi@SHBY-;URr?h!_OsBL1{CO&F`5Q7q zy~IM5Wvp#6r%7*AM5?I_c7#5a6Hlt~1l+#we>640i&)!qZ@IZGrK9)BymmoHO)&t| zE!FyS?@{h~D&^6mYMEB~lf5&g2EVK6?_-fu;i#2NVthFFmBGg1-)k2iOJSZq9-;To zY;RZDl4@zvIXRg;bUG&-TECzjj1y7CFg)a?3Kq&$3u;!0JC~w2OchyrVYDugr8@Ff z|5{+!`nu<{@3XC;44LcVo+h8SIM#fxZd|gA`iDFSM~LmO0T`F=$VrC1-)Pxw4Mb^N zrmEvA(xc0`rF< z)_cw;qkb7fYuBdB4Ae)Z?rnbcU8J=?-TfwgF*Rk4h)#dW+#ZSFU-z}&pZ2x?TB_iS zub6$_Q8c_YJZCziW+Hk`4XXjnNiq|)K?|avp8s17fb5?dz(3XGKkKIdRF+dz9J{$N zqmJ%~WG_D}pjgHWe2SN-y}N?%)mR(t;e$-S{}seRZisHZKbkr^bJ`|<{+&1BJGe&* z6S?9wAhPqbH|nR*yTgN3Qk|jKODlE^Np;ltm8}G`A;b!t25(OV^f@fLl7)?n2NYG` z%nUS#A82q6o+wP@zz-2=RvRL|b756)@$C9lXE}p~ZCd1?&()8GHsnE-@?`8;lEn%u zClAV*^Yy0arZ_F-EZvvd@cd_dK4?>>HdIG`7_MmZ)#^d}xAdncO<6M4wM&Zu4>drw zAJ=~4P`?@ewU$iyPh>pp%ss5FG`&3RU2GppfiJak^J;ksjX`fut#b5J+}wKSZI z=Mt+;cP&Wt5f`@{Qjb^}dZ$}Pr#CkS<@dbJ$`zBUjpwIhocAo4z8INmy;tAL->zUe zoxXKyY^)(qaXnK7H!jTAtOLBWl&rC0D>M&g*YKKT*2tJa2JFv34O#aoeC9EI2<&a)-^ge`Sufh=>^QW8$-e_D6wY$!CL+FqD z1a;b$J*_b^D(T~$w0S2-0pPn?^KsX=&Et>)iKYYv5`e0ECkvZE37RTPSOSwaOWgiI zRmV{?q$1L8#`Wdvw>C@yc~{|}lp&#y%T_pjN^p1?;%&wf9jtF;YPg@`T${Lf%*M*3 zX3WNIw=kxD{}|SB`K&N3INLEsu7S0Zal&{}e4HqK_$9OSV%W+gK$AG3>ITmyt`!v1 zr}=R|4|V7Li^9To{FTcmI8wUJ%eAtE#uKP+8j19%LZ1|LCpO(0vpImRfsM%{D8aGVx5^ zC7W`Wz=iKlGxH|*ss`cPj=rC_>X8^-s#t}6a{F-zCi&k2p5_d{O=X1%HMU>)1~D7`i?xyVbkEM7HkP7n$l+aK*9Bkv^WvMfDTY zq|WnYNFIsatZ#o^UD9LhIIsp^TZqQ}J1%oD!`^8kx5)c4lNFl$fMam|e&yA>$L<}H zuQ^@z{k1NKw8NqURC&r;)ZUGAUl2rQT?p9kUqJQ{HiC;y+(_|2=B#8_nM2} z+hVOpS99hO5b zG87Saptocj|KD;5w8#i&tcDm`auOr0nc_k7sjNc;no#lM0U8P z4hgl+Qk<=H$5ZVo#{Dv*y{p~#Z!xu6uCs;2aa`XKb{t*OtRwi$0#Z;o(CLzcR*KHJ zgI*YYRnKDhrjnz)w(pl>V?C$xOIb)c_c>$D*yE3 z>DgKR?PcH>Qy3!5)@0q9@NjUk|C+mS|3t*x%?$=fZnBDN7c7_G5NY?kS6~UJO+$9h z_LiK(rktz(f=|rzlv?qb*8LrVT?c8tQ~R{#P060d&Zr-LI=+*mypu^^fI(MW<8kutgA(U}lhBt(h8SXB`})H!$HR4+m9fxZ|&s*D*CU(KH8|EMRwT&1|TKWp2cYD3BIpz+Da z=^4qbrwT?!Y3D!A5`L{2C%qE3Pkm>C_vTj)*_t6Su+1TfXF4ct!Uu*-(#0m9J`@#Q zBp5RA2!wFIyWW4cePLbi|33ccf&abn&psclEBt$qLQVbEBmR5ypY0P^EAaO)hqeu& zn}0V3|K9rNs|(g&`90*2|F7Nu(QWx#%bzb4*bD9VP*j9_f94xlp8q|Ppa=UOS^w|tf8IJ+Qu;k;DE`>~pA_}??mw9UyP3a-0pRo@0Od@?k^lez literal 0 HcmV?d00001 diff --git a/pom.xml b/pom.xml index 64170b48..83d942ce 100644 --- a/pom.xml +++ b/pom.xml @@ -51,7 +51,7 @@ 3.14.0 19.0 - 2.12.0 + 2.15.0 5.0.0 0.6.0a5 4.3.1 @@ -68,6 +68,7 @@ UTF-8 1.2.17 1.9.13 + 5.4.0 @@ -335,6 +336,17 @@ log4j ${log4j.version} + + + org.apache.poi + poi + ${poi.version} + + + org.apache.poi + poi-ooxml + ${poi.version} + From 20b2ef549c0b789a868b12b87c4fc4e873d4860e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Sun, 27 Jul 2025 08:26:16 +0200 Subject: [PATCH 15/24] models: add ChimerDB data (gene fusion), #TASK-7830, #TASK-5564 --- biodata-formats/pom.xml | 4 + .../feature/chemirdb/ChemirDbParser.java | 71 ------- .../feature/chimerdb/ChimerDbParser.java | 194 ++++++++++++++++++ .../GeneFusionParserCallback.java} | 15 +- .../feature/chemirdb/ChemirDbParserTest.java | 69 ------- .../feature/chimerdb/ChimerDbParserTest.java | 56 +++++ .../clinical/genefusion/GeneFusion.java | 160 +++++++++++++++ .../genefusion/GeneFusionBreakpoint.java | 66 ++++++ .../biodata/models/core/GeneAnnotation.java | 25 ++- 9 files changed, 505 insertions(+), 155 deletions(-) delete mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chemirdb/ChemirDbParser.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParser.java rename biodata-formats/src/main/java/org/opencb/biodata/formats/feature/{chemirdb/ChemirDbParserCallback.java => chimerdb/GeneFusionParserCallback.java} (63%) delete mode 100644 biodata-formats/src/test/java/org/opencb/biodata/formats/feature/chemirdb/ChemirDbParserTest.java create mode 100644 biodata-formats/src/test/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParserTest.java create mode 100644 biodata-models/src/main/java/org/opencb/biodata/models/clinical/genefusion/GeneFusion.java create mode 100644 biodata-models/src/main/java/org/opencb/biodata/models/clinical/genefusion/GeneFusionBreakpoint.java diff --git a/biodata-formats/pom.xml b/biodata-formats/pom.xml index 883e455c..fe60b62f 100644 --- a/biodata-formats/pom.xml +++ b/biodata-formats/pom.xml @@ -38,6 +38,10 @@ org.opencb.commons commons-lib + + org.opencb.commons + commons-datastore-core + jakarta.xml.bind diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chemirdb/ChemirDbParser.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chemirdb/ChemirDbParser.java deleted file mode 100644 index a3601d06..00000000 --- a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chemirdb/ChemirDbParser.java +++ /dev/null @@ -1,71 +0,0 @@ -/* - * - * - */ - -package org.opencb.biodata.formats.feature.chemirdb; - -import org.apache.poi.hssf.usermodel.HSSFWorkbook; -import org.apache.poi.ss.usermodel.Cell; -import org.apache.poi.ss.usermodel.Row; -import org.apache.poi.ss.usermodel.Sheet; -import org.apache.poi.ss.usermodel.Workbook; -import org.apache.poi.xssf.usermodel.XSSFWorkbook; -import org.opencb.commons.utils.FileUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.FileInputStream; -import java.io.IOException; -import java.nio.file.Path; -import java.util.Iterator; - -public class ChemirDbParser { - - private static Logger logger = LoggerFactory.getLogger(ChemirDbParser.class); - - public static void parse(Path xlsxPath, ChemirDbParserCallback callback) throws IOException { - logger.info("Parsing ChemirDB file: {}", xlsxPath); - FileUtils.checkFile(xlsxPath); - - try (FileInputStream excelFile = new FileInputStream(xlsxPath.toFile()); - Workbook workbook = new XSSFWorkbook(excelFile)) { - // Get the first sheet from the workbook - Sheet sheet = workbook.getSheetAt(0); - - // Iterate over rows - Iterator rowIterator = sheet.iterator(); - while (rowIterator.hasNext()) { - Row currentRow = rowIterator.next(); - - // Skip header row if needed (e.g., if first row is header) - if (currentRow.getRowNum() == 0) { - continue; - } - - // Iterate over cells in the current row - Iterator cellIterator = currentRow.iterator(); - while (cellIterator.hasNext()) { - Cell currentCell = cellIterator.next(); - } - } - } catch (IOException e) { - throw new IOException("Error reading the ChemirDB file: " + e.getMessage(), e); - } - logger.info("ChemirDB file parsed successfully: {}", xlsxPath); - } -} \ No newline at end of file diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParser.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParser.java new file mode 100644 index 00000000..45fc72e1 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParser.java @@ -0,0 +1,194 @@ +/* + * + * + */ + +package org.opencb.biodata.formats.feature.chimerdb; + +import org.apache.commons.lang3.StringUtils; +import org.apache.poi.ss.usermodel.CellType; +import org.apache.poi.ss.usermodel.Row; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.ss.usermodel.Workbook; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.opencb.biodata.models.clinical.genefusion.GeneFusion; +import org.opencb.biodata.models.clinical.genefusion.GeneFusionBreakpoint; +import org.opencb.commons.utils.FileUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.FileInputStream; +import java.io.IOException; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.Iterator; + +public class ChimerDbParser { + + private static Logger logger = LoggerFactory.getLogger(ChimerDbParser.class); + + public static void parse(Path xlsxPath, GeneFusionParserCallback callback) throws IOException { + logger.info("Parsing ChemirDB file: {}", xlsxPath); + FileUtils.checkFile(xlsxPath); + + try (FileInputStream excelFile = new FileInputStream(xlsxPath.toFile()); + Workbook workbook = new XSSFWorkbook(excelFile)) { + // Get the first sheet from the workbook + Sheet sheet = workbook.getSheetAt(0); + + // Iterate over rows + Iterator rowIterator = sheet.iterator(); + while (rowIterator.hasNext()) { + Row row = rowIterator.next(); + + // Skip header row if needed (e.g., if first row is header) + if (row.getRowNum() == 0) { + continue; + } + + GeneFusion geneFusion = new GeneFusion(); + + // 0 1 2 3 4 5 6 7 8 9 10 + // id ChimerDB_Type Source webSource Fusion_pair 5Gene_Junction 3Gene_Junction H_gene H_chr H_position H_strand + // 11 12 13 14 15 16 17 18 + // T_gene T_chr T_position T_strand Genomic_breakpoint Exonic_breakpoint Breakpoint_Type Genome_Build_Version + // 19 20 21 22 23 24 25 26 27 28 + // PMID Disease Validation Frame Chr_info Kinase Oncogene Tumor_suppressor Receptor Transcription_Factor + // 29 30 31 + // ChimerPub ChimerSeq ChimerSeq+ + + geneFusion.setId(String.valueOf((int) row.getCell(0).getNumericCellValue())); + geneFusion.setSource("chimerdb"); + geneFusion.setPair(row.getCell(4).getStringCellValue()); + + if (row.getCell(5) != null && StringUtils.isNotEmpty(row.getCell(5).getStringCellValue())) { + geneFusion.setGene5PrimeJunction(row.getCell(5).getStringCellValue()); + } + if (row.getCell(6) != null && StringUtils.isNotEmpty(row.getCell(6).getStringCellValue())) { + geneFusion.setGene3PrimeJunction(row.getCell(6).getStringCellValue()); + } + + // Head gene breakpoint + GeneFusionBreakpoint head = new GeneFusionBreakpoint(); + if (row.getCell(7) != null && StringUtils.isNotEmpty(row.getCell(7).getStringCellValue())) { + head.setGeneName(row.getCell(7).getStringCellValue()); + } + if (row.getCell(8) != null && StringUtils.isNotEmpty(row.getCell(8).getStringCellValue())) { + head.setChromosome(row.getCell(8).getStringCellValue()); + } + if (row.getCell(9) != null) { + // The excel file may contain errors in this cell, so we need to check the type + if (row.getCell(9).getCellType() == CellType.STRING) { + logger.warn("Error in cell 9 (H_position), expected numeric value but found string: {}", + row.getCell(9).getStringCellValue()); + } else if (row.getCell(9).getCellType() == CellType.NUMERIC) { + if (row.getCell(9).getNumericCellValue() > 0) { + head.setPosition((int) row.getCell(9).getNumericCellValue()); + } + } + } + if (row.getCell(10) != null && StringUtils.isNotEmpty(row.getCell(10).getStringCellValue())) { + head.setStrand(row.getCell(10).getStringCellValue()); + } + geneFusion.setHeadGene(head); + + // Tail gene breakpoint + GeneFusionBreakpoint tail = new GeneFusionBreakpoint(); + if (row.getCell(11) != null && StringUtils.isNotEmpty(row.getCell(11).getStringCellValue())) { + tail.setGeneName(row.getCell(11).getStringCellValue()); + } + if (row.getCell(12) != null) { + if (row.getCell(12).getCellType() == CellType.STRING) { + if (StringUtils.isNotEmpty(row.getCell(12).getStringCellValue())) { + tail.setChromosome(row.getCell(12).getStringCellValue()); + } + } else { + logger.warn("Error in cell 12 (T_chromosome), it is not a string value"); + } + } + if (row.getCell(13) != null) { + // The excel file may contain errors in this cell, so we need to check the type + if (row.getCell(13).getCellType() == CellType.STRING) { + logger.warn("Error in cell 13 (T_position), expected numeric value but found string: {}", + row.getCell(13).getStringCellValue()); + } else if (row.getCell(13).getCellType() == CellType.NUMERIC) { + if (row.getCell(13).getNumericCellValue() > 0) { + tail.setPosition((int) row.getCell(13).getNumericCellValue()); + } + } + } + if (row.getCell(14) != null && StringUtils.isNotEmpty(row.getCell(14).getStringCellValue())) { + tail.setStrand(row.getCell(14).getStringCellValue()); + } + geneFusion.setTailGene(tail); + + // Publications + if (row.getCell(19) != null) { + if (row.getCell(19).getCellType() == CellType.STRING) { + if (StringUtils.isNotEmpty(row.getCell(19).getStringCellValue())) { + geneFusion.setPublications(Arrays.asList(row.getCell(19).getStringCellValue().split(","))); + } + } else if (row.getCell(19).getCellType() == CellType.NUMERIC) { + if (row.getCell(19).getNumericCellValue() > 0) { + geneFusion.setPublications(Arrays.asList(String.valueOf((int) row.getCell(19).getNumericCellValue()))); + } + } + } + + // Diseases + if (row.getCell(20) != null && StringUtils.isNotEmpty(row.getCell(20).getStringCellValue())) { + geneFusion.setDiseases(Arrays.asList(row.getCell(20).getStringCellValue().split(","))); + } + + // Validations + if (row.getCell(21) != null && StringUtils.isNotEmpty(row.getCell(21).getStringCellValue())) { + geneFusion.setValidations(Arrays.asList(row.getCell(21).getStringCellValue().split(","))); + } + + // Attributes + if (row.getCell(15) != null && row.getCell(15).getNumericCellValue() == 1) { + geneFusion.getAttributes().put("genomic_breakpoint", true); + } + if (row.getCell(16) != null && row.getCell(16).getNumericCellValue() == 1) { + geneFusion.getAttributes().put("exomic_breakpoint", true); + } + if (row.getCell(24) != null && StringUtils.isNotEmpty(row.getCell(24).getStringCellValue())) { + geneFusion.getAttributes().put("kinase", true); + } + if (row.getCell(25) != null && StringUtils.isNotEmpty(row.getCell(25).getStringCellValue())) { + geneFusion.getAttributes().put("oncogene", true); + } + if (row.getCell(26) != null && StringUtils.isNotEmpty(row.getCell(26).getStringCellValue())) { + geneFusion.getAttributes().put("tumor_supressor", true); + } + if (row.getCell(27) != null && StringUtils.isNotEmpty(row.getCell(27).getStringCellValue())) { + geneFusion.getAttributes().put("receptor", true); + } + if (row.getCell(28) != null && StringUtils.isNotEmpty(row.getCell(28).getStringCellValue())) { + geneFusion.getAttributes().put("transcriptor_factor", true); + } + + + // Callback to process the gene fusion + callback.processGeneFusion(geneFusion); + } + } catch (IOException e) { + throw new IOException("Error reading the ChemirDB file: " + e.getMessage(), e); + } + logger.info("ChemirDB file parsed successfully: {}", xlsxPath); + } +} \ No newline at end of file diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chemirdb/ChemirDbParserCallback.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/GeneFusionParserCallback.java similarity index 63% rename from biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chemirdb/ChemirDbParserCallback.java rename to biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/GeneFusionParserCallback.java index 7c6b4ff1..34b4576a 100644 --- a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chemirdb/ChemirDbParserCallback.java +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/GeneFusionParserCallback.java @@ -17,17 +17,10 @@ * */ -package org.opencb.biodata.formats.feature.chemirdb; +package org.opencb.biodata.formats.feature.chimerdb; -import org.opencb.biodata.models.core.MiRnaGene; -import org.opencb.commons.utils.FileUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import org.opencb.biodata.models.clinical.genefusion.GeneFusion; -import java.io.FileInputStream; -import java.io.IOException; -import java.nio.file.Path; - -public interface ChemirDbParserCallback { - boolean processChemirDbItem(Object object); +public interface GeneFusionParserCallback { + boolean processGeneFusion(GeneFusion geneFusion); } diff --git a/biodata-formats/src/test/java/org/opencb/biodata/formats/feature/chemirdb/ChemirDbParserTest.java b/biodata-formats/src/test/java/org/opencb/biodata/formats/feature/chemirdb/ChemirDbParserTest.java deleted file mode 100644 index 0aec4e01..00000000 --- a/biodata-formats/src/test/java/org/opencb/biodata/formats/feature/chemirdb/ChemirDbParserTest.java +++ /dev/null @@ -1,69 +0,0 @@ -package org.opencb.biodata.formats.feature.chemirdb; - - -import org.junit.Assert; -import org.junit.Test; -import org.opencb.biodata.formats.feature.mirbase.MirBaseParser; -import org.opencb.biodata.formats.feature.mirbase.MirBaseParserCallback; -import org.opencb.biodata.formats.feature.mirbase.MirBaseParserTest; -import org.opencb.biodata.models.core.MiRnaGene; - -import java.io.IOException; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.List; - -public class ChemirDbParserTest { - - - @Test - public void testParse() throws IOException { - Path xlsxPath = Paths.get(getClass().getResource("/ChimerKB4.small.xlsx").getPath()); - - ChemirDbParserTest.MyCallback callback = new ChemirDbParserTest.MyCallback(">>> Testing message"); - - ChemirDbParser.parse(xlsxPath, callback); - Assert.assertEquals(50, callback.getCounter()); - -// MiRnaGene mi0000060 = callback.getMiRnaGene("MI0000060"); - } - - - // Implementation of the MirBaseParserCallback function - public class MyCallback implements ChemirDbParserCallback { - private String msg; - private List objects; - - public MyCallback(String msg) { - this.msg = msg; - this.objects = new ArrayList<>(); - } - - @Override - public boolean processChemirDbItem(Object object) { - System.out.println(msg); - System.out.println(objects.toString()); - objects.add(object); - return true; - } - - public List getMiRnaGenes() { - return objects; - } - -// public MiRnaGene getMiRnaGene(String accession) { -// for (MiRnaGene miRnaGene : miRnaGenes) { -// if (accession.equals(miRnaGene.getAccession())) { -// return miRnaGene; -// } -// } -// return null; -// } - - public int getCounter() { - return objects.size(); - } - } - -} \ No newline at end of file diff --git a/biodata-formats/src/test/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParserTest.java b/biodata-formats/src/test/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParserTest.java new file mode 100644 index 00000000..e8fae4dc --- /dev/null +++ b/biodata-formats/src/test/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParserTest.java @@ -0,0 +1,56 @@ +package org.opencb.biodata.formats.feature.chimerdb; + + +import org.junit.Assert; +import org.junit.Test; +import org.opencb.biodata.models.clinical.genefusion.GeneFusion; + +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; + +public class ChimerDbParserTest { + + + public void testParse() throws IOException { + Path xlsxPath = Paths.get(getClass().getResource("/ChimerKB4.small.xlsx").getPath()); + + ChimerDbParserTest.MyCallback callback = new ChimerDbParserTest.MyCallback(">>> Testing message"); + + ChimerDbParser.parse(xlsxPath, callback); + Assert.assertEquals(50, callback.getCounter()); + +// MiRnaGene mi0000060 = callback.getMiRnaGene("MI0000060"); + } + + + // Implementation of the MirBaseParserCallback function + public class MyCallback implements GeneFusionParserCallback { + private String msg; + private List geneFusionList; + + public MyCallback(String msg) { + this.msg = msg; + this.geneFusionList = new ArrayList<>(); + } + + @Override + public boolean processGeneFusion(GeneFusion geneFusion) { + System.out.println(msg); + System.out.println(geneFusion.toString()); + geneFusionList.add(geneFusion); + return true; + } + + public List getGeneFusionList() { + return geneFusionList; + } + + public int getCounter() { + return geneFusionList.size(); + } + } + +} \ No newline at end of file diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/clinical/genefusion/GeneFusion.java b/biodata-models/src/main/java/org/opencb/biodata/models/clinical/genefusion/GeneFusion.java new file mode 100644 index 00000000..bb43db53 --- /dev/null +++ b/biodata-models/src/main/java/org/opencb/biodata/models/clinical/genefusion/GeneFusion.java @@ -0,0 +1,160 @@ +package org.opencb.biodata.models.clinical.genefusion; + +import org.opencb.commons.datastore.core.ObjectMap; + +import java.util.ArrayList; +import java.util.List; + +public class GeneFusion { + + private String id; + private String pair; + private String source; + private String gene5PrimeJunction; + private String gene3PrimeJunction; + private GeneFusionBreakpoint headGene; + private GeneFusionBreakpoint tailGene; + private List diseases; + private List publications; + private List validations; + private ObjectMap attributes; + + public GeneFusion() { + this.diseases = new ArrayList<>(); + this.publications = new ArrayList<>(); + this.attributes = new ObjectMap(); + } + + public GeneFusion(String id, String pair, String source, String gene5PrimeJunction, String gene3PrimeJunction, + GeneFusionBreakpoint headGene, GeneFusionBreakpoint tailGene, List diseases, List publications, + List validations, ObjectMap attributes) { + this.id = id; + this.pair = pair; + this.source = source; + this.gene5PrimeJunction = gene5PrimeJunction; + this.gene3PrimeJunction = gene3PrimeJunction; + this.headGene = headGene; + this.tailGene = tailGene; + this.diseases = diseases; + this.publications = publications; + this.validations = validations; + this.attributes = attributes; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("GeneFusion{"); + sb.append("id='").append(id).append('\''); + sb.append("pair='").append(pair).append('\''); + sb.append(", source='").append(source).append('\''); + sb.append(", gene5PrimeJunction='").append(gene5PrimeJunction).append('\''); + sb.append(", gene3PrimeJunction='").append(gene3PrimeJunction).append('\''); + sb.append(", headGene=").append(headGene); + sb.append(", tailGene=").append(tailGene); + sb.append(", diseases=").append(diseases); + sb.append(", publications=").append(publications); + sb.append(", validations='").append(validations).append('\''); + sb.append(", attributes=").append(attributes.toJson()); + sb.append('}'); + return sb.toString(); + } + + public String getId() { + return id; + } + + public GeneFusion setId(String id) { + this.id = id; + return this; + } + + public String getPair() { + return pair; + } + + public GeneFusion setPair(String pair) { + this.pair = pair; + return this; + } + + public String getSource() { + return source; + } + + public GeneFusion setSource(String source) { + this.source = source; + return this; + } + + public String getGene5PrimeJunction() { + return gene5PrimeJunction; + } + + public GeneFusion setGene5PrimeJunction(String gene5PrimeJunction) { + this.gene5PrimeJunction = gene5PrimeJunction; + return this; + } + + public String getGene3PrimeJunction() { + return gene3PrimeJunction; + } + + public GeneFusion setGene3PrimeJunction(String gene3PrimeJunction) { + this.gene3PrimeJunction = gene3PrimeJunction; + return this; + } + + public GeneFusionBreakpoint getHeadGene() { + return headGene; + } + + public GeneFusion setHeadGene(GeneFusionBreakpoint headGene) { + this.headGene = headGene; + return this; + } + + public GeneFusionBreakpoint getTailGene() { + return tailGene; + } + + public GeneFusion setTailGene(GeneFusionBreakpoint tailGene) { + this.tailGene = tailGene; + return this; + } + + public List getDiseases() { + return diseases; + } + + public GeneFusion setDiseases(List diseases) { + this.diseases = diseases; + return this; + } + + public List getPublications() { + return publications; + } + + public GeneFusion setPublications(List publications) { + this.publications = publications; + return this; + } + + public List getValidations() { + return validations; + } + + public GeneFusion setValidations(List validations) { + this.validations = validations; + return this; + } + + public ObjectMap getAttributes() { + return attributes; + } + + public GeneFusion setAttributes(ObjectMap attributes) { + this.attributes = attributes; + return this; + } +} diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/clinical/genefusion/GeneFusionBreakpoint.java b/biodata-models/src/main/java/org/opencb/biodata/models/clinical/genefusion/GeneFusionBreakpoint.java new file mode 100644 index 00000000..2604df5d --- /dev/null +++ b/biodata-models/src/main/java/org/opencb/biodata/models/clinical/genefusion/GeneFusionBreakpoint.java @@ -0,0 +1,66 @@ +package org.opencb.biodata.models.clinical.genefusion; + +public class GeneFusionBreakpoint { + + private String geneName; + private String chromosome; + private int position; + private String strand; + + public GeneFusionBreakpoint() { + } + + public GeneFusionBreakpoint(String geneName, String chromosome, int position, String strand) { + this.geneName = geneName; + this.chromosome = chromosome; + this.position = position; + this.strand = strand; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("GeneFusionBreakpoint{"); + sb.append("geneName='").append(geneName).append('\''); + sb.append(", chromosome='").append(chromosome).append('\''); + sb.append(", position=").append(position); + sb.append(", strand=").append(strand); + sb.append('}'); + return sb.toString(); + } + + public String getGeneName() { + return geneName; + } + + public GeneFusionBreakpoint setGeneName(String geneName) { + this.geneName = geneName; + return this; + } + + public String getChromosome() { + return chromosome; + } + + public GeneFusionBreakpoint setChromosome(String chromosome) { + this.chromosome = chromosome; + return this; + } + + public int getPosition() { + return position; + } + + public GeneFusionBreakpoint setPosition(int position) { + this.position = position; + return this; + } + + public String getStrand() { + return strand; + } + + public GeneFusionBreakpoint setStrand(String strand) { + this.strand = strand; + return this; + } +} diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/GeneAnnotation.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/GeneAnnotation.java index 02b0c0db..ffde725a 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/core/GeneAnnotation.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/GeneAnnotation.java @@ -19,6 +19,7 @@ package org.opencb.biodata.models.core; +import org.opencb.biodata.models.clinical.genefusion.GeneFusion; import org.opencb.biodata.models.variant.avro.Constraint; import org.opencb.biodata.models.variant.avro.Expression; import org.opencb.biodata.models.variant.avro.GeneDrugInteraction; @@ -38,6 +39,7 @@ public class GeneAnnotation { private List cancerAssociations; private List cancerHotspots; private List imprinted; + private List fusions; public GeneAnnotation() { this.expression = new ArrayList<>(); @@ -48,30 +50,34 @@ public GeneAnnotation() { this.cancerAssociations = new ArrayList<>(); this.cancerHotspots = new ArrayList<>(); this.imprinted = new ArrayList<>(); + this.fusions = new ArrayList<>(); } @Deprecated public GeneAnnotation(List expression, List diseases, List drugs, List constraints, List mirnaTargets) { - this(expression, diseases, drugs, constraints, mirnaTargets, new ArrayList<>(), new ArrayList<>(), new ArrayList<>()); + this(expression, diseases, drugs, constraints, mirnaTargets, new ArrayList<>(), new ArrayList<>(), new ArrayList<>(), + new ArrayList<>()); } @Deprecated public GeneAnnotation(List expression, List diseases, List drugs, List constraints, List mirnaTargets, List cancerAssociations) { - this(expression, diseases, drugs, constraints, mirnaTargets, cancerAssociations, new ArrayList<>(), new ArrayList<>()); + this(expression, diseases, drugs, constraints, mirnaTargets, cancerAssociations, new ArrayList<>(), new ArrayList<>(), + new ArrayList<>()); } @Deprecated public GeneAnnotation(List expression, List diseases, List drugs, List constraints, List mirnaTargets, List cancerAssociations, List cancerHotspots) { - this(expression, diseases, drugs, constraints, mirnaTargets, cancerAssociations, cancerHotspots, new ArrayList<>()); + this(expression, diseases, drugs, constraints, mirnaTargets, cancerAssociations, cancerHotspots, new ArrayList<>(), + new ArrayList<>()); } public GeneAnnotation(List expression, List diseases, List drugs, List constraints, List mirnaTargets, List cancerAssociations, - List cancerHotspots, List imprinted) { + List cancerHotspots, List imprinted, List fusions) { this.expression = expression; this.diseases = diseases; this.drugs = drugs; @@ -80,6 +86,7 @@ public GeneAnnotation(List expression, List di this.cancerAssociations = cancerAssociations; this.cancerHotspots = cancerHotspots; this.imprinted = imprinted; + this.fusions = fusions; } @Override @@ -93,6 +100,7 @@ public String toString() { sb.append(", cancerAssociations=").append(cancerAssociations); sb.append(", cancerHotspots=").append(cancerHotspots); sb.append(", imprinted=").append(imprinted); + sb.append(", fusions=").append(fusions); sb.append('}'); return sb.toString(); } @@ -168,4 +176,13 @@ public GeneAnnotation setImprinted(List imprinted) { this.imprinted = imprinted; return this; } + + public List getFusions() { + return fusions; + } + + public GeneAnnotation setFusions(List fusions) { + this.fusions = fusions; + return this; + } } From c3caccda5103982419cad4edada86c169d5835b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Mon, 28 Jul 2025 11:36:08 +0200 Subject: [PATCH 16/24] models: move GeneFusion to core, #TASK-7830, #TASK-5564 --- .../biodata/formats/feature/chimerdb/ChimerDbParser.java | 8 +++++--- .../feature/chimerdb/GeneFusionParserCallback.java | 2 +- .../formats/feature/chimerdb/ChimerDbParserTest.java | 3 +-- .../org/opencb/biodata/models/core/GeneAnnotation.java | 2 +- .../models/{clinical => core}/genefusion/GeneFusion.java | 2 +- .../genefusion/GeneFusionBreakpoint.java | 2 +- 6 files changed, 10 insertions(+), 9 deletions(-) rename biodata-models/src/main/java/org/opencb/biodata/models/{clinical => core}/genefusion/GeneFusion.java (98%) rename biodata-models/src/main/java/org/opencb/biodata/models/{clinical => core}/genefusion/GeneFusionBreakpoint.java (96%) diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParser.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParser.java index 45fc72e1..50c9675c 100644 --- a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParser.java +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParser.java @@ -25,8 +25,8 @@ import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.xssf.usermodel.XSSFWorkbook; -import org.opencb.biodata.models.clinical.genefusion.GeneFusion; -import org.opencb.biodata.models.clinical.genefusion.GeneFusionBreakpoint; +import org.opencb.biodata.models.core.genefusion.GeneFusion; +import org.opencb.biodata.models.core.genefusion.GeneFusionBreakpoint; import org.opencb.commons.utils.FileUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -166,6 +166,9 @@ public static void parse(Path xlsxPath, GeneFusionParserCallback callback) throw if (row.getCell(16) != null && row.getCell(16).getNumericCellValue() == 1) { geneFusion.getAttributes().put("exomic_breakpoint", true); } + if (row.getCell(23) != null && StringUtils.isNotEmpty(row.getCell(23).getStringCellValue())) { + geneFusion.getAttributes().put("chr_info", row.getCell(23).getStringCellValue()); + } if (row.getCell(24) != null && StringUtils.isNotEmpty(row.getCell(24).getStringCellValue())) { geneFusion.getAttributes().put("kinase", true); } @@ -182,7 +185,6 @@ public static void parse(Path xlsxPath, GeneFusionParserCallback callback) throw geneFusion.getAttributes().put("transcriptor_factor", true); } - // Callback to process the gene fusion callback.processGeneFusion(geneFusion); } diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/GeneFusionParserCallback.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/GeneFusionParserCallback.java index 34b4576a..b5836cfd 100644 --- a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/GeneFusionParserCallback.java +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/GeneFusionParserCallback.java @@ -19,7 +19,7 @@ package org.opencb.biodata.formats.feature.chimerdb; -import org.opencb.biodata.models.clinical.genefusion.GeneFusion; +import org.opencb.biodata.models.core.genefusion.GeneFusion; public interface GeneFusionParserCallback { boolean processGeneFusion(GeneFusion geneFusion); diff --git a/biodata-formats/src/test/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParserTest.java b/biodata-formats/src/test/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParserTest.java index e8fae4dc..01bace8f 100644 --- a/biodata-formats/src/test/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParserTest.java +++ b/biodata-formats/src/test/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParserTest.java @@ -2,8 +2,7 @@ import org.junit.Assert; -import org.junit.Test; -import org.opencb.biodata.models.clinical.genefusion.GeneFusion; +import org.opencb.biodata.models.core.genefusion.GeneFusion; import java.io.IOException; import java.nio.file.Path; diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/GeneAnnotation.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/GeneAnnotation.java index ffde725a..27b008c0 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/core/GeneAnnotation.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/GeneAnnotation.java @@ -19,7 +19,7 @@ package org.opencb.biodata.models.core; -import org.opencb.biodata.models.clinical.genefusion.GeneFusion; +import org.opencb.biodata.models.core.genefusion.GeneFusion; import org.opencb.biodata.models.variant.avro.Constraint; import org.opencb.biodata.models.variant.avro.Expression; import org.opencb.biodata.models.variant.avro.GeneDrugInteraction; diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/clinical/genefusion/GeneFusion.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/genefusion/GeneFusion.java similarity index 98% rename from biodata-models/src/main/java/org/opencb/biodata/models/clinical/genefusion/GeneFusion.java rename to biodata-models/src/main/java/org/opencb/biodata/models/core/genefusion/GeneFusion.java index bb43db53..d8d20d49 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/clinical/genefusion/GeneFusion.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/genefusion/GeneFusion.java @@ -1,4 +1,4 @@ -package org.opencb.biodata.models.clinical.genefusion; +package org.opencb.biodata.models.core.genefusion; import org.opencb.commons.datastore.core.ObjectMap; diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/clinical/genefusion/GeneFusionBreakpoint.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/genefusion/GeneFusionBreakpoint.java similarity index 96% rename from biodata-models/src/main/java/org/opencb/biodata/models/clinical/genefusion/GeneFusionBreakpoint.java rename to biodata-models/src/main/java/org/opencb/biodata/models/core/genefusion/GeneFusionBreakpoint.java index 2604df5d..c145b539 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/clinical/genefusion/GeneFusionBreakpoint.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/genefusion/GeneFusionBreakpoint.java @@ -1,4 +1,4 @@ -package org.opencb.biodata.models.clinical.genefusion; +package org.opencb.biodata.models.core.genefusion; public class GeneFusionBreakpoint { From 22a8d3527346a39a19a2ba17980570eb2c2ba010 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Mon, 28 Jul 2025 15:15:27 +0200 Subject: [PATCH 17/24] models: add imprinted genes and gene fusions in variant annotation, #TASK-7754, #TASK-7830, #TASK-5564 --- .../feature/chimerdb/ChimerDbParser.java | 18 +- .../chimerdb/GeneFusionParserCallback.java | 3 +- .../feature/chimerdb/ChimerDbParserTest.java | 2 +- .../src/main/avro/variantAnnotation.avdl | 31 ++++ .../biodata/models/core/GeneAnnotation.java | 6 +- .../biodata/models/core/ImprintedGene.java | 103 ----------- .../models/core/genefusion/GeneFusion.java | 160 ------------------ .../core/genefusion/GeneFusionBreakpoint.java | 66 -------- 8 files changed, 44 insertions(+), 345 deletions(-) delete mode 100644 biodata-models/src/main/java/org/opencb/biodata/models/core/ImprintedGene.java delete mode 100644 biodata-models/src/main/java/org/opencb/biodata/models/core/genefusion/GeneFusion.java delete mode 100644 biodata-models/src/main/java/org/opencb/biodata/models/core/genefusion/GeneFusionBreakpoint.java diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParser.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParser.java index 50c9675c..1290c54b 100644 --- a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParser.java +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParser.java @@ -25,8 +25,8 @@ import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.xssf.usermodel.XSSFWorkbook; -import org.opencb.biodata.models.core.genefusion.GeneFusion; -import org.opencb.biodata.models.core.genefusion.GeneFusionBreakpoint; +import org.opencb.biodata.models.variant.avro.GeneFusion; +import org.opencb.biodata.models.variant.avro.GeneFusionBreakpoint; import org.opencb.commons.utils.FileUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -161,28 +161,28 @@ public static void parse(Path xlsxPath, GeneFusionParserCallback callback) throw // Attributes if (row.getCell(15) != null && row.getCell(15).getNumericCellValue() == 1) { - geneFusion.getAttributes().put("genomic_breakpoint", true); + geneFusion.getAttributes().put("genomic_breakpoint", String.valueOf(true)); } if (row.getCell(16) != null && row.getCell(16).getNumericCellValue() == 1) { - geneFusion.getAttributes().put("exomic_breakpoint", true); + geneFusion.getAttributes().put("exomic_breakpoint", String.valueOf(true)); } if (row.getCell(23) != null && StringUtils.isNotEmpty(row.getCell(23).getStringCellValue())) { geneFusion.getAttributes().put("chr_info", row.getCell(23).getStringCellValue()); } if (row.getCell(24) != null && StringUtils.isNotEmpty(row.getCell(24).getStringCellValue())) { - geneFusion.getAttributes().put("kinase", true); + geneFusion.getAttributes().put("kinase", String.valueOf(true)); } if (row.getCell(25) != null && StringUtils.isNotEmpty(row.getCell(25).getStringCellValue())) { - geneFusion.getAttributes().put("oncogene", true); + geneFusion.getAttributes().put("oncogene", String.valueOf(true)); } if (row.getCell(26) != null && StringUtils.isNotEmpty(row.getCell(26).getStringCellValue())) { - geneFusion.getAttributes().put("tumor_supressor", true); + geneFusion.getAttributes().put("tumor_supressor", String.valueOf(true)); } if (row.getCell(27) != null && StringUtils.isNotEmpty(row.getCell(27).getStringCellValue())) { - geneFusion.getAttributes().put("receptor", true); + geneFusion.getAttributes().put("receptor", String.valueOf(true)); } if (row.getCell(28) != null && StringUtils.isNotEmpty(row.getCell(28).getStringCellValue())) { - geneFusion.getAttributes().put("transcriptor_factor", true); + geneFusion.getAttributes().put("transcriptor_factor", String.valueOf(true)); } // Callback to process the gene fusion diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/GeneFusionParserCallback.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/GeneFusionParserCallback.java index b5836cfd..da756e48 100644 --- a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/GeneFusionParserCallback.java +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/GeneFusionParserCallback.java @@ -19,7 +19,8 @@ package org.opencb.biodata.formats.feature.chimerdb; -import org.opencb.biodata.models.core.genefusion.GeneFusion; + +import org.opencb.biodata.models.variant.avro.GeneFusion; public interface GeneFusionParserCallback { boolean processGeneFusion(GeneFusion geneFusion); diff --git a/biodata-formats/src/test/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParserTest.java b/biodata-formats/src/test/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParserTest.java index 01bace8f..8d9c1105 100644 --- a/biodata-formats/src/test/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParserTest.java +++ b/biodata-formats/src/test/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParserTest.java @@ -2,7 +2,7 @@ import org.junit.Assert; -import org.opencb.biodata.models.core.genefusion.GeneFusion; +import org.opencb.biodata.models.variant.avro.GeneFusion; import java.io.IOException; import java.nio.file.Path; diff --git a/biodata-models/src/main/avro/variantAnnotation.avdl b/biodata-models/src/main/avro/variantAnnotation.avdl index fb80f328..0d69e931 100644 --- a/biodata-models/src/main/avro/variantAnnotation.avdl +++ b/biodata-models/src/main/avro/variantAnnotation.avdl @@ -55,6 +55,35 @@ protocol VariantAnnotations { string source; } + record ImprintedGene { + string geneName; + string status; + string expressedAllele; + string source; + union { null, map } attributes; + } + + record GeneFusionBreakpoint { + string geneName; + string chromosome; + int position; + string strand; + } + + record GeneFusion { + string id; + string pair; + string source; + string gene5PrimeJunction; + string gene3PrimeJunction; + union { null, GeneFusionBreakpoint } headGene; + union { null, GeneFusionBreakpoint } tailGene; + union { null, array } diseases; + union { null, array } publications; + union { null, array } validations; + union { null, map } attributes; + } + record PopulationFrequency { string study; string population; @@ -361,6 +390,8 @@ protocol VariantAnnotations { union { null, array } cytoband; union { null, array } repeat; union { null, array } drugs; + union { null, array } imprintedGenes; + union { null, array } geneFusions; union { null, map } additionalAttributes = null; } diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/GeneAnnotation.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/GeneAnnotation.java index 27b008c0..c31db9ae 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/core/GeneAnnotation.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/GeneAnnotation.java @@ -19,11 +19,7 @@ package org.opencb.biodata.models.core; -import org.opencb.biodata.models.core.genefusion.GeneFusion; -import org.opencb.biodata.models.variant.avro.Constraint; -import org.opencb.biodata.models.variant.avro.Expression; -import org.opencb.biodata.models.variant.avro.GeneDrugInteraction; -import org.opencb.biodata.models.variant.avro.GeneTraitAssociation; +import org.opencb.biodata.models.variant.avro.*; import java.util.ArrayList; import java.util.List; diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/ImprintedGene.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/ImprintedGene.java deleted file mode 100644 index f3c4e7c3..00000000 --- a/biodata-models/src/main/java/org/opencb/biodata/models/core/ImprintedGene.java +++ /dev/null @@ -1,103 +0,0 @@ -/* - * - * - */ - -package org.opencb.biodata.models.core; - -import org.opencb.commons.datastore.core.ObjectMap; - -import java.util.Map; - - -public class ImprintedGene { - - private String geneName; - private String status; - private String expressedAllele; - private Map attributes; - private String source; - - public ImprintedGene() { - this.attributes = new ObjectMap(); - } - - public ImprintedGene(String geneName, String status, String expressedAllele, Map attributes, String source) { - this.geneName = geneName; - this.status = status; - this.expressedAllele = expressedAllele; - this.attributes = attributes; - this.source = source; - } - - @Override - public String toString() { - final StringBuilder sb = new StringBuilder("ImprintedGene{"); - sb.append("geneName='").append(geneName).append('\''); - sb.append(", status='").append(status).append('\''); - sb.append(", expressedAllele='").append(expressedAllele).append('\''); - sb.append(", attributes=").append(attributes); - sb.append(", source='").append(source).append('\''); - sb.append('}'); - return sb.toString(); - } - - public String getGeneName() { - return geneName; - } - - public ImprintedGene setGeneName(String geneName) { - this.geneName = geneName; - return this; - } - - public String getStatus() { - return status; - } - - public ImprintedGene setStatus(String status) { - this.status = status; - return this; - } - - public String getExpressedAllele() { - return expressedAllele; - } - - public ImprintedGene setExpressedAllele(String expressedAllele) { - this.expressedAllele = expressedAllele; - return this; - } - - public Map getAttributes() { - return attributes; - } - - public ImprintedGene setAttributes(Map attributes) { - this.attributes = attributes; - return this; - } - - public String getSource() { - return source; - } - - public ImprintedGene setSource(String source) { - this.source = source; - return this; - } -} diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/genefusion/GeneFusion.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/genefusion/GeneFusion.java deleted file mode 100644 index d8d20d49..00000000 --- a/biodata-models/src/main/java/org/opencb/biodata/models/core/genefusion/GeneFusion.java +++ /dev/null @@ -1,160 +0,0 @@ -package org.opencb.biodata.models.core.genefusion; - -import org.opencb.commons.datastore.core.ObjectMap; - -import java.util.ArrayList; -import java.util.List; - -public class GeneFusion { - - private String id; - private String pair; - private String source; - private String gene5PrimeJunction; - private String gene3PrimeJunction; - private GeneFusionBreakpoint headGene; - private GeneFusionBreakpoint tailGene; - private List diseases; - private List publications; - private List validations; - private ObjectMap attributes; - - public GeneFusion() { - this.diseases = new ArrayList<>(); - this.publications = new ArrayList<>(); - this.attributes = new ObjectMap(); - } - - public GeneFusion(String id, String pair, String source, String gene5PrimeJunction, String gene3PrimeJunction, - GeneFusionBreakpoint headGene, GeneFusionBreakpoint tailGene, List diseases, List publications, - List validations, ObjectMap attributes) { - this.id = id; - this.pair = pair; - this.source = source; - this.gene5PrimeJunction = gene5PrimeJunction; - this.gene3PrimeJunction = gene3PrimeJunction; - this.headGene = headGene; - this.tailGene = tailGene; - this.diseases = diseases; - this.publications = publications; - this.validations = validations; - this.attributes = attributes; - } - - @Override - public String toString() { - final StringBuilder sb = new StringBuilder("GeneFusion{"); - sb.append("id='").append(id).append('\''); - sb.append("pair='").append(pair).append('\''); - sb.append(", source='").append(source).append('\''); - sb.append(", gene5PrimeJunction='").append(gene5PrimeJunction).append('\''); - sb.append(", gene3PrimeJunction='").append(gene3PrimeJunction).append('\''); - sb.append(", headGene=").append(headGene); - sb.append(", tailGene=").append(tailGene); - sb.append(", diseases=").append(diseases); - sb.append(", publications=").append(publications); - sb.append(", validations='").append(validations).append('\''); - sb.append(", attributes=").append(attributes.toJson()); - sb.append('}'); - return sb.toString(); - } - - public String getId() { - return id; - } - - public GeneFusion setId(String id) { - this.id = id; - return this; - } - - public String getPair() { - return pair; - } - - public GeneFusion setPair(String pair) { - this.pair = pair; - return this; - } - - public String getSource() { - return source; - } - - public GeneFusion setSource(String source) { - this.source = source; - return this; - } - - public String getGene5PrimeJunction() { - return gene5PrimeJunction; - } - - public GeneFusion setGene5PrimeJunction(String gene5PrimeJunction) { - this.gene5PrimeJunction = gene5PrimeJunction; - return this; - } - - public String getGene3PrimeJunction() { - return gene3PrimeJunction; - } - - public GeneFusion setGene3PrimeJunction(String gene3PrimeJunction) { - this.gene3PrimeJunction = gene3PrimeJunction; - return this; - } - - public GeneFusionBreakpoint getHeadGene() { - return headGene; - } - - public GeneFusion setHeadGene(GeneFusionBreakpoint headGene) { - this.headGene = headGene; - return this; - } - - public GeneFusionBreakpoint getTailGene() { - return tailGene; - } - - public GeneFusion setTailGene(GeneFusionBreakpoint tailGene) { - this.tailGene = tailGene; - return this; - } - - public List getDiseases() { - return diseases; - } - - public GeneFusion setDiseases(List diseases) { - this.diseases = diseases; - return this; - } - - public List getPublications() { - return publications; - } - - public GeneFusion setPublications(List publications) { - this.publications = publications; - return this; - } - - public List getValidations() { - return validations; - } - - public GeneFusion setValidations(List validations) { - this.validations = validations; - return this; - } - - public ObjectMap getAttributes() { - return attributes; - } - - public GeneFusion setAttributes(ObjectMap attributes) { - this.attributes = attributes; - return this; - } -} diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/genefusion/GeneFusionBreakpoint.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/genefusion/GeneFusionBreakpoint.java deleted file mode 100644 index c145b539..00000000 --- a/biodata-models/src/main/java/org/opencb/biodata/models/core/genefusion/GeneFusionBreakpoint.java +++ /dev/null @@ -1,66 +0,0 @@ -package org.opencb.biodata.models.core.genefusion; - -public class GeneFusionBreakpoint { - - private String geneName; - private String chromosome; - private int position; - private String strand; - - public GeneFusionBreakpoint() { - } - - public GeneFusionBreakpoint(String geneName, String chromosome, int position, String strand) { - this.geneName = geneName; - this.chromosome = chromosome; - this.position = position; - this.strand = strand; - } - - @Override - public String toString() { - final StringBuilder sb = new StringBuilder("GeneFusionBreakpoint{"); - sb.append("geneName='").append(geneName).append('\''); - sb.append(", chromosome='").append(chromosome).append('\''); - sb.append(", position=").append(position); - sb.append(", strand=").append(strand); - sb.append('}'); - return sb.toString(); - } - - public String getGeneName() { - return geneName; - } - - public GeneFusionBreakpoint setGeneName(String geneName) { - this.geneName = geneName; - return this; - } - - public String getChromosome() { - return chromosome; - } - - public GeneFusionBreakpoint setChromosome(String chromosome) { - this.chromosome = chromosome; - return this; - } - - public int getPosition() { - return position; - } - - public GeneFusionBreakpoint setPosition(int position) { - this.position = position; - return this; - } - - public String getStrand() { - return strand; - } - - public GeneFusionBreakpoint setStrand(String strand) { - this.strand = strand; - return this; - } -} From c4e4e5886e8962aba51c3c535eb3235b3def4048 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Tue, 29 Jul 2025 10:06:47 +0200 Subject: [PATCH 18/24] formats: initialize the attributes map before parsing ChimerDB, #TASK-7830, #TASK-5564 --- .../feature/chimerdb/ChimerDbParser.java | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParser.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParser.java index 1290c54b..a3760415 100644 --- a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParser.java +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParser.java @@ -19,6 +19,7 @@ package org.opencb.biodata.formats.feature.chimerdb; +import org.apache.commons.collections4.MapUtils; import org.apache.commons.lang3.StringUtils; import org.apache.poi.ss.usermodel.CellType; import org.apache.poi.ss.usermodel.Row; @@ -35,7 +36,9 @@ import java.io.IOException; import java.nio.file.Path; import java.util.Arrays; +import java.util.HashMap; import java.util.Iterator; +import java.util.Map; public class ChimerDbParser { @@ -61,6 +64,7 @@ public static void parse(Path xlsxPath, GeneFusionParserCallback callback) throw } GeneFusion geneFusion = new GeneFusion(); + Map attributes = new HashMap<>(); // 0 1 2 3 4 5 6 7 8 9 10 // id ChimerDB_Type Source webSource Fusion_pair 5Gene_Junction 3Gene_Junction H_gene H_chr H_position H_strand @@ -161,28 +165,31 @@ public static void parse(Path xlsxPath, GeneFusionParserCallback callback) throw // Attributes if (row.getCell(15) != null && row.getCell(15).getNumericCellValue() == 1) { - geneFusion.getAttributes().put("genomic_breakpoint", String.valueOf(true)); + attributes.put("genomic_breakpoint", String.valueOf(true)); } if (row.getCell(16) != null && row.getCell(16).getNumericCellValue() == 1) { - geneFusion.getAttributes().put("exomic_breakpoint", String.valueOf(true)); + attributes.put("exomic_breakpoint", String.valueOf(true)); } if (row.getCell(23) != null && StringUtils.isNotEmpty(row.getCell(23).getStringCellValue())) { - geneFusion.getAttributes().put("chr_info", row.getCell(23).getStringCellValue()); + attributes.put("chr_info", row.getCell(23).getStringCellValue()); } if (row.getCell(24) != null && StringUtils.isNotEmpty(row.getCell(24).getStringCellValue())) { - geneFusion.getAttributes().put("kinase", String.valueOf(true)); + attributes.put("kinase", String.valueOf(true)); } if (row.getCell(25) != null && StringUtils.isNotEmpty(row.getCell(25).getStringCellValue())) { - geneFusion.getAttributes().put("oncogene", String.valueOf(true)); + attributes.put("oncogene", String.valueOf(true)); } if (row.getCell(26) != null && StringUtils.isNotEmpty(row.getCell(26).getStringCellValue())) { - geneFusion.getAttributes().put("tumor_supressor", String.valueOf(true)); + attributes.put("tumor_supressor", String.valueOf(true)); } if (row.getCell(27) != null && StringUtils.isNotEmpty(row.getCell(27).getStringCellValue())) { - geneFusion.getAttributes().put("receptor", String.valueOf(true)); + attributes.put("receptor", String.valueOf(true)); } if (row.getCell(28) != null && StringUtils.isNotEmpty(row.getCell(28).getStringCellValue())) { - geneFusion.getAttributes().put("transcriptor_factor", String.valueOf(true)); + attributes.put("transcriptor_factor", String.valueOf(true)); + } + if (MapUtils.isNotEmpty(attributes)) { + geneFusion.setAttributes(attributes); } // Callback to process the gene fusion From d2ee79a0ba1697ca1050b4ee4148ac40e3c05522 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Tue, 5 Aug 2025 16:33:04 +0200 Subject: [PATCH 19/24] formats: add parser for files ChimerSeq.xlsx and ChimerPub.xlsx, #TASK-7830, #TASK-5564 --- .../feature/chimerdb/ChimerDbParser.java | 203 --------- ...lback.java => ChimerDbParserCallback.java} | 6 +- .../feature/chimerdb/ChimerKbParser.java | 279 +++++++++++++ .../feature/chimerdb/ChimerPubParser.java | 258 ++++++++++++ .../feature/chimerdb/ChimerSeqParser.java | 282 +++++++++++++ .../feature/chimerdb/ChimerDbParserTest.java | 34 +- .../src/main/avro/variantAnnotation.avdl | 8 +- .../biodata/models/core/GeneAnnotation.java | 16 +- .../biodata/models/core/GeneFusion.java | 83 ++++ .../models/core/chimerdb/ChimerKb.java | 386 ++++++++++++++++++ .../core/chimerdb/ChimerKbGeneBreakpoint.java | 88 ++++ .../models/core/chimerdb/ChimerPub.java | 312 ++++++++++++++ .../chimerdb/ChimerPubGeneBreakpoint.java | 61 +++ .../models/core/chimerdb/ChimerSeq.java | 299 ++++++++++++++ .../chimerdb/ChimerSeqGeneBreakpoint.java | 118 ++++++ 15 files changed, 2193 insertions(+), 240 deletions(-) delete mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParser.java rename biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/{GeneFusionParserCallback.java => ChimerDbParserCallback.java} (82%) create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerKbParser.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerPubParser.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerSeqParser.java create mode 100644 biodata-models/src/main/java/org/opencb/biodata/models/core/GeneFusion.java create mode 100644 biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerKb.java create mode 100644 biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerKbGeneBreakpoint.java create mode 100644 biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerPub.java create mode 100644 biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerPubGeneBreakpoint.java create mode 100644 biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerSeq.java create mode 100644 biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerSeqGeneBreakpoint.java diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParser.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParser.java deleted file mode 100644 index a3760415..00000000 --- a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParser.java +++ /dev/null @@ -1,203 +0,0 @@ -/* - * - * - */ - -package org.opencb.biodata.formats.feature.chimerdb; - -import org.apache.commons.collections4.MapUtils; -import org.apache.commons.lang3.StringUtils; -import org.apache.poi.ss.usermodel.CellType; -import org.apache.poi.ss.usermodel.Row; -import org.apache.poi.ss.usermodel.Sheet; -import org.apache.poi.ss.usermodel.Workbook; -import org.apache.poi.xssf.usermodel.XSSFWorkbook; -import org.opencb.biodata.models.variant.avro.GeneFusion; -import org.opencb.biodata.models.variant.avro.GeneFusionBreakpoint; -import org.opencb.commons.utils.FileUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.FileInputStream; -import java.io.IOException; -import java.nio.file.Path; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; - -public class ChimerDbParser { - - private static Logger logger = LoggerFactory.getLogger(ChimerDbParser.class); - - public static void parse(Path xlsxPath, GeneFusionParserCallback callback) throws IOException { - logger.info("Parsing ChemirDB file: {}", xlsxPath); - FileUtils.checkFile(xlsxPath); - - try (FileInputStream excelFile = new FileInputStream(xlsxPath.toFile()); - Workbook workbook = new XSSFWorkbook(excelFile)) { - // Get the first sheet from the workbook - Sheet sheet = workbook.getSheetAt(0); - - // Iterate over rows - Iterator rowIterator = sheet.iterator(); - while (rowIterator.hasNext()) { - Row row = rowIterator.next(); - - // Skip header row if needed (e.g., if first row is header) - if (row.getRowNum() == 0) { - continue; - } - - GeneFusion geneFusion = new GeneFusion(); - Map attributes = new HashMap<>(); - - // 0 1 2 3 4 5 6 7 8 9 10 - // id ChimerDB_Type Source webSource Fusion_pair 5Gene_Junction 3Gene_Junction H_gene H_chr H_position H_strand - // 11 12 13 14 15 16 17 18 - // T_gene T_chr T_position T_strand Genomic_breakpoint Exonic_breakpoint Breakpoint_Type Genome_Build_Version - // 19 20 21 22 23 24 25 26 27 28 - // PMID Disease Validation Frame Chr_info Kinase Oncogene Tumor_suppressor Receptor Transcription_Factor - // 29 30 31 - // ChimerPub ChimerSeq ChimerSeq+ - - geneFusion.setId(String.valueOf((int) row.getCell(0).getNumericCellValue())); - geneFusion.setSource("chimerdb"); - geneFusion.setPair(row.getCell(4).getStringCellValue()); - - if (row.getCell(5) != null && StringUtils.isNotEmpty(row.getCell(5).getStringCellValue())) { - geneFusion.setGene5PrimeJunction(row.getCell(5).getStringCellValue()); - } - if (row.getCell(6) != null && StringUtils.isNotEmpty(row.getCell(6).getStringCellValue())) { - geneFusion.setGene3PrimeJunction(row.getCell(6).getStringCellValue()); - } - - // Head gene breakpoint - GeneFusionBreakpoint head = new GeneFusionBreakpoint(); - if (row.getCell(7) != null && StringUtils.isNotEmpty(row.getCell(7).getStringCellValue())) { - head.setGeneName(row.getCell(7).getStringCellValue()); - } - if (row.getCell(8) != null && StringUtils.isNotEmpty(row.getCell(8).getStringCellValue())) { - head.setChromosome(row.getCell(8).getStringCellValue()); - } - if (row.getCell(9) != null) { - // The excel file may contain errors in this cell, so we need to check the type - if (row.getCell(9).getCellType() == CellType.STRING) { - logger.warn("Error in cell 9 (H_position), expected numeric value but found string: {}", - row.getCell(9).getStringCellValue()); - } else if (row.getCell(9).getCellType() == CellType.NUMERIC) { - if (row.getCell(9).getNumericCellValue() > 0) { - head.setPosition((int) row.getCell(9).getNumericCellValue()); - } - } - } - if (row.getCell(10) != null && StringUtils.isNotEmpty(row.getCell(10).getStringCellValue())) { - head.setStrand(row.getCell(10).getStringCellValue()); - } - geneFusion.setHeadGene(head); - - // Tail gene breakpoint - GeneFusionBreakpoint tail = new GeneFusionBreakpoint(); - if (row.getCell(11) != null && StringUtils.isNotEmpty(row.getCell(11).getStringCellValue())) { - tail.setGeneName(row.getCell(11).getStringCellValue()); - } - if (row.getCell(12) != null) { - if (row.getCell(12).getCellType() == CellType.STRING) { - if (StringUtils.isNotEmpty(row.getCell(12).getStringCellValue())) { - tail.setChromosome(row.getCell(12).getStringCellValue()); - } - } else { - logger.warn("Error in cell 12 (T_chromosome), it is not a string value"); - } - } - if (row.getCell(13) != null) { - // The excel file may contain errors in this cell, so we need to check the type - if (row.getCell(13).getCellType() == CellType.STRING) { - logger.warn("Error in cell 13 (T_position), expected numeric value but found string: {}", - row.getCell(13).getStringCellValue()); - } else if (row.getCell(13).getCellType() == CellType.NUMERIC) { - if (row.getCell(13).getNumericCellValue() > 0) { - tail.setPosition((int) row.getCell(13).getNumericCellValue()); - } - } - } - if (row.getCell(14) != null && StringUtils.isNotEmpty(row.getCell(14).getStringCellValue())) { - tail.setStrand(row.getCell(14).getStringCellValue()); - } - geneFusion.setTailGene(tail); - - // Publications - if (row.getCell(19) != null) { - if (row.getCell(19).getCellType() == CellType.STRING) { - if (StringUtils.isNotEmpty(row.getCell(19).getStringCellValue())) { - geneFusion.setPublications(Arrays.asList(row.getCell(19).getStringCellValue().split(","))); - } - } else if (row.getCell(19).getCellType() == CellType.NUMERIC) { - if (row.getCell(19).getNumericCellValue() > 0) { - geneFusion.setPublications(Arrays.asList(String.valueOf((int) row.getCell(19).getNumericCellValue()))); - } - } - } - - // Diseases - if (row.getCell(20) != null && StringUtils.isNotEmpty(row.getCell(20).getStringCellValue())) { - geneFusion.setDiseases(Arrays.asList(row.getCell(20).getStringCellValue().split(","))); - } - - // Validations - if (row.getCell(21) != null && StringUtils.isNotEmpty(row.getCell(21).getStringCellValue())) { - geneFusion.setValidations(Arrays.asList(row.getCell(21).getStringCellValue().split(","))); - } - - // Attributes - if (row.getCell(15) != null && row.getCell(15).getNumericCellValue() == 1) { - attributes.put("genomic_breakpoint", String.valueOf(true)); - } - if (row.getCell(16) != null && row.getCell(16).getNumericCellValue() == 1) { - attributes.put("exomic_breakpoint", String.valueOf(true)); - } - if (row.getCell(23) != null && StringUtils.isNotEmpty(row.getCell(23).getStringCellValue())) { - attributes.put("chr_info", row.getCell(23).getStringCellValue()); - } - if (row.getCell(24) != null && StringUtils.isNotEmpty(row.getCell(24).getStringCellValue())) { - attributes.put("kinase", String.valueOf(true)); - } - if (row.getCell(25) != null && StringUtils.isNotEmpty(row.getCell(25).getStringCellValue())) { - attributes.put("oncogene", String.valueOf(true)); - } - if (row.getCell(26) != null && StringUtils.isNotEmpty(row.getCell(26).getStringCellValue())) { - attributes.put("tumor_supressor", String.valueOf(true)); - } - if (row.getCell(27) != null && StringUtils.isNotEmpty(row.getCell(27).getStringCellValue())) { - attributes.put("receptor", String.valueOf(true)); - } - if (row.getCell(28) != null && StringUtils.isNotEmpty(row.getCell(28).getStringCellValue())) { - attributes.put("transcriptor_factor", String.valueOf(true)); - } - if (MapUtils.isNotEmpty(attributes)) { - geneFusion.setAttributes(attributes); - } - - // Callback to process the gene fusion - callback.processGeneFusion(geneFusion); - } - } catch (IOException e) { - throw new IOException("Error reading the ChemirDB file: " + e.getMessage(), e); - } - logger.info("ChemirDB file parsed successfully: {}", xlsxPath); - } -} \ No newline at end of file diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/GeneFusionParserCallback.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParserCallback.java similarity index 82% rename from biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/GeneFusionParserCallback.java rename to biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParserCallback.java index da756e48..6fe1a321 100644 --- a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/GeneFusionParserCallback.java +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParserCallback.java @@ -20,8 +20,6 @@ package org.opencb.biodata.formats.feature.chimerdb; -import org.opencb.biodata.models.variant.avro.GeneFusion; - -public interface GeneFusionParserCallback { - boolean processGeneFusion(GeneFusion geneFusion); +public interface ChimerDbParserCallback { + boolean processChimerDbObject(T object); } diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerKbParser.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerKbParser.java new file mode 100644 index 00000000..0d01b25b --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerKbParser.java @@ -0,0 +1,279 @@ +/* + * + * + */ + +package org.opencb.biodata.formats.feature.chimerdb; + +import org.apache.poi.ss.usermodel.Row; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.ss.usermodel.Workbook; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.opencb.biodata.models.core.chimerdb.ChimerKb; +import org.opencb.biodata.models.core.chimerdb.ChimerKbGeneBreakpoint; +import org.opencb.commons.utils.FileUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.FileInputStream; +import java.io.IOException; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.Iterator; +import java.util.stream.Collectors; + +import static org.opencb.biodata.formats.feature.chimerdb.ChimerPubParser.getIntCellValue; +import static org.opencb.biodata.formats.feature.chimerdb.ChimerPubParser.getStringCellValue; + +public class ChimerKbParser { + + private static Logger logger = LoggerFactory.getLogger(ChimerKbParser.class); + + public static void parse(Path xlsxPath, ChimerDbParserCallback callback) throws IOException { + logger.info("Parsing ChimerKB file: {}", xlsxPath); + FileUtils.checkFile(xlsxPath); + + try (FileInputStream excelFile = new FileInputStream(xlsxPath.toFile()); + Workbook workbook = new XSSFWorkbook(excelFile)) { + // Get the first sheet from the workbook + Sheet sheet = workbook.getSheetAt(0); + + // Iterate over rows + Iterator rowIterator = sheet.iterator(); + while (rowIterator.hasNext()) { + Row row = rowIterator.next(); + + // Skip header row if needed (e.g., if first row is header) + if (row.getRowNum() == 0) { + continue; + } + + String strValue; + Integer intValue; + + ChimerKb chimerKb = new ChimerKb(); + + // 0 1 2 3 4 5 6 7 8 9 10 + // id ChimerDB_Type Source webSource Fusion_pair 5Gene_Junction 3Gene_Junction H_gene H_chr H_position H_strand + // 11 12 13 14 15 16 17 18 + // T_gene T_chr T_position T_strand Genomic_breakpoint Exonic_breakpoint Breakpoint_Type Genome_Build_Version + // 19 20 21 22 23 24 25 26 27 28 + // PMID Disease Validation Frame Chr_info Kinase Oncogene Tumor_suppressor Receptor Transcription_Factor + // 29 30 31 + // ChimerPub ChimerSeq ChimerSeq+ + + // ID + chimerKb.setId(String.valueOf((int) row.getCell(0).getNumericCellValue())); + + // ChimerDB Type + strValue = getStringCellValue(row, 1); + if (strValue != null) { + chimerKb.setChimerDbType(strValue); + } + + // Source (different from source provided by user) + strValue = getStringCellValue(row, 2); + if (strValue != null) { + chimerKb.setChimerSource(strValue); + } + + // Web source + strValue = getStringCellValue(row, 3); + if (strValue != null) { + chimerKb.setWebSource(strValue); + } + + // Fusion pair + strValue = getStringCellValue(row, 4); + if (strValue != null) { + chimerKb.setFusionPair(strValue); + } + + // Gene junctions (5 and 3) + strValue = getStringCellValue(row, 5); + if (strValue != null) { + chimerKb.setFiveGeneJunction(strValue); + } + strValue = getStringCellValue(row, 6); + if (strValue != null) { + chimerKb.setThreeGeneJunction(strValue); + } + + // Head gene breakpoint + ChimerKbGeneBreakpoint head = new ChimerKbGeneBreakpoint(); + strValue = getStringCellValue(row, 7); + if (strValue != null) { + head.setGene(strValue); + } + strValue = getStringCellValue(row, 8); + if (strValue != null) { + if (strValue.startsWith("chr") || strValue.startsWith("Chr") || strValue.startsWith("CHR")) { + // Remove 'chr' prefix if present + strValue = strValue.substring(3); + } + head.setChromosome(strValue); + } + intValue = getIntCellValue(row, 9); + if (intValue != null) { + head.setPosition(intValue); + } + strValue = getStringCellValue(row, 10); + if (strValue != null) { + head.setStrand(strValue); + } + chimerKb.setHeadGene(head); + + // Tail gene breakpoint + ChimerKbGeneBreakpoint tail = new ChimerKbGeneBreakpoint(); + strValue = getStringCellValue(row, 11); + if (strValue != null) { + tail.setGene(strValue); + } + strValue = getStringCellValue(row, 12); + if (strValue != null) { + if (strValue.startsWith("chr") || strValue.startsWith("Chr") || strValue.startsWith("CHR")) { + // Remove 'chr' prefix if present + strValue = strValue.substring(3); + } + tail.setChromosome(strValue); + } + intValue = getIntCellValue(row, 13); + if (intValue != null) { + tail.setPosition(intValue); + } + strValue = getStringCellValue(row, 14); + if (strValue != null) { + tail.setStrand(strValue); + } + chimerKb.setTailGene(tail); + + // Genomic breakpoint + intValue = getIntCellValue(row, 15); + if (intValue != null) { + chimerKb.setGenomicBreakpoint(intValue == 1); + } + + // Exonic breakpoint + intValue = getIntCellValue(row, 16); + if (intValue != null) { + chimerKb.setExonicBreakpoint(intValue == 1); + } + + // Breakpoint type + strValue = getStringCellValue(row, 17); + if (strValue != null) { + chimerKb.setBreakpointType(strValue); + } + + // Genome build version + strValue = getStringCellValue(row, 18); + if (strValue != null) { + chimerKb.setGenomeBuildVersion(strValue); + } + + // Publications + strValue = getStringCellValue(row, 19); + if (strValue != null) { + chimerKb.setPmid(Arrays.stream(strValue.split(",")).map(String::trim).collect(Collectors.toList())); + } else { + intValue = getIntCellValue(row, 19); + if (intValue != null && intValue > 0) { + chimerKb.setPmid(Arrays.asList(String.valueOf(intValue))); + } + } + + // Diseases + strValue = getStringCellValue(row, 20); + if (strValue != null) { + chimerKb.setDisease(Arrays.stream(strValue.split(",")).map(String::trim).collect(Collectors.toList())); + } + + // Validations + strValue = getStringCellValue(row, 21); + if (strValue != null) { + chimerKb.setValidation(Arrays.stream(strValue.split(",")).map(String::trim).collect(Collectors.toList())); + } + + // Frame + strValue = getStringCellValue(row, 22); + if (strValue != null) { + chimerKb.setFrame(strValue); + } + + // Chromosome info + strValue = getStringCellValue(row, 23); + if (strValue != null) { + chimerKb.setChrInfo(strValue); + } + + // Kinase + strValue = getStringCellValue(row, 24); + if (strValue != null) { + chimerKb.setKinase(strValue.equalsIgnoreCase("kinase")); + } + + // Oncogene + strValue = getStringCellValue(row, 25); + if (strValue != null) { + chimerKb.setOncogene(strValue.equalsIgnoreCase("oncogene")); + } + + // Tumor suppressor + strValue = getStringCellValue(row, 26); + if (strValue != null) { + chimerKb.setTumorSuppressor(strValue.equalsIgnoreCase("Tumor suppressor gene")); + } + + // Receptor + strValue = getStringCellValue(row, 27); + if (strValue != null) { + chimerKb.setReceptor(strValue.equalsIgnoreCase("Receptor")); + } + + // Transcription factor + strValue = getStringCellValue(row, 28); + if (strValue != null) { + chimerKb.setTranscriptionFactor(strValue.equalsIgnoreCase("Transcription factor")); + } + + // ChimerPub + strValue = getStringCellValue(row, 29); + if (strValue != null) { + chimerKb.setChimerPub(strValue.equalsIgnoreCase("Pub")); + } + + // ChimerSeq + strValue = getStringCellValue(row, 30); + if (strValue != null) { + chimerKb.setChimerSeq(strValue.equalsIgnoreCase("Seq")); + } + + // ChimerSeq+ + strValue = getStringCellValue(row, 31); + if (strValue != null) { + chimerKb.setChimerSeqPlus(strValue.equalsIgnoreCase("Seq+")); + } + + // Callback to process the gene fusion + callback.processChimerDbObject(chimerKb); + } + } catch (IOException e) { + throw new IOException("Error reading the ChimerKB file: " + e.getMessage(), e); + } + logger.info("ChimerKB file parsed successfully: {}", xlsxPath); + } +} \ No newline at end of file diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerPubParser.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerPubParser.java new file mode 100644 index 00000000..f909ab84 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerPubParser.java @@ -0,0 +1,258 @@ +/* + * + * + */ + +package org.opencb.biodata.formats.feature.chimerdb; + +import org.apache.commons.lang3.StringUtils; +import org.apache.poi.ss.usermodel.CellType; +import org.apache.poi.ss.usermodel.Row; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.ss.usermodel.Workbook; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.opencb.biodata.models.core.chimerdb.ChimerPub; +import org.opencb.biodata.models.core.chimerdb.ChimerPubGeneBreakpoint; +import org.opencb.commons.utils.FileUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.FileInputStream; +import java.io.IOException; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.Iterator; +import java.util.stream.Collectors; + +public class ChimerPubParser { + + private static Logger logger = LoggerFactory.getLogger(ChimerPubParser.class); + + public static void parse(Path xlsxPath, ChimerDbParserCallback callback) throws IOException { + logger.info("Parsing ChimerPub file: {}", xlsxPath); + FileUtils.checkFile(xlsxPath); + + try (FileInputStream excelFile = new FileInputStream(xlsxPath.toFile()); + Workbook workbook = new XSSFWorkbook(excelFile)) { + // Get the first sheet from the workbook + Sheet sheet = workbook.getSheetAt(0); + + // Iterate over rows + Iterator rowIterator = sheet.iterator(); + while (rowIterator.hasNext()) { + Row row = rowIterator.next(); + + // Skip header row if needed (e.g., if first row is header) + if (row.getRowNum() == 0) { + continue; + } + + String strValue; + Integer intValue; + Double doubleValue; + + ChimerPub chimerPub = new ChimerPub(); + + // 0 1 2 3 4 5 6 7 8 9 10 11 + // id Fusion_pair Translocation H_gene T_gene PMID Score Disease Validation Kinase Oncogene Tumor_suppressor + // 12 13 14 15 16 17 18 + // Receptor Transcription_Factor ChimerKB ChimerSeq ChimerSeq+ Sentence_highlight H_gene_highlight + // 19 20 21 + // T_gene_highlight Disease_highlight Validation_highlight + + // ID + chimerPub.setId(String.valueOf((int) row.getCell(0).getNumericCellValue())); + + // Fusion pair + strValue = getStringCellValue(row, 1); + if (strValue != null) { + chimerPub.setFusionPair(strValue); + } + + // Translocation + strValue = getStringCellValue(row, 2); + if (strValue != null) { + chimerPub.setTranslocation(strValue); + } + + // Head gene breakpoint + ChimerPubGeneBreakpoint head = new ChimerPubGeneBreakpoint(); + strValue = getStringCellValue(row, 3); + if (strValue != null) { + head.setGene(strValue); + } + strValue = getStringCellValue(row, 18); + if (strValue != null) { + head.setHighlight(strValue); + } + chimerPub.setHeadGene(head); + + // Tail gene breakpoint + ChimerPubGeneBreakpoint tail = new ChimerPubGeneBreakpoint(); + strValue = getStringCellValue(row, 4); + if (strValue != null) { + tail.setGene(strValue); + } + strValue = getStringCellValue(row, 19); + if (strValue != null) { + tail.setHighlight(strValue); + } + chimerPub.setTailGene(tail); + + // Publications + strValue = getStringCellValue(row, 5); + if (strValue != null) { + chimerPub.setPmid(Arrays.stream(strValue.split(",")).map(String::trim).collect(Collectors.toList())); + } else { + intValue = getIntCellValue(row, 5); + if (intValue != null && intValue > 0) { + chimerPub.setPmid(Arrays.asList(String.valueOf(intValue))); + } + } + + // Score + doubleValue = getDoubleCellValue(row, 6); + if (doubleValue != null) { + chimerPub.setScore(doubleValue); + } + + // Diseases + strValue = getStringCellValue(row, 7); + if (strValue != null) { + chimerPub.setDisease(Arrays.stream(strValue.split(",")).map(String::trim).collect(Collectors.toList())); + } + + // Validations + strValue = getStringCellValue(row, 8); + if (strValue != null) { + chimerPub.setValidation(Arrays.stream(strValue.split(",")).map(String::trim).collect(Collectors.toList())); + } + + // Kinase + strValue = getStringCellValue(row, 9); + if (strValue != null) { + chimerPub.setKinase(strValue.equalsIgnoreCase("kinase")); + } + + // Oncogene + strValue = getStringCellValue(row, 10); + if (strValue != null) { + chimerPub.setOncogene(strValue.equalsIgnoreCase("oncogene")); + } + + // Tumor suppressor + strValue = getStringCellValue(row, 11); + if (strValue != null) { + chimerPub.setTumorSuppressor(strValue.equalsIgnoreCase("Tumor suppressor gene")); + } + + // Receptor + strValue = getStringCellValue(row, 12); + if (strValue != null) { + chimerPub.setReceptor(strValue.equalsIgnoreCase("Receptor")); + } + + // Transcription factor + strValue = getStringCellValue(row, 13); + if (strValue != null) { + chimerPub.setTranscriptionFactor(strValue.equalsIgnoreCase("Transcription factor")); + } + + // ChimerKb + strValue = getStringCellValue(row, 14); + if (strValue != null) { + chimerPub.setChimerKb(strValue.equalsIgnoreCase("KB")); + } + + // ChimerSeq + strValue = getStringCellValue(row, 15); + if (strValue != null) { + chimerPub.setChimerSeq(strValue.equalsIgnoreCase("Seq")); + } + + // ChimerSeq+ + strValue = getStringCellValue(row, 16); + if (strValue != null) { + chimerPub.setChimerSeqPlus(strValue.equalsIgnoreCase("Seq+")); + } + + // Sentence highlight + strValue = getStringCellValue(row, 17); + if (strValue != null) { + chimerPub.setSenteceHighlight(strValue); + } + + // Disease highlight + strValue = getStringCellValue(row, 20); + if (strValue != null) { + chimerPub.setDiseaseHighlight(strValue); + } + + // Validation highlight + strValue = getStringCellValue(row, 21); + if (strValue != null) { + chimerPub.setValidationHighlight(strValue); + } + + // Callback to process the gene fusion + callback.processChimerDbObject(chimerPub); + } + } catch (IOException e) { + throw new IOException("Error reading the ChimerPub file: " + e.getMessage(), e); + } + logger.info("ChimerPub file parsed successfully: {}", xlsxPath); + } + + public static Integer getIntCellValue(Row row, int index) { + if (row.getCell(index) == null) { + return null; + } + if (row.getCell(index).getCellType() != CellType.NUMERIC) { + logger.warn("Error in cell {} of row {}, expected NUMERIC but found {}", index, row.getRowNum(), + row.getCell(index).getCellType()); + return null; + } + return (int) row.getCell(index).getNumericCellValue(); + } + + public static Double getDoubleCellValue(Row row, int index) { + if (row.getCell(index) == null) { + return null; + } + if (row.getCell(index).getCellType() != CellType.NUMERIC) { + logger.warn("Error in cell {} of row {}, expected NUMERIC but found {}", index, row.getRowNum(), + row.getCell(index).getCellType()); + return null; + } + return row.getCell(index).getNumericCellValue(); + } + + public static String getStringCellValue(Row row, int index) { + if (row.getCell(index) == null) { + return null; + } + if (row.getCell(index).getCellType() != CellType.STRING) { + logger.warn("Error in cell {} of row {}, expected STRING but found {}", index, row.getRowNum(), + row.getCell(index).getCellType()); + return null; + } + if (StringUtils.isNotEmpty(row.getCell(index).getStringCellValue())) { + return row.getCell(index).getStringCellValue(); + } + return null; + } +} \ No newline at end of file diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerSeqParser.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerSeqParser.java new file mode 100644 index 00000000..97e30f2c --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerSeqParser.java @@ -0,0 +1,282 @@ +/* + * + * + */ + +package org.opencb.biodata.formats.feature.chimerdb; + +import org.apache.poi.ss.usermodel.Row; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.ss.usermodel.Workbook; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.opencb.biodata.models.core.chimerdb.ChimerSeq; +import org.opencb.biodata.models.core.chimerdb.ChimerSeqGeneBreakpoint; +import org.opencb.commons.utils.FileUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.FileInputStream; +import java.io.IOException; +import java.nio.file.Path; +import java.util.Iterator; + +import static org.opencb.biodata.formats.feature.chimerdb.ChimerPubParser.getIntCellValue; +import static org.opencb.biodata.formats.feature.chimerdb.ChimerPubParser.getStringCellValue; + +public class ChimerSeqParser { + + private static Logger logger = LoggerFactory.getLogger(ChimerSeqParser.class); + + public static void parse(Path xlsxPath, ChimerDbParserCallback callback) throws IOException { + logger.info("Parsing ChimerSeq file: {}", xlsxPath); + FileUtils.checkFile(xlsxPath); + + try (FileInputStream excelFile = new FileInputStream(xlsxPath.toFile()); + Workbook workbook = new XSSFWorkbook(excelFile)) { + // Get the first sheet from the workbook + Sheet sheet = workbook.getSheetAt(0); + + // Iterate over rows + Iterator rowIterator = sheet.iterator(); + while (rowIterator.hasNext()) { + Row row = rowIterator.next(); + + // Skip header row if needed (e.g., if first row is header) + if (row.getRowNum() == 0) { + continue; + } + + String strValue; + Integer intValue; + + ChimerSeq chimerSeq = new ChimerSeq(); + + // 0 1 2 3 4 5 6 7 8 9 10 11 + // id ChimerDB_Type Source webSource Fusion_pair H_gene H_chr H_position H_strand T_gene T_chr T_position + // 12 13 14 15 16 17 18 + // T_strand Genomic_breakpoint Genome_Build_Version Cancertype BarcodeID Seed_reads_num Spanning_pairs_num + // 19 20 21 22 23 24 25 26 + // Junction_reads_num Frame Chr_info H_locus H_kinase H_oncogene H_tumor_suppressor H_receptor + // 27 28 29 30 + // H_transcription_factor T_locus T_kinase T_oncogene + // 31 32 33 34 35 36 + // T_tumor_suppressor T_receptor T_transcription_factor ChimerKB ChimerPub Highly_Reliable_Seq + + // ID + chimerSeq.setId(String.valueOf((int) row.getCell(0).getNumericCellValue())); + + // ChimerDB Type + strValue = getStringCellValue(row, 1); + if (strValue != null) { + chimerSeq.setChimerDbType(strValue); + } + + // Source (different from source provided by user) + strValue = getStringCellValue(row, 2); + if (strValue != null) { + chimerSeq.setChimerSource(strValue); + } + + // Web source + strValue = getStringCellValue(row, 3); + if (strValue != null) { + chimerSeq.setWebSource(strValue); + } + + // Fusion pair + strValue = getStringCellValue(row, 4); + if (strValue != null) { + chimerSeq.setFusionPair(strValue); + } + + // Head gene breakpoint + ChimerSeqGeneBreakpoint head = new ChimerSeqGeneBreakpoint(); + strValue = getStringCellValue(row, 5); + if (strValue != null) { + head.setGene(strValue); + } + strValue = getStringCellValue(row, 6); + if (strValue != null) { + if (strValue.startsWith("chr") || strValue.startsWith("Chr") || strValue.startsWith("CHR")) { + // Remove 'chr' prefix if present + strValue = strValue.substring(3); + } + head.setChromosome(strValue); + } + intValue = getIntCellValue(row, 7); + if (intValue != null) { + head.setPosition(intValue); + } + strValue = getStringCellValue(row, 8); + if (strValue != null) { + head.setStrand(strValue); + } + strValue = getStringCellValue(row, 22); + if (strValue != null) { + head.setLocus(strValue); + } + strValue = getStringCellValue(row, 23); + if (strValue != null) { + head.setKinase(strValue.equalsIgnoreCase("kinase")); + } + strValue = getStringCellValue(row, 24); + if (strValue != null) { + head.setOncogene(strValue.equalsIgnoreCase("oncogene")); + } + strValue = getStringCellValue(row, 25); + if (strValue != null) { + head.setTumorSuppressor(strValue.equalsIgnoreCase("Tumor suppressor gene")); + } + strValue = getStringCellValue(row, 26); + if (strValue != null) { + head.setReceptor(strValue.equalsIgnoreCase("Receptor")); + } + strValue = getStringCellValue(row, 27); + if (strValue != null) { + head.setTranscriptionFactor(strValue.equalsIgnoreCase("Transcription factor")); + } + chimerSeq.setHeadGene(head); + + // Tail gene breakpoint + ChimerSeqGeneBreakpoint tail = new ChimerSeqGeneBreakpoint(); + strValue = getStringCellValue(row, 9); + if (strValue != null) { + tail.setGene(strValue); + } + strValue = getStringCellValue(row, 10); + if (strValue != null) { + if (strValue.startsWith("chr") || strValue.startsWith("Chr") || strValue.startsWith("CHR")) { + // Remove 'chr' prefix if present + strValue = strValue.substring(3); + } + tail.setChromosome(strValue); + } + intValue = getIntCellValue(row, 11); + if (intValue != null) { + tail.setPosition(intValue); + } + strValue = getStringCellValue(row, 12); + if (strValue != null) { + tail.setStrand(strValue); + } + strValue = getStringCellValue(row, 28); + if (strValue != null) { + tail.setLocus(strValue); + } + strValue = getStringCellValue(row, 29); + if (strValue != null) { + tail.setKinase(strValue.equalsIgnoreCase("kinase")); + } + strValue = getStringCellValue(row, 30); + if (strValue != null) { + tail.setOncogene(strValue.equalsIgnoreCase("oncogene")); + } + strValue = getStringCellValue(row, 31); + if (strValue != null) { + tail.setTumorSuppressor(strValue.equalsIgnoreCase("Tumor suppressor gene")); + } + strValue = getStringCellValue(row, 32); + if (strValue != null) { + tail.setReceptor(strValue.equalsIgnoreCase("Receptor")); + } + strValue = getStringCellValue(row, 33); + if (strValue != null) { + tail.setTranscriptionFactor(strValue.equalsIgnoreCase("Transcription factor")); + } + chimerSeq.setTailGene(tail); + + // Genomic breakpoint + strValue = getStringCellValue(row, 13); + if (intValue != null) { + chimerSeq.setGenomicBreakpoint(strValue); + } + + // Genome build version + strValue = getStringCellValue(row, 14); + if (strValue != null) { + chimerSeq.setGenomeBuildVersion(strValue); + } + + // Cancer type + strValue = getStringCellValue(row, 15); + if (intValue != null) { + chimerSeq.setCancerType(strValue); + } + + // Barcode ID + strValue = getStringCellValue(row, 16); + if (strValue != null) { + chimerSeq.setBarcodeId(strValue); + } + + // Seed reads number + intValue = getIntCellValue(row, 17); + if (intValue != null) { + chimerSeq.setSeedReadsNum(intValue); + } + + // Spanning pairs number + intValue = getIntCellValue(row, 18); + if (intValue != null) { + chimerSeq.setSpanningPairsNum(intValue); + } + + // Junction reads number + intValue = getIntCellValue(row, 19); + if (intValue != null) { + chimerSeq.setJunctionReadsNum(intValue); + } + + // Frame + strValue = getStringCellValue(row, 20); + if (strValue != null) { + chimerSeq.setFrame(strValue); + } + + // Chromosome info + strValue = getStringCellValue(row, 21); + if (strValue != null) { + chimerSeq.setChrInfo(strValue); + } + + // ChimerKb + strValue = getStringCellValue(row, 34); + if (strValue != null) { + chimerSeq.setChimerKb(strValue.equalsIgnoreCase("KB")); + } + + // ChimerPub + strValue = getStringCellValue(row, 35); + if (strValue != null) { + chimerSeq.setChimerPub(strValue.equalsIgnoreCase("Pub")); + } + + // Highly reliable sequence + strValue = getStringCellValue(row, 36); + if (strValue != null) { + chimerSeq.setHighlyReliableSeq(strValue.equalsIgnoreCase("Seq+")); + } + + // Callback to process the gene fusion + callback.processChimerDbObject(chimerSeq); + } + } catch (IOException e) { + throw new IOException("Error reading the ChimerKB file: " + e.getMessage(), e); + } + logger.info("ChimerKB file parsed successfully: {}", xlsxPath); + } +} \ No newline at end of file diff --git a/biodata-formats/src/test/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParserTest.java b/biodata-formats/src/test/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParserTest.java index 8d9c1105..4ad5d2e0 100644 --- a/biodata-formats/src/test/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParserTest.java +++ b/biodata-formats/src/test/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParserTest.java @@ -2,53 +2,45 @@ import org.junit.Assert; -import org.opencb.biodata.models.variant.avro.GeneFusion; +import org.junit.Test; +import org.opencb.biodata.models.core.GeneFusion; +import org.opencb.biodata.models.core.chimerdb.ChimerKb; import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.List; public class ChimerDbParserTest { - public void testParse() throws IOException { Path xlsxPath = Paths.get(getClass().getResource("/ChimerKB4.small.xlsx").getPath()); ChimerDbParserTest.MyCallback callback = new ChimerDbParserTest.MyCallback(">>> Testing message"); - ChimerDbParser.parse(xlsxPath, callback); - Assert.assertEquals(50, callback.getCounter()); - -// MiRnaGene mi0000060 = callback.getMiRnaGene("MI0000060"); + ChimerKbParser.parse(xlsxPath, callback); + Assert.assertEquals(50, callback.getGeneFusions().getChimerKb().size()); } - // Implementation of the MirBaseParserCallback function - public class MyCallback implements GeneFusionParserCallback { + public class MyCallback implements ChimerDbParserCallback { private String msg; - private List geneFusionList; + private GeneFusion geneFusions; public MyCallback(String msg) { this.msg = msg; - this.geneFusionList = new ArrayList<>(); + this.geneFusions = new GeneFusion(); } @Override - public boolean processGeneFusion(GeneFusion geneFusion) { + public boolean processChimerDbObject(ChimerKb chimerKb) { System.out.println(msg); - System.out.println(geneFusion.toString()); - geneFusionList.add(geneFusion); + System.out.println(chimerKb.toString()); + geneFusions.getChimerKb().add(chimerKb); return true; } - public List getGeneFusionList() { - return geneFusionList; - } - - public int getCounter() { - return geneFusionList.size(); + public GeneFusion getGeneFusions() { + return geneFusions; } } diff --git a/biodata-models/src/main/avro/variantAnnotation.avdl b/biodata-models/src/main/avro/variantAnnotation.avdl index 0d69e931..b1073bc4 100644 --- a/biodata-models/src/main/avro/variantAnnotation.avdl +++ b/biodata-models/src/main/avro/variantAnnotation.avdl @@ -70,14 +70,14 @@ protocol VariantAnnotations { string strand; } - record GeneFusion { + record GeneFusionSummary { string id; string pair; string source; string gene5PrimeJunction; string gene3PrimeJunction; - union { null, GeneFusionBreakpoint } headGene; - union { null, GeneFusionBreakpoint } tailGene; + //union { null, GeneFusionBreakpoint } headGene; + //union { null, GeneFusionBreakpoint } tailGene; union { null, array } diseases; union { null, array } publications; union { null, array } validations; @@ -391,7 +391,7 @@ protocol VariantAnnotations { union { null, array } repeat; union { null, array } drugs; union { null, array } imprintedGenes; - union { null, array } geneFusions; + union { null, array } geneFusionSummaries; union { null, map } additionalAttributes = null; } diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/GeneAnnotation.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/GeneAnnotation.java index c31db9ae..c350dbde 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/core/GeneAnnotation.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/GeneAnnotation.java @@ -35,7 +35,7 @@ public class GeneAnnotation { private List cancerAssociations; private List cancerHotspots; private List imprinted; - private List fusions; + private GeneFusion fusions; public GeneAnnotation() { this.expression = new ArrayList<>(); @@ -46,21 +46,21 @@ public GeneAnnotation() { this.cancerAssociations = new ArrayList<>(); this.cancerHotspots = new ArrayList<>(); this.imprinted = new ArrayList<>(); - this.fusions = new ArrayList<>(); + this.fusions = new GeneFusion(); } @Deprecated public GeneAnnotation(List expression, List diseases, List drugs, List constraints, List mirnaTargets) { this(expression, diseases, drugs, constraints, mirnaTargets, new ArrayList<>(), new ArrayList<>(), new ArrayList<>(), - new ArrayList<>()); + new GeneFusion()); } @Deprecated public GeneAnnotation(List expression, List diseases, List drugs, List constraints, List mirnaTargets, List cancerAssociations) { this(expression, diseases, drugs, constraints, mirnaTargets, cancerAssociations, new ArrayList<>(), new ArrayList<>(), - new ArrayList<>()); + new GeneFusion()); } @Deprecated @@ -68,12 +68,12 @@ public GeneAnnotation(List expression, List di List constraints, List mirnaTargets, List cancerAssociations, List cancerHotspots) { this(expression, diseases, drugs, constraints, mirnaTargets, cancerAssociations, cancerHotspots, new ArrayList<>(), - new ArrayList<>()); + new GeneFusion()); } public GeneAnnotation(List expression, List diseases, List drugs, List constraints, List mirnaTargets, List cancerAssociations, - List cancerHotspots, List imprinted, List fusions) { + List cancerHotspots, List imprinted, GeneFusion fusions) { this.expression = expression; this.diseases = diseases; this.drugs = drugs; @@ -173,11 +173,11 @@ public GeneAnnotation setImprinted(List imprinted) { return this; } - public List getFusions() { + public GeneFusion getFusions() { return fusions; } - public GeneAnnotation setFusions(List fusions) { + public GeneAnnotation setFusions(GeneFusion fusions) { this.fusions = fusions; return this; } diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/GeneFusion.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/GeneFusion.java new file mode 100644 index 00000000..bc30b355 --- /dev/null +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/GeneFusion.java @@ -0,0 +1,83 @@ +/* + * + * + */ + +package org.opencb.biodata.models.core; + +import org.opencb.biodata.models.core.chimerdb.ChimerKb; +import org.opencb.biodata.models.core.chimerdb.ChimerPub; +import org.opencb.biodata.models.core.chimerdb.ChimerSeq; + +import java.util.ArrayList; +import java.util.List; + +public class GeneFusion { + + private List chimerKb; + private List chimerPub; + private List chimerSeq; + + public GeneFusion() { + this.chimerKb = new ArrayList<>(); + this.chimerPub = new ArrayList<>(); + this.chimerSeq = new ArrayList<>(); + } + + public GeneFusion(List chimerKb, List chimerPub, List chimerSeq) { + this.chimerKb = chimerKb; + this.chimerPub = chimerPub; + this.chimerSeq = chimerSeq; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("GeneFusion{"); + sb.append("chimerKb=").append(chimerKb); + sb.append(", chimerPub=").append(chimerPub); + sb.append(", chimerSeq=").append(chimerSeq); + sb.append('}'); + return sb.toString(); + } + + public List getChimerKb() { + return chimerKb; + } + + public GeneFusion setChimerKb(List chimerKb) { + this.chimerKb = chimerKb; + return this; + } + + public List getChimerPub() { + return chimerPub; + } + + public GeneFusion setChimerPub(List chimerPub) { + this.chimerPub = chimerPub; + return this; + } + + public List getChimerSeq() { + return chimerSeq; + } + + public GeneFusion setChimerSeq(List chimerSeq) { + this.chimerSeq = chimerSeq; + return this; + } +} diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerKb.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerKb.java new file mode 100644 index 00000000..2ef0b1cf --- /dev/null +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerKb.java @@ -0,0 +1,386 @@ +/* + * + * + */ + +package org.opencb.biodata.models.core.chimerdb; + +import java.util.ArrayList; +import java.util.List; + +public class ChimerKb { + + public static final String SOURCE = "chimerkb"; + + // 0 1 2 3 4 5 6 7 8 9 10 + // id ChimerDB_Type Source webSource Fusion_pair 5Gene_Junction 3Gene_Junction H_gene H_chr H_position H_strand + // 11 12 13 14 15 16 17 18 + // T_gene T_chr T_position T_strand Genomic_breakpoint Exonic_breakpoint Breakpoint_Type Genome_Build_Version + // 19 20 21 22 23 24 25 26 27 28 + // PMID Disease Validation Frame Chr_info Kinase Oncogene Tumor_suppressor Receptor Transcription_Factor + // 29 30 31 + // ChimerPub ChimerSeq ChimerSeq+ + + private String id; + private String chimerDbType; + private String chimerSource; + private String webSource; + private String fusionPair; + private String fiveGeneJunction; + private String threeGeneJunction; + private ChimerKbGeneBreakpoint headGene; + private ChimerKbGeneBreakpoint tailGene; + private boolean genomicBreakpoint; + private boolean exonicBreakpoint; + private String breakpointType; + private String genomeBuildVersion; + private List pmid; + private List disease; + private List validation; + private String frame; + private String chrInfo; + private boolean kinase; + private boolean oncogene; + private boolean tumorSuppressor; + private boolean receptor; + private boolean transcriptionFactor; + private boolean chimerPub; + private boolean chimerSeq; + private boolean chimerSeqPlus; + + private String source; + + public ChimerKb() { + this.pmid = new ArrayList<>(); + this.disease = new ArrayList<>(); + this.validation = new ArrayList<>(); + + this.source = SOURCE; + } + + public ChimerKb(String id, String chimerDbType, String chimerSource, String webSource, String fusionPair, String fiveGeneJunction, + String threeGeneJunction, ChimerKbGeneBreakpoint headGene, ChimerKbGeneBreakpoint tailGene, boolean genomicBreakpoint, + boolean exonicBreakpoint, String breakpointType, String genomeBuildVersion, List pmid, List disease, + List validation, String frame, String chrInfo, boolean kinase, boolean oncogene, boolean tumorSuppressor, + boolean receptor, boolean transcriptionFactor, boolean chimerPub, boolean chimerSeq, boolean chimerSeqPlus, + String source) { + this.id = id; + this.chimerDbType = chimerDbType; + this.chimerSource = chimerSource; + this.webSource = webSource; + this.fusionPair = fusionPair; + this.fiveGeneJunction = fiveGeneJunction; + this.threeGeneJunction = threeGeneJunction; + this.headGene = headGene; + this.tailGene = tailGene; + this.genomicBreakpoint = genomicBreakpoint; + this.exonicBreakpoint = exonicBreakpoint; + this.breakpointType = breakpointType; + this.genomeBuildVersion = genomeBuildVersion; + this.pmid = pmid; + this.disease = disease; + this.validation = validation; + this.frame = frame; + this.chrInfo = chrInfo; + this.kinase = kinase; + this.oncogene = oncogene; + this.tumorSuppressor = tumorSuppressor; + this.receptor = receptor; + this.transcriptionFactor = transcriptionFactor; + this.chimerPub = chimerPub; + this.chimerSeq = chimerSeq; + this.chimerSeqPlus = chimerSeqPlus; + this.source = source; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("ChimerKb{"); + sb.append("id='").append(id).append('\''); + sb.append(", chimerDbType='").append(chimerDbType).append('\''); + sb.append(", chimerSource='").append(chimerSource).append('\''); + sb.append(", webSource='").append(webSource).append('\''); + sb.append(", fusionPair='").append(fusionPair).append('\''); + sb.append(", fiveGeneJunction='").append(fiveGeneJunction).append('\''); + sb.append(", threeGeneJunction='").append(threeGeneJunction).append('\''); + sb.append(", headGene=").append(headGene); + sb.append(", tailGene=").append(tailGene); + sb.append(", genomicBreakpoint='").append(genomicBreakpoint).append('\''); + sb.append(", exonicBreakpoint='").append(exonicBreakpoint).append('\''); + sb.append(", breakpointType='").append(breakpointType).append('\''); + sb.append(", genomeBuildVersion='").append(genomeBuildVersion).append('\''); + sb.append(", pmid=").append(pmid); + sb.append(", disease=").append(disease); + sb.append(", validation=").append(validation); + sb.append(", frame='").append(frame).append('\''); + sb.append(", chrInfo='").append(chrInfo).append('\''); + sb.append(", kinase=").append(kinase); + sb.append(", oncogene=").append(oncogene); + sb.append(", tumorSuppressor=").append(tumorSuppressor); + sb.append(", receptor=").append(receptor); + sb.append(", transcriptionFactor=").append(transcriptionFactor); + sb.append(", chimerPub=").append(chimerPub); + sb.append(", chimerSeq=").append(chimerSeq); + sb.append(", chimerSeqPlus=").append(chimerSeqPlus); + sb.append(", source='").append(source).append('\''); + sb.append('}'); + return sb.toString(); + } + + public String getId() { + return id; + } + + public ChimerKb setId(String id) { + this.id = id; + return this; + } + + public String getChimerDbType() { + return chimerDbType; + } + + public ChimerKb setChimerDbType(String chimerDbType) { + this.chimerDbType = chimerDbType; + return this; + } + + public String getChimerSource() { + return chimerSource; + } + + public ChimerKb setChimerSource(String chimerSource) { + this.chimerSource = chimerSource; + return this; + } + + public String getWebSource() { + return webSource; + } + + public ChimerKb setWebSource(String webSource) { + this.webSource = webSource; + return this; + } + + public String getFusionPair() { + return fusionPair; + } + + public ChimerKb setFusionPair(String fusionPair) { + this.fusionPair = fusionPair; + return this; + } + + public String getFiveGeneJunction() { + return fiveGeneJunction; + } + + public ChimerKb setFiveGeneJunction(String fiveGeneJunction) { + this.fiveGeneJunction = fiveGeneJunction; + return this; + } + + public String getThreeGeneJunction() { + return threeGeneJunction; + } + + public ChimerKb setThreeGeneJunction(String threeGeneJunction) { + this.threeGeneJunction = threeGeneJunction; + return this; + } + + public ChimerKbGeneBreakpoint getHeadGene() { + return headGene; + } + + public ChimerKb setHeadGene(ChimerKbGeneBreakpoint headGene) { + this.headGene = headGene; + return this; + } + + public ChimerKbGeneBreakpoint getTailGene() { + return tailGene; + } + + public ChimerKb setTailGene(ChimerKbGeneBreakpoint tailGene) { + this.tailGene = tailGene; + return this; + } + + public boolean getGenomicBreakpoint() { + return genomicBreakpoint; + } + + public ChimerKb setGenomicBreakpoint(boolean genomicBreakpoint) { + this.genomicBreakpoint = genomicBreakpoint; + return this; + } + + public boolean getExonicBreakpoint() { + return exonicBreakpoint; + } + + public ChimerKb setExonicBreakpoint(boolean exonicBreakpoint) { + this.exonicBreakpoint = exonicBreakpoint; + return this; + } + + public String getBreakpointType() { + return breakpointType; + } + + public ChimerKb setBreakpointType(String breakpointType) { + this.breakpointType = breakpointType; + return this; + } + + public String getGenomeBuildVersion() { + return genomeBuildVersion; + } + + public ChimerKb setGenomeBuildVersion(String genomeBuildVersion) { + this.genomeBuildVersion = genomeBuildVersion; + return this; + } + + public List getPmid() { + return pmid; + } + + public ChimerKb setPmid(List pmid) { + this.pmid = pmid; + return this; + } + + public List getDisease() { + return disease; + } + + public ChimerKb setDisease(List disease) { + this.disease = disease; + return this; + } + + public List getValidation() { + return validation; + } + + public ChimerKb setValidation(List validation) { + this.validation = validation; + return this; + } + + public String getFrame() { + return frame; + } + + public ChimerKb setFrame(String frame) { + this.frame = frame; + return this; + } + + public String getChrInfo() { + return chrInfo; + } + + public ChimerKb setChrInfo(String chrInfo) { + this.chrInfo = chrInfo; + return this; + } + + public boolean isKinase() { + return kinase; + } + + public ChimerKb setKinase(boolean kinase) { + this.kinase = kinase; + return this; + } + + public boolean isOncogene() { + return oncogene; + } + + public ChimerKb setOncogene(boolean oncogene) { + this.oncogene = oncogene; + return this; + } + + public boolean isTumorSuppressor() { + return tumorSuppressor; + } + + public ChimerKb setTumorSuppressor(boolean tumorSuppressor) { + this.tumorSuppressor = tumorSuppressor; + return this; + } + + public boolean isReceptor() { + return receptor; + } + + public ChimerKb setReceptor(boolean receptor) { + this.receptor = receptor; + return this; + } + + public boolean isTranscriptionFactor() { + return transcriptionFactor; + } + + public ChimerKb setTranscriptionFactor(boolean transcriptionFactor) { + this.transcriptionFactor = transcriptionFactor; + return this; + } + + public boolean isChimerPub() { + return chimerPub; + } + + public ChimerKb setChimerPub(boolean chimerPub) { + this.chimerPub = chimerPub; + return this; + } + + public boolean isChimerSeq() { + return chimerSeq; + } + + public ChimerKb setChimerSeq(boolean chimerSeq) { + this.chimerSeq = chimerSeq; + return this; + } + + public boolean isChimerSeqPlus() { + return chimerSeqPlus; + } + + public ChimerKb setChimerSeqPlus(boolean chimerSeqPlus) { + this.chimerSeqPlus = chimerSeqPlus; + return this; + } + + public String getSource() { + return source; + } + + public ChimerKb setSource(String source) { + this.source = source; + return this; + } +} diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerKbGeneBreakpoint.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerKbGeneBreakpoint.java new file mode 100644 index 00000000..24a4d922 --- /dev/null +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerKbGeneBreakpoint.java @@ -0,0 +1,88 @@ +/* + * + * + */ + +package org.opencb.biodata.models.core.chimerdb; + +import java.util.ArrayList; +import java.util.List; + +public class ChimerKbGeneBreakpoint { + + protected String gene; + protected String chromosome; + protected int position; + protected String strand; + + public ChimerKbGeneBreakpoint() { + } + + public ChimerKbGeneBreakpoint(String gene, String chromosome, int position, String strand) { + this.gene = gene; + this.chromosome = chromosome; + this.position = position; + this.strand = strand; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("ChimerKbGeneBreakpoint{"); + sb.append("gene='").append(gene).append('\''); + sb.append(", chromosome='").append(chromosome).append('\''); + sb.append(", position=").append(position); + sb.append(", strand='").append(strand).append('\''); + sb.append('}'); + return sb.toString(); + } + + public String getGene() { + return gene; + } + + public ChimerKbGeneBreakpoint setGene(String gene) { + this.gene = gene; + return this; + } + + public String getChromosome() { + return chromosome; + } + + public ChimerKbGeneBreakpoint setChromosome(String chromosome) { + this.chromosome = chromosome; + return this; + } + + public int getPosition() { + return position; + } + + public ChimerKbGeneBreakpoint setPosition(int position) { + this.position = position; + return this; + } + + public String getStrand() { + return strand; + } + + public ChimerKbGeneBreakpoint setStrand(String strand) { + this.strand = strand; + return this; + } +} diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerPub.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerPub.java new file mode 100644 index 00000000..6a788a2d --- /dev/null +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerPub.java @@ -0,0 +1,312 @@ +/* + * + * + */ + +package org.opencb.biodata.models.core.chimerdb; + +import java.util.ArrayList; +import java.util.List; + +public class ChimerPub { + + public static final String SOURCE = "chimerpub"; + + // 0 1 2 3 4 5 6 7 8 9 10 11 12 + // id Fusion_pair Translocation H_gene T_gene PMID Score Disease Validation Kinase Oncogene Tumor_suppressor Receptor + // 13 14 15 16 17 18 19 + // Transcription_Factor ChimerKB ChimerSeq ChimerSeq+ Sentence_highlight H_gene_highlight T_gene_highlight + // 20 21 + // Disease_highlight Validation_highlight + + private String id; + private String fusionPair; + private String translocation; + private ChimerPubGeneBreakpoint headGene; + private ChimerPubGeneBreakpoint tailGene; + private List pmid; + private double score; + private List disease; + private List validation; + private boolean kinase; + private boolean oncogene; + private boolean tumorSuppressor; + private boolean receptor; + private boolean transcriptionFactor; + private boolean chimerKb; + private boolean chimerSeq; + private boolean chimerSeqPlus; + private String senteceHighlight; + private String diseaseHighlight; + private String validationHighlight; + + private String source; + + public ChimerPub() { + this.headGene = new ChimerPubGeneBreakpoint(); + this.tailGene = new ChimerPubGeneBreakpoint(); + this.pmid = new ArrayList<>(); + this.disease = new ArrayList<>(); + this.validation = new ArrayList<>(); + + this.source = SOURCE; + } + + public ChimerPub(String id, String fusionPair, String translocation, ChimerPubGeneBreakpoint headGene, ChimerPubGeneBreakpoint tailGene, + List pmid, double score, List disease, List validation, boolean kinase, boolean oncogene, + boolean tumorSuppressor, boolean receptor, boolean transcriptionFactor, boolean chimerKb, boolean chimerSeq, + boolean chimerSeqPlus, String senteceHighlight, String diseaseHighlight, String validationHighlight, String source) { + this.id = id; + this.fusionPair = fusionPair; + this.translocation = translocation; + this.headGene = headGene; + this.tailGene = tailGene; + this.pmid = pmid; + this.score = score; + this.disease = disease; + this.validation = validation; + this.kinase = kinase; + this.oncogene = oncogene; + this.tumorSuppressor = tumorSuppressor; + this.receptor = receptor; + this.transcriptionFactor = transcriptionFactor; + this.chimerKb = chimerKb; + this.chimerSeq = chimerSeq; + this.chimerSeqPlus = chimerSeqPlus; + this.senteceHighlight = senteceHighlight; + this.diseaseHighlight = diseaseHighlight; + this.validationHighlight = validationHighlight; + this.source = source; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("ChimerPub{"); + sb.append("id='").append(id).append('\''); + sb.append(", fusionPair='").append(fusionPair).append('\''); + sb.append(", translocation='").append(translocation).append('\''); + sb.append(", headGene=").append(headGene); + sb.append(", tailGene=").append(tailGene); + sb.append(", pmid=").append(pmid); + sb.append(", score=").append(score); + sb.append(", disease=").append(disease); + sb.append(", validation=").append(validation); + sb.append(", kinase=").append(kinase); + sb.append(", oncogene=").append(oncogene); + sb.append(", tumorSuppressor=").append(tumorSuppressor); + sb.append(", receptor=").append(receptor); + sb.append(", transcriptionFactor=").append(transcriptionFactor); + sb.append(", chimerKb=").append(chimerKb); + sb.append(", chimerSeq=").append(chimerSeq); + sb.append(", chimerSeqPlus=").append(chimerSeqPlus); + sb.append(", senteceHighlight='").append(senteceHighlight).append('\''); + sb.append(", diseaseHighlight='").append(diseaseHighlight).append('\''); + sb.append(", validationHighlight='").append(validationHighlight).append('\''); + sb.append(", source='").append(source).append('\''); + sb.append('}'); + return sb.toString(); + } + + public String getId() { + return id; + } + + public ChimerPub setId(String id) { + this.id = id; + return this; + } + + public String getFusionPair() { + return fusionPair; + } + + public ChimerPub setFusionPair(String fusionPair) { + this.fusionPair = fusionPair; + return this; + } + + public String getTranslocation() { + return translocation; + } + + public ChimerPub setTranslocation(String translocation) { + this.translocation = translocation; + return this; + } + + public ChimerPubGeneBreakpoint getHeadGene() { + return headGene; + } + + public ChimerPub setHeadGene(ChimerPubGeneBreakpoint headGene) { + this.headGene = headGene; + return this; + } + + public ChimerPubGeneBreakpoint getTailGene() { + return tailGene; + } + + public ChimerPub setTailGene(ChimerPubGeneBreakpoint tailGene) { + this.tailGene = tailGene; + return this; + } + + public List getPmid() { + return pmid; + } + + public ChimerPub setPmid(List pmid) { + this.pmid = pmid; + return this; + } + + public double getScore() { + return score; + } + + public ChimerPub setScore(double score) { + this.score = score; + return this; + } + + public List getDisease() { + return disease; + } + + public ChimerPub setDisease(List disease) { + this.disease = disease; + return this; + } + + public List getValidation() { + return validation; + } + + public ChimerPub setValidation(List validation) { + this.validation = validation; + return this; + } + + public boolean isKinase() { + return kinase; + } + + public ChimerPub setKinase(boolean kinase) { + this.kinase = kinase; + return this; + } + + public boolean isOncogene() { + return oncogene; + } + + public ChimerPub setOncogene(boolean oncogene) { + this.oncogene = oncogene; + return this; + } + + public boolean isTumorSuppressor() { + return tumorSuppressor; + } + + public ChimerPub setTumorSuppressor(boolean tumorSuppressor) { + this.tumorSuppressor = tumorSuppressor; + return this; + } + + public boolean isReceptor() { + return receptor; + } + + public ChimerPub setReceptor(boolean receptor) { + this.receptor = receptor; + return this; + } + + public boolean isTranscriptionFactor() { + return transcriptionFactor; + } + + public ChimerPub setTranscriptionFactor(boolean transcriptionFactor) { + this.transcriptionFactor = transcriptionFactor; + return this; + } + + public boolean isChimerKb() { + return chimerKb; + } + + public ChimerPub setChimerKb(boolean chimerKb) { + this.chimerKb = chimerKb; + return this; + } + + public boolean isChimerSeq() { + return chimerSeq; + } + + public ChimerPub setChimerSeq(boolean chimerSeq) { + this.chimerSeq = chimerSeq; + return this; + } + + public boolean isChimerSeqPlus() { + return chimerSeqPlus; + } + + public ChimerPub setChimerSeqPlus(boolean chimerSeqPlus) { + this.chimerSeqPlus = chimerSeqPlus; + return this; + } + + public String getSenteceHighlight() { + return senteceHighlight; + } + + public ChimerPub setSenteceHighlight(String senteceHighlight) { + this.senteceHighlight = senteceHighlight; + return this; + } + + public String getDiseaseHighlight() { + return diseaseHighlight; + } + + public ChimerPub setDiseaseHighlight(String diseaseHighlight) { + this.diseaseHighlight = diseaseHighlight; + return this; + } + + public String getValidationHighlight() { + return validationHighlight; + } + + public ChimerPub setValidationHighlight(String validationHighlight) { + this.validationHighlight = validationHighlight; + return this; + } + + public String getSource() { + return source; + } + + public ChimerPub setSource(String source) { + this.source = source; + return this; + } +} diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerPubGeneBreakpoint.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerPubGeneBreakpoint.java new file mode 100644 index 00000000..f8b00d62 --- /dev/null +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerPubGeneBreakpoint.java @@ -0,0 +1,61 @@ +/* + * + * + */ + +package org.opencb.biodata.models.core.chimerdb; + +public class ChimerPubGeneBreakpoint { + + private String gene; + private String highlight; + + public ChimerPubGeneBreakpoint() { + } + + public ChimerPubGeneBreakpoint(String gene, String highlight) { + this.gene = gene; + this.highlight = highlight; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("ChimerPubGeneBreakpoint{"); + sb.append("gene='").append(gene).append('\''); + sb.append(", highlight='").append(highlight).append('\''); + sb.append('}'); + return sb.toString(); + } + + public String getGene() { + return gene; + } + + public ChimerPubGeneBreakpoint setGene(String gene) { + this.gene = gene; + return this; + } + + public String getHighlight() { + return highlight; + } + + public ChimerPubGeneBreakpoint setHighlight(String highlight) { + this.highlight = highlight; + return this; + } +} diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerSeq.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerSeq.java new file mode 100644 index 00000000..97a26f80 --- /dev/null +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerSeq.java @@ -0,0 +1,299 @@ +/* + * + * + */ + +package org.opencb.biodata.models.core.chimerdb; + +public class ChimerSeq { + + public static final String SOURCE = "chimerseq"; + + // 0 1 2 3 4 5 6 7 8 9 10 11 12 + // id ChimerDB_Type Source webSource Fusion_pair H_gene H_chr H_position H_strand T_gene T_chr T_position T_strand + // 13 14 15 16 17 18 19 20 + // Genomic_breakpoint Genome_Build_Version Cancertype BarcodeID Seed_reads_num Spanning_pairs_num Junction_reads_num Frame + // 21 22 23 24 25 26 27 28 29 30 + // Chr_info H_locus H_kinase H_oncogene H_tumor_suppressor H_receptor H_transcription_factor T_locus T_kinase T_oncogene + // 31 32 33 34 35 36 + // T_tumor_suppressor T_receptor T_transcription_factor ChimerKB ChimerPub Highly_Reliable_Seq + + + private String id; + private String chimerDbType; + private String chimerSource; + private String webSource; + private String fusionPair; + private ChimerSeqGeneBreakpoint headGene; + private ChimerSeqGeneBreakpoint tailGene; + private String genomicBreakpoint; + private String genomeBuildVersion; + private String cancerType; + private String barcodeId; + private int seedReadsNum; + private int spanningPairsNum; + private int junctionReadsNum; + private String frame; + private String chrInfo; + private boolean chimerKb; + private boolean chimerPub; + private boolean highlyReliableSeq; + + private String source; + + public ChimerSeq() { + this.headGene = new ChimerSeqGeneBreakpoint(); + this.tailGene = new ChimerSeqGeneBreakpoint(); + + this.source = SOURCE; + } + + public ChimerSeq(String id, String chimerDbType, String chimerSource, String webSource, String fusionPair, + ChimerSeqGeneBreakpoint headGene, ChimerSeqGeneBreakpoint tailGene, String genomicBreakpoint, + String genomeBuildVersion, String cancerType, String barcodeId, int seedReadsNum, int spanningPairsNum, + int junctionReadsNum, String frame, String chrInfo, boolean chimerKb, boolean chimerPub, boolean highlyReliableSeq, + String source) { + this.id = id; + this.chimerDbType = chimerDbType; + this.chimerSource = chimerSource; + this.webSource = webSource; + this.fusionPair = fusionPair; + this.headGene = headGene; + this.tailGene = tailGene; + this.genomicBreakpoint = genomicBreakpoint; + this.genomeBuildVersion = genomeBuildVersion; + this.cancerType = cancerType; + this.barcodeId = barcodeId; + this.seedReadsNum = seedReadsNum; + this.spanningPairsNum = spanningPairsNum; + this.junctionReadsNum = junctionReadsNum; + this.frame = frame; + this.chrInfo = chrInfo; + this.chimerKb = chimerKb; + this.chimerPub = chimerPub; + this.highlyReliableSeq = highlyReliableSeq; + + this.source = source; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("ChimerSeq{"); + sb.append("id='").append(id).append('\''); + sb.append(", chimerDbType='").append(chimerDbType).append('\''); + sb.append(", chimerSource='").append(chimerSource).append('\''); + sb.append(", webSource='").append(webSource).append('\''); + sb.append(", fusionPair='").append(fusionPair).append('\''); + sb.append(", headGene=").append(headGene); + sb.append(", tailGene=").append(tailGene); + sb.append(", genomicBreakpoint='").append(genomicBreakpoint).append('\''); + sb.append(", genomeBuildVersion='").append(genomeBuildVersion).append('\''); + sb.append(", cancerType='").append(cancerType).append('\''); + sb.append(", barcodeId='").append(barcodeId).append('\''); + sb.append(", seedReadsNum=").append(seedReadsNum); + sb.append(", spanningPairsNum=").append(spanningPairsNum); + sb.append(", junctionReadsNum=").append(junctionReadsNum); + sb.append(", frame='").append(frame).append('\''); + sb.append(", chrInfo='").append(chrInfo).append('\''); + sb.append(", chimerKb=").append(chimerKb); + sb.append(", chimerPub=").append(chimerPub); + sb.append(", highlyReliableSeq='").append(highlyReliableSeq).append('\''); + sb.append(", source='").append(source).append('\''); + sb.append('}'); + return sb.toString(); + } + + public String getId() { + return id; + } + + public ChimerSeq setId(String id) { + this.id = id; + return this; + } + + public String getChimerDbType() { + return chimerDbType; + } + + public ChimerSeq setChimerDbType(String chimerDbType) { + this.chimerDbType = chimerDbType; + return this; + } + + public String getChimerSource() { + return chimerSource; + } + + public ChimerSeq setChimerSource(String chimerSource) { + this.chimerSource = chimerSource; + return this; + } + + public String getWebSource() { + return webSource; + } + + public ChimerSeq setWebSource(String webSource) { + this.webSource = webSource; + return this; + } + + public String getFusionPair() { + return fusionPair; + } + + public ChimerSeq setFusionPair(String fusionPair) { + this.fusionPair = fusionPair; + return this; + } + + public ChimerSeqGeneBreakpoint getHeadGene() { + return headGene; + } + + public ChimerSeq setHeadGene(ChimerSeqGeneBreakpoint headGene) { + this.headGene = headGene; + return this; + } + + public ChimerSeqGeneBreakpoint getTailGene() { + return tailGene; + } + + public ChimerSeq setTailGene(ChimerSeqGeneBreakpoint tailGene) { + this.tailGene = tailGene; + return this; + } + + public String getGenomicBreakpoint() { + return genomicBreakpoint; + } + + public ChimerSeq setGenomicBreakpoint(String genomicBreakpoint) { + this.genomicBreakpoint = genomicBreakpoint; + return this; + } + + public String getGenomeBuildVersion() { + return genomeBuildVersion; + } + + public ChimerSeq setGenomeBuildVersion(String genomeBuildVersion) { + this.genomeBuildVersion = genomeBuildVersion; + return this; + } + + public String getCancerType() { + return cancerType; + } + + public ChimerSeq setCancerType(String cancerType) { + this.cancerType = cancerType; + return this; + } + + public String getBarcodeId() { + return barcodeId; + } + + public ChimerSeq setBarcodeId(String barcodeId) { + this.barcodeId = barcodeId; + return this; + } + + public int getSeedReadsNum() { + return seedReadsNum; + } + + public ChimerSeq setSeedReadsNum(int seedReadsNum) { + this.seedReadsNum = seedReadsNum; + return this; + } + + public int getSpanningPairsNum() { + return spanningPairsNum; + } + + public ChimerSeq setSpanningPairsNum(int spanningPairsNum) { + this.spanningPairsNum = spanningPairsNum; + return this; + } + + public int getJunctionReadsNum() { + return junctionReadsNum; + } + + public ChimerSeq setJunctionReadsNum(int junctionReadsNum) { + this.junctionReadsNum = junctionReadsNum; + return this; + } + + public String getFrame() { + return frame; + } + + public ChimerSeq setFrame(String frame) { + this.frame = frame; + return this; + } + + public String getChrInfo() { + return chrInfo; + } + + public ChimerSeq setChrInfo(String chrInfo) { + this.chrInfo = chrInfo; + return this; + } + + public boolean isChimerKb() { + return chimerKb; + } + + public ChimerSeq setChimerKb(boolean chimerKb) { + this.chimerKb = chimerKb; + return this; + } + + public boolean isChimerPub() { + return chimerPub; + } + + public ChimerSeq setChimerPub(boolean chimerPub) { + this.chimerPub = chimerPub; + return this; + } + + public boolean getHighlyReliableSeq() { + return highlyReliableSeq; + } + + public ChimerSeq setHighlyReliableSeq(boolean highlyReliableSeq) { + this.highlyReliableSeq = highlyReliableSeq; + return this; + } + + public String getSource() { + return source; + } + + public ChimerSeq setSource(String source) { + this.source = source; + return this; + } +} diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerSeqGeneBreakpoint.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerSeqGeneBreakpoint.java new file mode 100644 index 00000000..7ee27f3d --- /dev/null +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerSeqGeneBreakpoint.java @@ -0,0 +1,118 @@ +/* + * + * + */ + +package org.opencb.biodata.models.core.chimerdb; + +public class ChimerSeqGeneBreakpoint extends ChimerKbGeneBreakpoint { + + private String locus; + private boolean kinase; + private boolean oncogene; + private boolean tumorSuppressor; + private boolean receptor; + private boolean transcriptionFactor; + + public ChimerSeqGeneBreakpoint() { + super(); + } + + public ChimerSeqGeneBreakpoint(String gene, String chromosome, int position, String strand, String locus, boolean kinase, + boolean oncogene, boolean tumorSuppressor, boolean receptor, boolean transcriptionFactor) { + super(gene, chromosome, position, strand); + this.locus = locus; + this.kinase = kinase; + this.oncogene = oncogene; + this.tumorSuppressor = tumorSuppressor; + this.receptor = receptor; + this.transcriptionFactor = transcriptionFactor; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("ChimerSeqGeneBreakpoint{"); + sb.append("locus='").append(locus).append('\''); + sb.append(", kinase=").append(kinase); + sb.append(", oncogene=").append(oncogene); + sb.append(", tumorSuppressor=").append(tumorSuppressor); + sb.append(", receptor=").append(receptor); + sb.append(", transcriptionFactor=").append(transcriptionFactor); + sb.append(", gene='").append(gene).append('\''); + sb.append(", chromosome='").append(chromosome).append('\''); + sb.append(", position=").append(position); + sb.append(", strand='").append(strand).append('\''); + sb.append('}'); + return sb.toString(); + } + + public String getLocus() { + return locus; + } + + public ChimerSeqGeneBreakpoint setLocus(String locus) { + this.locus = locus; + return this; + } + + public boolean isKinase() { + return kinase; + } + + public ChimerSeqGeneBreakpoint setKinase(boolean kinase) { + this.kinase = kinase; + return this; + } + + public boolean isOncogene() { + return oncogene; + } + + public ChimerSeqGeneBreakpoint setOncogene(boolean oncogene) { + this.oncogene = oncogene; + return this; + } + + public boolean isTumorSuppressor() { + return tumorSuppressor; + } + + public ChimerSeqGeneBreakpoint setTumorSuppressor(boolean tumorSuppressor) { + this.tumorSuppressor = tumorSuppressor; + return this; + } + + public boolean isReceptor() { + return receptor; + } + + public ChimerSeqGeneBreakpoint setReceptor(boolean receptor) { + this.receptor = receptor; + return this; + } + + public boolean isTranscriptionFactor() { + return transcriptionFactor; + } + + public ChimerSeqGeneBreakpoint setTranscriptionFactor(boolean transcriptionFactor) { + this.transcriptionFactor = transcriptionFactor; + return this; + } +} + + From cc4a722da76e9c9a9b08ddead42ceed19a3e4e94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Mon, 11 Aug 2025 09:54:36 +0200 Subject: [PATCH 20/24] models: rename gene imprinting and gene fusions, #TASK-7745, #TASK-5564 --- .../src/main/avro/variantAnnotation.avdl | 22 ++++++------ .../biodata/models/core/GeneAnnotation.java | 34 +++++++++---------- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/biodata-models/src/main/avro/variantAnnotation.avdl b/biodata-models/src/main/avro/variantAnnotation.avdl index b1073bc4..e01a828b 100644 --- a/biodata-models/src/main/avro/variantAnnotation.avdl +++ b/biodata-models/src/main/avro/variantAnnotation.avdl @@ -55,7 +55,7 @@ protocol VariantAnnotations { string source; } - record ImprintedGene { + record GeneImprinting { string geneName; string status; string expressedAllele; @@ -71,17 +71,17 @@ protocol VariantAnnotations { } record GeneFusionSummary { - string id; string pair; string source; - string gene5PrimeJunction; - string gene3PrimeJunction; - //union { null, GeneFusionBreakpoint } headGene; - //union { null, GeneFusionBreakpoint } tailGene; + union { null, GeneFusionBreakpoint } headGene; + union { null, GeneFusionBreakpoint } tailGene; + union { null, array } pmid; union { null, array } diseases; - union { null, array } publications; - union { null, array } validations; - union { null, map } attributes; + union { null, boolean } kinase; + union { null, boolean } oncogene; + union { null, boolean } tumorSuppresor; + union { null, boolean } receptor; + union { null, boolean } transcriptionFactor; } record PopulationFrequency { @@ -390,8 +390,8 @@ protocol VariantAnnotations { union { null, array } cytoband; union { null, array } repeat; union { null, array } drugs; - union { null, array } imprintedGenes; - union { null, array } geneFusionSummaries; + union { null, array } geneImprinting; + union { null, array } geneFusions; union { null, map } additionalAttributes = null; } diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/GeneAnnotation.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/GeneAnnotation.java index c350dbde..6dce7c6c 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/core/GeneAnnotation.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/GeneAnnotation.java @@ -34,8 +34,8 @@ public class GeneAnnotation { private List mirnaTargets; private List cancerAssociations; private List cancerHotspots; - private List imprinted; - private GeneFusion fusions; + private List geneImprinting; + private GeneFusion geneFusions; public GeneAnnotation() { this.expression = new ArrayList<>(); @@ -45,8 +45,8 @@ public GeneAnnotation() { this.mirnaTargets = new ArrayList<>(); this.cancerAssociations = new ArrayList<>(); this.cancerHotspots = new ArrayList<>(); - this.imprinted = new ArrayList<>(); - this.fusions = new GeneFusion(); + this.geneImprinting = new ArrayList<>(); + this.geneFusions = new GeneFusion(); } @Deprecated @@ -73,7 +73,7 @@ public GeneAnnotation(List expression, List di public GeneAnnotation(List expression, List diseases, List drugs, List constraints, List mirnaTargets, List cancerAssociations, - List cancerHotspots, List imprinted, GeneFusion fusions) { + List cancerHotspots, List geneImprinting, GeneFusion geneFusions) { this.expression = expression; this.diseases = diseases; this.drugs = drugs; @@ -81,8 +81,8 @@ public GeneAnnotation(List expression, List di this.mirnaTargets = mirnaTargets; this.cancerAssociations = cancerAssociations; this.cancerHotspots = cancerHotspots; - this.imprinted = imprinted; - this.fusions = fusions; + this.geneImprinting = geneImprinting; + this.geneFusions = geneFusions; } @Override @@ -95,8 +95,8 @@ public String toString() { sb.append(", mirnaTargets=").append(mirnaTargets); sb.append(", cancerAssociations=").append(cancerAssociations); sb.append(", cancerHotspots=").append(cancerHotspots); - sb.append(", imprinted=").append(imprinted); - sb.append(", fusions=").append(fusions); + sb.append(", geneImprinting=").append(geneImprinting); + sb.append(", geneFusions=").append(geneFusions); sb.append('}'); return sb.toString(); } @@ -164,21 +164,21 @@ public GeneAnnotation setCancerHotspots(List cancerHotspots) { return this; } - public List getImprinted() { - return imprinted; + public List getGeneImprinting() { + return geneImprinting; } - public GeneAnnotation setImprinted(List imprinted) { - this.imprinted = imprinted; + public GeneAnnotation setGeneImprinting(List geneImprinting) { + this.geneImprinting = geneImprinting; return this; } - public GeneFusion getFusions() { - return fusions; + public GeneFusion getGeneFusions() { + return geneFusions; } - public GeneAnnotation setFusions(GeneFusion fusions) { - this.fusions = fusions; + public GeneAnnotation setGeneFusions(GeneFusion geneFusions) { + this.geneFusions = geneFusions; return this; } } From 4e821ca183a0fc835eb15a50c58d57270d89ff4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Mon, 11 Aug 2025 09:55:28 +0200 Subject: [PATCH 21/24] models: minor changes in ChimerDB data models, #TASK-7830, #TASK-5564 --- .../feature/chimerdb/ChimerKbParser.java | 4 +-- .../feature/chimerdb/ChimerPubParser.java | 4 +-- .../feature/chimerdb/ChimerDbParserTest.java | 1 - .../models/core/chimerdb/ChimerKb.java | 36 +++++++++---------- .../models/core/chimerdb/ChimerPub.java | 34 +++++++++--------- 5 files changed, 39 insertions(+), 40 deletions(-) diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerKbParser.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerKbParser.java index 0d01b25b..79fdd9aa 100644 --- a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerKbParser.java +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerKbParser.java @@ -199,13 +199,13 @@ public static void parse(Path xlsxPath, ChimerDbParserCallback callbac // Diseases strValue = getStringCellValue(row, 20); if (strValue != null) { - chimerKb.setDisease(Arrays.stream(strValue.split(",")).map(String::trim).collect(Collectors.toList())); + chimerKb.setDiseases(Arrays.stream(strValue.split(",")).map(String::trim).collect(Collectors.toList())); } // Validations strValue = getStringCellValue(row, 21); if (strValue != null) { - chimerKb.setValidation(Arrays.stream(strValue.split(",")).map(String::trim).collect(Collectors.toList())); + chimerKb.setValidations(Arrays.stream(strValue.split(",")).map(String::trim).collect(Collectors.toList())); } // Frame diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerPubParser.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerPubParser.java index f909ab84..3561d481 100644 --- a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerPubParser.java +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerPubParser.java @@ -133,13 +133,13 @@ public static void parse(Path xlsxPath, ChimerDbParserCallback callba // Diseases strValue = getStringCellValue(row, 7); if (strValue != null) { - chimerPub.setDisease(Arrays.stream(strValue.split(",")).map(String::trim).collect(Collectors.toList())); + chimerPub.setDiseases(Arrays.stream(strValue.split(",")).map(String::trim).collect(Collectors.toList())); } // Validations strValue = getStringCellValue(row, 8); if (strValue != null) { - chimerPub.setValidation(Arrays.stream(strValue.split(",")).map(String::trim).collect(Collectors.toList())); + chimerPub.setValidations(Arrays.stream(strValue.split(",")).map(String::trim).collect(Collectors.toList())); } // Kinase diff --git a/biodata-formats/src/test/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParserTest.java b/biodata-formats/src/test/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParserTest.java index 4ad5d2e0..b89419f8 100644 --- a/biodata-formats/src/test/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParserTest.java +++ b/biodata-formats/src/test/java/org/opencb/biodata/formats/feature/chimerdb/ChimerDbParserTest.java @@ -2,7 +2,6 @@ import org.junit.Assert; -import org.junit.Test; import org.opencb.biodata.models.core.GeneFusion; import org.opencb.biodata.models.core.chimerdb.ChimerKb; diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerKb.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerKb.java index 2ef0b1cf..6dad082a 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerKb.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerKb.java @@ -49,8 +49,8 @@ public class ChimerKb { private String breakpointType; private String genomeBuildVersion; private List pmid; - private List disease; - private List validation; + private List diseases; + private List validations; private String frame; private String chrInfo; private boolean kinase; @@ -66,16 +66,16 @@ public class ChimerKb { public ChimerKb() { this.pmid = new ArrayList<>(); - this.disease = new ArrayList<>(); - this.validation = new ArrayList<>(); + this.diseases = new ArrayList<>(); + this.validations = new ArrayList<>(); this.source = SOURCE; } public ChimerKb(String id, String chimerDbType, String chimerSource, String webSource, String fusionPair, String fiveGeneJunction, String threeGeneJunction, ChimerKbGeneBreakpoint headGene, ChimerKbGeneBreakpoint tailGene, boolean genomicBreakpoint, - boolean exonicBreakpoint, String breakpointType, String genomeBuildVersion, List pmid, List disease, - List validation, String frame, String chrInfo, boolean kinase, boolean oncogene, boolean tumorSuppressor, + boolean exonicBreakpoint, String breakpointType, String genomeBuildVersion, List pmid, List diseases, + List validations, String frame, String chrInfo, boolean kinase, boolean oncogene, boolean tumorSuppressor, boolean receptor, boolean transcriptionFactor, boolean chimerPub, boolean chimerSeq, boolean chimerSeqPlus, String source) { this.id = id; @@ -92,8 +92,8 @@ public ChimerKb(String id, String chimerDbType, String chimerSource, String webS this.breakpointType = breakpointType; this.genomeBuildVersion = genomeBuildVersion; this.pmid = pmid; - this.disease = disease; - this.validation = validation; + this.diseases = diseases; + this.validations = validations; this.frame = frame; this.chrInfo = chrInfo; this.kinase = kinase; @@ -124,8 +124,8 @@ public String toString() { sb.append(", breakpointType='").append(breakpointType).append('\''); sb.append(", genomeBuildVersion='").append(genomeBuildVersion).append('\''); sb.append(", pmid=").append(pmid); - sb.append(", disease=").append(disease); - sb.append(", validation=").append(validation); + sb.append(", diseases=").append(diseases); + sb.append(", validations=").append(validations); sb.append(", frame='").append(frame).append('\''); sb.append(", chrInfo='").append(chrInfo).append('\''); sb.append(", kinase=").append(kinase); @@ -267,21 +267,21 @@ public ChimerKb setPmid(List pmid) { return this; } - public List getDisease() { - return disease; + public List getDiseases() { + return diseases; } - public ChimerKb setDisease(List disease) { - this.disease = disease; + public ChimerKb setDiseases(List diseases) { + this.diseases = diseases; return this; } - public List getValidation() { - return validation; + public List getValidations() { + return validations; } - public ChimerKb setValidation(List validation) { - this.validation = validation; + public ChimerKb setValidations(List validations) { + this.validations = validations; return this; } diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerPub.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerPub.java index 6a788a2d..f78a9076 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerPub.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerPub.java @@ -40,8 +40,8 @@ public class ChimerPub { private ChimerPubGeneBreakpoint tailGene; private List pmid; private double score; - private List disease; - private List validation; + private List diseases; + private List validations; private boolean kinase; private boolean oncogene; private boolean tumorSuppressor; @@ -60,14 +60,14 @@ public ChimerPub() { this.headGene = new ChimerPubGeneBreakpoint(); this.tailGene = new ChimerPubGeneBreakpoint(); this.pmid = new ArrayList<>(); - this.disease = new ArrayList<>(); - this.validation = new ArrayList<>(); + this.diseases = new ArrayList<>(); + this.validations = new ArrayList<>(); this.source = SOURCE; } public ChimerPub(String id, String fusionPair, String translocation, ChimerPubGeneBreakpoint headGene, ChimerPubGeneBreakpoint tailGene, - List pmid, double score, List disease, List validation, boolean kinase, boolean oncogene, + List pmid, double score, List diseases, List validations, boolean kinase, boolean oncogene, boolean tumorSuppressor, boolean receptor, boolean transcriptionFactor, boolean chimerKb, boolean chimerSeq, boolean chimerSeqPlus, String senteceHighlight, String diseaseHighlight, String validationHighlight, String source) { this.id = id; @@ -77,8 +77,8 @@ public ChimerPub(String id, String fusionPair, String translocation, ChimerPubGe this.tailGene = tailGene; this.pmid = pmid; this.score = score; - this.disease = disease; - this.validation = validation; + this.diseases = diseases; + this.validations = validations; this.kinase = kinase; this.oncogene = oncogene; this.tumorSuppressor = tumorSuppressor; @@ -103,8 +103,8 @@ public String toString() { sb.append(", tailGene=").append(tailGene); sb.append(", pmid=").append(pmid); sb.append(", score=").append(score); - sb.append(", disease=").append(disease); - sb.append(", validation=").append(validation); + sb.append(", diseases=").append(diseases); + sb.append(", validations=").append(validations); sb.append(", kinase=").append(kinase); sb.append(", oncogene=").append(oncogene); sb.append(", tumorSuppressor=").append(tumorSuppressor); @@ -184,21 +184,21 @@ public ChimerPub setScore(double score) { return this; } - public List getDisease() { - return disease; + public List getDiseases() { + return diseases; } - public ChimerPub setDisease(List disease) { - this.disease = disease; + public ChimerPub setDiseases(List diseases) { + this.diseases = diseases; return this; } - public List getValidation() { - return validation; + public List getValidations() { + return validations; } - public ChimerPub setValidation(List validation) { - this.validation = validation; + public ChimerPub setValidations(List validations) { + this.validations = validations; return this; } From 00ea1d8ec33d8e0c2e951153d6563005f32e4ea8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Mon, 11 Aug 2025 11:59:15 +0200 Subject: [PATCH 22/24] models: rename member, #TASK-7830, #TASK-5564 --- .../feature/chimerdb/ChimerKbParser.java | 4 ++-- .../feature/chimerdb/ChimerPubParser.java | 4 ++-- .../feature/chimerdb/ChimerSeqParser.java | 4 ++-- .../core/chimerdb/ChimerKbGeneBreakpoint.java | 19 ++++++++----------- .../chimerdb/ChimerPubGeneBreakpoint.java | 16 ++++++++-------- .../chimerdb/ChimerSeqGeneBreakpoint.java | 6 +++--- 6 files changed, 25 insertions(+), 28 deletions(-) diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerKbParser.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerKbParser.java index 79fdd9aa..5c5e098b 100644 --- a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerKbParser.java +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerKbParser.java @@ -117,7 +117,7 @@ public static void parse(Path xlsxPath, ChimerDbParserCallback callbac ChimerKbGeneBreakpoint head = new ChimerKbGeneBreakpoint(); strValue = getStringCellValue(row, 7); if (strValue != null) { - head.setGene(strValue); + head.setGeneName(strValue); } strValue = getStringCellValue(row, 8); if (strValue != null) { @@ -141,7 +141,7 @@ public static void parse(Path xlsxPath, ChimerDbParserCallback callbac ChimerKbGeneBreakpoint tail = new ChimerKbGeneBreakpoint(); strValue = getStringCellValue(row, 11); if (strValue != null) { - tail.setGene(strValue); + tail.setGeneName(strValue); } strValue = getStringCellValue(row, 12); if (strValue != null) { diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerPubParser.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerPubParser.java index 3561d481..6ca5e734 100644 --- a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerPubParser.java +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerPubParser.java @@ -93,7 +93,7 @@ public static void parse(Path xlsxPath, ChimerDbParserCallback callba ChimerPubGeneBreakpoint head = new ChimerPubGeneBreakpoint(); strValue = getStringCellValue(row, 3); if (strValue != null) { - head.setGene(strValue); + head.setGeneName(strValue); } strValue = getStringCellValue(row, 18); if (strValue != null) { @@ -105,7 +105,7 @@ public static void parse(Path xlsxPath, ChimerDbParserCallback callba ChimerPubGeneBreakpoint tail = new ChimerPubGeneBreakpoint(); strValue = getStringCellValue(row, 4); if (strValue != null) { - tail.setGene(strValue); + tail.setGeneName(strValue); } strValue = getStringCellValue(row, 19); if (strValue != null) { diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerSeqParser.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerSeqParser.java index 97e30f2c..a0c0783c 100644 --- a/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerSeqParser.java +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/feature/chimerdb/ChimerSeqParser.java @@ -107,7 +107,7 @@ public static void parse(Path xlsxPath, ChimerDbParserCallback callba ChimerSeqGeneBreakpoint head = new ChimerSeqGeneBreakpoint(); strValue = getStringCellValue(row, 5); if (strValue != null) { - head.setGene(strValue); + head.setGeneName(strValue); } strValue = getStringCellValue(row, 6); if (strValue != null) { @@ -155,7 +155,7 @@ public static void parse(Path xlsxPath, ChimerDbParserCallback callba ChimerSeqGeneBreakpoint tail = new ChimerSeqGeneBreakpoint(); strValue = getStringCellValue(row, 9); if (strValue != null) { - tail.setGene(strValue); + tail.setGeneName(strValue); } strValue = getStringCellValue(row, 10); if (strValue != null) { diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerKbGeneBreakpoint.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerKbGeneBreakpoint.java index 24a4d922..9f8cdb23 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerKbGeneBreakpoint.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerKbGeneBreakpoint.java @@ -19,12 +19,9 @@ package org.opencb.biodata.models.core.chimerdb; -import java.util.ArrayList; -import java.util.List; - public class ChimerKbGeneBreakpoint { - protected String gene; + protected String geneName; protected String chromosome; protected int position; protected String strand; @@ -32,8 +29,8 @@ public class ChimerKbGeneBreakpoint { public ChimerKbGeneBreakpoint() { } - public ChimerKbGeneBreakpoint(String gene, String chromosome, int position, String strand) { - this.gene = gene; + public ChimerKbGeneBreakpoint(String geneName, String chromosome, int position, String strand) { + this.geneName = geneName; this.chromosome = chromosome; this.position = position; this.strand = strand; @@ -42,7 +39,7 @@ public ChimerKbGeneBreakpoint(String gene, String chromosome, int position, Stri @Override public String toString() { final StringBuilder sb = new StringBuilder("ChimerKbGeneBreakpoint{"); - sb.append("gene='").append(gene).append('\''); + sb.append("geneName='").append(geneName).append('\''); sb.append(", chromosome='").append(chromosome).append('\''); sb.append(", position=").append(position); sb.append(", strand='").append(strand).append('\''); @@ -50,12 +47,12 @@ public String toString() { return sb.toString(); } - public String getGene() { - return gene; + public String getGeneName() { + return geneName; } - public ChimerKbGeneBreakpoint setGene(String gene) { - this.gene = gene; + public ChimerKbGeneBreakpoint setGeneName(String geneName) { + this.geneName = geneName; return this; } diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerPubGeneBreakpoint.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerPubGeneBreakpoint.java index f8b00d62..8615f331 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerPubGeneBreakpoint.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerPubGeneBreakpoint.java @@ -21,32 +21,32 @@ public class ChimerPubGeneBreakpoint { - private String gene; + private String geneName; private String highlight; public ChimerPubGeneBreakpoint() { } - public ChimerPubGeneBreakpoint(String gene, String highlight) { - this.gene = gene; + public ChimerPubGeneBreakpoint(String geneName, String highlight) { + this.geneName = geneName; this.highlight = highlight; } @Override public String toString() { final StringBuilder sb = new StringBuilder("ChimerPubGeneBreakpoint{"); - sb.append("gene='").append(gene).append('\''); + sb.append("geneName='").append(geneName).append('\''); sb.append(", highlight='").append(highlight).append('\''); sb.append('}'); return sb.toString(); } - public String getGene() { - return gene; + public String getGeneName() { + return geneName; } - public ChimerPubGeneBreakpoint setGene(String gene) { - this.gene = gene; + public ChimerPubGeneBreakpoint setGeneName(String geneName) { + this.geneName = geneName; return this; } diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerSeqGeneBreakpoint.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerSeqGeneBreakpoint.java index 7ee27f3d..29604f96 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerSeqGeneBreakpoint.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/chimerdb/ChimerSeqGeneBreakpoint.java @@ -32,9 +32,9 @@ public ChimerSeqGeneBreakpoint() { super(); } - public ChimerSeqGeneBreakpoint(String gene, String chromosome, int position, String strand, String locus, boolean kinase, + public ChimerSeqGeneBreakpoint(String geneName, String chromosome, int position, String strand, String locus, boolean kinase, boolean oncogene, boolean tumorSuppressor, boolean receptor, boolean transcriptionFactor) { - super(gene, chromosome, position, strand); + super(geneName, chromosome, position, strand); this.locus = locus; this.kinase = kinase; this.oncogene = oncogene; @@ -52,7 +52,7 @@ public String toString() { sb.append(", tumorSuppressor=").append(tumorSuppressor); sb.append(", receptor=").append(receptor); sb.append(", transcriptionFactor=").append(transcriptionFactor); - sb.append(", gene='").append(gene).append('\''); + sb.append(", geneName='").append(geneName).append('\''); sb.append(", chromosome='").append(chromosome).append('\''); sb.append(", position=").append(position); sb.append(", strand='").append(strand).append('\''); From 712e7372249f3fbc7595d27557c9d29194cc1832 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Fri, 5 Sep 2025 11:33:41 +0200 Subject: [PATCH 23/24] formats: implement the CIViC parser, #TASK-7903, #TASK-5564 --- .../formats/variant/civic/CivicParser.java | 357 ++++++++++++++++++ .../variant/civic/CivicParserCallback.java | 7 + .../variant/civic/CivicParserTest.java | 61 +++ .../models/core/civic/CivicAssertion.java | 246 ++++++++++++ .../core/civic/CivicClinicalEvidence.java | 264 +++++++++++++ .../models/core/civic/CivicFeature.java | 330 ++++++++++++++++ .../core/civic/CivicMolecularProfile.java | 124 ++++++ .../models/core/civic/CivicVariant.java | 304 +++++++++++++++ 8 files changed, 1693 insertions(+) create mode 100755 biodata-formats/src/main/java/org/opencb/biodata/formats/variant/civic/CivicParser.java create mode 100644 biodata-formats/src/main/java/org/opencb/biodata/formats/variant/civic/CivicParserCallback.java create mode 100644 biodata-formats/src/test/java/org/opencb/biodata/formats/variant/civic/CivicParserTest.java create mode 100644 biodata-models/src/main/java/org/opencb/biodata/models/core/civic/CivicAssertion.java create mode 100644 biodata-models/src/main/java/org/opencb/biodata/models/core/civic/CivicClinicalEvidence.java create mode 100644 biodata-models/src/main/java/org/opencb/biodata/models/core/civic/CivicFeature.java create mode 100644 biodata-models/src/main/java/org/opencb/biodata/models/core/civic/CivicMolecularProfile.java create mode 100644 biodata-models/src/main/java/org/opencb/biodata/models/core/civic/CivicVariant.java diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/variant/civic/CivicParser.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/variant/civic/CivicParser.java new file mode 100755 index 00000000..83177585 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/variant/civic/CivicParser.java @@ -0,0 +1,357 @@ +/* + * + * + */ + +package org.opencb.biodata.formats.variant.civic; + +import org.apache.commons.lang3.StringUtils; +import org.opencb.biodata.formats.io.FileFormatException; +import org.opencb.biodata.models.core.civic.*; +import org.opencb.commons.utils.FileUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.file.Path; +import java.util.*; + +public class CivicParser { + + private static final Logger logger = LoggerFactory.getLogger(CivicParser.class); + + private CivicParser() { + throw new IllegalStateException("Utility class"); + } + + public static void parse(Path variantSummariesFile, Path featureSummariesFile, Path molecularProfileSummariesFile, + Path assertionSummariesFile, Path clinicalEvidenceSummariesFile, String version, + CivicParserCallback callback) throws IOException, FileFormatException { + + logger.info("Starting CIViC parsing with version: {}", version); + + // Step 1: Parse features first + Map featuresMap = parseFeaturesFile(featureSummariesFile); + logger.info("Parsed {} features", featuresMap.size()); + + // Step 2: Parse clinical evidence and link to molecular profiles + Map evidencesMap = parseClinicalEvidencesFile(clinicalEvidenceSummariesFile); + logger.info("Parsed {} evidences", evidencesMap.size()); + + // Step 3: Parse assertions and complete them with evidences + Map assertionsMap = parseAssertionsFile(assertionSummariesFile, evidencesMap); + logger.info("Parsed {} assertions and complete with evidences", assertionsMap.size()); + + // Step 4: Parse molecular profiles and complete them with assertions and evidences + Map profilesMap = parseMolecularProfilesFile(molecularProfileSummariesFile, assertionsMap, + evidencesMap); + logger.info("Parsed {} molecular profiles and complete with assertions and evidences", profilesMap.size()); + + + // Step 5: Parse variants and build complete objects + int numVariants = parseVariantsFile(variantSummariesFile, profilesMap, featuresMap, callback); + logger.info("Completed CIViC parsing: {} variants processed", numVariants); + } + + private static Map parseFeaturesFile(Path featureSummariesFile) throws IOException, FileFormatException { + Map featuresMap = new HashMap<>(); + + try (BufferedReader reader = new BufferedReader(new InputStreamReader(FileUtils.newInputStream(featureSummariesFile)))) { + String line = reader.readLine(); // Skip header + + while ((line = reader.readLine()) != null) { + if (StringUtils.isNotBlank(line)) { + String[] fields = line.split("\t", -1); + CivicFeature feature = parseFeatureFields(fields); + featuresMap.put(feature.getFeatureId(), feature); + } + } + } + + return featuresMap; + } + + private static CivicFeature parseFeatureFields(String[] fields) { + return new CivicFeature() + .setFeatureId(getField(fields, 0)) + .setFeatureCivicUrl(getField(fields, 1)) + .setFeatureType(getField(fields, 2)) + .setName(getField(fields, 3)) + .setFeatureAliases(parseStringList(getField(fields, 4))) + .setDescription(getField(fields, 5)) + .setLastReviewDate(getField(fields, 6)) + .setFlagged(parseBoolean(getField(fields, 7))) + .setEntrezId(getField(fields, 8)) + .setNcitId(getField(fields, 9)) + .setFivePrimePartnerStatus(getField(fields, 10)) + .setThreePrimePartnerStatus(getField(fields, 11)) + .setFivePrimeGeneId(getField(fields, 12)) + .setFivePrimeGeneName(getField(fields, 13)) + .setFivePrimeGeneEntrezId(getField(fields, 14)) + .setThreePrimeGeneId(getField(fields, 15)) + .setThreePrimeGeneName(getField(fields, 16)) + .setThreePrimeGeneEntrezId(getField(fields, 17)); + } + + private static Map parseClinicalEvidencesFile(Path clinicalEvidenceSummariesFile) throws IOException { + + Map evidencesMap = new HashMap<>(); + + try (BufferedReader reader = new BufferedReader(new InputStreamReader(FileUtils.newInputStream(clinicalEvidenceSummariesFile)))) { + String line = reader.readLine(); // Skip header + + while ((line = reader.readLine()) != null) { + if (StringUtils.isNotBlank(line)) { + String[] fields = line.split("\t", -1); + CivicClinicalEvidence evidence = parseClinicalEvidenceFields(fields); + evidencesMap.put(evidence.getEvidenceId(), evidence); + } + } + } + + return evidencesMap; + } + + private static CivicClinicalEvidence parseClinicalEvidenceFields(String[] fields) { + return new CivicClinicalEvidence() + .setDisease(getField(fields, 2)) + .setDoid(getField(fields, 3)) + .setPhenotypes(parseStringList(getField(fields, 4))) + .setTherapies(parseStringList(getField(fields, 5))) + .setTherapyInteractionType(getField(fields, 6)) + .setEvidenceType(getField(fields, 7)) + .setEvidenceDirection(getField(fields, 8)) + .setEvidenceLevel(getField(fields, 9)) + .setSignificance(getField(fields, 10)) + .setEvidenceStatement(getField(fields, 11)) + .setCitationId(getField(fields, 12)) + .setSourceType(getField(fields, 13)) + .setAscoAbstractId(getField(fields, 14)) + .setCitation(getField(fields, 15)) + .setNctIds(parseStringList(getField(fields, 16))) + .setRating(getField(fields, 17)) + .setEvidenceStatus(getField(fields, 18)) + .setEvidenceId(getField(fields, 19)) + .setVariantOrigin(getField(fields, 20)) + .setLastReviewDate(getField(fields, 21)) + .setEvidenceCivicUrl(getField(fields, 22)) + .setFlagged(parseBoolean(getField(fields, 24))); + } + + private static Map parseAssertionsFile(Path assertionSummariesFile, Map evidenceMap) throws IOException { + + Map assertionsMap = new HashMap<>(); + + try (BufferedReader reader = new BufferedReader(new InputStreamReader(FileUtils.newInputStream(assertionSummariesFile)))) { + String line = reader.readLine(); // Skip header + + while ((line = reader.readLine()) != null) { + if (StringUtils.isNotBlank(line)) { + String[] fields = line.split("\t", -1); + CivicAssertion assertion = parseAssertionFields(fields); + + // Set evidences from evidence map using the evidence IDs in the assertion + List evidenceIds = parseStringList(getField(fields, 18)); + for (String evidenceId : evidenceIds) { + if (evidenceMap.containsKey(evidenceId)) { + assertion.getEvidences().add(evidenceMap.get(evidenceId)); + } + } + + assertionsMap.put(assertion.getAssertionId(), assertion); + } + } + } + + return assertionsMap; + } + + private static CivicAssertion parseAssertionFields(String[] fields) { + return new CivicAssertion() + .setDisease(getField(fields, 2)) + .setDoid(getField(fields, 3)) + .setPhenotypes(parseStringList(getField(fields, 4))) + .setTherapies(parseStringList(getField(fields, 5))) + .setAssertionType(getField(fields, 6)) + .setAssertionDirection(getField(fields, 7)) + .setSignificance(getField(fields, 8)) + .setAcmgCodes(parseStringList(getField(fields, 9))) + .setAmpCategory(getField(fields, 10)) + .setNccnGuideline(getField(fields, 11)) + .setNccnGuidelineVersion(getField(fields, 12)) + .setRegulatoryApproval(getField(fields, 13)) + .setFdaCompanionTest(getField(fields, 14)) + .setAssertionSummary(getField(fields, 15)) + .setAssertionDescription(getField(fields, 16)) + .setAssertionId(getField(fields, 17)) + .setLastReviewDate(getField(fields, 19)) + .setAssertionCivicUrl(getField(fields, 20)) + .setFlagged(parseBoolean(getField(fields, 23))); + } + + private static Map parseMolecularProfilesFile(Path molecularProfileSummariesFile, + Map assertionsMap, + Map evidencesMap) + throws IOException { + + Map profilesMap = new HashMap<>(); + + try (BufferedReader reader = new BufferedReader(new InputStreamReader(FileUtils.newInputStream(molecularProfileSummariesFile)))) { + String line = reader.readLine(); // Skip header + + while ((line = reader.readLine()) != null) { + if (StringUtils.isNotBlank(line)) { + String[] fields = line.split("\t", -1); + CivicMolecularProfile profile = parseMolecularProfileFields(fields); + + // Set evidences from evidence map using the evidence IDs in the molecular profile + List evidenceIds = parseStringList(getField(fields, 6)); + for (String evidenceId : evidenceIds) { + if (evidencesMap.containsKey(evidenceId)) { + profile.getEvidences().add(evidencesMap.get(evidenceId)); + } + } + + // Set assertions from assertion map using the assertion IDs in the molecular profile + List assertionIds = parseStringList(getField(fields, 8)); + for (String assertionId : assertionIds) { + if (assertionsMap.containsKey(assertionId)) { + profile.getAssertions().add(assertionsMap.get(assertionId)); + } + } + + profilesMap.put(profile.getMolecularProfileId(), profile); + } + } + } + + return profilesMap; + } + + private static CivicMolecularProfile parseMolecularProfileFields(String[] fields) { + return new CivicMolecularProfile() + .setName(getField(fields, 0)) + .setMolecularProfileId(getField(fields, 1)) + .setSummary(getField(fields, 2)) + .setEvidenceScore(getField(fields, 5)) + .setAliases(parseStringList(getField(fields, 10))) + .setLastReviewDate(getField(fields, 11)) + .setFlagged(parseBoolean(getField(fields, 12))); + } + + private static int parseVariantsFile(Path variantSummariesFile, Map profilesMap, + Map featuresMap, CivicParserCallback callback) throws IOException { + + int numVariants = 0; + try (BufferedReader reader = new BufferedReader(new InputStreamReader(FileUtils.newInputStream(variantSummariesFile)))) { + String line = reader.readLine(); // Skip header + + while ((line = reader.readLine()) != null) { + if (StringUtils.isNotBlank(line)) { + String[] fields = line.split("\t", -1); + CivicVariant variant = parseVariantFields(fields); + + // Link feature and enhance with transcript/exon info + String featureId = getField(fields, 3); + if (featuresMap.containsKey(featureId)) { + CivicFeature feature = featuresMap.get(featureId); + // Enhance feature with transcript/exon information from variant file + feature.setFivePrimeTranscript(getField(fields, 32)) + .setFivePrimeEndExon(getField(fields, 33)) + .setFivePrimeExonOffset(getField(fields, 34)) + .setFivePrimeExonOffsetDirection(getField(fields, 35)) + .setThreePrimeTranscript(getField(fields, 36)) + .setThreePrimeStartExon(getField(fields, 37)) + .setThreePrimeExonOffset(getField(fields, 38)) + .setThreePrimeExonOffsetDirection(getField(fields, 39)); + + variant.setFeature(feature); + } + + // Link molecular profiles from profiles map using the single variant molecular profile ID + String singleVariantMolecularProfileId = getField(fields, 11); + if (StringUtils.isNotBlank(singleVariantMolecularProfileId) + && profilesMap.containsKey(singleVariantMolecularProfileId)) { + variant.setMolecularProfile(profilesMap.get(singleVariantMolecularProfileId)); + } + + // Process variant through callback + if (!callback.processCivicVariant(variant)) { + // Stop parsing if callback returns false + logger.warn("CIViC parsing stopped by callback request."); + break; + } + numVariants++; + } + } + } + + return numVariants; + } + + private static CivicVariant parseVariantFields(String[] fields) { + return new CivicVariant() + .setVariantId(getField(fields, 0)) + .setVariantCivicUrl(getField(fields, 1)) + .setVariant(getField(fields, 6)) + .setVariantAliases(parseStringList(getField(fields, 7))) + .setFlagged(parseBoolean(getField(fields, 8))) + .setVariantGroups(parseStringList(getField(fields, 9))) + .setVariantTypes(parseStringList(getField(fields, 10))) + .setLastReviewDate(getField(fields, 12)) + .setGene(getField(fields, 13)) + .setEntrezId(getField(fields, 14)) + .setChromosome(getField(fields, 15)) + .setStart(getField(fields, 16)) + .setStop(getField(fields, 17)) + .setReferenceBases(getField(fields, 18)) + .setVariantBases(getField(fields, 19)) + .setRepresentativeTranscript(getField(fields, 20)) + .setEnsemblVersion(getField(fields, 21)) + .setReferenceBuild(getField(fields, 22)) + .setHgvsDescriptions(parseStringList(getField(fields, 23))) + .setAlleleRegistryId(getField(fields, 24)) + .setClinvarIds(parseStringList(getField(fields, 25))) + .setNcitId(getField(fields, 26)) + .setViccCompliantName(getField(fields, 31)); + } + + // Helper methods + private static String getField(String[] fields, int index) { + if (index < fields.length && StringUtils.isNotBlank(fields[index])) { + return fields[index]; + } + return null; + } + + private static Boolean parseBoolean(String value) { + if (StringUtils.isNotBlank(value)) { + return "true".equalsIgnoreCase(value) || "1".equals(value); + } + return null; + } + + private static List parseStringList(String value) { + if (StringUtils.isNotBlank(value)) { + return Arrays.asList(value.split(",\\s*")); + } + return new ArrayList<>(); + } +} \ No newline at end of file diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/variant/civic/CivicParserCallback.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/variant/civic/CivicParserCallback.java new file mode 100644 index 00000000..e517baff --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/variant/civic/CivicParserCallback.java @@ -0,0 +1,7 @@ +package org.opencb.biodata.formats.variant.civic; + +import org.opencb.biodata.models.core.civic.CivicVariant; + +public interface CivicParserCallback { + boolean processCivicVariant(CivicVariant civicVariant); +} diff --git a/biodata-formats/src/test/java/org/opencb/biodata/formats/variant/civic/CivicParserTest.java b/biodata-formats/src/test/java/org/opencb/biodata/formats/variant/civic/CivicParserTest.java new file mode 100644 index 00000000..9847c3d3 --- /dev/null +++ b/biodata-formats/src/test/java/org/opencb/biodata/formats/variant/civic/CivicParserTest.java @@ -0,0 +1,61 @@ +package org.opencb.biodata.formats.variant.civic; + +import org.junit.Assert; +import org.junit.Assume; +import org.junit.Test; +import org.opencb.biodata.formats.io.FileFormatException; +import org.opencb.biodata.models.core.civic.CivicVariant; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; + +public class CivicParserTest { + + // Implementation of the LineCallback function + public class MyCallback implements CivicParserCallback { + private String msg; + private List civicVariants; + + public MyCallback(String msg) { + this.msg = msg; + this.civicVariants = new ArrayList<>(); + } + + @Override + public boolean processCivicVariant(CivicVariant civicVariant) { + System.out.println(msg + ": CIViC ID = " + civicVariant.getVariantId() + ", position: " + civicVariant.getChromosome() + + ":" + civicVariant.getStart() + ":" + civicVariant.getReferenceBases() + ":" + civicVariant.getVariantBases()); + civicVariants.add(civicVariant); + return true; + } + + public List getCivicVariants() { + return civicVariants; + } + } + + @Test + public void testCivicParser() throws IOException, FileFormatException { + Path civicPath = Paths.get("/opt/civic-data/"); + Assume.assumeTrue(Files.exists(civicPath)); + + String version = "v1"; + Path variantSummariesFile = civicPath.resolve("01-Sep-2025-VariantSummaries.tsv"); + Path featureSummariesFile = civicPath.resolve("01-Sep-2025-FeatureSummaries.tsv"); + Path molecularProfileSummariesFile = civicPath.resolve("01-Sep-2025-MolecularProfileSummaries.tsv"); + Path assertionSummariesFile = civicPath.resolve("01-Sep-2025-AssertionSummaries.tsv"); + Path clinicalEvidenceSummariesFile = civicPath.resolve("01-Sep-2025-ClinicalEvidenceSummaries.tsv"); + + MyCallback callback = new MyCallback(">>> Testing message"); + + CivicParser.parse(variantSummariesFile, featureSummariesFile, molecularProfileSummariesFile, assertionSummariesFile, + clinicalEvidenceSummariesFile, version, callback); + List civicVariants = callback.getCivicVariants(); + + Assert.assertEquals(1811, civicVariants.size()); + } +} \ No newline at end of file diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/civic/CivicAssertion.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/civic/CivicAssertion.java new file mode 100644 index 00000000..098e97b4 --- /dev/null +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/civic/CivicAssertion.java @@ -0,0 +1,246 @@ +package org.opencb.biodata.models.core.civic; + +import java.util.ArrayList; +import java.util.List; + +public class CivicAssertion { + + // From AssertionSummaries.tsv + private String disease; + private String doid; + private List phenotypes; + private List therapies; + private String assertionType; + private String assertionDirection; + private String significance; + private List acmgCodes; + private String ampCategory; + private String nccnGuideline; + private String nccnGuidelineVersion; + private String regulatoryApproval; + private String fdaCompanionTest; + private String assertionSummary; + private String assertionDescription; + private String assertionId; + private String lastReviewDate; + private String assertionCivicUrl; + private Boolean isFlagged; + + // Associated data + private List evidences; + + public CivicAssertion() { + this.phenotypes = new ArrayList<>(); + this.therapies = new ArrayList<>(); + this.acmgCodes = new ArrayList<>(); + + this.evidences = new ArrayList<>(); + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("CivicAssertion{"); + sb.append("disease='").append(disease).append('\''); + sb.append(", doid='").append(doid).append('\''); + sb.append(", phenotypes=").append(phenotypes); + sb.append(", therapies=").append(therapies); + sb.append(", assertionType='").append(assertionType).append('\''); + sb.append(", assertionDirection='").append(assertionDirection).append('\''); + sb.append(", significance='").append(significance).append('\''); + sb.append(", acmgCodes=").append(acmgCodes); + sb.append(", ampCategory='").append(ampCategory).append('\''); + sb.append(", nccnGuideline='").append(nccnGuideline).append('\''); + sb.append(", nccnGuidelineVersion='").append(nccnGuidelineVersion).append('\''); + sb.append(", regulatoryApproval='").append(regulatoryApproval).append('\''); + sb.append(", fdaCompanionTest='").append(fdaCompanionTest).append('\''); + sb.append(", assertionSummary='").append(assertionSummary).append('\''); + sb.append(", assertionDescription='").append(assertionDescription).append('\''); + sb.append(", assertionId='").append(assertionId).append('\''); + sb.append(", lastReviewDate='").append(lastReviewDate).append('\''); + sb.append(", assertionCivicUrl='").append(assertionCivicUrl).append('\''); + sb.append(", isFlagged=").append(isFlagged); + sb.append(", evidences=").append(evidences); + sb.append('}'); + return sb.toString(); + } + + public String getDisease() { + return disease; + } + + public CivicAssertion setDisease(String disease) { + this.disease = disease; + return this; + } + + public String getDoid() { + return doid; + } + + public CivicAssertion setDoid(String doid) { + this.doid = doid; + return this; + } + + public List getPhenotypes() { + return phenotypes; + } + + public CivicAssertion setPhenotypes(List phenotypes) { + this.phenotypes = phenotypes; + return this; + } + + public List getTherapies() { + return therapies; + } + + public CivicAssertion setTherapies(List therapies) { + this.therapies = therapies; + return this; + } + + public String getAssertionType() { + return assertionType; + } + + public CivicAssertion setAssertionType(String assertionType) { + this.assertionType = assertionType; + return this; + } + + public String getAssertionDirection() { + return assertionDirection; + } + + public CivicAssertion setAssertionDirection(String assertionDirection) { + this.assertionDirection = assertionDirection; + return this; + } + + public String getSignificance() { + return significance; + } + + public CivicAssertion setSignificance(String significance) { + this.significance = significance; + return this; + } + + public List getAcmgCodes() { + return acmgCodes; + } + + public CivicAssertion setAcmgCodes(List acmgCodes) { + this.acmgCodes = acmgCodes; + return this; + } + + public String getAmpCategory() { + return ampCategory; + } + + public CivicAssertion setAmpCategory(String ampCategory) { + this.ampCategory = ampCategory; + return this; + } + + public String getNccnGuideline() { + return nccnGuideline; + } + + public CivicAssertion setNccnGuideline(String nccnGuideline) { + this.nccnGuideline = nccnGuideline; + return this; + } + + public String getNccnGuidelineVersion() { + return nccnGuidelineVersion; + } + + public CivicAssertion setNccnGuidelineVersion(String nccnGuidelineVersion) { + this.nccnGuidelineVersion = nccnGuidelineVersion; + return this; + } + + public String getRegulatoryApproval() { + return regulatoryApproval; + } + + public CivicAssertion setRegulatoryApproval(String regulatoryApproval) { + this.regulatoryApproval = regulatoryApproval; + return this; + } + + public String getFdaCompanionTest() { + return fdaCompanionTest; + } + + public CivicAssertion setFdaCompanionTest(String fdaCompanionTest) { + this.fdaCompanionTest = fdaCompanionTest; + return this; + } + + public String getAssertionSummary() { + return assertionSummary; + } + + public CivicAssertion setAssertionSummary(String assertionSummary) { + this.assertionSummary = assertionSummary; + return this; + } + + public String getAssertionDescription() { + return assertionDescription; + } + + public CivicAssertion setAssertionDescription(String assertionDescription) { + this.assertionDescription = assertionDescription; + return this; + } + + public String getAssertionId() { + return assertionId; + } + + public CivicAssertion setAssertionId(String assertionId) { + this.assertionId = assertionId; + return this; + } + + public String getLastReviewDate() { + return lastReviewDate; + } + + public CivicAssertion setLastReviewDate(String lastReviewDate) { + this.lastReviewDate = lastReviewDate; + return this; + } + + public String getAssertionCivicUrl() { + return assertionCivicUrl; + } + + public CivicAssertion setAssertionCivicUrl(String assertionCivicUrl) { + this.assertionCivicUrl = assertionCivicUrl; + return this; + } + + public Boolean getFlagged() { + return isFlagged; + } + + public CivicAssertion setFlagged(Boolean flagged) { + isFlagged = flagged; + return this; + } + + public List getEvidences() { + return evidences; + } + + public CivicAssertion setEvidences(List evidences) { + this.evidences = evidences; + return this; + } +} \ No newline at end of file diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/civic/CivicClinicalEvidence.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/civic/CivicClinicalEvidence.java new file mode 100644 index 00000000..56df46a5 --- /dev/null +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/civic/CivicClinicalEvidence.java @@ -0,0 +1,264 @@ +package org.opencb.biodata.models.core.civic; + +import java.util.ArrayList; +import java.util.List; + +public class CivicClinicalEvidence { + + // From ClinicalEvidenceSummaries.tsv + private String disease; + private String doid; + private List phenotypes; + private List therapies; + private String therapyInteractionType; + private String evidenceType; + private String evidenceDirection; + private String evidenceLevel; + private String significance; + private String evidenceStatement; + private String citationId; + private String sourceType; + private String ascoAbstractId; + private String citation; + private List nctIds; + private String rating; + private String evidenceStatus; + private String evidenceId; + private String variantOrigin; + private String lastReviewDate; + private String evidenceCivicUrl; + private Boolean isFlagged; + + public CivicClinicalEvidence() { + this.phenotypes = new ArrayList<>(); + this.therapies = new ArrayList<>(); + this.nctIds = new ArrayList<>(); + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("CivicClinicalEvidence{"); + sb.append(", disease='").append(disease).append('\''); + sb.append(", doid='").append(doid).append('\''); + sb.append(", phenotypes=").append(phenotypes); + sb.append(", therapies=").append(therapies); + sb.append(", therapyInteractionType='").append(therapyInteractionType).append('\''); + sb.append(", evidenceType='").append(evidenceType).append('\''); + sb.append(", evidenceDirection='").append(evidenceDirection).append('\''); + sb.append(", evidenceLevel='").append(evidenceLevel).append('\''); + sb.append(", significance='").append(significance).append('\''); + sb.append(", evidenceStatement='").append(evidenceStatement).append('\''); + sb.append(", citationId='").append(citationId).append('\''); + sb.append(", sourceType='").append(sourceType).append('\''); + sb.append(", ascoAbstractId='").append(ascoAbstractId).append('\''); + sb.append(", citation='").append(citation).append('\''); + sb.append(", nctIds=").append(nctIds); + sb.append(", rating='").append(rating).append('\''); + sb.append(", evidenceStatus='").append(evidenceStatus).append('\''); + sb.append(", evidenceId='").append(evidenceId).append('\''); + sb.append(", variantOrigin='").append(variantOrigin).append('\''); + sb.append(", lastReviewDate='").append(lastReviewDate).append('\''); + sb.append(", evidenceCivicUrl='").append(evidenceCivicUrl).append('\''); + sb.append(", isFlagged=").append(isFlagged); + sb.append('}'); + return sb.toString(); + } + + public String getDisease() { + return disease; + } + + public CivicClinicalEvidence setDisease(String disease) { + this.disease = disease; + return this; + } + + public String getDoid() { + return doid; + } + + public CivicClinicalEvidence setDoid(String doid) { + this.doid = doid; + return this; + } + + public List getPhenotypes() { + return phenotypes; + } + + public CivicClinicalEvidence setPhenotypes(List phenotypes) { + this.phenotypes = phenotypes; + return this; + } + + public List getTherapies() { + return therapies; + } + + public CivicClinicalEvidence setTherapies(List therapies) { + this.therapies = therapies; + return this; + } + + public String getTherapyInteractionType() { + return therapyInteractionType; + } + + public CivicClinicalEvidence setTherapyInteractionType(String therapyInteractionType) { + this.therapyInteractionType = therapyInteractionType; + return this; + } + + public String getEvidenceType() { + return evidenceType; + } + + public CivicClinicalEvidence setEvidenceType(String evidenceType) { + this.evidenceType = evidenceType; + return this; + } + + public String getEvidenceDirection() { + return evidenceDirection; + } + + public CivicClinicalEvidence setEvidenceDirection(String evidenceDirection) { + this.evidenceDirection = evidenceDirection; + return this; + } + + public String getEvidenceLevel() { + return evidenceLevel; + } + + public CivicClinicalEvidence setEvidenceLevel(String evidenceLevel) { + this.evidenceLevel = evidenceLevel; + return this; + } + + public String getSignificance() { + return significance; + } + + public CivicClinicalEvidence setSignificance(String significance) { + this.significance = significance; + return this; + } + + public String getEvidenceStatement() { + return evidenceStatement; + } + + public CivicClinicalEvidence setEvidenceStatement(String evidenceStatement) { + this.evidenceStatement = evidenceStatement; + return this; + } + + public String getCitationId() { + return citationId; + } + + public CivicClinicalEvidence setCitationId(String citationId) { + this.citationId = citationId; + return this; + } + + public String getSourceType() { + return sourceType; + } + + public CivicClinicalEvidence setSourceType(String sourceType) { + this.sourceType = sourceType; + return this; + } + + public String getAscoAbstractId() { + return ascoAbstractId; + } + + public CivicClinicalEvidence setAscoAbstractId(String ascoAbstractId) { + this.ascoAbstractId = ascoAbstractId; + return this; + } + + public String getCitation() { + return citation; + } + + public CivicClinicalEvidence setCitation(String citation) { + this.citation = citation; + return this; + } + + public List getNctIds() { + return nctIds; + } + + public CivicClinicalEvidence setNctIds(List nctIds) { + this.nctIds = nctIds; + return this; + } + + public String getRating() { + return rating; + } + + public CivicClinicalEvidence setRating(String rating) { + this.rating = rating; + return this; + } + + public String getEvidenceStatus() { + return evidenceStatus; + } + + public CivicClinicalEvidence setEvidenceStatus(String evidenceStatus) { + this.evidenceStatus = evidenceStatus; + return this; + } + + public String getEvidenceId() { + return evidenceId; + } + + public CivicClinicalEvidence setEvidenceId(String evidenceId) { + this.evidenceId = evidenceId; + return this; + } + + public String getVariantOrigin() { + return variantOrigin; + } + + public CivicClinicalEvidence setVariantOrigin(String variantOrigin) { + this.variantOrigin = variantOrigin; + return this; + } + + public String getLastReviewDate() { + return lastReviewDate; + } + + public CivicClinicalEvidence setLastReviewDate(String lastReviewDate) { + this.lastReviewDate = lastReviewDate; + return this; + } + + public String getEvidenceCivicUrl() { + return evidenceCivicUrl; + } + + public CivicClinicalEvidence setEvidenceCivicUrl(String evidenceCivicUrl) { + this.evidenceCivicUrl = evidenceCivicUrl; + return this; + } + + public Boolean getFlagged() { + return isFlagged; + } + + public CivicClinicalEvidence setFlagged(Boolean flagged) { + isFlagged = flagged; + return this; + } +} \ No newline at end of file diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/civic/CivicFeature.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/civic/CivicFeature.java new file mode 100644 index 00000000..e70a27cc --- /dev/null +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/civic/CivicFeature.java @@ -0,0 +1,330 @@ +package org.opencb.biodata.models.core.civic; + +import java.util.ArrayList; +import java.util.List; + +public class CivicFeature { + + // From FeatureSummaries.tsv + private String featureId; + private String featureCivicUrl; + private String featureType; + private String name; + private List featureAliases; + private String description; + private String lastReviewDate; + private Boolean isFlagged; + private String entrezId; + private String ncitId; + private String fivePrimePartnerStatus; + private String threePrimePartnerStatus; + private String fivePrimeGeneId; + private String fivePrimeGeneName; + private String fivePrimeGeneEntrezId; + private String threePrimeGeneId; + private String threePrimeGeneName; + private String threePrimeGeneEntrezId; + + // Additional transcript/exon information from VariantSummaries.tsv + private String fivePrimePartner; + private String threePrimePartner; + private String fivePrimeTranscript; + private String fivePrimeEndExon; + private String fivePrimeExonOffset; + private String fivePrimeExonOffsetDirection; + private String threePrimeTranscript; + private String threePrimeStartExon; + private String threePrimeExonOffset; + private String threePrimeExonOffsetDirection; + + public CivicFeature() { + this.featureAliases = new ArrayList<>(); + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("CivicFeature{"); + sb.append("featureId='").append(featureId).append('\''); + sb.append(", featureCivicUrl='").append(featureCivicUrl).append('\''); + sb.append(", featureType='").append(featureType).append('\''); + sb.append(", name='").append(name).append('\''); + sb.append(", featureAliases=").append(featureAliases); + sb.append(", description='").append(description).append('\''); + sb.append(", lastReviewDate='").append(lastReviewDate).append('\''); + sb.append(", isFlagged=").append(isFlagged); + sb.append(", entrezId='").append(entrezId).append('\''); + sb.append(", ncitId='").append(ncitId).append('\''); + sb.append(", fivePrimePartnerStatus='").append(fivePrimePartnerStatus).append('\''); + sb.append(", threePrimePartnerStatus='").append(threePrimePartnerStatus).append('\''); + sb.append(", fivePrimeGeneId='").append(fivePrimeGeneId).append('\''); + sb.append(", fivePrimeGeneName='").append(fivePrimeGeneName).append('\''); + sb.append(", fivePrimeGeneEntrezId='").append(fivePrimeGeneEntrezId).append('\''); + sb.append(", threePrimeGeneId='").append(threePrimeGeneId).append('\''); + sb.append(", threePrimeGeneName='").append(threePrimeGeneName).append('\''); + sb.append(", threePrimeGeneEntrezId='").append(threePrimeGeneEntrezId).append('\''); + sb.append(", fivePrimePartner='").append(fivePrimePartner).append('\''); + sb.append(", threePrimePartner='").append(threePrimePartner).append('\''); + sb.append(", fivePrimeTranscript='").append(fivePrimeTranscript).append('\''); + sb.append(", fivePrimeEndExon='").append(fivePrimeEndExon).append('\''); + sb.append(", fivePrimeExonOffset='").append(fivePrimeExonOffset).append('\''); + sb.append(", fivePrimeExonOffsetDirection='").append(fivePrimeExonOffsetDirection).append('\''); + sb.append(", threePrimeTranscript='").append(threePrimeTranscript).append('\''); + sb.append(", threePrimeStartExon='").append(threePrimeStartExon).append('\''); + sb.append(", threePrimeExonOffset='").append(threePrimeExonOffset).append('\''); + sb.append(", threePrimeExonOffsetDirection='").append(threePrimeExonOffsetDirection).append('\''); + sb.append('}'); + return sb.toString(); + } + + public String getFeatureId() { + return featureId; + } + + public CivicFeature setFeatureId(String featureId) { + this.featureId = featureId; + return this; + } + + public String getFeatureCivicUrl() { + return featureCivicUrl; + } + + public CivicFeature setFeatureCivicUrl(String featureCivicUrl) { + this.featureCivicUrl = featureCivicUrl; + return this; + } + + public String getFeatureType() { + return featureType; + } + + public CivicFeature setFeatureType(String featureType) { + this.featureType = featureType; + return this; + } + + public String getName() { + return name; + } + + public CivicFeature setName(String name) { + this.name = name; + return this; + } + + public List getFeatureAliases() { + return featureAliases; + } + + public CivicFeature setFeatureAliases(List featureAliases) { + this.featureAliases = featureAliases; + return this; + } + + public String getDescription() { + return description; + } + + public CivicFeature setDescription(String description) { + this.description = description; + return this; + } + + public String getLastReviewDate() { + return lastReviewDate; + } + + public CivicFeature setLastReviewDate(String lastReviewDate) { + this.lastReviewDate = lastReviewDate; + return this; + } + + public Boolean getFlagged() { + return isFlagged; + } + + public CivicFeature setFlagged(Boolean flagged) { + isFlagged = flagged; + return this; + } + + public String getEntrezId() { + return entrezId; + } + + public CivicFeature setEntrezId(String entrezId) { + this.entrezId = entrezId; + return this; + } + + public String getNcitId() { + return ncitId; + } + + public CivicFeature setNcitId(String ncitId) { + this.ncitId = ncitId; + return this; + } + + public String getFivePrimePartnerStatus() { + return fivePrimePartnerStatus; + } + + public CivicFeature setFivePrimePartnerStatus(String fivePrimePartnerStatus) { + this.fivePrimePartnerStatus = fivePrimePartnerStatus; + return this; + } + + public String getThreePrimePartnerStatus() { + return threePrimePartnerStatus; + } + + public CivicFeature setThreePrimePartnerStatus(String threePrimePartnerStatus) { + this.threePrimePartnerStatus = threePrimePartnerStatus; + return this; + } + + public String getFivePrimeGeneId() { + return fivePrimeGeneId; + } + + public CivicFeature setFivePrimeGeneId(String fivePrimeGeneId) { + this.fivePrimeGeneId = fivePrimeGeneId; + return this; + } + + public String getFivePrimeGeneName() { + return fivePrimeGeneName; + } + + public CivicFeature setFivePrimeGeneName(String fivePrimeGeneName) { + this.fivePrimeGeneName = fivePrimeGeneName; + return this; + } + + public String getFivePrimeGeneEntrezId() { + return fivePrimeGeneEntrezId; + } + + public CivicFeature setFivePrimeGeneEntrezId(String fivePrimeGeneEntrezId) { + this.fivePrimeGeneEntrezId = fivePrimeGeneEntrezId; + return this; + } + + public String getThreePrimeGeneId() { + return threePrimeGeneId; + } + + public CivicFeature setThreePrimeGeneId(String threePrimeGeneId) { + this.threePrimeGeneId = threePrimeGeneId; + return this; + } + + public String getThreePrimeGeneName() { + return threePrimeGeneName; + } + + public CivicFeature setThreePrimeGeneName(String threePrimeGeneName) { + this.threePrimeGeneName = threePrimeGeneName; + return this; + } + + public String getThreePrimeGeneEntrezId() { + return threePrimeGeneEntrezId; + } + + public CivicFeature setThreePrimeGeneEntrezId(String threePrimeGeneEntrezId) { + this.threePrimeGeneEntrezId = threePrimeGeneEntrezId; + return this; + } + + public String getFivePrimePartner() { + return fivePrimePartner; + } + + public CivicFeature setFivePrimePartner(String fivePrimePartner) { + this.fivePrimePartner = fivePrimePartner; + return this; + } + + public String getThreePrimePartner() { + return threePrimePartner; + } + + public CivicFeature setThreePrimePartner(String threePrimePartner) { + this.threePrimePartner = threePrimePartner; + return this; + } + + public String getFivePrimeTranscript() { + return fivePrimeTranscript; + } + + public CivicFeature setFivePrimeTranscript(String fivePrimeTranscript) { + this.fivePrimeTranscript = fivePrimeTranscript; + return this; + } + + public String getFivePrimeEndExon() { + return fivePrimeEndExon; + } + + public CivicFeature setFivePrimeEndExon(String fivePrimeEndExon) { + this.fivePrimeEndExon = fivePrimeEndExon; + return this; + } + + public String getFivePrimeExonOffset() { + return fivePrimeExonOffset; + } + + public CivicFeature setFivePrimeExonOffset(String fivePrimeExonOffset) { + this.fivePrimeExonOffset = fivePrimeExonOffset; + return this; + } + + public String getFivePrimeExonOffsetDirection() { + return fivePrimeExonOffsetDirection; + } + + public CivicFeature setFivePrimeExonOffsetDirection(String fivePrimeExonOffsetDirection) { + this.fivePrimeExonOffsetDirection = fivePrimeExonOffsetDirection; + return this; + } + + public String getThreePrimeTranscript() { + return threePrimeTranscript; + } + + public CivicFeature setThreePrimeTranscript(String threePrimeTranscript) { + this.threePrimeTranscript = threePrimeTranscript; + return this; + } + + public String getThreePrimeStartExon() { + return threePrimeStartExon; + } + + public CivicFeature setThreePrimeStartExon(String threePrimeStartExon) { + this.threePrimeStartExon = threePrimeStartExon; + return this; + } + + public String getThreePrimeExonOffset() { + return threePrimeExonOffset; + } + + public CivicFeature setThreePrimeExonOffset(String threePrimeExonOffset) { + this.threePrimeExonOffset = threePrimeExonOffset; + return this; + } + + public String getThreePrimeExonOffsetDirection() { + return threePrimeExonOffsetDirection; + } + + public CivicFeature setThreePrimeExonOffsetDirection(String threePrimeExonOffsetDirection) { + this.threePrimeExonOffsetDirection = threePrimeExonOffsetDirection; + return this; + } +} \ No newline at end of file diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/civic/CivicMolecularProfile.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/civic/CivicMolecularProfile.java new file mode 100644 index 00000000..7c7800df --- /dev/null +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/civic/CivicMolecularProfile.java @@ -0,0 +1,124 @@ +package org.opencb.biodata.models.core.civic; + +import java.util.ArrayList; +import java.util.List; + +public class CivicMolecularProfile { + + // From MolecularProfileSummaries.tsv + private String name; + private String molecularProfileId; + private String summary; + private String evidenceScore; + private List aliases; + private String lastReviewDate; + private Boolean isFlagged; + + // Associated data + private List assertions; + private List evidences; + + public CivicMolecularProfile() { + this.aliases = new ArrayList<>(); + + this.assertions = new ArrayList<>(); + this.evidences = new ArrayList<>(); + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("CivicMolecularProfile{"); + sb.append("name='").append(name).append('\''); + sb.append(", molecularProfileId='").append(molecularProfileId).append('\''); + sb.append(", summary='").append(summary).append('\''); + sb.append(", evidenceScore='").append(evidenceScore).append('\''); + sb.append(", aliases=").append(aliases); + sb.append(", lastReviewDate='").append(lastReviewDate).append('\''); + sb.append(", isFlagged=").append(isFlagged); + sb.append(", assertions=").append(assertions); + sb.append(", evidences=").append(evidences); + sb.append('}'); + return sb.toString(); + } + + public String getName() { + return name; + } + + public CivicMolecularProfile setName(String name) { + this.name = name; + return this; + } + + public String getMolecularProfileId() { + return molecularProfileId; + } + + public CivicMolecularProfile setMolecularProfileId(String molecularProfileId) { + this.molecularProfileId = molecularProfileId; + return this; + } + + public String getSummary() { + return summary; + } + + public CivicMolecularProfile setSummary(String summary) { + this.summary = summary; + return this; + } + + public String getEvidenceScore() { + return evidenceScore; + } + + public CivicMolecularProfile setEvidenceScore(String evidenceScore) { + this.evidenceScore = evidenceScore; + return this; + } + + public List getAliases() { + return aliases; + } + + public CivicMolecularProfile setAliases(List aliases) { + this.aliases = aliases; + return this; + } + + public String getLastReviewDate() { + return lastReviewDate; + } + + public CivicMolecularProfile setLastReviewDate(String lastReviewDate) { + this.lastReviewDate = lastReviewDate; + return this; + } + + public Boolean getFlagged() { + return isFlagged; + } + + public CivicMolecularProfile setFlagged(Boolean flagged) { + isFlagged = flagged; + return this; + } + + public List getAssertions() { + return assertions; + } + + public CivicMolecularProfile setAssertions(List assertions) { + this.assertions = assertions; + return this; + } + + public List getEvidences() { + return evidences; + } + + public CivicMolecularProfile setEvidences(List evidences) { + this.evidences = evidences; + return this; + } +} \ No newline at end of file diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/civic/CivicVariant.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/civic/CivicVariant.java new file mode 100644 index 00000000..783631f7 --- /dev/null +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/civic/CivicVariant.java @@ -0,0 +1,304 @@ +package org.opencb.biodata.models.core.civic; + +import java.util.ArrayList; +import java.util.List; + +public class CivicVariant { + + // From VariantSummaries.tsv + private String variantId; + private String variantCivicUrl; + private String variant; + private List variantAliases; + private Boolean isFlagged; + private List variantGroups; + private List variantTypes; + private String lastReviewDate; + private String gene; + private String entrezId; + private String chromosome; + private String start; + private String stop; + private String referenceBases; + private String variantBases; + private String representativeTranscript; + private String ensemblVersion; + private String referenceBuild; + private List hgvsDescriptions; + private String alleleRegistryId; + private List clinvarIds; + private String ncitId; + private String viccCompliantName; + + // Associated data + private CivicFeature feature; + private CivicMolecularProfile molecularProfile; + + public CivicVariant() { + this.variantAliases = new ArrayList<>(); + this.variantGroups = new ArrayList<>(); + this.variantTypes = new ArrayList<>(); + this.hgvsDescriptions = new ArrayList<>(); + this.clinvarIds = new ArrayList<>(); + + this.feature = new CivicFeature(); + this.molecularProfile = new CivicMolecularProfile(); + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("CivicVariant{"); + sb.append("variantId='").append(variantId).append('\''); + sb.append(", variantCivicUrl='").append(variantCivicUrl).append('\''); + sb.append(", variant='").append(variant).append('\''); + sb.append(", variantAliases=").append(variantAliases); + sb.append(", isFlagged=").append(isFlagged); + sb.append(", variantGroups=").append(variantGroups); + sb.append(", variantTypes=").append(variantTypes); + sb.append(", lastReviewDate='").append(lastReviewDate).append('\''); + sb.append(", gene='").append(gene).append('\''); + sb.append(", entrezId='").append(entrezId).append('\''); + sb.append(", chromosome='").append(chromosome).append('\''); + sb.append(", start='").append(start).append('\''); + sb.append(", stop='").append(stop).append('\''); + sb.append(", referenceBases='").append(referenceBases).append('\''); + sb.append(", variantBases='").append(variantBases).append('\''); + sb.append(", representativeTranscript='").append(representativeTranscript).append('\''); + sb.append(", ensemblVersion='").append(ensemblVersion).append('\''); + sb.append(", referenceBuild='").append(referenceBuild).append('\''); + sb.append(", hgvsDescriptions=").append(hgvsDescriptions); + sb.append(", alleleRegistryId='").append(alleleRegistryId).append('\''); + sb.append(", clinvarIds=").append(clinvarIds); + sb.append(", ncitId='").append(ncitId).append('\''); + sb.append(", viccCompliantName='").append(viccCompliantName).append('\''); + sb.append(", feature=").append(feature); + sb.append(", molecularProfile=").append(molecularProfile); + sb.append('}'); + return sb.toString(); + } + + public String getVariantId() { + return variantId; + } + + public CivicVariant setVariantId(String variantId) { + this.variantId = variantId; + return this; + } + + public String getVariantCivicUrl() { + return variantCivicUrl; + } + + public CivicVariant setVariantCivicUrl(String variantCivicUrl) { + this.variantCivicUrl = variantCivicUrl; + return this; + } + + public String getVariant() { + return variant; + } + + public CivicVariant setVariant(String variant) { + this.variant = variant; + return this; + } + + public List getVariantAliases() { + return variantAliases; + } + + public CivicVariant setVariantAliases(List variantAliases) { + this.variantAliases = variantAliases; + return this; + } + + public Boolean getFlagged() { + return isFlagged; + } + + public CivicVariant setFlagged(Boolean flagged) { + isFlagged = flagged; + return this; + } + + public List getVariantGroups() { + return variantGroups; + } + + public CivicVariant setVariantGroups(List variantGroups) { + this.variantGroups = variantGroups; + return this; + } + + public List getVariantTypes() { + return variantTypes; + } + + public CivicVariant setVariantTypes(List variantTypes) { + this.variantTypes = variantTypes; + return this; + } + + public String getLastReviewDate() { + return lastReviewDate; + } + + public CivicVariant setLastReviewDate(String lastReviewDate) { + this.lastReviewDate = lastReviewDate; + return this; + } + + public String getGene() { + return gene; + } + + public CivicVariant setGene(String gene) { + this.gene = gene; + return this; + } + + public String getEntrezId() { + return entrezId; + } + + public CivicVariant setEntrezId(String entrezId) { + this.entrezId = entrezId; + return this; + } + + public String getChromosome() { + return chromosome; + } + + public CivicVariant setChromosome(String chromosome) { + this.chromosome = chromosome; + return this; + } + + public String getStart() { + return start; + } + + public CivicVariant setStart(String start) { + this.start = start; + return this; + } + + public String getStop() { + return stop; + } + + public CivicVariant setStop(String stop) { + this.stop = stop; + return this; + } + + public String getReferenceBases() { + return referenceBases; + } + + public CivicVariant setReferenceBases(String referenceBases) { + this.referenceBases = referenceBases; + return this; + } + + public String getVariantBases() { + return variantBases; + } + + public CivicVariant setVariantBases(String variantBases) { + this.variantBases = variantBases; + return this; + } + + public String getRepresentativeTranscript() { + return representativeTranscript; + } + + public CivicVariant setRepresentativeTranscript(String representativeTranscript) { + this.representativeTranscript = representativeTranscript; + return this; + } + + public String getEnsemblVersion() { + return ensemblVersion; + } + + public CivicVariant setEnsemblVersion(String ensemblVersion) { + this.ensemblVersion = ensemblVersion; + return this; + } + + public String getReferenceBuild() { + return referenceBuild; + } + + public CivicVariant setReferenceBuild(String referenceBuild) { + this.referenceBuild = referenceBuild; + return this; + } + + public List getHgvsDescriptions() { + return hgvsDescriptions; + } + + public CivicVariant setHgvsDescriptions(List hgvsDescriptions) { + this.hgvsDescriptions = hgvsDescriptions; + return this; + } + + public String getAlleleRegistryId() { + return alleleRegistryId; + } + + public CivicVariant setAlleleRegistryId(String alleleRegistryId) { + this.alleleRegistryId = alleleRegistryId; + return this; + } + + public List getClinvarIds() { + return clinvarIds; + } + + public CivicVariant setClinvarIds(List clinvarIds) { + this.clinvarIds = clinvarIds; + return this; + } + + public String getNcitId() { + return ncitId; + } + + public CivicVariant setNcitId(String ncitId) { + this.ncitId = ncitId; + return this; + } + + public String getViccCompliantName() { + return viccCompliantName; + } + + public CivicVariant setViccCompliantName(String viccCompliantName) { + this.viccCompliantName = viccCompliantName; + return this; + } + + public CivicFeature getFeature() { + return feature; + } + + public CivicVariant setFeature(CivicFeature feature) { + this.feature = feature; + return this; + } + + public CivicMolecularProfile getMolecularProfile() { + return molecularProfile; + } + + public CivicVariant setMolecularProfile(CivicMolecularProfile molecularProfile) { + this.molecularProfile = molecularProfile; + return this; + } +} \ No newline at end of file From 6efe1d00eb5359b10d88af51335cbfa97d802a7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Thu, 25 Sep 2025 17:09:37 +0200 Subject: [PATCH 24/24] formats: improve CIViC parser, #TASK-7903, #TASK-5564 --- .../formats/variant/civic/CivicParser.java | 178 ++++++++++++------ .../variant/civic/CivicParserTest.java | 14 +- .../models/core/civic/CivicVariant.java | 14 +- 3 files changed, 140 insertions(+), 66 deletions(-) diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/variant/civic/CivicParser.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/variant/civic/CivicParser.java index 83177585..1cfb7674 100755 --- a/biodata-formats/src/main/java/org/opencb/biodata/formats/variant/civic/CivicParser.java +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/variant/civic/CivicParser.java @@ -20,6 +20,8 @@ package org.opencb.biodata.formats.variant.civic; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.commons.lang3.tuple.Pair; import org.opencb.biodata.formats.io.FileFormatException; import org.opencb.biodata.models.core.civic.*; import org.opencb.commons.utils.FileUtils; @@ -34,43 +36,60 @@ public class CivicParser { + private Path variantSummariesFile; + private Path featureSummariesFile; + private Path molecularProfileSummariesFile; + private Path assertionSummariesFile; + private Path clinicalEvidenceSummariesFile; + private String version; + private String assembly; + private CivicParserCallback callback; + + Map featuresMap; + Map evidencesMap; + Map assertionsMap; + Map profilesMap; + + Map> variantToProfilesMap; + private static final Logger logger = LoggerFactory.getLogger(CivicParser.class); - private CivicParser() { - throw new IllegalStateException("Utility class"); + public CivicParser(Path variantSummariesFile, Path featureSummariesFile, Path molecularProfileSummariesFile, + Path assertionSummariesFile, Path clinicalEvidenceSummariesFile, String version, String assembly, + CivicParserCallback callback) { + this.variantSummariesFile = variantSummariesFile; + this.featureSummariesFile = featureSummariesFile; + this.molecularProfileSummariesFile = molecularProfileSummariesFile; + this.assertionSummariesFile = assertionSummariesFile; + this.clinicalEvidenceSummariesFile = clinicalEvidenceSummariesFile; + this.version = version; + this.assembly = assembly; + this.callback = callback; + + this.variantToProfilesMap = new HashMap<>(); } - public static void parse(Path variantSummariesFile, Path featureSummariesFile, Path molecularProfileSummariesFile, - Path assertionSummariesFile, Path clinicalEvidenceSummariesFile, String version, - CivicParserCallback callback) throws IOException, FileFormatException { - - logger.info("Starting CIViC parsing with version: {}", version); + public void parse() throws IOException, FileFormatException { + logger.info("Starting CIViC parsing with version {} for assembly {}", version, assembly); // Step 1: Parse features first - Map featuresMap = parseFeaturesFile(featureSummariesFile); - logger.info("Parsed {} features", featuresMap.size()); + parseFeaturesFile(); // Step 2: Parse clinical evidence and link to molecular profiles - Map evidencesMap = parseClinicalEvidencesFile(clinicalEvidenceSummariesFile); - logger.info("Parsed {} evidences", evidencesMap.size()); + parseClinicalEvidencesFile(); // Step 3: Parse assertions and complete them with evidences - Map assertionsMap = parseAssertionsFile(assertionSummariesFile, evidencesMap); - logger.info("Parsed {} assertions and complete with evidences", assertionsMap.size()); + parseAssertionsFile(); // Step 4: Parse molecular profiles and complete them with assertions and evidences - Map profilesMap = parseMolecularProfilesFile(molecularProfileSummariesFile, assertionsMap, - evidencesMap); - logger.info("Parsed {} molecular profiles and complete with assertions and evidences", profilesMap.size()); - + parseMolecularProfilesFile(); // Step 5: Parse variants and build complete objects - int numVariants = parseVariantsFile(variantSummariesFile, profilesMap, featuresMap, callback); - logger.info("Completed CIViC parsing: {} variants processed", numVariants); + parseVariantsFile(); } - private static Map parseFeaturesFile(Path featureSummariesFile) throws IOException, FileFormatException { - Map featuresMap = new HashMap<>(); + private void parseFeaturesFile() throws IOException { + featuresMap = new HashMap<>(); try (BufferedReader reader = new BufferedReader(new InputStreamReader(FileUtils.newInputStream(featureSummariesFile)))) { String line = reader.readLine(); // Skip header @@ -84,10 +103,10 @@ private static Map parseFeaturesFile(Path featureSummaries } } - return featuresMap; + logger.info("Parsed {} features", featuresMap.size()); } - private static CivicFeature parseFeatureFields(String[] fields) { + private CivicFeature parseFeatureFields(String[] fields) { return new CivicFeature() .setFeatureId(getField(fields, 0)) .setFeatureCivicUrl(getField(fields, 1)) @@ -109,9 +128,8 @@ private static CivicFeature parseFeatureFields(String[] fields) { .setThreePrimeGeneEntrezId(getField(fields, 17)); } - private static Map parseClinicalEvidencesFile(Path clinicalEvidenceSummariesFile) throws IOException { - - Map evidencesMap = new HashMap<>(); + private void parseClinicalEvidencesFile() throws IOException { + evidencesMap = new HashMap<>(); try (BufferedReader reader = new BufferedReader(new InputStreamReader(FileUtils.newInputStream(clinicalEvidenceSummariesFile)))) { String line = reader.readLine(); // Skip header @@ -125,10 +143,18 @@ private static Map parseClinicalEvidencesFile(Pat } } - return evidencesMap; + logger.info("Parsed {} evidences", evidencesMap.size()); } - private static CivicClinicalEvidence parseClinicalEvidenceFields(String[] fields) { + private CivicClinicalEvidence parseClinicalEvidenceFields(String[] fields) { + // 0 1 2 3 4 5 6 7 + // molecular_profile molecular_profile_id disease doid phenotypes therapies therapy_interaction_type evidence_type + // 8 9 10 11 12 13 14 15 + // evidence_direction evidence_level significance evidence_statement citation_id source_type asco_abstract_id citation + // 16 17 18 19 20 21 22 23 + // nct_ids rating evidence_status evidence_id variant_origin last_review_date evidence_civic_url molecular_profile_civic_url + // 24 + // is_flagged return new CivicClinicalEvidence() .setDisease(getField(fields, 2)) .setDoid(getField(fields, 3)) @@ -154,10 +180,8 @@ private static CivicClinicalEvidence parseClinicalEvidenceFields(String[] fields .setFlagged(parseBoolean(getField(fields, 24))); } - private static Map parseAssertionsFile(Path assertionSummariesFile, Map evidenceMap) throws IOException { - - Map assertionsMap = new HashMap<>(); + private void parseAssertionsFile() throws IOException { + assertionsMap = new HashMap<>(); try (BufferedReader reader = new BufferedReader(new InputStreamReader(FileUtils.newInputStream(assertionSummariesFile)))) { String line = reader.readLine(); // Skip header @@ -170,8 +194,8 @@ private static Map parseAssertionsFile(Path assertionSu // Set evidences from evidence map using the evidence IDs in the assertion List evidenceIds = parseStringList(getField(fields, 18)); for (String evidenceId : evidenceIds) { - if (evidenceMap.containsKey(evidenceId)) { - assertion.getEvidences().add(evidenceMap.get(evidenceId)); + if (evidencesMap.containsKey(evidenceId)) { + assertion.getEvidences().add(evidencesMap.get(evidenceId)); } } @@ -180,10 +204,10 @@ private static Map parseAssertionsFile(Path assertionSu } } - return assertionsMap; + logger.info("Parsed {} assertions and complete with evidences", assertionsMap.size()); } - private static CivicAssertion parseAssertionFields(String[] fields) { + private CivicAssertion parseAssertionFields(String[] fields) { return new CivicAssertion() .setDisease(getField(fields, 2)) .setDoid(getField(fields, 3)) @@ -206,12 +230,8 @@ private static CivicAssertion parseAssertionFields(String[] fields) { .setFlagged(parseBoolean(getField(fields, 23))); } - private static Map parseMolecularProfilesFile(Path molecularProfileSummariesFile, - Map assertionsMap, - Map evidencesMap) - throws IOException { - - Map profilesMap = new HashMap<>(); + private void parseMolecularProfilesFile() throws IOException { + profilesMap = new HashMap<>(); try (BufferedReader reader = new BufferedReader(new InputStreamReader(FileUtils.newInputStream(molecularProfileSummariesFile)))) { String line = reader.readLine(); // Skip header @@ -238,14 +258,29 @@ private static Map parseMolecularProfilesFile(Pat } profilesMap.put(profile.getMolecularProfileId(), profile); + + // Add to the variant to profiles map + // This is necessary because there are situations that a given profile is not include in the VariantSummaries.tsv file + // but in the MolecularProfilesSummarie.tsv file (usually when the profile includes multiple variants) + List variantIds = parseStringList(getField(fields, 3)); + for (String variantId : variantIds) { + if (!variantToProfilesMap.containsKey(variantId)) { + variantToProfilesMap.put(variantId, new HashSet<>()); + } + variantToProfilesMap.get(variantId).add(profile.getMolecularProfileId()); + } } } } - return profilesMap; + logger.info("Parsed {} molecular profiles and complete with assertions and evidences", profilesMap.size()); } - private static CivicMolecularProfile parseMolecularProfileFields(String[] fields) { + private CivicMolecularProfile parseMolecularProfileFields(String[] fields) { + // 0 1 2 3 4 5 6 7 + // name molecular_profile_id summary variant_ids variants_civic_url evidence_score evidence_item_ids evidence_items_civic_url + // 8 9 10 11 12 + // assertion_ids assertions_civic_url aliases last_review_date is_flagged return new CivicMolecularProfile() .setName(getField(fields, 0)) .setMolecularProfileId(getField(fields, 1)) @@ -256,16 +291,35 @@ private static CivicMolecularProfile parseMolecularProfileFields(String[] fields .setFlagged(parseBoolean(getField(fields, 12))); } - private static int parseVariantsFile(Path variantSummariesFile, Map profilesMap, - Map featuresMap, CivicParserCallback callback) throws IOException { - + private void parseVariantsFile() throws IOException { + int totalVariants = 0; int numVariants = 0; + int numVariantsSkippedByAssemblyEmpty = 0; + int numVariantsSkippedByAssemblyMismatch = 0; + try (BufferedReader reader = new BufferedReader(new InputStreamReader(FileUtils.newInputStream(variantSummariesFile)))) { String line = reader.readLine(); // Skip header while ((line = reader.readLine()) != null) { if (StringUtils.isNotBlank(line)) { + totalVariants++; + String[] fields = line.split("\t", -1); + + // Filter by assembly + String variantAssembly = getField(fields, 22); + if (StringUtils.isEmpty(variantAssembly)) { + numVariantsSkippedByAssemblyEmpty++; + logger.warn("Skipping variant ID {} due to assembly is empty", getField(fields, 0)); + continue; + } + if (!assembly.equalsIgnoreCase(variantAssembly)) { + numVariantsSkippedByAssemblyMismatch++; + logger.warn("Skipping variant ID {} due to assembly mismatch: expected {}, found {}", getField(fields, 0), + assembly, variantAssembly); + continue; + } + CivicVariant variant = parseVariantFields(fields); // Link feature and enhance with transcript/exon info @@ -287,23 +341,35 @@ private static int parseVariantsFile(Path variantSummariesFile, Map()); + } + variantToProfilesMap.get(variant.getVariantId()).add(singleVariantMolecularProfileId); + } + + // Iterate the list of molecular profile IDs associated to the variant and link the profilesfrom the profiles map + List profileIds = new ArrayList<>(variantToProfilesMap.get(variant.getVariantId())); + for (String profileId : profileIds) { + if (profilesMap.containsKey(profileId)) { + variant.getMolecularProfiles().add(profilesMap.get(profileId)); + } } // Process variant through callback - if (!callback.processCivicVariant(variant)) { - // Stop parsing if callback returns false - logger.warn("CIViC parsing stopped by callback request."); - break; + if (callback.processCivicVariant(variant)) { + numVariants++; + } else { + // Add warning to log and continue the parsing + logger.warn("CIViC parsing callback returned false for variant ID: {}", variant.getVariantId()); } - numVariants++; } } } - return numVariants; + logger.info("Parsed {} variants, {} passed the callback filter", totalVariants, numVariants); + logger.info("Skipped {} variants due to empty assembly", numVariantsSkippedByAssemblyEmpty); + logger.info("Skipped {} variants due to assembly mismatch", numVariantsSkippedByAssemblyMismatch); } private static CivicVariant parseVariantFields(String[] fields) { diff --git a/biodata-formats/src/test/java/org/opencb/biodata/formats/variant/civic/CivicParserTest.java b/biodata-formats/src/test/java/org/opencb/biodata/formats/variant/civic/CivicParserTest.java index 9847c3d3..5f55c35b 100644 --- a/biodata-formats/src/test/java/org/opencb/biodata/formats/variant/civic/CivicParserTest.java +++ b/biodata-formats/src/test/java/org/opencb/biodata/formats/variant/civic/CivicParserTest.java @@ -1,5 +1,6 @@ package org.opencb.biodata.formats.variant.civic; +import com.fasterxml.jackson.databind.ObjectMapper; import org.junit.Assert; import org.junit.Assume; import org.junit.Test; @@ -43,6 +44,7 @@ public void testCivicParser() throws IOException, FileFormatException { Path civicPath = Paths.get("/opt/civic-data/"); Assume.assumeTrue(Files.exists(civicPath)); + String assembly = "grch38"; String version = "v1"; Path variantSummariesFile = civicPath.resolve("01-Sep-2025-VariantSummaries.tsv"); Path featureSummariesFile = civicPath.resolve("01-Sep-2025-FeatureSummaries.tsv"); @@ -52,10 +54,16 @@ public void testCivicParser() throws IOException, FileFormatException { MyCallback callback = new MyCallback(">>> Testing message"); - CivicParser.parse(variantSummariesFile, featureSummariesFile, molecularProfileSummariesFile, assertionSummariesFile, - clinicalEvidenceSummariesFile, version, callback); + CivicParser parser = new CivicParser(variantSummariesFile, featureSummariesFile, molecularProfileSummariesFile, + assertionSummariesFile, clinicalEvidenceSummariesFile, version, assembly, callback); + + parser.parse(); List civicVariants = callback.getCivicVariants(); - Assert.assertEquals(1811, civicVariants.size()); + // Only 2 variants are GRCh38 + Assert.assertEquals(2, civicVariants.size()); + for (CivicVariant civicVariant : civicVariants) { + System.out.println(new ObjectMapper().writerFor(CivicVariant.class).writeValueAsString(civicVariant)); + } } } \ No newline at end of file diff --git a/biodata-models/src/main/java/org/opencb/biodata/models/core/civic/CivicVariant.java b/biodata-models/src/main/java/org/opencb/biodata/models/core/civic/CivicVariant.java index 783631f7..78597dec 100644 --- a/biodata-models/src/main/java/org/opencb/biodata/models/core/civic/CivicVariant.java +++ b/biodata-models/src/main/java/org/opencb/biodata/models/core/civic/CivicVariant.java @@ -32,7 +32,7 @@ public class CivicVariant { // Associated data private CivicFeature feature; - private CivicMolecularProfile molecularProfile; + private List molecularProfiles; public CivicVariant() { this.variantAliases = new ArrayList<>(); @@ -42,7 +42,7 @@ public CivicVariant() { this.clinvarIds = new ArrayList<>(); this.feature = new CivicFeature(); - this.molecularProfile = new CivicMolecularProfile(); + this.molecularProfiles = new ArrayList<>(); } @Override @@ -72,7 +72,7 @@ public String toString() { sb.append(", ncitId='").append(ncitId).append('\''); sb.append(", viccCompliantName='").append(viccCompliantName).append('\''); sb.append(", feature=").append(feature); - sb.append(", molecularProfile=").append(molecularProfile); + sb.append(", molecularProfiles=").append(molecularProfiles); sb.append('}'); return sb.toString(); } @@ -293,12 +293,12 @@ public CivicVariant setFeature(CivicFeature feature) { return this; } - public CivicMolecularProfile getMolecularProfile() { - return molecularProfile; + public List getMolecularProfiles() { + return molecularProfiles; } - public CivicVariant setMolecularProfile(CivicMolecularProfile molecularProfile) { - this.molecularProfile = molecularProfile; + public CivicVariant setMolecularProfiles(List molecularProfiles) { + this.molecularProfiles = molecularProfiles; return this; } } \ No newline at end of file