Skip to content
This repository was archived by the owner on Mar 11, 2019. It is now read-only.

Commit 27b2cf3

Browse files
author
LuisFranciscoHS
committed
Added PathwayMatcherService class to return the list of hit pathways.
1 parent d926c0a commit 27b2cf3

76 files changed

Lines changed: 2528 additions & 335 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

src/main/java/no/uib/pathwaymatcher/Analysis/AnalyserProteins.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ public void analyse(TreeMultimap<Proteoform, Reaction> result) {
3939
// Calculate the entities pvalue
4040
int n = result.keySet().size(); // Sample size: # Proteins in the input
4141
int k = pathway.getEntitiesFound().size(); // Sucessful trials: Entities found participating in the pathway
42-
double p = pathway.getNumEntitiesTotal() / u; // Probability of sucess in each trial: The entity is a participant in the pathway
42+
double p = pathway.getNumEntitiesTotal() / (double)u; // Probability of sucess in each trial: The entity is a participant in the pathway
4343

4444
BinomialDistribution binomialDistribution = new BinomialDistributionImpl(n, p); //Given n trials with probability p of success
4545
pathway.setpValue(binomialDistribution.probability(k)); //Probability of k successful trials
@@ -62,7 +62,7 @@ public int compare(Pathway x, Pathway y) {
6262
});
6363

6464
// Count number of pathways with p-Values less than 0.05
65-
int n = 0;
65+
double n = 0;
6666
for (Pathway pathway : sortedList) {
6767
if (pathway.getPValue() < 0.05) {
6868
n++;
@@ -71,7 +71,7 @@ public int compare(Pathway x, Pathway y) {
7171
}
7272
}
7373

74-
int rank = 1;
74+
double rank = 1;
7575
for (Pathway pathway : sortedList) {
7676
double newPValue = pathway.getPValue() * n;
7777
newPValue /= rank;

src/main/java/no/uib/pathwaymatcher/Conf.java

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import java.util.logging.Level;
88
import java.util.logging.SimpleFormatter;
99

10+
import no.uib.pathwaymatcher.model.Pathway;
1011
import org.apache.commons.cli.CommandLine;
1112
import org.apache.commons.cli.Options;
1213
import org.apache.commons.io.FileUtils;
@@ -52,7 +53,6 @@ public interface StrVars {
5253
String vepTablesPath = "vepTablesPath";
5354
String vepTableName = "vepTableName";
5455
String fastaFile = "fastaFile";
55-
String matchType = "matchType";
5656
String peptideGrouping = "peptideGrouping";
5757
String colSep = "colSep";
5858
String ptmColSep = "ptmColSep";
@@ -112,6 +112,12 @@ public static void setValue(String name, Boolean value) {
112112
}
113113
}
114114

115+
public static void setValue(String name, int value) {
116+
if (intMap.containsKey(name)) {
117+
intMap.put(name, value);
118+
}
119+
}
120+
115121
public static String get(String name) {
116122
if (strMap.containsKey(name)) {
117123
return strMap.get(name);
@@ -130,18 +136,18 @@ public static void setDefaultValues() {
130136
strMap.put(StrVars.pathwayStatistics, "pathwayStatistics.csv");
131137
strMap.put(StrVars.conf, "./Config.txt");
132138
strMap.put(StrVars.inputType, InputType.unknown);
133-
strMap.put(StrVars.vepTablesPath, "./vep/");
139+
strMap.put(StrVars.vepTablesPath, "vep/");
134140
strMap.put(StrVars.fastaFile, "");
135141
boolMap.put(BoolVars.showTopLevelPathways, Boolean.FALSE);
136142

137143

138144
// Extra configuration options (not published)
139-
strMap.put(StrVars.vepTableName, "XX.gz");
145+
strMap.put(StrVars.vepTableName, "simpleXX.gz");
140146
boolMap.put(BoolVars.inputHasPTMs, Boolean.FALSE);
141147
strMap.put(StrVars.outputType, OutputTypeEnum.fullTable);
142148
intMap.put(IntVars.maxNumProt, 21000);
143149
boolMap.put(BoolVars.verbose, Boolean.TRUE);
144-
strMap.put(StrVars.matchType, MatchType.FLEXIBLE.toString());
150+
strMap.put(StrVars.matchingType, MatchType.FLEXIBLE.toString());
145151
strMap.put(StrVars.peptideGrouping, PeptidePTMGrouping.none.toString());
146152

147153
intMap.put(IntVars.rsidIndex, 2);
@@ -153,7 +159,7 @@ public static void setDefaultValues() {
153159
boolMap.put(BoolVars.showTopLevelPathways, Boolean.FALSE);
154160

155161
intMap.put(IntVars.margin, 3);
156-
strMap.put(StrVars.colSep, "\t");
162+
strMap.put(StrVars.colSep, "|");
157163
strMap.put(StrVars.ptmColSep, ";");
158164

159165
//Database access
@@ -163,6 +169,8 @@ public static void setDefaultValues() {
163169

164170
// Extras
165171
boolMap.put(BoolVars.useSubsequenceRanges, Boolean.FALSE);
172+
173+
PathwayMatcher.logger.setLevel(Level.ALL);
166174
}
167175

168176
// public static String input = "./src/main/resources/csv/listBjorn.csv";

src/main/java/no/uib/pathwaymatcher/PathwayMatcher.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -74,17 +74,18 @@ public static void main(String args[]) {
7474
options = new Options();
7575

7676
addOption("t", StrVars.inputType, true, "Type of input file (" + InputType.peptideList + ", " + InputType.rsidList + ", " + InputType.uniprotListAndModSites + ",...etc.)", true);
77+
addOption("r", IntVars.margin, true, "Allowed distance for PTM sites", false);
78+
addOption("tlp", BoolVars.showTopLevelPathways, false, "Set this flag to show the \"Top Level Pathways\" column in the output file.", false);
79+
addOption("mt", StrVars.matchingType.toString(), false, "Type of criteria used to decide if two proteoforms are equivalent.", false);
80+
81+
addOption("f", StrVars.fastaFile, true, "Path and name of the FASTA file with the possible protein sequences to search the peptides.", false);
7782
addOption("i", StrVars.input, true, "input file path", false);
7883
addOption("o", StrVars.output, true, "output file path", false);
7984
addOption("c", StrVars.conf, true, "config file path and name", false);
80-
addOption("r", IntVars.margin, true, "Allowed distance for PTM sites", false);
8185
addOption("h", StrVars.host, true, "Url of the Neo4j database with Reactome", false);
8286
addOption("u", StrVars.username, true, "Username to access the database with Reactome", false);
8387
addOption("p", StrVars.password, true, "Password related to the username provided to access the database with Reactome", false);
8488
addOption("vep", StrVars.vepTablesPath, true, "The path of the folder containing the vep mapping tables. If the type of input is \"snpList\" then the parameter is required. It is not required otherwise.", false);
85-
addOption("f", StrVars.fastaFile, true, "Path and name of the FASTA file with the possible protein sequences to search the peptides.", false);
86-
addOption("tlp", BoolVars.showTopLevelPathways, false, "Set this flag to show the \"Top Level Pathways\" column in the output file.", false);
87-
addOption("mt", StrVars.matchingType.toString(), false, "Type of criteria used to decide if two proteoforms are equivalent.", false);
8889

8990
CommandLineParser parser = new DefaultParser();
9091
HelpFormatter formatter = new HelpFormatter();
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
package no.uib.pathwaymatcher;
2+
3+
import com.google.common.collect.TreeMultimap;
4+
import no.uib.pathwaymatcher.Analysis.Analyser;
5+
import no.uib.pathwaymatcher.Analysis.AnalyserFactory;
6+
import no.uib.pathwaymatcher.Matching.Matcher;
7+
import no.uib.pathwaymatcher.Matching.MatcherFactory;
8+
import no.uib.pathwaymatcher.Preprocessing.Preprocessor;
9+
import no.uib.pathwaymatcher.Preprocessing.PreprocessorFactory;
10+
import no.uib.pathwaymatcher.Search.Finder;
11+
import no.uib.pathwaymatcher.model.Proteoform;
12+
import no.uib.pathwaymatcher.model.Reaction;
13+
import no.uib.pathwaymatcher.stages.Reporter;
14+
15+
import java.util.ArrayList;
16+
import java.util.HashSet;
17+
import java.util.List;
18+
import java.util.Set;
19+
import java.util.logging.Level;
20+
import java.util.logging.Logger;
21+
22+
import static no.uib.pathwaymatcher.Conf.initializeLog;
23+
import static no.uib.pathwaymatcher.Conf.setValue;
24+
import static no.uib.pathwaymatcher.Conf.strMap;
25+
import static no.uib.pathwaymatcher.db.ConnectionNeo4j.initializeNeo4j;
26+
import static no.uib.pathwaymatcher.model.Error.INPUT_PARSING_ERROR;
27+
import static no.uib.pathwaymatcher.model.Error.sendError;
28+
29+
public class PathwayMatcherService {
30+
31+
public final static Logger logger = Logger.getLogger(PathwayMatcher.class.getName());
32+
33+
public static List<String> match(List<String> input, String type, int margin, Boolean showTopLevelPathways, String matchingType) {
34+
Set<Proteoform> entities = new HashSet<>();
35+
Preprocessor preprocessor;
36+
Matcher matcher;
37+
38+
Conf.setDefaultValues();
39+
setValue(Conf.IntVars.margin, margin);
40+
setValue(Conf.BoolVars.showTopLevelPathways, showTopLevelPathways);
41+
setValue(Conf.StrVars.matchingType, matchingType);
42+
initializeNeo4j(strMap.get(Conf.StrVars.host), strMap.get(Conf.StrVars.username), strMap.get(Conf.StrVars.password));
43+
initializeLog();
44+
45+
preprocessor = PreprocessorFactory.getPreprocessor(type);
46+
47+
try {
48+
entities = preprocessor.process(input);
49+
} catch (java.text.ParseException e) {
50+
sendError(INPUT_PARSING_ERROR);
51+
}
52+
53+
logger.log(Level.INFO, "Preprocessing complete.");
54+
55+
logger.log(Level.INFO, "\nMatching input entities...");
56+
matcher = MatcherFactory.getMatcher(type, matchingType);
57+
TreeMultimap<Proteoform, String> mapping = matcher.match(entities);
58+
logger.log(Level.INFO, "Matching complete.");
59+
60+
logger.log(Level.INFO, "\nFiltering pathways and reactions....");
61+
TreeMultimap<Proteoform, Reaction> result = Finder.search(mapping);
62+
logger.log(Level.INFO, "Filtering pathways and reactions complete.");
63+
64+
Analyser analyser = AnalyserFactory.getAnalyser(type);
65+
analyser.analyse(result);
66+
67+
return Reporter.getPathwayStatisticsList();
68+
}
69+
}

src/main/java/no/uib/pathwaymatcher/Preprocessing/Preprocessor.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,18 +29,20 @@ public static List<String> readInput(String fileName) throws IOException {
2929
return Files.readLines(file, Charset.defaultCharset());
3030
}
3131

32-
public static Boolean validateVepTables(String path) throws FileNotFoundException, NoSuchFileException {
32+
public Boolean validateVepTables(String path) throws FileNotFoundException, NoSuchFileException {
3333

3434
if (!path.endsWith("/")) {
3535
path = path + "/";
3636
}
3737

38-
File vepDirectory = new File(path);
38+
ClassLoader classLoader = getClass().getClassLoader();
39+
File vepDirectory = new File(classLoader.getResource(path).getFile());
3940
if (!vepDirectory.exists()) {
4041
throw new NoSuchFileException(VEP_DIRECTORY_NOT_FOUND.getMessage());
4142
} else {
4243
for (int chr = 1; chr <= 22; chr++) {
43-
if (!(new File(path + strMap.get(StrVars.vepTableName).replace("XX", chr + "")).exists())) {
44+
File file = new File(classLoader.getResource(path + strMap.get(StrVars.vepTableName).replace("XX", chr + "")).getFile());
45+
if (!file.exists()) {
4446
throw new FileNotFoundException("The vep table for chromosome " + chr + " was not found. Expected: " + path + strMap.get(StrVars.vepTableName).replace("XX", chr + ""));
4547
}
4648
}

src/main/java/no/uib/pathwaymatcher/Preprocessing/PreprocessorSnps.java

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,17 @@
11
package no.uib.pathwaymatcher.Preprocessing;
22

3+
import com.google.common.collect.Multimap;
34
import no.uib.pathwaymatcher.Conf;
4-
import no.uib.pathwaymatcher.model.Pair;
55
import no.uib.pathwaymatcher.model.Proteoform;
66
import no.uib.pathwaymatcher.model.Snp;
77

88
import java.io.BufferedReader;
9+
import java.io.File;
910
import java.io.FileNotFoundException;
1011
import java.io.IOException;
1112
import java.nio.file.NoSuchFileException;
1213
import java.text.ParseException;
13-
import java.util.Arrays;
14-
import java.util.HashSet;
15-
import java.util.List;
16-
import java.util.TreeSet;
14+
import java.util.*;
1715
import java.util.logging.Level;
1816

1917
import static no.uib.pathwaymatcher.Conf.strMap;
@@ -37,10 +35,10 @@ public TreeSet<Proteoform> process(List<String> input) throws ParseException {
3735

3836
logger.log(Level.INFO, "\nPreprocessing input file...");
3937
TreeSet<Proteoform> entities = new TreeSet<>();
40-
HashSet<Snp> snpSet = new HashSet<>();
38+
Set<String> rsidSet = new HashSet<>();
4139

4240
try {
43-
Preprocessor.validateVepTables(strMap.get(Conf.StrVars.vepTablesPath));
41+
validateVepTables(strMap.get(Conf.StrVars.vepTablesPath));
4442
} catch (FileNotFoundException e) {
4543
sendError(ERROR_READING_VEP_TABLES);
4644
} catch (NoSuchFileException e) {
@@ -54,7 +52,7 @@ public TreeSet<Proteoform> process(List<String> input) throws ParseException {
5452
row++;
5553

5654
if (matches_Rsid(line)) {
57-
snpSet.add(new Snp(line));
55+
rsidSet.add(line);
5856
} else {
5957
if (line.isEmpty()) sendWarning(EMPTY_ROW, row);
6058
else sendWarning(INVALID_ROW, row);
@@ -64,22 +62,18 @@ public TreeSet<Proteoform> process(List<String> input) throws ParseException {
6462

6563
// Traverse all the vepTables
6664
for (int chr = 1; chr <= 22; chr++) {
67-
logger.log(Level.FINE, "Scanning vepTable for chromosome " + chr);
65+
logger.log(Level.INFO, "Scanning vepTable for chromosome " + chr);
6866
try {
69-
BufferedReader br = getBufferedReader(strMap.get(Conf.StrVars.vepTablesPath) + strMap.get(Conf.StrVars.vepTableName).replace("XX", chr + ""));
67+
BufferedReader br = getBufferedReaderFromResource(strMap.get(Conf.StrVars.vepTablesPath) + strMap.get(Conf.StrVars.vepTableName).replace("XX", chr + ""));
7068
br.readLine(); //Read header line
7169

7270
for (String line; (line = br.readLine()) != null; ) {
7371

74-
Pair<String, String> snp = getRsIdAndSwissProtFromVep(line);
72+
Multimap<Snp, String> snpMap = getRsIdAndSwissProtFromVep(line);
7573

76-
if (!snp.getRight().equals("NA")) {
77-
78-
if (snpSet.contains(snp.getLeft())) {
79-
String[] ids = snp.getRight().split(",");
80-
for (String id : ids) {
81-
entities.add(new Proteoform(id));
82-
}
74+
for (Map.Entry<Snp, String> pair : snpMap.entries()) {
75+
if (rsidSet.contains(pair.getKey().getRsid())) {
76+
entities.add(new Proteoform(pair.getValue()));
8377
}
8478
}
8579
}

src/main/java/no/uib/pathwaymatcher/Preprocessing/PreprocessorVCF.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ public TreeSet<Proteoform> process(List<String> input) throws ParseException {
2828
HashSet<Snp> snpSet = new HashSet<>();
2929

3030
try {
31-
Preprocessor.validateVepTables(strMap.get(Conf.StrVars.vepTablesPath));
31+
validateVepTables(strMap.get(Conf.StrVars.vepTablesPath));
3232
} catch (FileNotFoundException e) {
3333
sendError(ERROR_READING_VEP_TABLES);
3434
} catch (NoSuchFileException e) {
Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
package no.uib.pathwaymatcher.Preprocessing;
22

3+
import com.google.common.collect.Multimap;
4+
import com.google.common.collect.TreeMultimap;
35
import no.uib.pathwaymatcher.Conf;
46
import no.uib.pathwaymatcher.model.Pair;
7+
import no.uib.pathwaymatcher.model.Snp;
58

69
import java.io.*;
710
import java.util.zip.GZIPInputStream;
@@ -10,10 +13,11 @@
1013

1114
public abstract class PreprocessorVariants extends Preprocessor {
1215

13-
public static BufferedReader getBufferedReader(String path) throws FileNotFoundException, IOException {
16+
public BufferedReader getBufferedReader(String path) throws FileNotFoundException, IOException {
1417
BufferedReader br = null;
1518
if (path.endsWith(".gz")) {
16-
InputStream fileStream = new FileInputStream(path);
19+
File file = new File(path);
20+
InputStream fileStream = new FileInputStream(file);
1721
InputStream gzipStream = new GZIPInputStream(fileStream);
1822
Reader decoder = new InputStreamReader(gzipStream);
1923
br = new BufferedReader(decoder);
@@ -23,8 +27,36 @@ public static BufferedReader getBufferedReader(String path) throws FileNotFoundE
2327
return br;
2428
}
2529

26-
protected static Pair<String, String> getRsIdAndSwissProtFromVep(String line) {
30+
public BufferedReader getBufferedReaderFromResource(String path) throws FileNotFoundException, IOException {
31+
BufferedReader br = null;
32+
ClassLoader classLoader = getClass().getClassLoader();
33+
File file = new File(classLoader.getResource(path).getFile());
34+
if (path.endsWith(".gz")) {
35+
InputStream fileStream = new FileInputStream(file);
36+
InputStream gzipStream = new GZIPInputStream(fileStream);
37+
Reader decoder = new InputStreamReader(gzipStream);
38+
br = new BufferedReader(decoder);
39+
} else {
40+
br = new BufferedReader(new FileReader(file));
41+
}
42+
return br;
43+
}
44+
45+
public static Multimap<Snp, String> getRsIdAndSwissProtFromVep(String line) {
46+
TreeMultimap<Snp, String> mapping = TreeMultimap.create();
2747
String[] fields = line.split(" ");
28-
return new Pair<>(fields[Conf.intMap.get(Conf.IntVars.rsidIndex)], fields[Conf.intMap.get(Conf.IntVars.swissprotIndex)]);
48+
Integer chr = Integer.valueOf(fields[0]);
49+
Long bp = Long.valueOf(fields[1]);
50+
51+
String[] rsids = fields[Conf.intMap.get(Conf.IntVars.rsidIndex)].split(",");
52+
String[] uniprots = fields[Conf.intMap.get(Conf.IntVars.swissprotIndex)].split(",");
53+
54+
for(String rsid : rsids){
55+
for(String uniprot : uniprots){
56+
Snp snp = new Snp(chr, bp, rsid);
57+
mapping.put(snp, uniprot);
58+
}
59+
}
60+
return mapping;
2961
}
3062
}

0 commit comments

Comments
 (0)