Skip to content

Commit 07add4a

Browse files
committed
v5.7.2
improved synteny algorithm
1 parent 1be3f9f commit 07add4a

30 files changed

Lines changed: 1091 additions & 639 deletions

java/src/backend/AnchorMain.java

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ protected boolean run(Mproject pj1, Mproject pj2) throws Exception {
5252
long startTime = Utils.getTimeMem();
5353

5454
String proj1Name = pj1.getDBName(), proj2Name = pj2.getDBName();
55-
plog.msg("\nStart calculating cluster hits for " + proj1Name + " and " + proj2Name);// CAS568 nl
55+
if (proj1Name.equals(proj2Name)) plog.msg("\nStart calculating cluster hits for " + proj1Name + " self synteny"); // CAS572 add
56+
else plog.msg("\nStart calculating cluster hits for " + proj1Name + " and " + proj2Name);
5657

5758
String resultDir = Constants.getNameResultsDir(proj1Name, proj2Name); // e.g. data/seq_results/p1_to_p2
5859
if ( !Utilities.pathExists(resultDir) ) {
@@ -75,26 +76,26 @@ protected boolean run(Mproject pj1, Mproject pj2) throws Exception {
7576
AnchorMain2 an = new AnchorMain2(this);
7677
boolean b = an.run(pj1, pj2, dh); if (!b) return false;
7778
}
78-
if (Cancelled.isCancelled()) {dbc2.close(); return false; } // CAS567 missing return!
79+
if (Cancelled.isCancelled()) {dbc2.close(); return false; }
7980

8081
saveHitNum();
8182
saveAnnoHitCnt();
8283

83-
/** Run before pseudo; CAS565 moved from DoAlignSyn as does not need blocks**/
84+
/** Run before pseudo **/
8485
if (pj1.hasGenes() && pj2.hasGenes()) {
8586
AnchorPost collinear = new AnchorPost(pairIdx, pj1, pj2, dbc2, plog);
8687
collinear.collinearSets();
8788
}
88-
if (Cancelled.isCancelled()) {dbc2.close(); return false; } // CAS567 missing return!
89+
if (Cancelled.isCancelled()) {dbc2.close(); return false; }
8990

90-
// Numbers pseudo genes; must be after collinear; add CAS565
91+
// Numbers pseudo genes; must be after collinear;
9192
if (mp.isNumPseudo(Mpair.FILE)) new Pseudo().addPseudo();
9293

9394
/** finish **/
9495
long modDirDate = new File(resultDir).lastModified(); // add for Pair Summary with 'use existing files'
9596
mp.setPairProp("pair_align_date", Utils.getDateStr(modDirDate));
9697
if (!Constants.VERBOSE) Globals.rclear();
97-
Utils.prtMsgTimeDone(plog, "Finish clustering hits", startTime); // CAS568 change from mem output
98+
Utils.prtMsgTimeDone(plog, "Finish clustering hits", startTime);
9899
return true;
99100
}
100101
catch (Exception e) {ErrorReport.print(e, "Run load anchors"); return false;}
@@ -106,7 +107,7 @@ protected boolean run(Mproject pj1, Mproject pj2) throws Exception {
106107
*/
107108
private boolean saveHitNum() {
108109
try {
109-
Globals.rprt("Compute and save hit#"); // CAS568 do not need for Verbose
110+
Globals.rprt("Compute and save hit#");
110111
TreeMap <Integer, String> grpMap1 = mp.mProj1.getGrpIdxMap();
111112
TreeMap <Integer, String> grpMap2 = mp.mProj2.getGrpIdxMap();
112113

@@ -152,9 +153,9 @@ private boolean saveHitNum() {
152153
* Count number of hits per gene; these include all pairwise projects except self;
153154
*/
154155
public void saveAnnoHitCnt() { // public for Mpair.removePairFromDB
155-
Globals.rprt("Compute and save gene numHits"); // CAS568 do not need for Verbose
156+
Globals.rprt("Compute and save gene numHits");
156157

157-
TreeMap <Integer, String> idxList1 = mp.mProj1.getGrpIdxMap(); // CAS546 was using SyProj.Group
158+
TreeMap <Integer, String> idxList1 = mp.mProj1.getGrpIdxMap();
158159
TreeMap <Integer, String> idxList2 = mp.mProj2.getGrpIdxMap();
159160

160161
for (int idx : idxList1.keySet()) if (!saveAnnotHitCnt(idx, idxList1.get(idx))) return;
@@ -185,8 +186,6 @@ public boolean saveAnnotHitCnt(int grpIdx, String grpName) { // public for Mproj
185186
}
186187
rs.close();
187188

188-
// CAS568 put in calling Globals.rprt(grpName + " Genes " + geneCntMap.size());
189-
190189
PreparedStatement ps = dbc2.prepareStatement("update pseudo_annot set numhits=? where idx=?");
191190
for (int idx : geneCntMap.keySet()) {
192191
int num = geneCntMap.get(idx);
@@ -249,7 +248,7 @@ protected boolean addPseudoFromFlag() {
249248
catch (Exception e) {ErrorReport.print(e, "Add pseudo from flag"); return false;}
250249
}
251250
/*******************************************************
252-
* Add so can be used in Cluster and Report as regular genes; CAS565
251+
* Add so can be used in Cluster and Report as regular genes;
253252
* Assigns sequentially along chromosome regardless of what the target chromosome is
254253
* which allows all numbers to be unique; but it may lead to gaps for a give chrQ-chrT
255254
*/
@@ -331,9 +330,6 @@ private boolean addPseudoGrpGrp(int grpIdx, String where, String annoStr, int i,
331330
String type = Globals.pseudoType;
332331

333332
// Must count pseudo too so as to not repeat pseudo numbers
334-
// CAS570 would still duplicate pseudo, which worked as in different pairs, but confusing
335-
//int genes = dbc2.executeCount("select count(*) from pseudo_annot where (type='gene' or type='pseudo') and grp_idx="+ grpIdx);
336-
337333
int genes = dbc2.executeCount("select max(genenum) from pseudo_annot where grp_idx="+ grpIdx);
338334
int geneStart = genes;
339335
if (genes==0) geneStart = 1;
@@ -368,14 +364,15 @@ private boolean addPseudoGrpGrp(int grpIdx, String where, String annoStr, int i,
368364

369365
String dn = (i==0) ? mProj1.getDisplayName() : mProj2.getDisplayName();
370366
String chr = (i==0) ? mProj1.getGrpNameFromIdx(grpIdx) : mProj2.getGrpNameFromIdx(grpIdx);
371-
372-
String msg = String.format("Pseudo %-15s Genes %,6d Start %,6d ", (dn+" "+chr), genes, geneStart);
373-
if (Constants.VERBOSE && cntGrp==1) Utils.prtIndentNumMsgFile(plog, 1, hitGeneMap.size(), msg); // CAS568 reduce output
367+
String msg = String.format("Pseudo %-20s Genes %,6d Start %,6d ", (dn+" "+chr), genes, geneStart);
368+
/* too much output CAS572
369+
if (Constants.VERBOSE && cntGrp==1) Utils.prtIndentNumMsgFile(plog, 1, hitGeneMap.size(), msg);
374370
if (Constants.VERBOSE && cntGrp==2) {
375371
if (maxGrp>2) msg += "...";
376372
Utils.prtIndentNumMsgFile(plog, 1, hitGeneMap.size(), msg);
377373
}
378-
else Globals.rprt(String.format("%,5d %s", hitGeneMap.size(),msg));
374+
else */
375+
Globals.rprt(String.format("%,5d %s", hitGeneMap.size(),msg));
379376

380377
if (hitGeneMap.size()==0) return true;
381378

java/src/backend/Constants.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,7 @@ public class Constants {
7171
public static final String mumSuffix = ".mum";
7272
public static final String doneSuffix = ".done";
7373
public static final String selfPrefix = "self.";
74-
public static final String finalDir = "/final/";
75-
public static final String anchorFile = "anchors.txt";
76-
public static final String blockFile = "block.txt";
74+
public static final String finalDir = "/final/"; // has been discontinued; still exists in order to Remove
7775

7876
public static final String projTo = "_to_";
7977
public static final String faFile = ".fa";

java/src/backend/DoAlignSynPair.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,12 @@ public void run(ManagerFrame frame, DBconn2 dbc2, Mpair mp, boolean closeWhenDo
5050
Mproject mProj2 = mp.mProj2;
5151
String dbName1 = mProj1.getDBName();
5252
String dbName2 = mProj2.getDBName();
53-
String toName = (mProj1 == mProj2) ? dbName1 + " to self" : dbName1 + " to " + dbName2;
53+
String toName = (mProj1 == mProj2) ? dbName1 : dbName1 + " to " + dbName2; // CAS572 make naming consistent
5454

5555
FileWriter syFW = symapLog(mProj1,mProj2);
5656
String alignLogDir = buildLogAlignDir(mProj1,mProj2);
5757

58-
String msg = (mProj1 == mProj2) ? "Synteny " + dbName1 + " to itself ..." : "Synteny " + toName + " ...";
58+
String msg = (mProj1 == mProj2) ? "Self-synteny " + dbName1 + " ..." : "Synteny " + toName + " ...";
5959

6060
final ProgressDialog diaLog = new ProgressDialog(this, "Running Synteny", msg, true, syFW); // write version and date
6161

java/src/backend/SeqLoadMain.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,10 @@ private void uploadSequence(String grp, String fullname, String seq, String file
343343
tdbc2.executeUpdate(st);
344344
}
345345
}
346-
catch (Exception e) {ErrorReport.print(e, "Loading sequence - fail loading to database "); };
346+
catch (Exception e) {
347+
ErrorReport.print(e, "Fail load sequence: " + projIdx + "','" + grp + "','" + fullname + "'," + order);
348+
Globals.prt(" This can happen if a project was just removed and another added immediately."); // CAS572
349+
}
347350
}
348351
/**************************************************************************/
349352
private boolean rtError(String msg) {

java/src/backend/anchor1/AnchorMain1.java

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ private boolean scanAlignFiles(File dh) throws Exception {
183183
}
184184
if (mTotalHits == 0) return rtError("No good anchors were found");
185185
Globals.rclear();
186-
if (nFile>1 || !VB) Utils.prtNumMsgFile(plog, nHitsScanned, "Total scanned hits");
186+
if (nFile>1 || !VB) Utils.prtNumMsgFile(plog, nHitsScanned, "Total scanned hits from " + nFile + " files"); // CAS572 add file
187187
if (nHitsScanned!=mTotalHits) Utils.prtNumMsgFile(plog, mTotalHits, "Total accepted hits");
188188
if (mTotalLargeHits > 0 && VB) Utils.prtNumMsgFile(plog, mTotalLargeHits, "Large hits (> " + Group.FSplitLen + ") "
189189
+ mTotalBrokenHits + " Split ");
@@ -218,7 +218,7 @@ private boolean scanAlignFiles(File dh) throws Exception {
218218
}
219219
Globals.rclear();
220220
if (nFile>1 || !VB)
221-
Utils.prtNumMsg(plog, nHitsScanned, "Total cluster hits ");
221+
Utils.prtNumMsg(plog, nHitsScanned, "Total cluster hits ");
222222

223223
if (Constants.PRT_STATS) {
224224
Utils.prtTimeMemUsage(plog, "Complete scan cluster", memTime);
@@ -441,8 +441,9 @@ private Vector<Hit> clusterHits2(Vector<Hit> inHits) throws Exception {
441441
AnnotElem qAnno = grp1.getBestOlapAnno(hit.queryHits.start, hit.queryHits.end); // priority to gene annotElem
442442
AnnotElem tAnno = grp2.getBestOlapAnno(hit.targetHits.start, hit.targetHits.end);
443443

444-
if (qAnno == null) {rtError("missing query annot! grp:" + grp1.idStr() + " start:" + hit.queryHits.start + " end:" + hit.queryHits.end); return null;}
445-
if (tAnno == null) {rtError("missing target annot! grp:" + grp2.idStr() + " start:" + hit.targetHits.start + " end:" + hit.targetHits.end); return null;}
444+
// Self-synteny humans failed on this and quit; change to continue... CAS572
445+
if (qAnno == null) {Globals.tprt("missing query annot! grp:" + grp1.idStr() + " start:" + hit.queryHits.start + " end:" + hit.queryHits.end); continue;}
446+
if (tAnno == null) {Globals.tprt("missing target annot! grp:" + grp2.idStr() + " start:" + hit.targetHits.start + " end:" + hit.targetHits.end); continue;}
446447

447448
String key = qAnno.mID + "_" + tAnno.mID;
448449

@@ -692,7 +693,7 @@ private void saveFilterHits(Vector <Hit> vecHits) throws Exception {
692693
}
693694
catch (Exception e) {ErrorReport.print(e, "save annot hits"); bSuccess=false;}
694695
}
695-
private String intArrayToBlockStr(int[] ia) {// CAS569 moved from Util
696+
private String intArrayToBlockStr(int[] ia) {
696697
String out = "";
697698
if (ia != null) {
698699
for (int i = 0; i < ia.length; i+=2) {

java/src/backend/anchor2/AnchorMain2.java

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -104,21 +104,21 @@ public boolean run(Mproject pj1, Mproject pj2, File dh) throws Exception {
104104
prtFileOpen();
105105

106106
// for each file: read file, build clusters, save results to db
107-
processFiles(dh); if (!bSuccess) return false;
107+
int nfile = processFiles(dh); if (!bSuccess) return false;
108108

109109
// finish
110110
prtFileClose();
111111
clearTree();
112112

113113
Globals.rclear();
114-
Utils.prtIndentMsgFile(plog, 1, String.format("Final totals Raw hits %,d", cntRawHits));
114+
Utils.prtIndentMsgFile(plog, 1, String.format("Final totals Raw hits %,d Files %,d", cntRawHits, nfile));
115115

116-
String msg1 = String.format("Clusters Both genes %,8d One gene %,8d No gene %,8d",
116+
String msg1 = String.format("Clusters Both genes %,9d One gene %,9d No gene %,9d", // CAS572 8->9
117117
cntG2, cntG1, cntG0);
118118
Utils.prtNumMsgFile(plog, cntClusters, msg1);
119119

120120
int total = cntG2Fil+cntG1Fil+cntG0Fil;
121-
String msg2 = String.format("Filtered Both genes %,8d One gene %,8d No gene %,8d Pile hits %,d",
121+
String msg2 = String.format("Filtered Both genes %,9d One gene %,9d No gene %,9d Pile hits %,d",
122122
total, cntG2Fil, cntG1Fil, cntG0Fil, cntPileFil);
123123
Utils.prtNumMsgFile(plog, total, msg2);
124124

@@ -271,32 +271,34 @@ private void loadAnno(int X, Mproject mProj, TreeMap <Integer, GeneTree> grpGene
271271
/******************************************************
272272
* Step 2: Each file is read and immediately processed and save
273273
*/
274-
private void processFiles(File dh) {
274+
private int processFiles(File dh) {
275275
try {
276+
int nfile=0;
276277
File[] fs = dh.listFiles();
277278
for (File f : fs) {
278279
if (!f.isFile()) continue;
279280
fileName = f.getName();
280281
if (!fileName.endsWith(Constants.mumSuffix))continue;
281-
282+
nfile++;
282283
long time = Utils.getTime();
283284

284285
// load hits into grpPairs
285286
readMummer(f);
286287

287288
// process all group pairs from file
288289
for (GrpPair gp : grpPairMap.values()) {
289-
bSuccess = gp.buildClusters(); if (!bSuccess) return;
290+
bSuccess = gp.buildClusters(); if (!bSuccess) return nfile;
290291
}
291292

292-
if (failCheck()) return;
293+
if (failCheck()) return nfile;
293294

294295
clearFileSpecific();
295296

296297
if (Arg.TRACE) Utils.prtTimeMemUsage(plog, " Finish ", time);
297298
}
299+
return nfile; // CAS572 add
298300
}
299-
catch (Exception e) {ErrorReport.print(e, "analyze and save"); bSuccess=false;}
301+
catch (Exception e) {ErrorReport.print(e, "analyze and save"); bSuccess=false; return 0;}
300302
}
301303
/*********************************************************************
302304
// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14

0 commit comments

Comments
 (0)