Skip to content

Commit 823e948

Browse files
committed
merged mix-merge branch
2 parents 872e0cb + b9a53f2 commit 823e948

6 files changed

Lines changed: 2070 additions & 1194 deletions

File tree

Makefile

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ LDFLAGS += -L${BAM}
2828

2929
LIBS := -lbam -lz
3030

31-
ifneq (,$(findstring nothreads,$(MAKECMDGOALS)))
31+
ifneq (,$(filter %nothreads %prof %profile, $(MAKECMDGOALS)))
3232
NOTHREADS=1
3333
endif
3434

@@ -96,14 +96,19 @@ else
9696
CXXFLAGS += -DDEBUG -D_DEBUG -DGDEBUG -fno-common -fstack-protector
9797
LIBS := ${SANLIBS} -lubsan -ldl ${LIBS}
9898
else
99-
#just plain debug build
100-
DEBUG_BUILD=1
101-
CXXFLAGS := $(if $(CXXFLAGS),$(CXXFLAGS),-g -O0)
102-
#CXXFLAGS := $(if $(CXXFLAGS),$(CXXFLAGS),-ggdb -g3 -O0 -fvar-tracking-assignments -fno-omit-frame-pointer)
103-
ifneq (, $(findstring darwin, $(DMACH)))
104-
CXXFLAGS += -gdwarf-3
99+
ifneq (,$(filter %prof %profile, $(MAKECMDGOALS)))
100+
## profiling build
101+
CXXFLAGS := -DNDEBUG $(BASEFLAGS) -g -pg
102+
LDFLAGS += -g -pg
103+
else
104+
#just plain debug build
105+
DEBUG_BUILD=1
106+
CXXFLAGS := $(if $(CXXFLAGS),$(CXXFLAGS),-g -O0)
107+
ifneq (, $(findstring darwin, $(DMACH)))
108+
CXXFLAGS += -gdwarf-3
109+
endif
110+
CXXFLAGS += -DDEBUG -D_DEBUG -DGDEBUG $(BASEFLAGS)
105111
endif
106-
CXXFLAGS += -DDEBUG -D_DEBUG -DGDEBUG $(BASEFLAGS)
107112
endif
108113
endif
109114

@@ -143,6 +148,7 @@ OBJS += rlink.o tablemaker.o tmerge.o
143148
all release static debug: stringtie${EXE}
144149
memcheck memdebug tsan tcheck thrcheck: stringtie${EXE}
145150
memuse memusage memtrace: stringtie${EXE}
151+
prof profile: stringtie${EXE}
146152
nothreads: stringtie${EXE}
147153

148154
stringtie.o : $(GDIR)/GBitVec.h $(GDIR)/GHashMap.hh $(GDIR)/GBam.h

gclib/GBam.cpp

Lines changed: 67 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@
22
#include <ctype.h>
33
#include "kstring.h"
44

5-
#define _cigOp(c) ((c)&BAM_CIGAR_MASK)
6-
#define _cigLen(c) ((c)>>BAM_CIGAR_SHIFT)
7-
85
//auxiliary functions for low level BAM record creation
96
uint8_t* realloc_bdata(bam1_t *b, int size) {
107
if (b->m_data < size) {
@@ -29,9 +26,12 @@ uint8_t* dupalloc_bdata(bam1_t *b, int size) {
2926
return odata; //user must FREE this after
3027
}
3128

29+
#define _cigOp(c) ((c)&BAM_CIGAR_MASK)
30+
#define _cigLen(c) ((c)>>BAM_CIGAR_SHIFT)
31+
3232
GBamRecord::GBamRecord(const char* qname, int32_t gseq_tid,
3333
int pos, bool reverse, const char* qseq,
34-
const char* cigar, const char* quals):iflags(0), exons(1),
34+
const char* cigar, const char* quals):iflags(0), exons(1),juncsdel(1),
3535
clipL(0), clipR(0), mapped_len(0), uval(0) {
3636
novel=true;
3737
bam_header=NULL;
@@ -60,7 +60,7 @@ GBamRecord::GBamRecord(const char* qname, int32_t gseq_tid,
6060
GBamRecord::GBamRecord(const char* qname, int32_t samflags, int32_t g_tid,
6161
int pos, int map_qual, const char* cigar, int32_t mg_tid, int mate_pos,
6262
int insert_size, const char* qseq, const char* quals,
63-
GVec<char*>* aux_strings):iflags(0), exons(1), uval(0) {
63+
GVec<char*>* aux_strings):iflags(0), exons(1), juncsdel(1),uval(0) {
6464
novel=true;
6565
bam_header=NULL;
6666
b=bam_init1();
@@ -321,80 +321,89 @@ switch (cop) {
321321
} // interpret_CIGAR(), just a reference of CIGAR operations interpretation
322322

323323
void GBamRecord::setupCoordinates() {
324+
if (!b) return;
324325
const bam1_core_t *c = &b->core;
325-
if (c->flag & BAM_FUNMAP) return; /* skip unmapped reads */
326+
if (c->flag & BAM_FUNMAP) return; // skip unmapped reads
326327
uint32_t *cigar = bam1_cigar(b);
327-
//uint32_t *p = bam1_cigar(b);
328-
//--- prevent alignment error here (reported by UB-sanitazer):
329-
//uint32_t *cigar= new uint32_t[c->n_cigar];
330-
//memcpy(cigar, p, c->n_cigar * sizeof(uint32_t));
331-
//--- UBsan protection end
332328
int l=0;
333329
mapped_len=0;
334330
clipL=0;
335331
clipR=0;
336332
start=c->pos+1; //genomic start coordinate, 1-based (BAM core.pos is 0-based)
333+
GSeg exon;
337334
int exstart=c->pos;
338-
//bool intron=false;
339-
//int del=0;
335+
bool intron=false;
336+
uint del=0;
337+
uint prevdel=0;
338+
bool ins=false;
340339
for (int i = 0; i < c->n_cigar; ++i) {
341340
unsigned char op = _cigOp(cigar[i]);
342341
switch(op) {
343342
case BAM_CEQUAL: // =
344343
case BAM_CDIFF: // X
345344
case BAM_CMATCH: // M
346-
case BAM_CDEL: // D
347-
l+=_cigLen(cigar[i]);
348-
//intron=false; del=0;
349-
break;
350-
/* case BAM_CDEL: // D
351-
del=_cigLen(cigar[i]);
352-
l+=del;
353-
if(intron) // deletion after intron
354-
exstart+=del; //push exon start
355-
break; */
356-
case BAM_CREF_SKIP: // N
357-
//intron starts
358-
//exon ends here
359-
{
360-
has_Introns=true;
361-
//GSeg exon(exstart+1,c->pos+l-del);
362-
GSeg exon(exstart+1,c->pos+l);
363-
exons.Add( exon );
364-
mapped_len+=exon.len();
365-
l += _cigLen(cigar[i]);
366-
exstart=c->pos+l;
367-
}
368-
//intron=true; del=0;
369-
break;
370-
case BAM_CSOFT_CLIP: // S
371-
soft_Clipped=true;
372-
if (l) clipR=_cigLen(cigar[i]);
373-
else clipL=_cigLen(cigar[i]);
374-
//intron=false; del=0;
375-
break;
345+
l += _cigLen(cigar[i]);
346+
if(intron) { // op comes after intron --> update juncdel
347+
GSeg deljunc(prevdel,0);
348+
juncsdel.Add(deljunc);
349+
}
350+
intron=false;
351+
del=0;
352+
ins=false;
353+
break;
354+
case BAM_CDEL:
355+
del=_cigLen(cigar[i]);
356+
l+=del;
357+
if (intron) { // deletion after intron --> update juncsdel
358+
GSeg deljunc(prevdel,del);
359+
juncsdel.Add(deljunc);
360+
}
361+
ins=false;
362+
break;
363+
case BAM_CINS:
364+
// take care of case where there is an insertion in the middle of an intron
365+
ins=true;
366+
break;
367+
case BAM_CREF_SKIP: //N
368+
//intron starts
369+
//exon ends here
370+
if(!ins || !intron) { // insertion in the middle of an intron --> adjust last exon
371+
exon.start=exstart+1;
372+
exon.end=c->pos+l;
373+
exons.Add(exon);
374+
mapped_len+=exon.len();
375+
}
376+
has_Introns=true;
377+
l += _cigLen(cigar[i]);
378+
exstart=c->pos+l;
379+
prevdel=del;
380+
intron=true;
381+
del=0;
382+
break;
383+
case BAM_CSOFT_CLIP:
384+
soft_Clipped=true;
385+
if (l) clipR=_cigLen(cigar[i]);
386+
else clipL=_cigLen(cigar[i]);
387+
intron=false;
388+
del=0;
389+
ins=false;
390+
break;
376391
case BAM_CHARD_CLIP:
377-
hard_Clipped=true;
378-
//intron=false; del=0;
379-
break;
380-
case BAM_CINS: // I
381-
//rpos+=cl; //gpos not advanced
382-
//intron=false; del=0;
383-
break;
384-
case BAM_CPAD:
385-
//gpos+=cl;
386-
//intron=false; del=0; //?
387-
break;
392+
hard_Clipped=true;
393+
intron=false;
394+
del=0;
395+
ins=false;
396+
break;
388397
default:
389-
int cl=_cigLen(cigar[i]);
390-
fprintf(stderr, "Unhandled CIGAR operation %d:%d\n", op, cl);
398+
int cl=_cigLen(cigar[i]);
399+
GMessage("Warning: unhandled CIGAR operation %d:%d\n", op, cl);
391400
}
392401
}
393-
GSeg exon(exstart+1,c->pos+l);
402+
exon.start=exstart+1;
403+
exon.end=c->pos+l;
394404
exons.Add(exon);
395405
mapped_len+=exon.len();
396406
end=c->pos+l; //genomic end coordinate
397-
//delete[] cigar; //UBsan protection
398407
}
399408

400409

gclib/GBam.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ class GBamRecord: public GSeg {
3333
uint8_t abuf[512];
3434
public:
3535
GVec<GSeg> exons; //coordinates will be 1-based
36+
GVec<GSeg> juncsdel; // delete coordinates around introns
3637
int clipL; //soft clipping data, as seen in the CIGAR string
3738
int clipR;
3839
int mapped_len; //sum of exon lengths
@@ -42,7 +43,7 @@ class GBamRecord: public GSeg {
4243
bool hasIntrons() { return has_Introns; }
4344
//created from a reader:
4445
void bfree_on_delete(bool b_free=true) { novel=b_free; }
45-
GBamRecord(bam1_t* from_b=NULL, bam_header_t* b_header=NULL, bool b_free=true):iflags(0), exons(1),
46+
GBamRecord(bam1_t* from_b=NULL, bam_header_t* b_header=NULL, bool b_free=true):iflags(0), exons(1),juncsdel(1),
4647
clipL(0), clipR(0), mapped_len(0), uval(0) {
4748
bam_header=NULL;
4849
if (from_b==NULL) {
@@ -58,7 +59,7 @@ class GBamRecord: public GSeg {
5859
setupCoordinates();//set 1-based coordinates (start, end and exons array)
5960
}
6061

61-
GBamRecord(GBamRecord& r):GSeg(r.start, r.end), iflags(0), exons(r.exons),
62+
GBamRecord(GBamRecord& r):GSeg(r.start, r.end), iflags(0), exons(r.exons),juncsdel(r.juncsdel),
6263
clipL(r.clipL), clipR(r.clipR), mapped_len(r.mapped_len), uval(0) { //copy constructor
6364
//makes a new copy of the bam1_t record etc.
6465
clear();
@@ -76,6 +77,7 @@ class GBamRecord: public GSeg {
7677
start=r.start;
7778
end=r.end;
7879
exons = r.exons;
80+
juncsdel=r.juncsdel;
7981
clipL = r.clipL;
8082
clipR = r.clipR;
8183
uval = r.uval;
@@ -91,6 +93,7 @@ class GBamRecord: public GSeg {
9193
}
9294
b=NULL;
9395
exons.Clear();
96+
juncsdel.Clear();
9497
mapped_len=0;
9598
bam_header=NULL;
9699
iflags=0;

0 commit comments

Comments
 (0)