22#include < ctype.h>
33#include " kstring.h"
44
5- #define _cigOp (c ) ((c)&BAM_CIGAR_MASK)
6- #define _cigLen (c ) ((c)>>BAM_CIGAR_SHIFT)
7-
85// auxiliary functions for low level BAM record creation
96uint8_t * realloc_bdata (bam1_t *b, int size) {
107 if (b->m_data < size) {
@@ -29,9 +26,12 @@ uint8_t* dupalloc_bdata(bam1_t *b, int size) {
2926 return odata; // user must FREE this after
3027}
3128
29+ #define _cigOp (c ) ((c)&BAM_CIGAR_MASK)
30+ #define _cigLen (c ) ((c)>>BAM_CIGAR_SHIFT)
31+
3232GBamRecord::GBamRecord (const char * qname, int32_t gseq_tid,
3333 int pos, bool reverse, const char * qseq,
34- const char * cigar, const char * quals):iflags(0 ), exons(1 ),
34+ const char * cigar, const char * quals):iflags(0 ), exons(1 ),juncsdel( 1 ),
3535 clipL(0 ), clipR(0 ), mapped_len(0 ), uval(0 ) {
3636 novel=true ;
3737 bam_header=NULL ;
@@ -60,7 +60,7 @@ GBamRecord::GBamRecord(const char* qname, int32_t gseq_tid,
6060GBamRecord::GBamRecord (const char * qname, int32_t samflags, int32_t g_tid,
6161 int pos, int map_qual, const char * cigar, int32_t mg_tid, int mate_pos,
6262 int insert_size, const char * qseq, const char * quals,
63- GVec<char *>* aux_strings):iflags(0 ), exons(1 ), uval(0 ) {
63+ GVec<char *>* aux_strings):iflags(0 ), exons(1 ), juncsdel( 1 ), uval(0 ) {
6464 novel=true ;
6565 bam_header=NULL ;
6666 b=bam_init1 ();
@@ -321,80 +321,89 @@ switch (cop) {
321321} // interpret_CIGAR(), just a reference of CIGAR operations interpretation
322322
323323void GBamRecord::setupCoordinates () {
324+ if (!b) return ;
324325 const bam1_core_t *c = &b->core ;
325- if (c->flag & BAM_FUNMAP) return ; /* skip unmapped reads */
326+ if (c->flag & BAM_FUNMAP) return ; // skip unmapped reads
326327 uint32_t *cigar = bam1_cigar (b);
327- // uint32_t *p = bam1_cigar(b);
328- // --- prevent alignment error here (reported by UB-sanitazer):
329- // uint32_t *cigar= new uint32_t[c->n_cigar];
330- // memcpy(cigar, p, c->n_cigar * sizeof(uint32_t));
331- // --- UBsan protection end
332328 int l=0 ;
333329 mapped_len=0 ;
334330 clipL=0 ;
335331 clipR=0 ;
336332 start=c->pos +1 ; // genomic start coordinate, 1-based (BAM core.pos is 0-based)
333+ GSeg exon;
337334 int exstart=c->pos ;
338- // bool intron=false;
339- // int del=0;
335+ bool intron=false ;
336+ uint del=0 ;
337+ uint prevdel=0 ;
338+ bool ins=false ;
340339 for (int i = 0 ; i < c->n_cigar ; ++i) {
341340 unsigned char op = _cigOp (cigar[i]);
342341 switch (op) {
343342 case BAM_CEQUAL: // =
344343 case BAM_CDIFF: // X
345344 case BAM_CMATCH: // M
346- case BAM_CDEL: // D
347- l+=_cigLen (cigar[i]);
348- // intron=false; del=0;
349- break ;
350- /* case BAM_CDEL: // D
351- del=_cigLen(cigar[i]);
352- l+=del;
353- if(intron) // deletion after intron
354- exstart+=del; //push exon start
355- break; */
356- case BAM_CREF_SKIP: // N
357- // intron starts
358- // exon ends here
359- {
360- has_Introns=true ;
361- // GSeg exon(exstart+1,c->pos+l-del);
362- GSeg exon (exstart+1 ,c->pos +l);
363- exons.Add ( exon );
364- mapped_len+=exon.len ();
365- l += _cigLen (cigar[i]);
366- exstart=c->pos +l;
367- }
368- // intron=true; del=0;
369- break ;
370- case BAM_CSOFT_CLIP: // S
371- soft_Clipped=true ;
372- if (l) clipR=_cigLen (cigar[i]);
373- else clipL=_cigLen (cigar[i]);
374- // intron=false; del=0;
375- break ;
345+ l += _cigLen (cigar[i]);
346+ if (intron) { // op comes after intron --> update juncdel
347+ GSeg deljunc (prevdel,0 );
348+ juncsdel.Add (deljunc);
349+ }
350+ intron=false ;
351+ del=0 ;
352+ ins=false ;
353+ break ;
354+ case BAM_CDEL:
355+ del=_cigLen (cigar[i]);
356+ l+=del;
357+ if (intron) { // deletion after intron --> update juncsdel
358+ GSeg deljunc (prevdel,del);
359+ juncsdel.Add (deljunc);
360+ }
361+ ins=false ;
362+ break ;
363+ case BAM_CINS:
364+ // take care of case where there is an insertion in the middle of an intron
365+ ins=true ;
366+ break ;
367+ case BAM_CREF_SKIP: // N
368+ // intron starts
369+ // exon ends here
370+ if (!ins || !intron) { // insertion in the middle of an intron --> adjust last exon
371+ exon.start =exstart+1 ;
372+ exon.end =c->pos +l;
373+ exons.Add (exon);
374+ mapped_len+=exon.len ();
375+ }
376+ has_Introns=true ;
377+ l += _cigLen (cigar[i]);
378+ exstart=c->pos +l;
379+ prevdel=del;
380+ intron=true ;
381+ del=0 ;
382+ break ;
383+ case BAM_CSOFT_CLIP:
384+ soft_Clipped=true ;
385+ if (l) clipR=_cigLen (cigar[i]);
386+ else clipL=_cigLen (cigar[i]);
387+ intron=false ;
388+ del=0 ;
389+ ins=false ;
390+ break ;
376391 case BAM_CHARD_CLIP:
377- hard_Clipped=true ;
378- // intron=false; del=0;
379- break ;
380- case BAM_CINS: // I
381- // rpos+=cl; //gpos not advanced
382- // intron=false; del=0;
383- break ;
384- case BAM_CPAD:
385- // gpos+=cl;
386- // intron=false; del=0; //?
387- break ;
392+ hard_Clipped=true ;
393+ intron=false ;
394+ del=0 ;
395+ ins=false ;
396+ break ;
388397 default :
389- int cl=_cigLen (cigar[i]);
390- fprintf (stderr, " Unhandled CIGAR operation %d:%d\n " , op, cl);
398+ int cl=_cigLen (cigar[i]);
399+ GMessage ( " Warning: unhandled CIGAR operation %d:%d\n " , op, cl);
391400 }
392401 }
393- GSeg exon (exstart+1 ,c->pos +l);
402+ exon.start =exstart+1 ;
403+ exon.end =c->pos +l;
394404 exons.Add (exon);
395405 mapped_len+=exon.len ();
396406 end=c->pos +l; // genomic end coordinate
397- // delete[] cigar; //UBsan protection
398407}
399408
400409
0 commit comments