Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 42 additions & 24 deletions faidx.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,13 @@ static inline int bgzf_getc_(BGZF *fp) {

typedef struct {
int id; // faidx_t->name[id] is for this struct.
uint32_t line_len, line_blen;
uint64_t len;
uint64_t seq_offset;
uint64_t qual_offset;
uint32_t line_extra; // Bytes to skip at the end of each line
// Usually 1 for \n or 2 for \r\n ending
uint64_t line_blen; // Number of bases (or quality values) on each line
// Total line length = line_blen + line_extra
uint64_t len; // Length of entire sequence
uint64_t seq_offset; // File offset to start of sequence
uint64_t qual_offset; // File offset to start of quality values (if fastq)
} faidx1_t;
KHASH_MAP_INIT_STR(s, faidx1_t)

Expand All @@ -90,10 +93,20 @@ static int fai_name2id(void *v, const char *ref)
return k == kh_end(fai->hash) ? -1 : kh_val(fai->hash, k).id;
}

static inline int fai_insert_index(faidx_t *idx, const char *name, uint64_t len, uint32_t line_len, uint32_t line_blen, uint64_t seq_offset, uint64_t qual_offset)
static inline int fai_insert_index(faidx_t *idx, const char *name, uint64_t len, uint64_t line_len, uint64_t line_blen, uint64_t seq_offset, uint64_t qual_offset)
{
if (!name) {
hts_log_error("Malformed line");
hts_log_error("Malformed line: no name");
return -1;
}
if (line_len < line_blen) {
hts_log_error("Malformed line: width (%"PRIu64") less than base count (%"PRIu64")",
line_len, line_blen);
return -1;
}
if (line_len - line_blen > UINT32_MAX) {
hts_log_error("Malformed line: difference between width (%"PRIu64") and base count (%"PRIu64") too large",
line_len, line_blen);
return -1;
}

Expand All @@ -120,7 +133,7 @@ static inline int fai_insert_index(faidx_t *idx, const char *name, uint64_t len,
v->id = idx->n;
idx->name[idx->n++] = name_key;
v->len = len;
v->line_len = line_len;
v->line_extra = (uint32_t) (line_len - line_blen);
v->line_blen = line_blen;
v->seq_offset = seq_offset;
v->qual_offset = qual_offset;
Expand Down Expand Up @@ -362,12 +375,13 @@ static int fai_save(const faidx_t *fai, hFILE *fp) {

if (fai->format == FAI_FASTA) {
snprintf(buf, sizeof(buf),
"\t%"PRIu64"\t%"PRIu64"\t%"PRIu32"\t%"PRIu32"\n",
x.len, x.seq_offset, x.line_blen, x.line_len);
"\t%"PRIu64"\t%"PRIu64"\t%"PRIu64"\t%"PRIu64"\n",
x.len, x.seq_offset, x.line_blen, x.line_blen + x.line_extra);
} else {
snprintf(buf, sizeof(buf),
"\t%"PRIu64"\t%"PRIu64"\t%"PRIu32"\t%"PRIu32"\t%"PRIu64"\n",
x.len, x.seq_offset, x.line_blen, x.line_len, x.qual_offset);
"\t%"PRIu64"\t%"PRIu64"\t%"PRIu64"\t%"PRIu64"\t%"PRIu64"\n",
x.len, x.seq_offset, x.line_blen,
x.line_blen + x.line_extra, x.qual_offset);
}

if (hputs(fai->name[i], fp) != 0) return -1;
Expand All @@ -393,10 +407,11 @@ static faidx_t *fai_read(hFILE *fp, const char *fname, int format)
if (!buf) goto fail;

while ((l = hgetln(buf, 0x10000, fp)) > 0) {
uint32_t line_len, line_blen, n;
uint64_t line_len, line_blen;
uint64_t len;
uint64_t seq_offset;
uint64_t qual_offset = 0;
int n;

for (p = buf; *p && !isspace_c(*p); ++p);

Expand All @@ -405,14 +420,14 @@ static faidx_t *fai_read(hFILE *fp, const char *fname, int format)
}

if (format == FAI_FASTA) {
n = sscanf(p, "%"SCNu64"%"SCNu64"%"SCNu32"%"SCNu32, &len, &seq_offset, &line_blen, &line_len);
n = sscanf(p, "%"SCNu64"%"SCNu64"%"SCNu64"%"SCNu64, &len, &seq_offset, &line_blen, &line_len);

if (n != 4) {
hts_log_error("Could not understand FASTA index %s line %zd", fname, lnum);
goto fail;
}
} else {
n = sscanf(p, "%"SCNu64"%"SCNu64"%"SCNu32"%"SCNu32"%"SCNu64, &len, &seq_offset, &line_blen, &line_len, &qual_offset);
n = sscanf(p, "%"SCNu64"%"SCNu64"%"SCNu64"%"SCNu64"%"SCNu64, &len, &seq_offset, &line_blen, &line_len, &qual_offset);

if (n != 5) {
if (n == 4) {
Expand Down Expand Up @@ -726,14 +741,16 @@ static char *fai_retrieve(const faidx_t *fai, const faidx1_t *val,
}

if (val->line_blen <= 0) {
hts_log_error("Invalid line length in index: %d", val->line_blen);
hts_log_error("Invalid line length in index: %"PRIu64, val->line_blen);
*len = -1;
return NULL;
}

uint64_t line_len = val->line_blen + val->line_extra;

ret = bgzf_useek(fai->bgzf,
offset
+ beg / val->line_blen * val->line_len
+ beg / val->line_blen * line_len
+ beg % val->line_blen, SEEK_SET);

if (ret < 0) {
Expand All @@ -743,7 +760,7 @@ static char *fai_retrieve(const faidx_t *fai, const faidx1_t *val,
}

// Over-allocate so there is extra space for one end-of-line sequence
buffer = (char*)malloc((size_t) end - beg + val->line_len - val->line_blen + 1);
buffer = (char*)malloc((size_t) end - beg + line_len - val->line_blen + 1);
if (!buffer) {
*len = -1;
return NULL;
Expand All @@ -761,7 +778,7 @@ static char *fai_retrieve(const faidx_t *fai, const faidx1_t *val,
}

s = buffer;
firstline_len = val->line_len - beg % val->line_blen;
firstline_len = line_len - beg % val->line_blen;

// Read the (partial) first line and its line terminator, but increment s past the
// line contents only, so the terminator characters will be overwritten by the next line.
Expand All @@ -772,8 +789,8 @@ static char *fai_retrieve(const faidx_t *fai, const faidx1_t *val,

// Similarly read complete lines and their line terminator characters, but overwrite the latter.
while (remaining > val->line_blen) {
nread = bgzf_read_small(fai->bgzf, s, val->line_len);
if (nread < (ssize_t) val->line_len) goto error;
nread = bgzf_read_small(fai->bgzf, s, line_len);
if (nread < (ssize_t) line_len) goto error;
s += val->line_blen;
remaining -= val->line_blen;
}
Expand Down Expand Up @@ -827,9 +844,10 @@ static int fai_get_val(const faidx_t *fai, const char *str,
}

/*
* The internal still has line_blen as uint32_t, but our references
* can be longer, so for future proofing we use hts_pos_t. We also needed
* a signed value so we can return negatives as an error.
* Returns hts_pos_t for historic reasons (line_blen was uint32_t,
* but this function returned a wider value in case it was expanded
* which has now happened). It also needs to return a negative value
* on error.
*/
hts_pos_t fai_line_length(const faidx_t *fai, const char *str)
{
Expand All @@ -840,7 +858,7 @@ hts_pos_t fai_line_length(const faidx_t *fai, const char *str)
if (fai_get_val(fai, str, &len, &val, &beg, &end))
return -1;
else
return val.line_blen;
return (hts_pos_t) (val.line_blen <= HTS_POS_MAX ? val.line_blen : HTS_POS_MAX);
}

char *fai_fetch64(const faidx_t *fai, const char *str, hts_pos_t *len)
Expand Down
Loading