61647b42c0de4b0d11abf375cd713cb28758a954 angie Wed Aug 9 12:53:14 2017 -0700 New lib modules in support of adding HGVS output to hgVai: - seqWindow: generic interface to fetch portions of a sequence - indelShift: slide to the left or right of an ambiguous alignment range (e.g. AG to AGAGAG) - variantProjector: use PSL+CDS to project a genomic variant to a transcript variant; project transcript variant to protein variant refs #19968 diff --git src/hg/inc/seqWindow.h src/hg/inc/seqWindow.h new file mode 100644 index 0000000..427f828 --- /dev/null +++ src/hg/inc/seqWindow.h @@ -0,0 +1,53 @@ +/* seqWindow -- generic interface & implementations for fetching subranges of a sequence */ + +/* Copyright (C) 2017 The Regents of the University of California + * See README in this or parent directory for licensing information. */ + +#ifndef SEQWINDOW_H +#define SEQWINDOW_H + +struct seqWindow +/* A portion (possibly all) of a sequence, with a means to fetch some other portion of sequence. */ + { + char *seqName; // Name of the sequence on which the current window is open. + uint start; // Start within seqName of current window. + uint end; // End within seqName of current window. + char *seq; // Uppercase IUPAC sequence of current window. + + void (*fetch)(struct seqWindow *self, char *seqName, uint start, uint end); + /* Generic method to set the window to a new range and get a new chunk of uppercase sequence. + * The sequence in seqWindow after fetching may be a larger range than what was requested, + * so caller must not assume that the resulting start and end are same as requested. + * If end is too large then it will be truncated to sequence size. + * errAbort if unable to get sequence. */ + + // Implementations hide state/details after this point. + }; + +INLINE void seqWindowCopy(struct seqWindow *self, uint start, uint len, char *buf, size_t bufSize) +/* Copy len bases of sequence into buf, starting at seqName coord start; errAbort if out of range. + * Zero-terminate buf and errAbort if bufSize < len+1. */ +{ +uint end = start + len; +if (start >= self->start && end <= self->end) + safencpy(buf, bufSize, self->seq + start - self->start, len); +else + errAbort("seqWindowCopy: %s [%u,%u) is out of bounds [%u,%u)", + self->seqName, start, end, self->start, self->end); +} + +struct seqWindow *chromSeqWindowNew(char *db, char *chrom, uint start, uint end); +/* Return a new seqWindow that can fetch uppercase sequence from the chrom sequences in db. + * If chrom is non-NULL and end > start then load sequence from that range; if chrom is non-NULL + * and start == end == 0 then fetch entire chrom. */ + +void chromSeqWindowFree(struct seqWindow **pSw); +/* Free a seqWindow that was created by chromSeqWindowNew. */ + +struct seqWindow *memSeqWindowNew(char *acc, char *seq); +/* Return a new seqWindow copying this sequence already in memory. */ + +void memSeqWindowFree(struct seqWindow **pSw); +/* Free a seqWindow that was created by memSeqWindowNew. */ + +#endif /* SEQWINDOW2_H */