b91a917e39c8d58d0de5e6325606f098ef0fb60b angie Wed Mar 21 09:22:21 2018 -0700 New util pslMismatchGapToBed searches for sequence mismatches and indels between reference genome and transcripts. Five output BED+ files are created, with corresponding hg/lib/txAli*.as autoSql files for generating bigBed. This could be used to create 'Anomalies' subtracks for the NCBI RefSeq track. refs #21079 diff --git src/hg/inc/hgHgvs.h src/hg/inc/hgHgvs.h index 070e0ae..2314781 100644 --- src/hg/inc/hgHgvs.h +++ src/hg/inc/hgHgvs.h @@ -240,30 +240,33 @@ "Description=\"Asserted reference sequence in HGVS term does not match actual " \ "reference sequence\">\n" \ "##FILTER=<ID=HgvsRefGenomicMismatch," \ "Description=\"HGVS reference sequence does not match genomic sequence; " \ "HGVS reference sequence is included in ALT\">\n" \ "##INFO=<ID=DupToIns,Number=0,Type=Flag," \ "Description=\"HGVS dup (duplication) was converted to insertion\">\n" \ "##INFO=<ID=BasesShifted,Number=1,Type=Integer," \ "Description=\"Position of HGVS variant was shifted this number of bases to the left\">\n" struct vcfRow *hgvsToVcfRow(char *db, char *term, boolean doLeftShift, struct dyString *dyError); /* Convert HGVS to a row of VCF suitable for sorting & printing. If unable, return NULL and * put the reason in dyError. Protein terms are ambiguous at the nucleotide level so they are * not supported at this point. */ +uint hgvsTxToCds(uint txOffset, struct genbankCds *cds, boolean isStart, char pPrefix[2]); +/* Return the cds-relative HGVS coord and prefix corresponding to 0-based txOffset & cds. */ + char *hgvsGFromVariant(struct seqWindow *gSeqWin, struct bed3 *variantBed, char *alt, char *acc, boolean breakDelIns); /* Return an HGVS g. string representing the genomic variant at the position of variantBed with * reference allele from gSeqWin and alternate allele alt. If acc is non-NULL it is used * instead of variantBed->chrom. * If breakDelIns, then show deleted bases (eg show 'delAGinsTT' instead of 'delinsTT'). */ char *hgvsNFromVpTx(struct vpTx *vpTx, struct seqWindow *gSeqWin, struct psl *txAli, struct dnaSeq *txSeq, boolean breakDelIns); /* Return an HGVS n. (noncoding transcript) term for a variant projected onto a transcript. * gSeqWin must already have at least the correct seqName if not the surrounding sequence. * If breakDelIns, then show deleted bases (eg show 'delAGinsTT' instead of 'delinsTT'). */ char *hgvsCFromVpTx(struct vpTx *vpTx, struct seqWindow *gSeqWin, struct psl *txAli, struct genbankCds *cds, struct dnaSeq *txSeq, boolean breakDelIns);