2bff6bdbbce7e4ef57b27d4104a12a076dc90ca5 angie Fri May 12 14:12:45 2017 -0700 gbSeqTable doesn't work as compatTable for hGenBankGet* -- wrong columns. Also recognize indel when no-change in HGVS transcript term aligns to a deletion in reference. We're not yet showing the correct alt sequence in that case. diff --git src/hg/lib/hgHgvs.c src/hg/lib/hgHgvs.c index be8e96d..66a0c48 100644 --- src/hg/lib/hgHgvs.c +++ src/hg/lib/hgHgvs.c @@ -530,31 +530,31 @@ } } else { if (dbHasNcbiRefSeq(db)) { char query[2048]; sqlSafef(query, sizeof(query), "select seq from ncbiRefSeqPepTable " "where name = '%s'", acc); struct sqlConnection *conn = hAllocConn(db); seq = sqlQuickString(conn, query); hFreeConn(&conn); } else { - aaSeq *aaSeq = hGenBankGetPep(db, acc, gbSeqTable); + aaSeq *aaSeq = hGenBankGetPep(db, acc, NULL); if (aaSeq) seq = aaSeq->dna; } } return seq; } static char refBaseForNp(char *db, char *npAcc, int pos) // Get the amino acid base in NP_'s sequence at 1-based offset pos. { char *seq = getProteinSeq(db, npAcc); char base = seq ? seq[pos-1] : '\0'; freeMem(seq); return base; @@ -763,31 +763,31 @@ if (hTableExists(db, "lrgCdna")) { char query[2048]; sqlSafef(query, sizeof(query), "select seq from lrgCdna where name = '%s'", acc); struct sqlConnection *conn = hAllocConn(db); seq = sqlQuickString(conn, query); hFreeConn(&conn); } } else { struct dnaSeq *cdnaSeq = NULL; if (dbHasNcbiRefSeq(db)) cdnaSeq = hDnaSeqGet(db, acc, "seqNcbiRefSeq", "extNcbiRefSeq"); else - cdnaSeq = hGenBankGetMrna(db, acc, gbSeqTable); + cdnaSeq = hGenBankGetMrna(db, acc, NULL); if (cdnaSeq) seq = dnaSeqCannibalize(&cdnaSeq); } return seq; } static boolean getCds(char *db, char *acc, struct genbankCds *retCds) /* Get the CDS info for genbank or LRG acc; return FALSE if not found or not applicable. */ { if (trackHubDatabase(db)) return FALSE; boolean gotCds = FALSE; char query[1024]; if (startsWith("LRG_", acc)) sqlSafef(query, sizeof(query), @@ -1837,67 +1837,65 @@ int hgvsSeqAltLen = strlen(hgvsSeqAlt); // VCF alleles are always on '+' (Fwd) strand of genome. char hgvsSeqRefFwd[hgvsSeqRefLen+1]; copyMaybeRc(hgvsSeqRefFwd, sizeof(hgvsSeqRefFwd), hgvsSeqRef, isRc); char hgvsSeqAltFwd[hgvsSeqAltLen+1]; copyMaybeRc(hgvsSeqAltFwd, sizeof(hgvsSeqAltFwd), hgvsSeqAlt, isRc); int genomicRefLen = strlen(genomicRef); // The alt allele string will contain 0 to 2 sequences, maybe a comma, and 0 to 2 leftBases: int altMaxLen = genomicRefLen + hgvsSeqRefLen + hgvsSeqAltLen + 16; char vcfAlt[altMaxLen]; // If the HGVS change is "=" (no change) then vcfAlt will be "." boolean noChange = sameString(hgvsSeqRef, hgvsSeqAlt); // VCF indels have start coord one base to the left of the actual indel point. // HGVS treats multi-base substitutions as indels but VCF does not, so look for // differing length of ref vs. each alt sequence in order to call it a VCF indel. -boolean isIndel = FALSE; +boolean isIndel = (genomicRefLen != hgvsSeqRefLen || genomicRefLen != hgvsSeqAltLen || + genomicRefLen == 0); if (sameString(hgvsSeqRef, genomicRef)) { // VCF alt is HGVS alt - isIndel = (genomicRefLen != hgvsSeqAltLen); if (noChange) safecpy(vcfAlt, sizeof(vcfAlt), "."); else if (isIndel) { if (leftBaseRight) safef(vcfAlt, sizeof(vcfAlt), "%s%c", hgvsSeqAltFwd, leftBase); else safef(vcfAlt, sizeof(vcfAlt), "%c%s", leftBase, hgvsSeqAltFwd); } else safecpy(vcfAlt, sizeof(vcfAlt), hgvsSeqAltFwd); } else if (sameString(genomicRef, hgvsSeqAlt)) { // Genomic reference allele is HGVS alt allele; VCF alt is HGVS ref allele - isIndel = (genomicRefLen != hgvsSeqRefLen); if (noChange) safecpy(vcfAlt, sizeof(vcfAlt), "."); else if (isIndel) { if (leftBaseRight) safef(vcfAlt, sizeof(vcfAlt), "%s%c", hgvsSeqRefFwd, leftBase); else safef(vcfAlt, sizeof(vcfAlt), "%c%s", leftBase, hgvsSeqRefFwd); } else safecpy(vcfAlt, sizeof(vcfAlt), hgvsSeqRefFwd); } else { // Both HGVS ref and HGVS alt differ from genomicRef, so both are VCF alts (unless noChange) - isIndel = (genomicRefLen != hgvsSeqRefLen || genomicRefLen != hgvsSeqAltLen); if (noChange) { if (isIndel) { if (leftBaseRight) safef(vcfAlt, sizeof(vcfAlt), "%s%c", hgvsSeqRefFwd, leftBase); else safef(vcfAlt, sizeof(vcfAlt), "%c%s", leftBase, hgvsSeqRefFwd); } else safef(vcfAlt, sizeof(vcfAlt), "%s", hgvsSeqRefFwd); } else if (isIndel) { if (leftBaseRight)