e81403a315a24af601884b8a19e89bcecc92f267 galt Sat Dec 8 20:04:28 2018 -0800 Fixing hFindSplitTable and its use. Standard size, give real string size so no undetected overflows. Test result and abort if not found. Avoids SQL errors that otherwise will popup. Handles uninitialzed stack better for the output name. refs #22596. diff --git src/hg/hgc/rmskJoinedClick.c src/hg/hgc/rmskJoinedClick.c index b51361d..2af9bc5 100644 --- src/hg/hgc/rmskJoinedClick.c +++ src/hg/hgc/rmskJoinedClick.c @@ -1,414 +1,418 @@ /* joinedRmskClick - Click handler for joinedRmskTrack. This is based * loosely on the original rmsk click handler inside * of hgc.c. * * Written by Robert Hubley 12/2012 */ /* Copyright (C) 2014 The Regents of the University of California * See README in this or parent directory for licensing information. */ #include "common.h" #include "hgc.h" #include "rmskOut2.h" #include "rmskAlign.h" #include "rmskJoined.h" #include "hCommon.h" void printAlignment (struct rmskAlign *ra) /* * Print RepeatMasker alignment data stored in RM's cAlign format. * The format is basically a lightly compressed diff format where * the query and subject are merged into one squence line. The * runs of exact matching sequences are interrupted by either * single base substitutions annotated as queryBase "/" subjectBase, * insertions in the subject annotated as "+ACTAT+", or deletions * in the query annotated as "-ACTTTG-". */ { int alignLength = 80; char querySeq[alignLength + 1]; char diffSeq[alignLength + 1]; char subjSeq[alignLength + 1]; int inSub = 0; int inDel = 0; int inIns = 0; int aIdx = 0; int sIdx = 0; int qCnt = 0; int sCnt = 0; int qStart = ra->genoStart; int sStart = ra->repStart; if (ra->strand[0] == '-') sStart = ra->repEnd; int maxNameLen = (strlen (ra->genoName) > strlen (ra->repName) ? strlen (ra->genoName) : strlen (ra->repName)); while (ra->alignment[aIdx] != '\0') { if (ra->alignment[aIdx] == '/') inSub = 1; else if (ra->alignment[aIdx] == '-') inDel ^= 1; else if (ra->alignment[aIdx] == '+') inIns ^= 1; else { if (inSub) { subjSeq[sIdx - 1] = ra->alignment[aIdx]; if ((querySeq[sIdx - 1] == 'C' && subjSeq[sIdx - 1] == 'T') || (querySeq[sIdx - 1] == 'T' && subjSeq[sIdx - 1] == 'C') || (querySeq[sIdx - 1] == 'A' && subjSeq[sIdx - 1] == 'G') || (querySeq[sIdx - 1] == 'G' && subjSeq[sIdx - 1] == 'A')) diffSeq[sIdx - 1] = 'i'; else if ((index ("BDHVRYKMSWNX", querySeq[sIdx - 1]) != NULL) || (index ("BDHVRYKMSWNX", subjSeq[sIdx - 1]) != NULL)) diffSeq[sIdx - 1] = '?'; else diffSeq[sIdx - 1] = 'v'; inSub = 0; } else if (inDel) { querySeq[sIdx] = ra->alignment[aIdx]; subjSeq[sIdx] = '-'; diffSeq[sIdx] = '-'; qCnt++; sIdx++; } else if (inIns) { querySeq[sIdx] = '-'; subjSeq[sIdx] = ra->alignment[aIdx]; diffSeq[sIdx] = '-'; sCnt++; sIdx++; } else { diffSeq[sIdx] = ' '; querySeq[sIdx] = ra->alignment[aIdx]; subjSeq[sIdx] = ra->alignment[aIdx]; sCnt++; qCnt++; sIdx++; } if (sIdx == alignLength) { querySeq[sIdx] = '\0'; diffSeq[sIdx] = '\0'; subjSeq[sIdx] = '\0'; printf ("%*s %10d %s %d\n", maxNameLen, ra->genoName, qStart, querySeq, (qStart + qCnt - 1)); printf ("%*s %s\n", maxNameLen, " ", diffSeq); if (ra->strand[0] == '+') printf ("%*s %10d %s %d\n", maxNameLen, ra->repName, sStart, subjSeq, (sStart + sCnt - 1)); else printf ("%*s %10d %s %d\n", maxNameLen, ra->repName, sStart, subjSeq, (sStart - sCnt + 1)); printf ("\n"); qStart += qCnt; if (ra->strand[0] == '+') sStart += sCnt; else sStart -= sCnt; qCnt = 0; sCnt = 0; sIdx = 0; } } aIdx++; } if (sIdx) { querySeq[sIdx] = '\0'; diffSeq[sIdx] = '\0'; subjSeq[sIdx] = '\0'; printf ("%*s %10d %s %d\n", maxNameLen, ra->genoName, qStart, querySeq, (qStart + qCnt - 1)); printf ("%*s %s\n", maxNameLen, " ", diffSeq); if (ra->strand[0] == '+') printf ("%*s %10d %s %d\n", maxNameLen, ra->repName, sStart, subjSeq, (sStart + sCnt - 1)); else printf ("%*s %10d %s %d\n", maxNameLen, ra->repName, sStart, subjSeq, (sStart - sCnt + 1)); ; } } void printOutTableHeader (char strand) /* Print the appropriate HTML table header * given the strand of the element. */ { const char *hd1_style = "style=\"background-color:#C2C9E0; font-size:14px; padding:4px 14px;\""; const char *hd1span_style = "style=\"background-color:#6678B1; font-size:14px; padding:4px 14px;\""; const char *hd2_style = "style=\"background-color:#C2C9E0; font-size:14px; padding:4px 14px; " "border-bottom: 2px solid #6678B1;\""; printf (" <thead>\n"); printf (" <tr>\n"); printf (" <th %s>bit/sw</th>\n", hd1_style); printf (" <th %s>perc</th>\n", hd1_style); printf (" <th %s>perc</th>\n", hd1_style); printf (" <th %s>perc</th>\n", hd1_style); printf (" <th colspan=\"4\" %s>query</th>\n", hd1span_style); printf (" <th %s></th>\n", hd1_style); printf (" <th colspan=\"5\" %s>matching repeat</th>\n", hd1span_style); printf (" <th %s></th>\n", hd1_style); printf (" </tr>\n"); printf (" <tr>\n"); printf (" <th %s>score</th>\n", hd2_style); printf (" <th %s>div.</th>\n", hd2_style); printf (" <th %s>del.</th>\n", hd2_style); printf (" <th %s>ins.</th>\n", hd2_style); printf (" <th %s>sequence</th>\n", hd2_style); printf (" <th %s>begin</th>\n", hd2_style); printf (" <th %s>end</th>\n", hd2_style); printf (" <th %s>remaining</th>\n", hd2_style); printf (" <th %s>orient.</th>\n", hd2_style); printf (" <th %s>name</th>\n", hd2_style); printf (" <th %s>class/family</th>\n", hd2_style); if (strand == 'C') { printf (" <th %s>remaining</th>\n", hd2_style); printf (" <th %s>end</th>\n", hd2_style); printf (" <th %s>begin</th>\n", hd2_style); } else { printf (" <th %s>begin</th>\n", hd2_style); printf (" <th %s>end</th>\n", hd2_style); printf (" <th %s>remaining</th>\n", hd2_style); } printf (" <th %s>id</th>\n", hd2_style); printf (" </tr>\n"); printf (" </thead>\n"); } void doJRepeat (struct trackDb *tdb, char *repeat) /* Main entry point */ { const char *data_style = "style=\"padding:0px 6px;\""; char *table = (tdb ? tdb->table : tdb->track); char outTable[64]; char alignTable[64]; /* Currently using a fixed table name design * with support for two datasets ( baseline and current ). * The baseline dataset is the run which all the other * tracks are based upon. The current can be any more * recent run of RepeatMasker. */ if (startsWith ("rmskJoinedCurrent", table)) { strcpy (alignTable, "rmskAlignCurrent"); strcpy (outTable, "rmskOutCurrent"); } else if (startsWith ("rmskJoinedBaseline", table)) { strcpy (alignTable, "rmskAlignBaseline"); strcpy (outTable, "rmskOutBaseline"); } cartWebStart (cart, database, "%s", tdb->longLabel); int offset = cartInt (cart, "o"); if (offset >= 0) { struct sqlConnection *conn2 = hAllocConn (database); struct sqlResult *sr2; char **row; char query[256]; - char qTable[64]; + char qTable[HDB_MAX_TABLE_STRING]; boolean hasBin; int start = cartInt (cart, "o"); if (hTableExists (database, table)) { - hFindSplitTable (database, seqName, table, qTable, &hasBin); + if (!hFindSplitTable (database, seqName, table, qTable, sizeof(qTable), &hasBin)) + errAbort("track %s not found", table); + sqlSafef (query, sizeof (query), "select * from %s where chrom = '%s' and alignStart >= %d" " and id = %s", qTable, seqName, start-1, repeat); sr2 = sqlGetResult (conn2, query); if ((row = sqlNextRow (sr2)) != NULL) { struct rmskJoined *rmJoin = rmskJoinedLoad (row + hasBin); char class[32]; class[0] = '\0'; char family[32]; family[0] = '\0'; char *poundPtr = index (rmJoin->name, '#'); if (poundPtr) { // Terminate name string properly safecpy (class, sizeof (class), poundPtr + 1); *poundPtr = '\0'; char *slashPtr = index (class, '/'); if (slashPtr) { // Terminate class string properly safecpy (family, sizeof (family), slashPtr + 1); *slashPtr = '\0'; } } printf ("<b>Repeat:</b> %s<br>\n", rmJoin->name); printf ("<b>Class:</b> %s<br>\n", class); printf ("<b>Family:</b> %s<br>\n", family); printf ("<b>Orientation:</b> %s<br>\n", rmJoin->strand); printf ("<b>Joined Element Genomic Range:</b> %s:%d-%d<br>\n", rmJoin->chrom, rmJoin->alignStart+1, rmJoin->alignEnd); printf ("<br><br>\n"); } sqlFreeResult (&sr2); } /* * Locate *.out annotation for this element */ if (hTableExists (database, outTable)) { int isFirst = 0; struct rmskOut2 *ro; - hFindSplitTable (database, seqName, outTable, qTable, &hasBin); + if (!hFindSplitTable (database, seqName, outTable, qTable, sizeof(qTable), &hasBin)) + errAbort("track %s not found", outTable); sqlSafef (query, sizeof (query), "select * from %s where genoName = '%s' and genoStart >= %d" " and id = %s", qTable, seqName, start-1, repeat); sr2 = sqlGetResult (conn2, query); printf ("<h4>RepeatMasker Annotation:</h4>\n"); printf ("The RepeatMasker annotation line(s) for this element. " "If the element is fragmented the output will contain one " "line per joined fragment.<p>\n"); printf ("<table cellspacing=\"0\">\n"); while ((row = sqlNextRow (sr2)) != NULL) { ro = rmskOut2Load (row + hasBin); if (!isFirst++) printOutTableHeader (ro->strand[0]); printf (" <tr>\n"); printf (" <td %s>%d</td>\n", data_style, ro->swScore); printf (" <td %s>%3.1f</td>\n", data_style, (double) ro->milliDiv * (double) 0.01); printf (" <td %s>%3.1f</td>\n", data_style, (double) ro->milliDel * (double) 0.01); printf (" <td %s>%3.1f</td>\n", data_style, (double) ro->milliIns * (double) 0.01); printf (" <td %s>%s</td>\n", data_style, ro->genoName); printf (" <td %s>%d</td>\n", data_style, ro->genoStart + 1); printf (" <td %s>%d</td>\n", data_style, ro->genoEnd); printf (" </A>"); printf (" <td %s>(%d)</td>\n", data_style, ro->genoLeft); printf (" <td %s>%s</td>\n", data_style, ro->strand); printf (" <td %s>%s</td>\n", data_style, ro->repName); printf (" <td %s>%s/%s</td>\n", data_style, ro->repClass, ro->repFamily); printf (" <td %s>%d</td>\n", data_style, ro->repStart); printf (" <td %s>%d</td>\n", data_style, ro->repEnd); printf (" <td %s>(%d)</td>\n", data_style, ro->repLeft); printf (" <td %s>%d</td>\n", data_style, ro->id); printf (" <td>\n"); printf ("<A HREF=\"%s&db=%s&position=%s%%3A%d-%d\">Browser</A>", hgTracksPathAndSettings (), database, seqName, ro->genoStart + 1, ro->genoEnd); char *tbl = cgiUsualString ("table", cgiString ("g")); printf (" - <A HREF=\"%s&o=%d&g=getDna&i=%s%s%s%s%s&c=%s&l=%d&r=%d&" "strand=%s&table=%s\">" "DNA</A>\n", hgcPathAndSettings (), ro->genoStart, (ro->repName != NULL ? cgiEncode (ro->repName) : ""), cgiEncode("#"), (ro->repClass != NULL ? cgiEncode (ro->repClass) : ""), cgiEncode("/"), (ro->repFamily != NULL ? cgiEncode (ro->repFamily) : ""), seqName, ro->genoStart, ro->genoEnd, cgiEncode (ro->strand), tbl); printf (" </td>\n"); printf (" </tr>\n"); } sqlFreeResult (&sr2); printf ("</table>\n"); } printf ("<br><br>\n"); /* * Locate *.align data for this element */ if (hTableExists (database, alignTable)) { struct rmskAlign *ro; - hFindSplitTable (database, seqName, alignTable, qTable, &hasBin); + if (!hFindSplitTable (database, seqName, alignTable, qTable, sizeof(qTable), &hasBin)) + errAbort("track %s not found", alignTable); sqlSafef (query, sizeof (query), "select * from %s where genoName = '%s' and genoStart >= %d" " and id = %s", qTable, seqName, start-1, repeat); sr2 = sqlGetResult (conn2, query); printf ("<h4>RepeatMasker Alignments:</h4>\n"); printf ("The raw alignment data used by RepeatMasker to generate " "the final annotation call for this element. NOTE: The " "aligned sequence names and consensus positions may differ " "from the final annotation.<p>\n"); printf ("<table>\n"); while ((row = sqlNextRow (sr2)) != NULL) { ro = rmskAlignLoad (row + hasBin); printf (" <tr>\n"); printf (" <td>%d</td>\n", ro->swScore); printf (" <td>%3.2f</td>\n", (double) ro->milliDiv * (double) 0.01); printf (" <td>%3.2f</td>\n", (double) ro->milliDel * (double) 0.01); printf (" <td>%3.2f</td>\n", (double) ro->milliIns * (double) 0.01); printf (" <td>%s</td>\n", ro->genoName); printf (" <td>%d</td>\n", ro->genoStart + 1); printf (" <td>%d</td>\n", ro->genoEnd); printf (" <td>(%d)</td>\n", ro->genoLeft); printf (" <td>%s</td>\n", ro->strand); printf (" <td>%s</td>\n", ro->repName); printf (" <td>%s/%s</td>\n", ro->repClass, ro->repFamily); if (ro->strand[0] == '-') { printf (" <td>(%d)</td>\n", ro->repLeft); printf (" <td>%d</td>\n", ro->repEnd); printf (" <td>%d</td>\n", ro->repStart); } else { printf (" <td>%d</td>\n", ro->repStart); printf (" <td>%d</td>\n", ro->repEnd); printf (" <td>(%d)</td>\n", ro->repLeft); } printf (" <td>%d</td>\n", ro->id); printf (" </tr><tr><td colspan=\"15\"><pre>\n"); printAlignment (ro); printf (" </pre><td></tr>\n"); } sqlFreeResult (&sr2); printf ("</table>\n"); } hFreeConn (&conn2); } printTrackHtml (tdb); }