11f75d3f0d7f476e2988aae6a88e025d113b5eef hiram Fri Aug 1 15:14:04 2014 -0700 change file name joinedRmskClick to rmskJoinedClick.c refs #13706 diff --git src/hg/hgc/rmskJoinedClick.c src/hg/hgc/rmskJoinedClick.c new file mode 100644 index 0000000..41b0f7b --- /dev/null +++ src/hg/hgc/rmskJoinedClick.c @@ -0,0 +1,410 @@ +/* joinedRmskClick - Click handler for joinedRmskTrack. This is based + * loosely on the original rmsk click handler inside + * of hgc.c. + * + * Written by Robert Hubley 12/2012 + */ + +/* Copyright (C) 2014 The Regents of the University of California + * See README in this or parent directory for licensing information. */ + +#include "common.h" +#include "hgc.h" +#include "rmskOut2.h" +#include "rmskAlign.h" +#include "rmskJoined.h" +#include "hCommon.h" + + +void printAlignment (struct rmskAlign *ra) +/* + * Print RepeatMasker alignment data stored in RM's cAlign format. + * The format is basically a lightly compressed diff format where + * the query and subject are merged into one squence line. The + * runs of exact matching sequences are interrupted by either + * single base substitutions annotated as queryBase "/" subjectBase, + * insertions in the subject annotated as "+ACTAT+", or deletions + * in the query annotated as "-ACTTTG-". + */ +{ +int alignLength = 80; +char querySeq[alignLength + 1]; +char diffSeq[alignLength + 1]; +char subjSeq[alignLength + 1]; +int inSub = 0; +int inDel = 0; +int inIns = 0; + +int aIdx = 0; +int sIdx = 0; +int qCnt = 0; +int sCnt = 0; +int qStart = ra->genoStart; +int sStart = ra->repStart; +if (ra->strand[0] == '-') + sStart = ra->repEnd; + +int maxNameLen = + (strlen (ra->genoName) > + strlen (ra->repName) ? strlen (ra->genoName) : strlen (ra->repName)); + +while (ra->alignment[aIdx] != '\0') + { + if (ra->alignment[aIdx] == '/') + inSub = 1; + else if (ra->alignment[aIdx] == '-') + inDel ^= 1; + else if (ra->alignment[aIdx] == '+') + inIns ^= 1; + else + { + if (inSub) + { + subjSeq[sIdx - 1] = ra->alignment[aIdx]; + if ((querySeq[sIdx - 1] == 'C' && + subjSeq[sIdx - 1] == 'T') || + (querySeq[sIdx - 1] == 'T' && + subjSeq[sIdx - 1] == 'C') || + (querySeq[sIdx - 1] == 'A' && + subjSeq[sIdx - 1] == 'G') || + (querySeq[sIdx - 1] == 'G' && subjSeq[sIdx - 1] == 'A')) + diffSeq[sIdx - 1] = 'i'; + else if ((index ("BDHVRYKMSWNX", querySeq[sIdx - 1]) != NULL) || + (index ("BDHVRYKMSWNX", subjSeq[sIdx - 1]) != NULL)) + diffSeq[sIdx - 1] = '?'; + else + diffSeq[sIdx - 1] = 'v'; + inSub = 0; + } + else if (inDel) + { + querySeq[sIdx] = ra->alignment[aIdx]; + subjSeq[sIdx] = '-'; + diffSeq[sIdx] = '-'; + qCnt++; + sIdx++; + } + else if (inIns) + { + querySeq[sIdx] = '-'; + subjSeq[sIdx] = ra->alignment[aIdx]; + diffSeq[sIdx] = '-'; + sCnt++; + sIdx++; + } + else + { + diffSeq[sIdx] = ' '; + querySeq[sIdx] = ra->alignment[aIdx]; + subjSeq[sIdx] = ra->alignment[aIdx]; + sCnt++; + qCnt++; + sIdx++; + } + if (sIdx == alignLength) + { + querySeq[sIdx] = '\0'; + diffSeq[sIdx] = '\0'; + subjSeq[sIdx] = '\0'; + printf ("%*s %10d %s %d\n", maxNameLen, ra->genoName, qStart, + querySeq, (qStart + qCnt - 1)); + printf ("%*s %s\n", maxNameLen, " ", diffSeq); + if (ra->strand[0] == '+') + printf ("%*s %10d %s %d\n", maxNameLen, ra->repName, sStart, + subjSeq, (sStart + sCnt - 1)); + else + printf ("%*s %10d %s %d\n", maxNameLen, ra->repName, sStart, + subjSeq, (sStart - sCnt + 1)); + printf ("\n"); + qStart += qCnt; + if (ra->strand[0] == '+') + sStart += sCnt; + else + sStart -= sCnt; + qCnt = 0; + sCnt = 0; + sIdx = 0; + } + } + aIdx++; + } +if (sIdx) + { + querySeq[sIdx] = '\0'; + diffSeq[sIdx] = '\0'; + subjSeq[sIdx] = '\0'; + printf ("%*s %10d %s %d\n", maxNameLen, ra->genoName, qStart, querySeq, + (qStart + qCnt - 1)); + printf ("%*s %s\n", maxNameLen, " ", diffSeq); + if (ra->strand[0] == '+') + printf ("%*s %10d %s %d\n", maxNameLen, ra->repName, sStart, subjSeq, + (sStart + sCnt - 1)); + else + printf ("%*s %10d %s %d\n", maxNameLen, ra->repName, sStart, subjSeq, + (sStart - sCnt + 1)); + ; + } +} + +void printOutTableHeader (char strand) +/* Print the appropriate HTML table header + * given the strand of the element. + */ +{ +const char *hd1_style = + "style=\"background-color:#C2C9E0; font-size:14px; padding:4px 14px;\""; +const char *hd1span_style = + "style=\"background-color:#6678B1; font-size:14px; padding:4px 14px;\""; +const char *hd2_style = + "style=\"background-color:#C2C9E0; font-size:14px; padding:4px 14px; " + "border-bottom: 2px solid #6678B1;\""; +printf (" \n"); +printf (" \n"); +printf (" bit/sw\n", hd1_style); +printf (" perc\n", hd1_style); +printf (" perc\n", hd1_style); +printf (" perc\n", hd1_style); +printf (" query\n", hd1span_style); +printf (" \n", hd1_style); +printf (" matching repeat\n", hd1span_style); +printf (" \n", hd1_style); +printf (" \n"); +printf (" \n"); +printf (" score\n", hd2_style); +printf (" div.\n", hd2_style); +printf (" del.\n", hd2_style); +printf (" ins.\n", hd2_style); +printf (" sequence\n", hd2_style); +printf (" begin\n", hd2_style); +printf (" end\n", hd2_style); +printf (" remaining\n", hd2_style); +printf (" orient.\n", hd2_style); +printf (" name\n", hd2_style); +printf (" class/family\n", hd2_style); +if (strand == 'C') + { + printf (" remaining\n", hd2_style); + printf (" end\n", hd2_style); + printf (" begin\n", hd2_style); + } +else + { + printf (" begin\n", hd2_style); + printf (" end\n", hd2_style); + printf (" remaining\n", hd2_style); + } +printf (" id\n", hd2_style); +printf (" \n"); +printf (" \n"); +} + + +void doJRepeat (struct trackDb *tdb, char *repeat) +/* Main entry point */ +{ +const char *data_style = "style=\"padding:0px 6px;\""; +char *table = (tdb ? tdb->table : tdb->track); +char outTable[64]; +char alignTable[64]; + +/* Currently using a fixed table name design + * with support for two datasets ( baseline and current ). + * The baseline dataset is the run which all the other + * tracks are based upon. The current can be any more + * recent run of RepeatMasker. + */ +if (startsWith ("rmskJoinedCurrent", table)) + { + strcpy (alignTable, "rmskAlignCurrent"); + strcpy (outTable, "rmskOutCurrent"); + } +else if (startsWith ("rmskJoinedBaseline", table)) + { + strcpy (alignTable, "rmskAlignBaseline"); + strcpy (outTable, "rmskOutBaseline"); + } + +cartWebStart (cart, database, "%s", tdb->longLabel); + +int offset = cartInt (cart, "o"); +if (offset >= 0) + { + struct sqlConnection *conn2 = hAllocConn (database); + struct sqlResult *sr2; + char **row; + char query[256]; + char qTable[64]; + boolean hasBin; + + int start = cartInt (cart, "o"); + + if (hTableExists (database, table)) + { + hFindSplitTable (database, seqName, table, qTable, &hasBin); + sqlSafef (query, sizeof (query), + "select * from %s where chrom = '%s' and alignStart >= %d" + " and id = %s", qTable, seqName, start-1, repeat); + + sr2 = sqlGetResult (conn2, query); + if ((row = sqlNextRow (sr2)) != NULL) + { + struct rmskJoined *rmJoin = rmskJoinedLoad (row + hasBin); + + char class[32]; + class[0] = '\0'; + char family[32]; + family[0] = '\0'; + char *poundPtr = index (rmJoin->name, '#'); + if (poundPtr) + { + // Terminate name string properly + safecpy (class, sizeof (class), poundPtr + 1); + *poundPtr = '\0'; + char *slashPtr = index (class, '/'); + if (slashPtr) + { + // Terminate class string properly + safecpy (family, sizeof (family), slashPtr + 1); + *slashPtr = '\0'; + } + } + + printf ("Repeat: %s
\n", rmJoin->name); + printf ("Class: %s
\n", class); + printf ("Family: %s
\n", family); + printf ("Orientation: %s
\n", rmJoin->strand); + printf ("Joined Element Genomic Range: %s:%d-%d
\n", + rmJoin->chrom, rmJoin->alignStart+1, rmJoin->alignEnd); + printf ("

\n"); + } + sqlFreeResult (&sr2); + } + + /* + * Locate *.out annotation for this element + */ + if (hTableExists (database, outTable)) + { + int isFirst = 0; + struct rmskOut2 *ro; + hFindSplitTable (database, seqName, outTable, qTable, &hasBin); + sqlSafef (query, sizeof (query), + "select * from %s where genoName = '%s' and genoStart >= %d" + " and id = %s", qTable, seqName, start-1, repeat); + sr2 = sqlGetResult (conn2, query); + printf ("

RepeatMasker Annotation:

\n"); + printf + ("The RepeatMasker annotation line(s) for this element. " + "If the element is fragmented the output will contain one " + "line per joined fragment.

\n"); + printf ("\n"); + while ((row = sqlNextRow (sr2)) != NULL) + { + ro = rmskOut2Load (row + hasBin); + if (!isFirst++) + printOutTableHeader (ro->strand[0]); + printf (" \n"); + printf (" \n", data_style, ro->swScore); + printf (" \n", data_style, + (double) ro->milliDiv * (double) 0.01); + printf (" \n", data_style, + (double) ro->milliDel * (double) 0.01); + printf (" \n", data_style, + (double) ro->milliIns * (double) 0.01); + printf (" \n", data_style, ro->genoName); + printf (" \n", data_style, ro->genoStart + 1); + printf (" \n", data_style, ro->genoEnd); + printf (" "); + printf (" \n", data_style, ro->genoLeft); + printf (" \n", data_style, ro->strand); + printf (" \n", data_style, ro->repName); + printf (" \n", data_style, ro->repClass, + ro->repFamily); + printf (" \n", data_style, ro->repStart); + printf (" \n", data_style, ro->repEnd); + printf (" \n", data_style, ro->repLeft); + printf (" \n", data_style, ro->id); + printf (" \n"); + + printf (" \n"); + + } + sqlFreeResult (&sr2); + printf ("
%d%3.1f%3.1f%3.1f%s%d%d(%d)%s%s%s/%s%d%d(%d)%d\n"); + printf ("Browser", + hgTracksPathAndSettings (), database, seqName, + ro->genoStart + 1, ro->genoEnd); + char *tbl = cgiUsualString ("table", cgiString ("g")); + printf + (" - " + "DNA\n", hgcPathAndSettings (), ro->genoStart, + (ro->repName != NULL ? cgiEncode (ro->repName) : ""), + seqName, ro->genoStart, ro->genoEnd, cgiEncode (ro->strand), + tbl); + + printf ("
\n"); + } + + printf ("

\n"); + + /* + * Locate *.align data for this element + */ + if (hTableExists (database, alignTable)) + { + struct rmskAlign *ro; + hFindSplitTable (database, seqName, alignTable, qTable, &hasBin); + sqlSafef (query, sizeof (query), + "select * from %s where genoName = '%s' and genoStart >= %d" + " and id = %s", qTable, seqName, start-1, repeat); + sr2 = sqlGetResult (conn2, query); + printf ("

RepeatMasker Alignments:

\n"); + printf + ("The raw alignment data used by RepeatMasker to generate " + "the final annotation call for this element. NOTE: The " + "aligned sequence names and consensus positions may differ " + "from the final annotation.

\n"); + printf ("\n"); + while ((row = sqlNextRow (sr2)) != NULL) + { + ro = rmskAlignLoad (row + hasBin); + printf (" \n"); + printf (" \n", ro->swScore); + printf (" \n", + (double) ro->milliDiv * (double) 0.01); + printf (" \n", + (double) ro->milliDel * (double) 0.01); + printf (" \n", + (double) ro->milliIns * (double) 0.01); + printf (" \n", ro->genoName); + printf (" \n", ro->genoStart + 1); + printf (" \n", ro->genoEnd); + printf (" \n", ro->genoLeft); + printf (" \n", ro->strand); + printf (" \n", ro->repName); + printf (" \n", ro->repClass, ro->repFamily); + if (ro->strand[0] == '-') + { + printf (" \n", ro->repLeft); + printf (" \n", ro->repEnd); + printf (" \n", ro->repStart); + } + else + { + printf (" \n", ro->repStart); + printf (" \n", ro->repEnd); + printf (" \n", ro->repLeft); + } + printf (" \n", ro->id); + printf (" \n"); + } + sqlFreeResult (&sr2); + printf ("
%d%3.2f%3.2f%3.2f%s%d%d(%d)%s%s%s/%s(%d)%d%d%d%d(%d)%d
\n");
+	    printAlignment (ro);
+	    printf ("    
\n"); + } + hFreeConn (&conn2); + } +printTrackHtml (tdb); +}