11f75d3f0d7f476e2988aae6a88e025d113b5eef
hiram
  Fri Aug 1 15:14:04 2014 -0700
change file name joinedRmskClick to rmskJoinedClick.c refs #13706
diff --git src/hg/hgc/rmskJoinedClick.c src/hg/hgc/rmskJoinedClick.c
new file mode 100644
index 0000000..41b0f7b
--- /dev/null
+++ src/hg/hgc/rmskJoinedClick.c
@@ -0,0 +1,410 @@
+/* joinedRmskClick - Click handler for joinedRmskTrack.  This is based
+ *                   loosely on the original rmsk click handler inside
+ *                   of hgc.c.
+ *
+ *  Written by Robert Hubley 12/2012
+ */
+
+/* Copyright (C) 2014 The Regents of the University of California 
+ * See README in this or parent directory for licensing information. */
+
+#include "common.h"
+#include "hgc.h"
+#include "rmskOut2.h"
+#include "rmskAlign.h"
+#include "rmskJoined.h"
+#include "hCommon.h"
+
+
+void printAlignment (struct rmskAlign *ra)
+/*
+ * Print RepeatMasker alignment data stored in RM's cAlign format.
+ * The format is basically a lightly compressed diff format where
+ * the query and subject are merged into one squence line. The
+ * runs of exact matching sequences are interrupted by either
+ * single base substitutions annotated as queryBase "/" subjectBase,
+ * insertions in the subject annotated as "+ACTAT+", or deletions
+ * in the query annotated as "-ACTTTG-".
+ */
+{
+int alignLength = 80;
+char querySeq[alignLength + 1];
+char diffSeq[alignLength + 1];
+char subjSeq[alignLength + 1];
+int inSub = 0;
+int inDel = 0;
+int inIns = 0;
+
+int aIdx = 0;
+int sIdx = 0;
+int qCnt = 0;
+int sCnt = 0;
+int qStart = ra->genoStart;
+int sStart = ra->repStart;
+if (ra->strand[0] == '-')
+    sStart = ra->repEnd;
+
+int maxNameLen =
+    (strlen (ra->genoName) >
+     strlen (ra->repName) ? strlen (ra->genoName) : strlen (ra->repName));
+
+while (ra->alignment[aIdx] != '\0')
+    {
+    if (ra->alignment[aIdx] == '/')
+        inSub = 1;
+    else if (ra->alignment[aIdx] == '-')
+        inDel ^= 1;
+    else if (ra->alignment[aIdx] == '+')
+        inIns ^= 1;
+    else
+	{
+	if (inSub)
+	    {
+	    subjSeq[sIdx - 1] = ra->alignment[aIdx];
+	    if ((querySeq[sIdx - 1] == 'C' &&
+		 subjSeq[sIdx - 1] == 'T') ||
+		(querySeq[sIdx - 1] == 'T' &&
+		 subjSeq[sIdx - 1] == 'C') ||
+		(querySeq[sIdx - 1] == 'A' &&
+		 subjSeq[sIdx - 1] == 'G') ||
+		(querySeq[sIdx - 1] == 'G' && subjSeq[sIdx - 1] == 'A'))
+	    diffSeq[sIdx - 1] = 'i';
+	    else if ((index ("BDHVRYKMSWNX", querySeq[sIdx - 1]) != NULL) ||
+		     (index ("BDHVRYKMSWNX", subjSeq[sIdx - 1]) != NULL))
+	        diffSeq[sIdx - 1] = '?';
+	    else
+	        diffSeq[sIdx - 1] = 'v';
+	    inSub = 0;
+	    }
+	else if (inDel)
+	    {
+	    querySeq[sIdx] = ra->alignment[aIdx];
+	    subjSeq[sIdx] = '-';
+	    diffSeq[sIdx] = '-';
+	    qCnt++;
+	    sIdx++;
+	    }
+	else if (inIns)
+	    {
+	    querySeq[sIdx] = '-';
+	    subjSeq[sIdx] = ra->alignment[aIdx];
+	    diffSeq[sIdx] = '-';
+	    sCnt++;
+	    sIdx++;
+	    }
+	else
+	    {
+	    diffSeq[sIdx] = ' ';
+	    querySeq[sIdx] = ra->alignment[aIdx];
+	    subjSeq[sIdx] = ra->alignment[aIdx];
+	    sCnt++;
+	    qCnt++;
+	    sIdx++;
+	    }
+	if (sIdx == alignLength)
+	    {
+	    querySeq[sIdx] = '\0';
+	    diffSeq[sIdx] = '\0';
+	    subjSeq[sIdx] = '\0';
+	    printf ("%*s %10d %s %d\n", maxNameLen, ra->genoName, qStart,
+		    querySeq, (qStart + qCnt - 1));
+	    printf ("%*s            %s\n", maxNameLen, " ", diffSeq);
+	    if (ra->strand[0] == '+')
+	        printf ("%*s %10d %s %d\n", maxNameLen, ra->repName, sStart,
+		    subjSeq, (sStart + sCnt - 1));
+	    else
+	        printf ("%*s %10d %s %d\n", maxNameLen, ra->repName, sStart,
+		    subjSeq, (sStart - sCnt + 1));
+	    printf ("\n");
+	    qStart += qCnt;
+	    if (ra->strand[0] == '+')
+	        sStart += sCnt;
+	    else
+	        sStart -= sCnt;
+	    qCnt = 0;
+	    sCnt = 0;
+	    sIdx = 0;
+	    }
+	}
+    aIdx++;
+    }
+if (sIdx)
+    {
+    querySeq[sIdx] = '\0';
+    diffSeq[sIdx] = '\0';
+    subjSeq[sIdx] = '\0';
+    printf ("%*s %10d %s %d\n", maxNameLen, ra->genoName, qStart, querySeq,
+	    (qStart + qCnt - 1));
+    printf ("%*s            %s\n", maxNameLen, " ", diffSeq);
+    if (ra->strand[0] == '+')
+        printf ("%*s %10d %s %d\n", maxNameLen, ra->repName, sStart, subjSeq,
+	    (sStart + sCnt - 1));
+    else
+        printf ("%*s %10d %s %d\n", maxNameLen, ra->repName, sStart, subjSeq,
+	    (sStart - sCnt + 1));
+    ;
+    }
+}
+
+void printOutTableHeader (char strand)
+/* Print the appropriate HTML table header
+ * given the strand of the element.
+ */
+{
+const char *hd1_style =
+    "style=\"background-color:#C2C9E0; font-size:14px; padding:4px 14px;\"";
+const char *hd1span_style =
+    "style=\"background-color:#6678B1; font-size:14px; padding:4px 14px;\"";
+const char *hd2_style =
+    "style=\"background-color:#C2C9E0; font-size:14px; padding:4px 14px; "
+    "border-bottom: 2px solid #6678B1;\"";
+printf ("  <thead>\n");
+printf ("    <tr>\n");
+printf ("      <th %s>bit/sw</th>\n", hd1_style);
+printf ("      <th %s>perc</th>\n", hd1_style);
+printf ("      <th %s>perc</th>\n", hd1_style);
+printf ("      <th %s>perc</th>\n", hd1_style);
+printf ("      <th colspan=\"4\" %s>query</th>\n", hd1span_style);
+printf ("      <th %s></th>\n", hd1_style);
+printf ("      <th colspan=\"5\" %s>matching repeat</th>\n", hd1span_style);
+printf ("      <th %s></th>\n", hd1_style);
+printf ("    </tr>\n");
+printf ("    <tr>\n");
+printf ("      <th %s>score</th>\n", hd2_style);
+printf ("      <th %s>div.</th>\n", hd2_style);
+printf ("      <th %s>del.</th>\n", hd2_style);
+printf ("      <th %s>ins.</th>\n", hd2_style);
+printf ("      <th %s>sequence</th>\n", hd2_style);
+printf ("      <th %s>begin</th>\n", hd2_style);
+printf ("      <th %s>end</th>\n", hd2_style);
+printf ("      <th %s>remaining</th>\n", hd2_style);
+printf ("      <th %s>orient.</th>\n", hd2_style);
+printf ("      <th %s>name</th>\n", hd2_style);
+printf ("      <th %s>class/family</th>\n", hd2_style);
+if (strand == 'C')
+    {
+    printf ("      <th %s>remaining</th>\n", hd2_style);
+    printf ("      <th %s>end</th>\n", hd2_style);
+    printf ("      <th %s>begin</th>\n", hd2_style);
+    }
+else
+    {
+    printf ("      <th %s>begin</th>\n", hd2_style);
+    printf ("      <th %s>end</th>\n", hd2_style);
+    printf ("      <th %s>remaining</th>\n", hd2_style);
+    }
+printf ("      <th %s>id</th>\n", hd2_style);
+printf ("    </tr>\n");
+printf ("  </thead>\n");
+}
+
+
+void doJRepeat (struct trackDb *tdb, char *repeat)
+/* Main entry point */
+{
+const char *data_style = "style=\"padding:0px 6px;\"";
+char *table = (tdb ? tdb->table : tdb->track);
+char outTable[64];
+char alignTable[64];
+
+/* Currently using a fixed table name design
+ * with support for two datasets ( baseline and current ).
+ * The baseline dataset is the run which all the other
+ * tracks are based upon.  The current can be any more
+ * recent run of RepeatMasker.
+ */
+if (startsWith ("rmskJoinedCurrent", table))
+    {
+    strcpy (alignTable, "rmskAlignCurrent");
+    strcpy (outTable, "rmskOutCurrent");
+    }
+else if (startsWith ("rmskJoinedBaseline", table))
+    {
+    strcpy (alignTable, "rmskAlignBaseline");
+    strcpy (outTable, "rmskOutBaseline");
+    }
+
+cartWebStart (cart, database, "%s", tdb->longLabel);
+
+int offset = cartInt (cart, "o");
+if (offset >= 0)
+    {
+    struct sqlConnection *conn2 = hAllocConn (database);
+    struct sqlResult *sr2;
+    char **row;
+    char query[256];
+    char qTable[64];
+    boolean hasBin;
+
+    int start = cartInt (cart, "o");
+
+    if (hTableExists (database, table))
+	{
+	hFindSplitTable (database, seqName, table, qTable, &hasBin);
+	sqlSafef (query, sizeof (query),
+	       "select * from %s where chrom = '%s' and alignStart >= %d"
+	       " and id = %s", qTable, seqName, start-1, repeat);
+
+	sr2 = sqlGetResult (conn2, query);
+	if ((row = sqlNextRow (sr2)) != NULL)
+	    {
+	    struct rmskJoined *rmJoin = rmskJoinedLoad (row + hasBin);
+
+	    char class[32];
+	    class[0] = '\0';
+	    char family[32];
+	    family[0] = '\0';
+	    char *poundPtr = index (rmJoin->name, '#');
+	    if (poundPtr)
+		{
+		// Terminate name string properly
+		safecpy (class, sizeof (class), poundPtr + 1);
+		*poundPtr = '\0';
+		char *slashPtr = index (class, '/');
+		if (slashPtr)
+		    {
+		    // Terminate class string properly
+		    safecpy (family, sizeof (family), slashPtr + 1);
+		    *slashPtr = '\0';
+		    }
+		}
+
+	    printf ("<b>Repeat:</b> %s<br>\n", rmJoin->name);
+	    printf ("<b>Class:</b> %s<br>\n", class);
+	    printf ("<b>Family:</b> %s<br>\n", family);
+	    printf ("<b>Orientation:</b> %s<br>\n", rmJoin->strand);
+	    printf ("<b>Joined Element Genomic Range:</b> %s:%d-%d<br>\n",
+		    rmJoin->chrom, rmJoin->alignStart+1, rmJoin->alignEnd);
+	    printf ("<br><br>\n");
+	    }
+	sqlFreeResult (&sr2);
+	}
+
+    /*
+     * Locate *.out annotation for this element
+     */
+    if (hTableExists (database, outTable))
+	{
+	int isFirst = 0;
+	struct rmskOut2 *ro;
+	hFindSplitTable (database, seqName, outTable, qTable, &hasBin);
+	sqlSafef (query, sizeof (query),
+	       "select * from %s where genoName = '%s' and genoStart >= %d"
+	       " and id = %s", qTable, seqName, start-1, repeat);
+	sr2 = sqlGetResult (conn2, query);
+	printf ("<h4>RepeatMasker Annotation:</h4>\n");
+	printf
+	    ("The RepeatMasker annotation line(s) for this element. "
+	     "If the element is fragmented the output will contain one "
+	     "line per joined fragment.<p>\n");
+	printf ("<table cellspacing=\"0\">\n");
+	while ((row = sqlNextRow (sr2)) != NULL)
+	    {
+	    ro = rmskOut2Load (row + hasBin);
+	    if (!isFirst++)
+	        printOutTableHeader (ro->strand[0]);
+	    printf ("  <tr>\n");
+	    printf ("    <td %s>%d</td>\n", data_style, ro->swScore);
+	    printf ("    <td %s>%3.1f</td>\n", data_style,
+		    (double) ro->milliDiv * (double) 0.01);
+	    printf ("    <td %s>%3.1f</td>\n", data_style,
+		    (double) ro->milliDel * (double) 0.01);
+	    printf ("    <td %s>%3.1f</td>\n", data_style,
+		    (double) ro->milliIns * (double) 0.01);
+	    printf ("    <td %s>%s</td>\n", data_style, ro->genoName);
+	    printf ("    <td %s>%d</td>\n", data_style, ro->genoStart + 1);
+	    printf ("    <td %s>%d</td>\n", data_style, ro->genoEnd);
+	    printf ("    </A>");
+	    printf ("    <td %s>(%d)</td>\n", data_style, ro->genoLeft);
+	    printf ("    <td %s>%s</td>\n", data_style, ro->strand);
+	    printf ("    <td %s>%s</td>\n", data_style, ro->repName);
+	    printf ("    <td %s>%s/%s</td>\n", data_style, ro->repClass,
+		    ro->repFamily);
+	    printf ("    <td %s>%d</td>\n", data_style, ro->repStart);
+	    printf ("    <td %s>%d</td>\n", data_style, ro->repEnd);
+	    printf ("    <td %s>(%d)</td>\n", data_style, ro->repLeft);
+	    printf ("    <td %s>%d</td>\n", data_style, ro->id);
+	    printf ("  <td>\n");
+	    printf ("<A HREF=\"%s&db=%s&position=%s%%3A%d-%d\">Browser</A>",
+		    hgTracksPathAndSettings (), database, seqName,
+		    ro->genoStart + 1, ro->genoEnd);
+	    char *tbl = cgiUsualString ("table", cgiString ("g"));
+	    printf
+		(" - <A HREF=\"%s&o=%d&g=getDna&i=%s&c=%s&l=%d&r=%d&"
+		 "strand=%s&table=%s\">"
+		 "DNA</A>\n", hgcPathAndSettings (), ro->genoStart,
+		 (ro->repName != NULL ? cgiEncode (ro->repName) : ""),
+		 seqName, ro->genoStart, ro->genoEnd, cgiEncode (ro->strand),
+		 tbl);
+
+	    printf ("  </td>\n");
+
+	    printf ("  </tr>\n");
+
+	    }
+	sqlFreeResult (&sr2);
+	printf ("</table>\n");
+	}
+
+    printf ("<br><br>\n");
+
+    /*
+     * Locate *.align data for this element
+     */
+    if (hTableExists (database, alignTable))
+	{
+	struct rmskAlign *ro;
+	hFindSplitTable (database, seqName, alignTable, qTable, &hasBin);
+	sqlSafef (query, sizeof (query),
+	       "select * from %s where genoName = '%s' and genoStart >= %d"
+	       " and id = %s", qTable, seqName, start-1, repeat);
+	sr2 = sqlGetResult (conn2, query);
+	printf ("<h4>RepeatMasker Alignments:</h4>\n");
+	printf
+	    ("The raw alignment data used by RepeatMasker to generate "
+	     "the final annotation call for this element. NOTE: The "
+	     "aligned sequence names and consensus positions may differ "
+	     "from the final annotation.<p>\n");
+	printf ("<table>\n");
+	while ((row = sqlNextRow (sr2)) != NULL)
+	    {
+	    ro = rmskAlignLoad (row + hasBin);
+	    printf ("  <tr>\n");
+	    printf ("    <td>%d</td>\n", ro->swScore);
+	    printf ("    <td>%3.2f</td>\n",
+		    (double) ro->milliDiv * (double) 0.01);
+	    printf ("    <td>%3.2f</td>\n",
+		    (double) ro->milliDel * (double) 0.01);
+	    printf ("    <td>%3.2f</td>\n",
+		    (double) ro->milliIns * (double) 0.01);
+	    printf ("    <td>%s</td>\n", ro->genoName);
+	    printf ("    <td>%d</td>\n", ro->genoStart + 1);
+	    printf ("    <td>%d</td>\n", ro->genoEnd);
+	    printf ("    <td>(%d)</td>\n", ro->genoLeft);
+	    printf ("    <td>%s</td>\n", ro->strand);
+	    printf ("    <td>%s</td>\n", ro->repName);
+	    printf ("    <td>%s/%s</td>\n", ro->repClass, ro->repFamily);
+	    if (ro->strand[0] == '-')
+		{
+		printf ("    <td>(%d)</td>\n", ro->repLeft);
+		printf ("    <td>%d</td>\n", ro->repEnd);
+		printf ("    <td>%d</td>\n", ro->repStart);
+		}
+	    else
+		{
+		printf ("    <td>%d</td>\n", ro->repStart);
+		printf ("    <td>%d</td>\n", ro->repEnd);
+		printf ("    <td>(%d)</td>\n", ro->repLeft);
+		}
+	    printf ("    <td>%d</td>\n", ro->id);
+	    printf ("    </tr><tr><td colspan=\"15\"><pre>\n");
+	    printAlignment (ro);
+	    printf ("    </pre><td></tr>\n");
+	    }
+	sqlFreeResult (&sr2);
+	printf ("</table>\n");
+	}
+    hFreeConn (&conn2);
+    }
+printTrackHtml (tdb);
+}