src/hg/hgc/hgc.c 1.1512

1.1512 2009/03/03 01:01:04 angie
Refactored hapmapSnps code to support both phase II (4 populations) and phase III (11 populations) SNPs.
Index: src/hg/hgc/hgc.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/hgc/hgc.c,v
retrieving revision 1.1511
retrieving revision 1.1512
diff -b -B -U 4 -r1.1511 -r1.1512
--- src/hg/hgc/hgc.c	28 Feb 2009 00:06:51 -0000	1.1511
+++ src/hg/hgc/hgc.c	3 Mar 2009 01:01:04 -0000	1.1512
@@ -14799,17 +14799,46 @@
 }
 
 void checkForHapmap(struct sqlConnection *conn, struct trackDb *tdb, char *itemName)
 {
+boolean gotHapMap = FALSE;
 char query[512];
-if (!hTableExists(database, "hapmapAllelesSummary"))
-    return;
-safef(query, sizeof(query),
+if (sqlTableExists(conn, "hapmapAllelesSummary"))
+    {
+    safef(query, sizeof(query),
       "select count(*) from hapmapAllelesSummary where name = '%s'", itemName);
-if (sqlQuickNum(conn, query) != 1)
-    return;
-printf("<BR><B><A HREF=\"%s&hapmapSnps=dense\"> HapMap SNP</A> </B><BR>\n",
-       hgTracksPathAndSettings());
+    if (sqlQuickNum(conn, query) > 0)
+	gotHapMap = TRUE;
+    }
+else
+    {
+    int i;
+    for (i = 0;  hapmapPhaseIIIPops[i] != NULL;  i++)
+	{
+	char table[HDB_MAX_TABLE_STRING];
+	safef(table, sizeof(table), "hapmapSnps%s", hapmapPhaseIIIPops[i]);
+	if (sqlTableExists(conn, table))
+	    {
+	    safef(query, sizeof(query),
+		  "select count(*) from %s where name = '%s'", table, itemName);
+	    if (sqlQuickNum(conn, query) > 0)
+		{
+		gotHapMap = TRUE;
+		break;
+		}
+	    }
+	}
+    }
+struct trackDb *hsTdb = hashFindVal(trackHash, "hapmapSnps");
+if (gotHapMap && hsTdb != NULL)
+    {
+    printf("<BR><B><A HREF=\"%s", hgTracksPathAndSettings());
+    // If hapmapSnps is hidden, make it dense; if it's pack etc., leave it alone.
+    if (sameString("hide", cartUsualString(cart, "hapmapSnps",
+					   trackDbSettingOrDefault(hsTdb, "visibility", "hide"))))
+	printf("&hapmapSnps=dense");
+    printf("\"> HapMap SNP</A> </B><BR>\n");
+    }
 }
 
 static void printLsSnpPdb(struct sqlConnection *conn, char *pdbId, char *snpId)
 /* generate LS-SNP and chimera links for a PDB id */
@@ -18296,39 +18325,40 @@
 printf("<TR>");
 printf("<TD>%s</TD>", pop);
 if (count1 > count2)
     {
-    printf("<TD bgcolor = \"lightgrey\">%d (%3.2f%%)</TD>", count1, freq1);
-    printf("<TD>%d (%3.2f%%)</TD>", count2, freq2);
+    printf("<TD bgcolor = \"lightgrey\" align=right>%d (%3.2f%%)</TD>", count1, freq1);
+    printf("<TD align=right>%d (%3.2f%%)</TD>", count2, freq2);
     }
 else if (count1 < count2)
     {
-    printf("<TD>%d (%3.2f%%)</TD>", count1, freq1);
-    printf("<TD bgcolor = \"lightgrey\">%d (%3.2f%%)</TD>", count2, freq2);
+    printf("<TD align=right>%d (%3.2f%%)</TD>", count1, freq1);
+    printf("<TD bgcolor = \"lightgrey\" align=right>%d (%3.2f%%)</TD>", count2, freq2);
     }
 else
     {
-    printf("<TD>%d (%3.2f%%)</TD>", count1, freq1);
-    printf("<TD>%d (%3.2f%%)</TD>", count2, freq2);
+    printf("<TD align=right>%d (%3.2f%%)</TD>", count1, freq1);
+    printf("<TD align=right>%d (%3.2f%%)</TD>", count2, freq2);
     }
 printf("</TR>\n");
 
 }
 
 
-void doHapmapSnpsSummary(struct trackDb *tdb, char *itemName, boolean showOrtho)
+void doHapmapSnpsSummaryTable(struct sqlConnection *conn, struct trackDb *tdb, char *itemName,
+			      boolean showOrtho)
+/* Use the hapmapAllelesSummary table (caller checks for existence) to display allele
+ * frequencies for the 4 HapMap Phase II populations. */
 {
 char *table = tdb->tableName;
 struct hapmapAllelesSummary *summaryItem;
-struct sqlConnection *conn = hAllocConn(database);
 struct sqlResult *sr;
 char **row;
 char query[256];
 int rowOffset = hOffsetPastBin(database, seqName, table);
 int start = cartInt(cart, "o");
 float het = 0.0;
 
-if (!hTableExists(database, "hapmapAllelesSummary")) return;
 safef(query, sizeof(query), "select * from hapmapAllelesSummary where chrom = '%s' and "
       "chromStart=%d and name = '%s'", seqName, start, itemName);
 sr = sqlGetResult(conn, query);
 row = sqlNextRow(sr);
@@ -18359,10 +18388,9 @@
 het = summaryItem->score / 10.0;
 printf("<BR><B>Heterozygosity over all populations:</B> %3.2f%%<BR>\n", het);
 
 
-if (showOrtho &&
-    (differentString(summaryItem->chimpAllele, "none") ||
+if (showOrtho && (differentString(summaryItem->chimpAllele, "none") ||
     differentString(summaryItem->macaqueAllele, "none")))
     {
     printf("<BR><B>Orthologous alleles:</B><BR>\n");
     printf("<TABLE BORDER=1>\n");
@@ -18386,16 +18414,143 @@
     printf("</TABLE>\n");
     }
 
 sqlFreeResult(&sr);
-hFreeConn(&conn);
+}
+
+void doHapmapSnpsAllPops(struct sqlConnection *conn, struct trackDb *tdb, char *itemName,
+			 boolean showOrtho)
+/* Show item's SNP allele frequencies for each of the 11 HapMap Phase III 
+ * populations, as well as chimp and macaque if showOrtho. */
+{
+int i;
+printf("<BR><B>Allele frequencies in each population (major allele highlighted):</B><BR>\n");
+printf("<TABLE BORDER=1>\n");
+// Do a first pass to gather up alleles and counts:
+char *majorAlleles[HAP_PHASEIII_POPCOUNT];
+int majorCounts[HAP_PHASEIII_POPCOUNT], haploCounts[HAP_PHASEIII_POPCOUNT];
+int totalA1Count = 0, totalA2Count = 0, totalHaploCount = 0;
+int totalScore = 0, scoreCount = 0;
+char *allele1 = NULL, *allele2 = NULL;
+for (i=0;  i < HAP_PHASEIII_POPCOUNT;  i++)
+    {
+    char *popCode = hapmapPhaseIIIPops[i];
+    struct hapmapSnps *item = NULL;
+    char table[HDB_MAX_TABLE_STRING];
+    safef(table, sizeof(table), "hapmapSnps%s", popCode);
+    if (sqlTableExists(conn, table))
+	{
+	char query[512];
+	safef(query, sizeof(query), "select * from %s where name = '%s' and chrom = '%s'",
+	      table, itemName, seqName);
+	struct sqlResult *sr = sqlGetResult(conn, query);
+	char **row;
+	if ((row = sqlNextRow(sr)) != NULL)
+	    {
+	    int rowOffset = hOffsetPastBin(database, seqName, table);
+	    item = hapmapSnpsLoad(row+rowOffset);
+	    }
+	sqlFreeResult(&sr);
+	}
+    majorAlleles[i] = "";
+    majorCounts[i] = 0;
+    haploCounts[i] = 0;
+    if (item != NULL)
+	{
+	majorAlleles[i] = item->allele1;
+        majorCounts[i] = 2*item->homoCount1 + item->heteroCount;
+	if (item->homoCount1 < item->homoCount2)
+	    {
+	    majorAlleles[i] = item->allele2;
+	    majorCounts[i] = 2*item->homoCount2 + item->heteroCount;
+	    }
+	haploCounts[i] = 2*(item->homoCount1 + item->homoCount2 + item->heteroCount);
+	if (allele1 == NULL)
+	    {
+	    allele1 = item->allele1;
+	    allele2 = item->allele2;
+	    }
+	else if (!sameString(allele1, item->allele1) || !sameString(allele2, item->allele2))
+	    warn("Allele order in hapmapSnps%s is different from earlier table(s)", popCode);
+	totalA1Count += 2*item->homoCount1 + item->heteroCount;
+	totalA2Count += 2*item->homoCount2 + item->heteroCount;
+	totalHaploCount += haploCounts[i];
+	totalScore += item->score;
+	scoreCount++;
+	}
+    }
+printf("<TR><TH>Population</TH> <TH>%s</TH> <TH>%s</TH></TR>\n", allele1, allele2);
+for (i=0;  i < HAP_PHASEIII_POPCOUNT;  i++)
+    showOneHapmapRow(hapmapPhaseIIIPops[i], allele1, allele2, majorAlleles[i],
+		     majorCounts[i], haploCounts[i]);
+char *totalMajorAllele = (totalA1Count >= totalA2Count) ? allele1 : allele2;
+int totalMajorCount = max(totalA1Count, totalA2Count);
+showOneHapmapRow("Total", allele1, allele2, totalMajorAllele, totalMajorCount, totalHaploCount);
+printf("</TABLE>\n");
+
+printf("<BR><B>Heterozygosity (average of all populations' het):</B> %3.2f%%<BR>\n",
+       ((float)totalScore/scoreCount)/10.0);
+
+if (showOrtho)
+    {
+    boolean showedHeader = FALSE;
+    int i;
+    for (i = 0;  hapmapOrthoSpecies[i] != NULL; i++)
+	{
+	char table[HDB_MAX_TABLE_STRING];
+	safef(table, sizeof(table), "hapmapAlleles%s", hapmapOrthoSpecies[i]);
+	if (sqlTableExists(conn, table))
+	    {
+	    if (!showedHeader)
+		{
+		printf("<BR><B>Orthologous alleles from reference genome assemblies:</B><BR>\n");
+		printf("<TABLE BORDER=1>\n");
+		printf("<TR><TH>Species</TH> <TH>Allele</TH> <TH>Quality Score</TH></TR>\n");
+		showedHeader = TRUE;
+		}
+	    boolean gotQual = (sqlFieldIndex(conn, table, "orthoQual") >= 0);
+	    char query[512];
+	    if (gotQual)
+		safef(query, sizeof(query),
+		      "select orthoAllele, orthoQual from %s where name = '%s' and chrom = '%s'",
+		      table, itemName, seqName);
+	    else
+		safef(query, sizeof(query),
+		      "select orthoAllele from %s where name = '%s' and chrom = '%s'",
+		      table, itemName, seqName);
+	    struct sqlResult *sr = sqlGetResult(conn, query);
+	    char **row;
+	    if ((row = sqlNextRow(sr)) != NULL)
+		{
+		char *allele = row[0];
+		char *qual = gotQual ? row[1] : "N/A";
+		printf("<TR><TD>%s</TD><TD>%s</TD><TD>%s</TD></TR>",
+		       hapmapOrthoSpecies[i], allele, qual);
+		}
+	    else
+		printf("<TR><TD>%s</TD><TD>N/A</TD><TD>N/A</TD></TR>", hapmapOrthoSpecies[i]);
+	    sqlFreeResult(&sr);
+	    }
+	}
+    if (showedHeader)
+	printf("</TABLE>\n");
+    }
+}
+
+void doHapmapSnpsSummary(struct sqlConnection *conn, struct trackDb *tdb, char *itemName,
+			 boolean showOrtho)
+/* Display per-population allele frequencies. */
+{
+if (sqlTableExists(conn, "hapmapAllelesSummary"))
+    doHapmapSnpsSummaryTable(conn, tdb, itemName, TRUE);
+else
+    doHapmapSnpsAllPops(conn, tdb, itemName, TRUE);
 }
 
 void doHapmapSnps(struct trackDb *tdb, char *itemName)
 /* assume just one hapmap snp at a given location */
 {
 char *table = tdb->tableName;
-struct hapmapSnps *thisItem;
 struct sqlConnection *conn = hAllocConn(database);
 struct sqlResult *sr;
 char **row;
 char query[256];
@@ -18404,85 +18559,58 @@
 int majorCount = 0;
 int minorCount = 0;
 char *majorAllele = NULL;
 char *minorAllele = NULL;
+char *popCode = table + strlen("hapmapSnps");
 
 genericHeader(tdb, itemName);
 
 safef(query, sizeof(query),
       "select * from %s where chrom = '%s' and "
       "chromStart=%d and name = '%s'", table, seqName, start, itemName);
 sr = sqlGetResult(conn, query);
 row = sqlNextRow(sr);
-thisItem = hapmapSnpsLoad(row+rowOffset);
-printf("<B>SNP rsId:</B> <A HREF=\"http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?");
-printf("type=rs&rs=%s\" TARGET=_blank> %s</A><BR>\n", itemName, itemName);
-printf("<B>Human Position:</B> <A HREF=\"%s&db=%s&position=%s%%3A%d-%d\">",
-      hgTracksPathAndSettings(), database, thisItem->chrom, thisItem->chromStart+1, thisItem->chromEnd);
-printf("%s:%d-%d</A><BR>\n", thisItem->chrom, thisItem->chromStart+1, thisItem->chromEnd);
-printf("<B>Strand:</B> %s<BR>\n", thisItem->strand);
-printf("<B>Polymorphism type:</B> %s<BR>\n", thisItem->observed);
-if (thisItem->homoCount1 >= thisItem->homoCount2)
-    {
-    majorAllele = cloneString(thisItem->allele1);
-    majorCount = thisItem->homoCount1;
-    minorCount = thisItem->homoCount2;
-    minorAllele = cloneString(thisItem->allele2);
+struct hapmapSnps *item = hapmapSnpsLoad(row+rowOffset);
+printf("<B>SNP rsId:</B> <A HREF=\"http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?"
+       "type=rs&rs=%s\" TARGET=_blank> %s</A><BR>\n", itemName, itemName);
+printf("<B>Position:</B> <A HREF=\"%s&db=%s&position=%s%%3A%d-%d\">%s:%d-%d</A><BR>\n",
+       hgTracksPathAndSettings(), database, item->chrom, item->chromStart+1, item->chromEnd,
+       item->chrom, item->chromStart+1, item->chromEnd);
+printf("<B>Strand:</B> %s<BR>\n", item->strand);
+printf("<B>Polymorphism type:</B> %s<BR>\n", item->observed);
+if (item->homoCount1 >= item->homoCount2)
+    {
+    majorAllele = cloneString(item->allele1);
+    majorCount = item->homoCount1;
+    minorCount = item->homoCount2;
+    minorAllele = cloneString(item->allele2);
     }
 else
     {
-    majorAllele = cloneString(thisItem->allele2);
-    majorCount = thisItem->homoCount2;
-    minorCount = thisItem->homoCount1;
-    minorAllele = cloneString(thisItem->allele1);
-    }
-
-printf("<BR><B>Genotype counts for ");
-if (sameString(table, "hapmapSnpsCEU"))
-    printf("CEU:</B><BR>\n");
-if (sameString(table, "hapmapSnpsCHB"))
-    printf("CHB:</B><BR>\n");
-if (sameString(table, "hapmapSnpsJPT"))
-    printf("JPT:</B><BR>\n");
-if (sameString(table, "hapmapSnpsYRI"))
-    printf("YRI:</B><BR>\n");
-printf("<TABLE BORDER=1>\n");
-
-printf("<TR>");
-printf("<TD>Major allele (%s) homozygotes</TD>", majorAllele);
-printf("<TD>%d individuals</TD>", majorCount);
-printf("</TR>");
-
-if (minorCount > 0)
-    {
-    printf("<TR>");
-    printf("<TD>Minor allele (%s) homozygotes</TD>", minorAllele);
-    printf("<TD>%d individuals</TD>", minorCount);
-    printf("</TR>");
+    majorAllele = cloneString(item->allele2);
+    majorCount = item->homoCount2;
+    minorCount = item->homoCount1;
+    minorAllele = cloneString(item->allele1);
     }
 
-if (thisItem->heteroCount > 0)
-    {
-    printf("<TR>");
-    if (strcasecmp(majorAllele, minorAllele) < 0)
-        printf("<TD>Heterozygotes (%s/%s)</TD>", majorAllele, minorAllele);
-    else
-        printf("<TD>Heterozygotes (%s/%s)</TD>", minorAllele, majorAllele);
-    printf("<TD>%d individuals</TD>", thisItem->heteroCount);
-    printf("</TR>");
-    }
-
-printf("<TR>");
-printf("<TD>Total</TD>");
-printf("<TD>%d individuals</TD>", majorCount + minorCount + thisItem->heteroCount);
-printf("</TR>");
-
+printf("<BR><B>Genotype counts for %s:</B><BR>\n", popCode);
+printf("<TABLE BORDER=1>\n");
+printf("<TR><TD>Major allele (%s) homozygotes</TD><TD align=right>%d individuals</TD></TR>\n",
+       majorAllele, majorCount);
+if (minorCount > 0)
+    printf("<TR><TD>Minor allele (%s) homozygotes</TD><TD align=right>%d individuals</TD></TR>\n",
+	   minorAllele, minorCount);
+if (item->heteroCount > 0)
+    printf("<TR><TD>Heterozygotes (%s)</TD><TD align=right>%d individuals</TD></TR>\n",
+	   item->observed, item->heteroCount);
+printf("<TR><TD>Total</TD><TD align=right>%d individuals</TD></TR>\n",
+       majorCount + minorCount + item->heteroCount);
 printf("</TABLE>\n");
 
 sqlFreeResult(&sr);
 
-/* get summary data here */
-doHapmapSnpsSummary(tdb, itemName, TRUE);
+/* Show allele frequencies for all populations */
+doHapmapSnpsSummary(conn, tdb, itemName, TRUE);
 printTrackHtml(tdb);
 hFreeConn(&conn);
 }
 
@@ -18547,11 +18675,11 @@
 
     }
 
 printf("<BR>\n");
-/* get summary data here */
+/* get summary data (allele frequencies) here */
 /* don't repeat ortho display */
-doHapmapSnpsSummary(tdb, itemName, FALSE);
+doHapmapSnpsSummary(conn, tdb, itemName, FALSE);
 printTrackHtml(tdb);
 sqlFreeResult(&sr);
 hFreeConn(&conn);
 }
@@ -21735,12 +21863,9 @@
 else if (sameString("dvBed", track))
     {
     doDv(tdb, item);
     }
-else if (sameString("hapmapSnpsCEU", track) ||
-         sameString("hapmapSnpsCHB", track) ||
-	 sameString("hapmapSnpsJPT", track) ||
-	 sameString("hapmapSnpsYRI", track))
+else if (startsWith("hapmapSnps", track) && strlen(track) == 13)
     {
     doHapmapSnps(tdb, item);
     }
 else if (sameString("hapmapAllelesChimp", track) ||