src/hg/hgc/hgc.c 1.1514

1.1514 2009/03/06 22:59:30 angie
HapMap SNPs: Added a trackDb setting to determine whether this particular track is Phase II or Phase III. Made the Heterozygosity labels more clear: the older versions of the track have expected het, but as of Phase III we're doing observed het. Fixed avg. obs. het calc ( oops, item- > score is minor allele freq not het! ) .
Index: src/hg/hgc/hgc.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/hgc/hgc.c,v
retrieving revision 1.1513
retrieving revision 1.1514
diff -b -B -U 4 -r1.1513 -r1.1514
--- src/hg/hgc/hgc.c	6 Mar 2009 04:54:21 -0000	1.1513
+++ src/hg/hgc/hgc.c	6 Mar 2009 22:59:30 -0000	1.1514
@@ -14809,11 +14809,12 @@
 }
 
 void checkForHapmap(struct sqlConnection *conn, struct trackDb *tdb, char *itemName)
 {
+boolean isPhaseIII = sameString(trackDbSettingOrDefault(tdb, "hapmapPhase", "II"), "III");
 boolean gotHapMap = FALSE;
 char query[512];
-if (sqlTableExists(conn, "hapmapAllelesSummary"))
+if (!isPhaseIII && sqlTableExists(conn, "hapmapAllelesSummary"))
     {
     safef(query, sizeof(query),
 	  "select count(*) from hapmapAllelesSummary where name = '%s'", itemName);
     if (sqlQuickNum(conn, query) > 0)
@@ -18395,9 +18396,9 @@
     }
 printf("</TABLE>\n");
 
 het = summaryItem->score / 10.0;
-printf("<BR><B>Heterozygosity over all populations:</B> %3.2f%%<BR>\n", het);
+printf("<BR><B>Expected Heterozygosity over all populations:</B> %3.2f%%<BR>\n", het);
 
 
 if (showOrtho && (differentString(summaryItem->chimpAllele, "none") ||
 		  differentString(summaryItem->macaqueAllele, "none")))
@@ -18438,9 +18439,10 @@
 // Do a first pass to gather up alleles and counts:
 char *majorAlleles[HAP_PHASEIII_POPCOUNT];
 int majorCounts[HAP_PHASEIII_POPCOUNT], haploCounts[HAP_PHASEIII_POPCOUNT];
 int totalA1Count = 0, totalA2Count = 0, totalHaploCount = 0;
-int totalScore = 0, scoreCount = 0;
+float sumHet = 0.0;
+int popCount = 0;
 char *allele1 = NULL, *allele2 = NULL;
 for (i=0;  i < HAP_PHASEIII_POPCOUNT;  i++)
     {
     char *popCode = hapmapPhaseIIIPops[i];
@@ -18483,10 +18485,11 @@
 	    warn("Allele order in hapmapSnps%s is different from earlier table(s)", popCode);
 	totalA1Count += 2*item->homoCount1 + item->heteroCount;
 	totalA2Count += 2*item->homoCount2 + item->heteroCount;
 	totalHaploCount += haploCounts[i];
-	totalScore += item->score;
-	scoreCount++;
+	sumHet += ((float)item->heteroCount /
+		   (item->homoCount1 + item->homoCount2 + item->heteroCount));
+	popCount++;
 	}
     }
 printf("<TR><TH>Population</TH> <TH>%s</TH> <TH>%s</TH></TR>\n", allele1, allele2);
 for (i=0;  i < HAP_PHASEIII_POPCOUNT;  i++)
@@ -18496,10 +18499,10 @@
 int totalMajorCount = max(totalA1Count, totalA2Count);
 showOneHapmapRow("Total", allele1, allele2, totalMajorAllele, totalMajorCount, totalHaploCount);
 printf("</TABLE>\n");
 
-printf("<BR><B>Heterozygosity (average of all populations' het):</B> %3.2f%%<BR>\n",
-       ((float)totalScore/scoreCount)/10.0);
+printf("<BR><B>Heterozygosity (average of all populations' observed het):</B> %3.2f%%<BR>\n",
+       (100.0 * sumHet/popCount));
 
 if (showOrtho)
     {
     boolean showedHeader = FALSE;
@@ -18516,24 +18519,21 @@
 		printf("<TABLE BORDER=1>\n");
 		printf("<TR><TH>Species</TH> <TH>Allele</TH> <TH>Quality Score</TH></TR>\n");
 		showedHeader = TRUE;
 		}
-	    boolean gotQual = (sqlFieldIndex(conn, table, "orthoQual") >= 0);
 	    char query[512];
-	    if (gotQual)
 		safef(query, sizeof(query),
-		      "select orthoAllele, orthoQual from %s where name = '%s' and chrom = '%s'",
-		      table, itemName, seqName);
-	    else
-		safef(query, sizeof(query),
-		      "select orthoAllele from %s where name = '%s' and chrom = '%s'",
+		  "select orthoAllele, score, strand from %s where name = '%s' and chrom = '%s'",
 		      table, itemName, seqName);
 	    struct sqlResult *sr = sqlGetResult(conn, query);
 	    char **row;
 	    if ((row = sqlNextRow(sr)) != NULL)
 		{
 		char *allele = row[0];
-		char *qual = gotQual ? row[1] : "N/A";
+		char *qual = row[1];
+		char *strand = row[2];
+		if (sameString("-", strand))
+		    reverseComplement(allele, strlen(allele));
 		printf("<TR><TD>%s</TD><TD>%s</TD><TD>%s</TD></TR>",
 		       hapmapOrthoSpecies[i], allele, qual);
 		}
 	    else
@@ -18549,12 +18549,13 @@
 void doHapmapSnpsSummary(struct sqlConnection *conn, struct trackDb *tdb, char *itemName,
 			 boolean showOrtho)
 /* Display per-population allele frequencies. */
 {
-if (sqlTableExists(conn, "hapmapAllelesSummary"))
-    doHapmapSnpsSummaryTable(conn, tdb, itemName, TRUE);
+boolean isPhaseIII = sameString(trackDbSettingOrDefault(tdb, "hapmapPhase", "II"), "III");
+if (!isPhaseIII && sqlTableExists(conn, "hapmapAllelesSummary"))
+    doHapmapSnpsSummaryTable(conn, tdb, itemName, showOrtho);
 else
-    doHapmapSnpsAllPops(conn, tdb, itemName, TRUE);
+    doHapmapSnpsAllPops(conn, tdb, itemName, showOrtho);
 }
 
 void doHapmapSnps(struct trackDb *tdb, char *itemName)
 /* assume just one hapmap snp at a given location */