dacecbaf00b2193872633dde175c53d263922ff9
angie
  Wed May 11 16:18:38 2011 -0700
Feature #2823 (VCF track handler): request from alpha-test user CarlosBorroto: call Belinda's coding variant functional prediction function
as on the pgSnp details page.

diff --git src/hg/lib/pgSnp.c src/hg/lib/pgSnp.c
index 733cb25..c3bddf5 100644
--- src/hg/lib/pgSnp.c
+++ src/hg/lib/pgSnp.c
@@ -1,31 +1,29 @@
 /* pgSnp.c was originally generated by the autoSql program, which also 
  * generated pgSnp.h and pgSnp.sql.  This module links the database and
  * the RAM representation of objects. */
 
 #include "common.h"
 #include "linefile.h"
 #include "dystring.h"
 #include "jksql.h"
 #include "pgSnp.h"
 #include "hdb.h"
 #include "dnaseq.h"
 #include "pgPhenoAssoc.h"
 #include "regexHelper.h"
 
-static char const rcsid[] = "$Id: pgSnp.c,v 1.8 2010/03/08 17:45:41 giardine Exp $";
-
 void pgSnpStaticLoad(char **row, struct pgSnp *ret)
 /* Load a row from pgSnp table into ret.  The contents of ret will
  * be replaced at the next call to this function. */
 {
 
 ret->bin = sqlUnsigned(row[0]);
 ret->chrom = row[1];
 ret->chromStart = sqlUnsigned(row[2]);
 ret->chromEnd = sqlUnsigned(row[3]);
 ret->name = row[4];
 ret->alleleCount = sqlSigned(row[5]);
 ret->alleleFreq = row[6];
 ret->alleleScores = row[7];
 }
 
@@ -677,15 +675,111 @@
 if (! regexMatchNoCase(row[3], alleles))
     lineFileAbort(lf, "invalid alleles %s", row[3]);
 /* read count, comma separated list of numbers with above # of items */
 item->alleleFreq = cloneString(row[5]);
 char pattern[128];
 safef(pattern, sizeof(pattern), "^[0-9]+(,[0-9]+){%d}$", item->alleleCount - 1);
 if (! regexMatchNoCase(row[5], pattern))
     lineFileAbort(lf, "invalid allele frequency, %s with count of %d", row[5], item->alleleCount);
 /* scores, comma separated list of numbers with above # of items */
 item->alleleScores = cloneString(row[6]);
 safef(pattern, sizeof(pattern), "^[0-9.]+(,[0-9.]+){%d}$", item->alleleCount - 1);
 if (! regexMatchNoCase(row[6], pattern))
     lineFileAbort(lf, "invalid allele scores, %s with count of %d", row[6], item->alleleCount);
 return item;
 }
+
+#define VCF_MAX_ALLELE_LEN 80
+
+struct pgSnp *pgSnpFromVcfRecord(struct vcfRecord *rec)
+/* Convert VCF rec to pgSnp; don't free rec->file (vcfFile) until
+ * you're done with pgSnp because pgSnp points to rec->chrom. */
+{
+static struct dyString *dy = NULL;
+if (dy == NULL)
+    dy = dyStringNew(0);
+else
+    dyStringClear(dy);
+struct pgSnp *pgs;
+AllocVar(pgs);
+pgs->chrom = rec->chrom;
+pgs->chromStart = rec->chromStart;
+pgs->chromEnd = rec->chromEnd;
+// Build up slash-separated allele string from rec->ref + rec->alt:
+dyStringAppend(dy, rec->ref);
+int altCount, i;
+if (sameString(rec->alt, "."))
+    altCount = 0;
+else
+    {
+    char *words[VCF_MAX_ALLELE_LEN/2];
+    char copy[VCF_MAX_ALLELE_LEN+1];
+    strncpy(copy, rec->alt, VCF_MAX_ALLELE_LEN);
+    copy[VCF_MAX_ALLELE_LEN] = '\0';
+    altCount = chopCommas(copy, words);
+    for (i = 0;  i < altCount && dy->stringSize < VCF_MAX_ALLELE_LEN;  i++)
+	dyStringPrintf(dy, "/%s", words[i]);
+    if (i < altCount)
+	altCount = i;
+    }
+pgs->name = cloneStringZ(dy->string, dy->stringSize+1);
+pgs->alleleCount = altCount + 1;
+// Build up comma-sep list of per-allele counts, if available:
+dyStringClear(dy);
+int refAlleleCount = 0;
+boolean gotAltCounts = FALSE;
+for (i = 0;  i < rec->infoCount;  i++)
+    if (sameString(rec->infoElements[i].key, "AN"))
+	{
+	refAlleleCount = rec->infoElements[i].values[0].datInt;
+	break;
+	}
+for (i = 0;  i < rec->infoCount;  i++)
+    if (sameString(rec->infoElements[i].key, "AC"))
+	{
+	int alCounts[64];
+	int j;
+	gotAltCounts = (rec->infoElements[i].count > 0);
+	for (j = 0;  j < rec->infoElements[i].count;  j++)
+	    {
+	    int ac = rec->infoElements[i].values[j].datInt;
+	    if (j < altCount)
+		alCounts[1+j] = ac;
+	    refAlleleCount -= ac;
+	    }
+	if (gotAltCounts)
+	    {
+	    while (j++ < altCount)
+		alCounts[1+j] = -1;
+	    alCounts[0] = refAlleleCount;
+	    if (refAlleleCount >= 0)
+		dyStringPrintf(dy, "%d", refAlleleCount);
+	    else
+		dyStringAppend(dy, "-1");
+	    for (j = 0;  j < altCount;  j++)
+		if (alCounts[1+j] >= 0)
+		    dyStringPrintf(dy, ",%d", alCounts[1+j]);
+		else
+		    dyStringAppend(dy, ",-1");
+	    }
+	break;
+	}
+if (refAlleleCount > 0 && !gotAltCounts)
+    dyStringPrintf(dy, "%d", refAlleleCount);
+pgs->alleleFreq = cloneStringZ(dy->string, dy->stringSize+1);
+// Build up comma-sep list... supposed to be per-allele quality scores but I think
+// the VCF spec only gives us one BQ... for the reference position?  should ask.
+dyStringClear(dy);
+for (i = 0;  i < rec->infoCount;  i++)
+    if (sameString(rec->infoElements[i].key, "BQ"))
+	{
+	float qual = rec->infoElements[i].values[0].datFloat;
+	dyStringPrintf(dy, "%.1f", qual);
+	int j;
+	for (j = 0;  j < altCount;  j++)
+	    dyStringPrintf(dy, ",%.1f", qual);
+	break;
+	}
+pgs->alleleScores = cloneStringZ(dy->string, dy->stringSize+1);
+return pgs;
+}
+