814df69886d2dc7c791db10417901a9768931c88
angie
  Mon Jun 1 17:16:44 2015 -0700
Added support for bigGenePred to anno* libs for hgVai and hgIntegrator.
While updating annoFormatVep.c to use column indices from autoSql instead of
hardcoding genePred indices, I found that I had been using an incorrect index
for strand anyway -- so I removed 3 lines of no-op code.
refs #15439

diff --git src/hg/hgVai/hgVai.c src/hg/hgVai/hgVai.c
index 8ca0e9b..1ce2da0 100644
--- src/hg/hgVai/hgVai.c
+++ src/hg/hgVai/hgVai.c
@@ -12,30 +12,31 @@
 #include "cheapcgi.h"
 #include "cart.h"
 #include "cartTrackDb.h"
 #include "hui.h"
 #include "grp.h"
 #include "hCommon.h"
 #include "hgFind.h"
 #include "hPrint.h"
 #include "jsHelper.h"
 #include "memalloc.h"
 #include "textOut.h"
 #include "trackHub.h"
 #include "hubConnect.h"
 #include "twoBit.h"
 #include "gpFx.h"
+#include "bigGenePred.h"
 #include "udc.h"
 #include "knetUdc.h"
 #include "md5.h"
 #include "regexHelper.h"
 #include "hAnno.h"
 #include "annoGratorQuery.h"
 #include "annoGratorGpVar.h"
 #include "annoFormatVep.h"
 #include "annoStreamBigBed.h"
 
 #include "libifyMe.h"
 
 /* Global Variables */
 struct cart *cart;		/* CGI and other variables */
 struct hash *oldVars = NULL;	/* The cart before new cgi stuff added. */
@@ -469,31 +470,31 @@
     }
 
 printf("<B>maximum number of variants to be processed:</B>\n");
 char *curLimit = cartUsualString(cart, "hgva_variantLimit", "10000");
 char *limitLabels[] = { "10", "100", "1,000", "10,000", "100,000" };
 char *limitValues[] = { "10", "100", "1000", "10000", "100000" };
 cgiMakeDropListWithVals("hgva_variantLimit", limitLabels, limitValues, ArraySize(limitLabels),
 			curLimit);
 printCtAndHubButtons();
 puts("<BR>");
 }
 
 boolean isGeneTrack(struct trackDb *tdb, void *filterData)
 /* This is a TdbFilterFunction to get genePred tracks. */
 {
-return (startsWith("genePred", tdb->type));
+return (startsWith("genePred", tdb->type) || sameString("bigGenePred", tdb->type));
 }
 
 boolean selectGenes()
 /* Let user select a gene predictions track; return FALSE if there are no genePred tracks. */
 {
 struct slRef *trackRefList = NULL;
 tdbFilterGroupTrack(fullTrackList, fullGroupList, isGeneTrack, NULL, NULL, &trackRefList);
 boolean gotGP = (trackRefList != NULL);
 if (!gotGP)
     warn("This assembly (%s) has no gene prediction tracks, "
 	 "so the VAI will not be able to annotate it.", database);
 printf("<div class='sectionLiteHeader'>Select Genes</div>\n");
 if (gotGP)
     printf("The gene predictions selected here will be used ");
 else
@@ -1590,81 +1591,81 @@
 
 static void writeArbitraryVariants(FILE *f, struct annoAssembly *assembly)
 /* Concoct some variants at an arbitrary position on some assembly sequence.
  * We shouldn't ever get here unless the selected geneTrack is empty. */
 {
 char *chrom = "NULL";
 uint start = 0, end = 0;
 getCartPosOrDie(&chrom, &start, &end);
 struct bed4 *bed = bed4New(chrom, start, start, "ex_ins");
 struct dnaSeq *seq = twoBitReadSeqFragLower(assembly->tbf, chrom, start-1, start+100);
 writeMinimalVcfRowFromBed(f, bed, seq, start-1);
 dnaSeqFree(&seq);
 }
 
 static void addGpFromRow(struct genePred **pGpList, struct annoRow *row,
-			 boolean *pNeedCoding, boolean *pNeedNonCoding)
+			 boolean *pNeedCoding, boolean *pNeedNonCoding, boolean isBig)
 /* If row is coding and we need a coding gp, add it to pGpList and update pNeedCoding;
  * likewise for noncoding. */
 {
-struct genePred *gp = genePredLoad(row->data);
+struct genePred *gp = isBig ? genePredFromBigGenePredRow(row->data) : genePredLoad(row->data);
 if (gp->cdsStart != gp->cdsEnd && *pNeedCoding)
     {
     slAddHead(pGpList, gp);
     *pNeedCoding = FALSE;
     }
 else if (gp->cdsStart == gp->cdsEnd && *pNeedNonCoding)
     {
     slAddHead(pGpList, gp);
     *pNeedNonCoding = FALSE;
     }
 }
 
 static void addGpFromPos(struct annoStreamer *geneStream, char *chrom, uint start, uint end,
 			 struct genePred **pGpList, boolean *pNeedCoding, boolean *pNeedNonCoding,
-			 struct lm *lm)
+			 struct lm *lm, boolean isBig)
 /* Get up to 10 rows from position; if we find one coding and one non-coding gene,
  * add them to pGpList and update pNeed* accordingly. */
 {
 geneStream->setRegion(geneStream, chrom, start, end);
 int rowCount = 0;
 struct annoRow *row = NULL;
 while (rowCount < 10 && (*pNeedCoding || *pNeedNonCoding)
        && (row = geneStream->nextRow(geneStream, NULL, 0, lm)) != NULL)
-    addGpFromRow(pGpList, row, pNeedCoding, pNeedNonCoding);
+    addGpFromRow(pGpList, row, pNeedCoding, pNeedNonCoding, isBig);
 }
 
-static struct genePred *genesFromPosition(struct annoStreamer *geneStream,
+static struct genePred *genesFromPosition(struct annoStreamer *geneStream, boolean isBig,
 					  boolean *retGotCoding, boolean *retGotNonCoding)
 /* Try to get a coding and non-coding gene from geneStream at cart position.
  * If there are none, try whole chrom; if none there, first in assembly. */
 {
 struct genePred *gpList = NULL;
 struct lm *lm = lmInit(0);
 char *chrom = NULL;
 uint start = 0, end = 0;
 getCartPosOrDie(&chrom, &start, &end);
 boolean needCoding = TRUE, needNonCoding = TRUE;
 // First, look for both coding and noncoding genes at cart position:
-addGpFromPos(geneStream, chrom, start, end, &gpList, &needCoding, &needNonCoding, lm);
+addGpFromPos(geneStream, chrom, start, end, &gpList, &needCoding, &needNonCoding, lm, isBig);
 // If that didn't do it, try querying whole chrom
 if (needCoding || needNonCoding)
-    addGpFromPos(geneStream, chrom, 0, 0, &gpList, &needCoding, &needNonCoding, lm);
+    addGpFromPos(geneStream, chrom, 0, 0, &gpList, &needCoding, &needNonCoding, lm, isBig);
 // If we still haven't found either coding or non-coding on the cart's current chrom,
 // try whole genome:
 if (needCoding && needNonCoding)
-    addGpFromPos(geneStream, NULL, 0, 0, &gpList, &needCoding, &needNonCoding, lm);
+    addGpFromPos(geneStream, NULL, 0, 0, &gpList, &needCoding, &needNonCoding, lm, isBig);
 slSort(&gpList, genePredCmp);
 lmCleanup(&lm);
 if (retGotCoding)
     *retGotCoding = !needCoding;
 if (retGotNonCoding)
     *retGotNonCoding = !needNonCoding;
 return gpList;
 }
 
 static void addSnv(struct bed4 **pBedList, char *chrom, uint start, char *name)
 /* Add a single-nucleotide bed4 item. */
 {
 slAddHead(pBedList, bed4New(chrom, start, start+1, name));
 }
 
@@ -1726,32 +1727,33 @@
 
 // margin for intergenic variants around transcript:
 #define IGFUDGE 5
 
 static struct annoStreamer *makeSampleVariantsStreamer(struct annoAssembly *assembly,
 						       struct trackDb *geneTdb, int maxOutRows)
 /* Construct a VCF file of example variants for db (unless it already exists)
  * and return an annoStreamer for that file.  If possible, make the variants hit a gene. */
 {
 char *sampleFile = sampleVariantsPath(assembly, geneTdb->track);
 boolean forceRebuild = cartUsualBoolean(cart, "hgva_rebuildSampleVariants", FALSE);
 if (! fileExists(sampleFile) || forceRebuild)
     {
     struct annoStreamer *geneStream = hAnnoStreamerFromTrackDb(assembly, geneTdb->table, geneTdb,
                                                                NULL, ANNO_NO_LIMIT);
+    boolean isBig = sameString(geneTdb->type, "bigGenePred");
     boolean gotCoding = FALSE, gotNonCoding = FALSE;
-    struct genePred *gpList = genesFromPosition(geneStream, &gotCoding, &gotNonCoding);
+    struct genePred *gpList = genesFromPosition(geneStream, isBig, &gotCoding, &gotNonCoding);
     FILE *f = mustOpen(sampleFile, "w");
     writeMinimalVcfHeader(f, assembly->name);
     if (gpList == NULL)
 	{
 	warn("Unable to find any gene transcripts in '%s' (%s)",
 	     geneTdb->shortLabel, geneTdb->track);
 	writeArbitraryVariants(f, assembly);
 	}
     else
 	{
 	struct bed4 *bedList = NULL;
 	uint chromSize = annoAssemblySeqSize(assembly, gpList->chrom);
 	uint minSeqStart = chromSize, maxSeqEnd = 0;
 	struct genePred *gp;
 	for (gp = gpList;  gp != NULL;  gp = gp->next)