814df69886d2dc7c791db10417901a9768931c88
angie
Mon Jun 1 17:16:44 2015 -0700
Added support for bigGenePred to anno* libs for hgVai and hgIntegrator.
While updating annoFormatVep.c to use column indices from autoSql instead of
hardcoding genePred indices, I found that I had been using an incorrect index
for strand anyway -- so I removed 3 lines of no-op code.
refs #15439
diff --git src/hg/hgVai/hgVai.c src/hg/hgVai/hgVai.c
index 8ca0e9b..1ce2da0 100644
--- src/hg/hgVai/hgVai.c
+++ src/hg/hgVai/hgVai.c
@@ -12,30 +12,31 @@
#include "cheapcgi.h"
#include "cart.h"
#include "cartTrackDb.h"
#include "hui.h"
#include "grp.h"
#include "hCommon.h"
#include "hgFind.h"
#include "hPrint.h"
#include "jsHelper.h"
#include "memalloc.h"
#include "textOut.h"
#include "trackHub.h"
#include "hubConnect.h"
#include "twoBit.h"
#include "gpFx.h"
+#include "bigGenePred.h"
#include "udc.h"
#include "knetUdc.h"
#include "md5.h"
#include "regexHelper.h"
#include "hAnno.h"
#include "annoGratorQuery.h"
#include "annoGratorGpVar.h"
#include "annoFormatVep.h"
#include "annoStreamBigBed.h"
#include "libifyMe.h"
/* Global Variables */
struct cart *cart; /* CGI and other variables */
struct hash *oldVars = NULL; /* The cart before new cgi stuff added. */
@@ -469,31 +470,31 @@
}
printf("maximum number of variants to be processed:\n");
char *curLimit = cartUsualString(cart, "hgva_variantLimit", "10000");
char *limitLabels[] = { "10", "100", "1,000", "10,000", "100,000" };
char *limitValues[] = { "10", "100", "1000", "10000", "100000" };
cgiMakeDropListWithVals("hgva_variantLimit", limitLabels, limitValues, ArraySize(limitLabels),
curLimit);
printCtAndHubButtons();
puts("
");
}
boolean isGeneTrack(struct trackDb *tdb, void *filterData)
/* This is a TdbFilterFunction to get genePred tracks. */
{
-return (startsWith("genePred", tdb->type));
+return (startsWith("genePred", tdb->type) || sameString("bigGenePred", tdb->type));
}
boolean selectGenes()
/* Let user select a gene predictions track; return FALSE if there are no genePred tracks. */
{
struct slRef *trackRefList = NULL;
tdbFilterGroupTrack(fullTrackList, fullGroupList, isGeneTrack, NULL, NULL, &trackRefList);
boolean gotGP = (trackRefList != NULL);
if (!gotGP)
warn("This assembly (%s) has no gene prediction tracks, "
"so the VAI will not be able to annotate it.", database);
printf("
\n");
if (gotGP)
printf("The gene predictions selected here will be used ");
else
@@ -1590,81 +1591,81 @@
static void writeArbitraryVariants(FILE *f, struct annoAssembly *assembly)
/* Concoct some variants at an arbitrary position on some assembly sequence.
* We shouldn't ever get here unless the selected geneTrack is empty. */
{
char *chrom = "NULL";
uint start = 0, end = 0;
getCartPosOrDie(&chrom, &start, &end);
struct bed4 *bed = bed4New(chrom, start, start, "ex_ins");
struct dnaSeq *seq = twoBitReadSeqFragLower(assembly->tbf, chrom, start-1, start+100);
writeMinimalVcfRowFromBed(f, bed, seq, start-1);
dnaSeqFree(&seq);
}
static void addGpFromRow(struct genePred **pGpList, struct annoRow *row,
- boolean *pNeedCoding, boolean *pNeedNonCoding)
+ boolean *pNeedCoding, boolean *pNeedNonCoding, boolean isBig)
/* If row is coding and we need a coding gp, add it to pGpList and update pNeedCoding;
* likewise for noncoding. */
{
-struct genePred *gp = genePredLoad(row->data);
+struct genePred *gp = isBig ? genePredFromBigGenePredRow(row->data) : genePredLoad(row->data);
if (gp->cdsStart != gp->cdsEnd && *pNeedCoding)
{
slAddHead(pGpList, gp);
*pNeedCoding = FALSE;
}
else if (gp->cdsStart == gp->cdsEnd && *pNeedNonCoding)
{
slAddHead(pGpList, gp);
*pNeedNonCoding = FALSE;
}
}
static void addGpFromPos(struct annoStreamer *geneStream, char *chrom, uint start, uint end,
struct genePred **pGpList, boolean *pNeedCoding, boolean *pNeedNonCoding,
- struct lm *lm)
+ struct lm *lm, boolean isBig)
/* Get up to 10 rows from position; if we find one coding and one non-coding gene,
* add them to pGpList and update pNeed* accordingly. */
{
geneStream->setRegion(geneStream, chrom, start, end);
int rowCount = 0;
struct annoRow *row = NULL;
while (rowCount < 10 && (*pNeedCoding || *pNeedNonCoding)
&& (row = geneStream->nextRow(geneStream, NULL, 0, lm)) != NULL)
- addGpFromRow(pGpList, row, pNeedCoding, pNeedNonCoding);
+ addGpFromRow(pGpList, row, pNeedCoding, pNeedNonCoding, isBig);
}
-static struct genePred *genesFromPosition(struct annoStreamer *geneStream,
+static struct genePred *genesFromPosition(struct annoStreamer *geneStream, boolean isBig,
boolean *retGotCoding, boolean *retGotNonCoding)
/* Try to get a coding and non-coding gene from geneStream at cart position.
* If there are none, try whole chrom; if none there, first in assembly. */
{
struct genePred *gpList = NULL;
struct lm *lm = lmInit(0);
char *chrom = NULL;
uint start = 0, end = 0;
getCartPosOrDie(&chrom, &start, &end);
boolean needCoding = TRUE, needNonCoding = TRUE;
// First, look for both coding and noncoding genes at cart position:
-addGpFromPos(geneStream, chrom, start, end, &gpList, &needCoding, &needNonCoding, lm);
+addGpFromPos(geneStream, chrom, start, end, &gpList, &needCoding, &needNonCoding, lm, isBig);
// If that didn't do it, try querying whole chrom
if (needCoding || needNonCoding)
- addGpFromPos(geneStream, chrom, 0, 0, &gpList, &needCoding, &needNonCoding, lm);
+ addGpFromPos(geneStream, chrom, 0, 0, &gpList, &needCoding, &needNonCoding, lm, isBig);
// If we still haven't found either coding or non-coding on the cart's current chrom,
// try whole genome:
if (needCoding && needNonCoding)
- addGpFromPos(geneStream, NULL, 0, 0, &gpList, &needCoding, &needNonCoding, lm);
+ addGpFromPos(geneStream, NULL, 0, 0, &gpList, &needCoding, &needNonCoding, lm, isBig);
slSort(&gpList, genePredCmp);
lmCleanup(&lm);
if (retGotCoding)
*retGotCoding = !needCoding;
if (retGotNonCoding)
*retGotNonCoding = !needNonCoding;
return gpList;
}
static void addSnv(struct bed4 **pBedList, char *chrom, uint start, char *name)
/* Add a single-nucleotide bed4 item. */
{
slAddHead(pBedList, bed4New(chrom, start, start+1, name));
}
@@ -1726,32 +1727,33 @@
// margin for intergenic variants around transcript:
#define IGFUDGE 5
static struct annoStreamer *makeSampleVariantsStreamer(struct annoAssembly *assembly,
struct trackDb *geneTdb, int maxOutRows)
/* Construct a VCF file of example variants for db (unless it already exists)
* and return an annoStreamer for that file. If possible, make the variants hit a gene. */
{
char *sampleFile = sampleVariantsPath(assembly, geneTdb->track);
boolean forceRebuild = cartUsualBoolean(cart, "hgva_rebuildSampleVariants", FALSE);
if (! fileExists(sampleFile) || forceRebuild)
{
struct annoStreamer *geneStream = hAnnoStreamerFromTrackDb(assembly, geneTdb->table, geneTdb,
NULL, ANNO_NO_LIMIT);
+ boolean isBig = sameString(geneTdb->type, "bigGenePred");
boolean gotCoding = FALSE, gotNonCoding = FALSE;
- struct genePred *gpList = genesFromPosition(geneStream, &gotCoding, &gotNonCoding);
+ struct genePred *gpList = genesFromPosition(geneStream, isBig, &gotCoding, &gotNonCoding);
FILE *f = mustOpen(sampleFile, "w");
writeMinimalVcfHeader(f, assembly->name);
if (gpList == NULL)
{
warn("Unable to find any gene transcripts in '%s' (%s)",
geneTdb->shortLabel, geneTdb->track);
writeArbitraryVariants(f, assembly);
}
else
{
struct bed4 *bedList = NULL;
uint chromSize = annoAssemblySeqSize(assembly, gpList->chrom);
uint minSeqStart = chromSize, maxSeqEnd = 0;
struct genePred *gp;
for (gp = gpList; gp != NULL; gp = gp->next)