814df69886d2dc7c791db10417901a9768931c88 angie Mon Jun 1 17:16:44 2015 -0700 Added support for bigGenePred to anno* libs for hgVai and hgIntegrator. While updating annoFormatVep.c to use column indices from autoSql instead of hardcoding genePred indices, I found that I had been using an incorrect index for strand anyway -- so I removed 3 lines of no-op code. refs #15439 diff --git src/hg/hgVai/hgVai.c src/hg/hgVai/hgVai.c index 8ca0e9b..1ce2da0 100644 --- src/hg/hgVai/hgVai.c +++ src/hg/hgVai/hgVai.c @@ -12,30 +12,31 @@ #include "cheapcgi.h" #include "cart.h" #include "cartTrackDb.h" #include "hui.h" #include "grp.h" #include "hCommon.h" #include "hgFind.h" #include "hPrint.h" #include "jsHelper.h" #include "memalloc.h" #include "textOut.h" #include "trackHub.h" #include "hubConnect.h" #include "twoBit.h" #include "gpFx.h" +#include "bigGenePred.h" #include "udc.h" #include "knetUdc.h" #include "md5.h" #include "regexHelper.h" #include "hAnno.h" #include "annoGratorQuery.h" #include "annoGratorGpVar.h" #include "annoFormatVep.h" #include "annoStreamBigBed.h" #include "libifyMe.h" /* Global Variables */ struct cart *cart; /* CGI and other variables */ struct hash *oldVars = NULL; /* The cart before new cgi stuff added. */ @@ -469,31 +470,31 @@ } printf("<B>maximum number of variants to be processed:</B>\n"); char *curLimit = cartUsualString(cart, "hgva_variantLimit", "10000"); char *limitLabels[] = { "10", "100", "1,000", "10,000", "100,000" }; char *limitValues[] = { "10", "100", "1000", "10000", "100000" }; cgiMakeDropListWithVals("hgva_variantLimit", limitLabels, limitValues, ArraySize(limitLabels), curLimit); printCtAndHubButtons(); puts("<BR>"); } boolean isGeneTrack(struct trackDb *tdb, void *filterData) /* This is a TdbFilterFunction to get genePred tracks. */ { -return (startsWith("genePred", tdb->type)); +return (startsWith("genePred", tdb->type) || sameString("bigGenePred", tdb->type)); } boolean selectGenes() /* Let user select a gene predictions track; return FALSE if there are no genePred tracks. */ { struct slRef *trackRefList = NULL; tdbFilterGroupTrack(fullTrackList, fullGroupList, isGeneTrack, NULL, NULL, &trackRefList); boolean gotGP = (trackRefList != NULL); if (!gotGP) warn("This assembly (%s) has no gene prediction tracks, " "so the VAI will not be able to annotate it.", database); printf("<div class='sectionLiteHeader'>Select Genes</div>\n"); if (gotGP) printf("The gene predictions selected here will be used "); else @@ -1590,81 +1591,81 @@ static void writeArbitraryVariants(FILE *f, struct annoAssembly *assembly) /* Concoct some variants at an arbitrary position on some assembly sequence. * We shouldn't ever get here unless the selected geneTrack is empty. */ { char *chrom = "NULL"; uint start = 0, end = 0; getCartPosOrDie(&chrom, &start, &end); struct bed4 *bed = bed4New(chrom, start, start, "ex_ins"); struct dnaSeq *seq = twoBitReadSeqFragLower(assembly->tbf, chrom, start-1, start+100); writeMinimalVcfRowFromBed(f, bed, seq, start-1); dnaSeqFree(&seq); } static void addGpFromRow(struct genePred **pGpList, struct annoRow *row, - boolean *pNeedCoding, boolean *pNeedNonCoding) + boolean *pNeedCoding, boolean *pNeedNonCoding, boolean isBig) /* If row is coding and we need a coding gp, add it to pGpList and update pNeedCoding; * likewise for noncoding. */ { -struct genePred *gp = genePredLoad(row->data); +struct genePred *gp = isBig ? genePredFromBigGenePredRow(row->data) : genePredLoad(row->data); if (gp->cdsStart != gp->cdsEnd && *pNeedCoding) { slAddHead(pGpList, gp); *pNeedCoding = FALSE; } else if (gp->cdsStart == gp->cdsEnd && *pNeedNonCoding) { slAddHead(pGpList, gp); *pNeedNonCoding = FALSE; } } static void addGpFromPos(struct annoStreamer *geneStream, char *chrom, uint start, uint end, struct genePred **pGpList, boolean *pNeedCoding, boolean *pNeedNonCoding, - struct lm *lm) + struct lm *lm, boolean isBig) /* Get up to 10 rows from position; if we find one coding and one non-coding gene, * add them to pGpList and update pNeed* accordingly. */ { geneStream->setRegion(geneStream, chrom, start, end); int rowCount = 0; struct annoRow *row = NULL; while (rowCount < 10 && (*pNeedCoding || *pNeedNonCoding) && (row = geneStream->nextRow(geneStream, NULL, 0, lm)) != NULL) - addGpFromRow(pGpList, row, pNeedCoding, pNeedNonCoding); + addGpFromRow(pGpList, row, pNeedCoding, pNeedNonCoding, isBig); } -static struct genePred *genesFromPosition(struct annoStreamer *geneStream, +static struct genePred *genesFromPosition(struct annoStreamer *geneStream, boolean isBig, boolean *retGotCoding, boolean *retGotNonCoding) /* Try to get a coding and non-coding gene from geneStream at cart position. * If there are none, try whole chrom; if none there, first in assembly. */ { struct genePred *gpList = NULL; struct lm *lm = lmInit(0); char *chrom = NULL; uint start = 0, end = 0; getCartPosOrDie(&chrom, &start, &end); boolean needCoding = TRUE, needNonCoding = TRUE; // First, look for both coding and noncoding genes at cart position: -addGpFromPos(geneStream, chrom, start, end, &gpList, &needCoding, &needNonCoding, lm); +addGpFromPos(geneStream, chrom, start, end, &gpList, &needCoding, &needNonCoding, lm, isBig); // If that didn't do it, try querying whole chrom if (needCoding || needNonCoding) - addGpFromPos(geneStream, chrom, 0, 0, &gpList, &needCoding, &needNonCoding, lm); + addGpFromPos(geneStream, chrom, 0, 0, &gpList, &needCoding, &needNonCoding, lm, isBig); // If we still haven't found either coding or non-coding on the cart's current chrom, // try whole genome: if (needCoding && needNonCoding) - addGpFromPos(geneStream, NULL, 0, 0, &gpList, &needCoding, &needNonCoding, lm); + addGpFromPos(geneStream, NULL, 0, 0, &gpList, &needCoding, &needNonCoding, lm, isBig); slSort(&gpList, genePredCmp); lmCleanup(&lm); if (retGotCoding) *retGotCoding = !needCoding; if (retGotNonCoding) *retGotNonCoding = !needNonCoding; return gpList; } static void addSnv(struct bed4 **pBedList, char *chrom, uint start, char *name) /* Add a single-nucleotide bed4 item. */ { slAddHead(pBedList, bed4New(chrom, start, start+1, name)); } @@ -1726,32 +1727,33 @@ // margin for intergenic variants around transcript: #define IGFUDGE 5 static struct annoStreamer *makeSampleVariantsStreamer(struct annoAssembly *assembly, struct trackDb *geneTdb, int maxOutRows) /* Construct a VCF file of example variants for db (unless it already exists) * and return an annoStreamer for that file. If possible, make the variants hit a gene. */ { char *sampleFile = sampleVariantsPath(assembly, geneTdb->track); boolean forceRebuild = cartUsualBoolean(cart, "hgva_rebuildSampleVariants", FALSE); if (! fileExists(sampleFile) || forceRebuild) { struct annoStreamer *geneStream = hAnnoStreamerFromTrackDb(assembly, geneTdb->table, geneTdb, NULL, ANNO_NO_LIMIT); + boolean isBig = sameString(geneTdb->type, "bigGenePred"); boolean gotCoding = FALSE, gotNonCoding = FALSE; - struct genePred *gpList = genesFromPosition(geneStream, &gotCoding, &gotNonCoding); + struct genePred *gpList = genesFromPosition(geneStream, isBig, &gotCoding, &gotNonCoding); FILE *f = mustOpen(sampleFile, "w"); writeMinimalVcfHeader(f, assembly->name); if (gpList == NULL) { warn("Unable to find any gene transcripts in '%s' (%s)", geneTdb->shortLabel, geneTdb->track); writeArbitraryVariants(f, assembly); } else { struct bed4 *bedList = NULL; uint chromSize = annoAssemblySeqSize(assembly, gpList->chrom); uint minSeqStart = chromSize, maxSeqEnd = 0; struct genePred *gp; for (gp = gpList; gp != NULL; gp = gp->next)