8b49d2145ae719c3b3577de7956f7b9131c385ba angie Wed Jan 7 09:43:45 2015 -0800 Libified code from hgVai/libifyMe.c to lib/hAnno.c in anticipationof sharing code with hgAi. Also added hAnnoGetAutoSqlForTdb for hgAi. diff --git src/hg/hgVai/hgVai.c src/hg/hgVai/hgVai.c index 3a96088..66f910b 100644 --- src/hg/hgVai/hgVai.c +++ src/hg/hgVai/hgVai.c @@ -16,30 +16,31 @@ #include "grp.h" #include "hCommon.h" #include "hgFind.h" #include "hPrint.h" #include "jsHelper.h" #include "memalloc.h" #include "textOut.h" #include "trackHub.h" #include "hubConnect.h" #include "twoBit.h" #include "gpFx.h" #include "udc.h" #include "knetUdc.h" #include "md5.h" #include "regexHelper.h" +#include "hAnno.h" #include "annoGratorQuery.h" #include "annoGratorGpVar.h" #include "annoFormatVep.h" #include "annoStreamBigBed.h" #include "libifyMe.h" /* Global Variables */ struct cart *cart; /* CGI and other variables */ struct hash *oldVars = NULL; /* The cart before new cgi stuff added. */ char *genome = NULL; /* Name of genome - mouse, human, etc. */ char *database = NULL; /* Current genome database - hg17, mm5, etc. */ char *regionType = NULL; /* genome, ENCODE pilot regions, or specific position range. */ struct grp *fullGroupList = NULL; /* List of all groups. */ struct trackDb *fullTrackList = NULL; /* List of all tracks in database. */ @@ -1167,33 +1168,34 @@ // First look in gratorsByName to see if this grator has already been constructed: if (tdb != NULL) { grator = hashFindVal(gratorsByName, tdb->table); if (retDescription) *retDescription = cloneString(tdb->longLabel); } if (grator != NULL) { // Set its overlap behavior and we're done. grator->setOverlapRule(grator, overlapRule); return grator; } // If not in gratorsByName, then attempt to construct it here: if (fileName != NULL) - grator = gratorFromBigDataFileOrUrl(fileName, assembly, NO_MAXROWS, overlapRule); + grator = hAnnoGratorFromBigFileUrl(fileName, assembly, NO_MAXROWS, overlapRule); else - grator = gratorFromTrackDb(assembly, tdb->table, tdb, chrom, NO_MAXROWS, NULL, overlapRule); + grator = hAnnoGratorFromTrackDb(assembly, tdb->table, tdb, chrom, NO_MAXROWS, + NULL, overlapRule); if (grator != NULL) hashAdd(gratorsByName, tdb->table, grator); return grator; } char *tableNameFromSourceName(char *sourceName) /* Strip sourceName (file path or db.table) to just the basename or table name. */ { char *tableName = cloneString(sourceName); char *p = strrchr(tableName, '/'); if (p != NULL) { // file path; strip to basename char dir[PATH_LEN], name[FILENAME_LEN], extension[FILEEXT_LEN]; splitPath(tableName, dir, name, extension); @@ -1289,31 +1291,31 @@ void addDbNsfpSeqChange(char *trackName, struct annoAssembly *assembly, struct hash *gratorsByName, struct annoGrator **pGratorList) // If the user has selected dbNsfp* data, we also need the underlying dbNsfpSeqChange // data, so annoFormatVep can tell whether the variant and gpFx are consistent with the // variant and transcript that dbNsfp used to calculate scores. { //#*** Yet another place where we need metadata: char *seqChangeTable = "dbNsfpSeqChange"; if (hashFindVal(gratorsByName, seqChangeTable) == NULL) { char *fileName = fileNameFromTable(seqChangeTable); if (fileName == NULL) errAbort("'%s' requested, but I can't find fileName for %s", trackName, seqChangeTable); - struct annoGrator *grator = gratorFromBigDataFileOrUrl(fileName, assembly, NO_MAXROWS, + struct annoGrator *grator = hAnnoGratorFromBigFileUrl(fileName, assembly, NO_MAXROWS, agoNoConstraint); updateGratorList(grator, pGratorList); hashAdd(gratorsByName, seqChangeTable, grator); } } static struct dyString *dyInfo = NULL; struct hash *getTrackFilterVars(char *track) /* Return a hash of filter variable names (cart variable suffixes) to slName lists of values. */ { char filterPrefix[512]; safef(filterPrefix, sizeof(filterPrefix), "hgva_filter_%s_", track); struct slPair *filterVars = cartVarsWithPrefix(cart, filterPrefix), *var; int prefixLen = strlen(filterPrefix); @@ -1421,38 +1423,38 @@ if (grator != NULL) // We already have this as a grator: continue; enum PolyPhen2Subset subset = noSubset; char *description = NULL; char *column = NULL; boolean isReg = FALSE; if (startsWith("dbNsfp", trackName)) { // trackName for PolyPhen2 has a suffix for subset -- strip it if we find it: subset = stripSubsetFromTrackName(trackName); description = dbNsfpDescFromTableName(trackName, subset, doHtml); addDbNsfpSeqChange(trackName, assembly, gratorsByName, pGratorList); char *fileName = fileNameFromTable(trackName); if (fileName != NULL) - grator = gratorFromBigDataFileOrUrl(fileName, assembly, NO_MAXROWS, agoNoConstraint); + grator = hAnnoGratorFromBigFileUrl(fileName, assembly, NO_MAXROWS, agoNoConstraint); } else { struct trackDb *tdb = tdbForTrack(database, trackName, &fullTrackList); if (tdb != NULL) { - grator = gratorFromTrackDb(assembly, tdb->table, tdb, chrom, NO_MAXROWS, NULL, + grator = hAnnoGratorFromTrackDb(assembly, tdb->table, tdb, chrom, NO_MAXROWS, NULL, agoNoConstraint); if (grator != NULL) { //#*** Need something more sophisticated but this works for our //#*** limited selection of extra tracks: if (asColumnFind(grator->streamer.asObj, "name") != NULL) column = "name"; addFiltersToGrator(grator, tdb); } description = tdb->longLabel; isReg = includeReg && isRegulatoryTrack(tdb, NULL); } } haveReg |= isReg; updateGratorListAndVepExtra(grator, pGratorList, vepOut, subset, column, description, isReg); @@ -1477,31 +1479,31 @@ if (!cartUsualBoolean(cart, "hgva_include_snpMult", TRUE)) { struct annoGrator *grator = gratorForSnpBed4(gratorsByName, "Mult", assembly, chrom, agoMustNotOverlap, NULL); updateGratorList(grator, pGratorList); } if (cartUsualBoolean(cart, "hgva_require_consEl", FALSE)) { char *consElTrack = cartString(cart, "hgva_require_consEl_track"); struct annoGrator *grator = hashFindVal(gratorsByName, consElTrack); if (grator == NULL) { struct trackDb *tdb = tdbForTrack(database, consElTrack, &fullTrackList); if (tdb != NULL) - grator = gratorFromTrackDb(assembly, tdb->table, tdb, chrom, NO_MAXROWS, NULL, + grator = hAnnoGratorFromTrackDb(assembly, tdb->table, tdb, chrom, NO_MAXROWS, NULL, agoMustOverlap); updateGratorList(grator, pGratorList); } else grator->setOverlapRule(grator, agoMustOverlap); } } static void getCartPosOrDie(char **retChrom, uint *retStart, uint *retEnd) /* Get chrom:start-end from cart, errAbort if any problems. */ { char *position = cartString(cart, hgvaRange); if (! parsePosition(position, retChrom, retStart, retEnd)) errAbort("Expected position to be chrom:start-end but got '%s'", position); } @@ -1724,32 +1726,32 @@ return bedList; } // margin for intergenic variants around transcript: #define IGFUDGE 5 static struct annoStreamer *makeSampleVariantsStreamer(struct annoAssembly *assembly, struct trackDb *geneTdb, int maxOutRows) /* Construct a VCF file of example variants for db (unless it already exists) * and return an annoStreamer for that file. If possible, make the variants hit a gene. */ { char *sampleFile = sampleVariantsPath(assembly, geneTdb->track); boolean forceRebuild = cartUsualBoolean(cart, "hgva_rebuildSampleVariants", FALSE); if (! fileExists(sampleFile) || forceRebuild) { - struct annoStreamer *geneStream = streamerFromTrack(assembly, geneTdb->table, geneTdb, NULL, - NO_MAXROWS); + struct annoStreamer *geneStream = hAnnoStreamerFromTrackDb(assembly, geneTdb->table, geneTdb, + NULL, NO_MAXROWS); boolean gotCoding = FALSE, gotNonCoding = FALSE; struct genePred *gpList = genesFromPosition(geneStream, &gotCoding, &gotNonCoding); FILE *f = mustOpen(sampleFile, "w"); writeMinimalVcfHeader(f, assembly->name); if (gpList == NULL) { warn("Unable to find any gene transcripts in '%s' (%s)", geneTdb->shortLabel, geneTdb->track); writeArbitraryVariants(f, assembly); } else { struct bed4 *bedList = NULL; uint chromSize = annoAssemblySeqSize(assembly, gpList->chrom); uint minSeqStart = chromSize, maxSeqEnd = 0; @@ -2140,31 +2142,31 @@ /* If we're able to detect regulatory elements, and want to keep those annotations, loosen up * gpVarGrator's overlap rule from the default (must overlap). */ { if (haveRegulatory && cartUsualBoolean(cart, "hgva_include_regulatory", TRUE)) gpVarGrator->setOverlapRule(gpVarGrator, agoNoConstraint); } void doQuery() /* Translate simple form inputs into anno* components and execute query. */ { dyInfo = dyStringNew(0); char *chrom = NULL; uint start = 0, end = 0; if (sameString(regionType, hgvaRegionTypeRange)) getCartPosOrDie(&chrom, &start, &end); -struct annoAssembly *assembly = getAnnoAssembly(database); +struct annoAssembly *assembly = hAnnoGetAssembly(database); char *geneTrack = cartString(cart, "hgva_geneTrack"); struct trackDb *geneTdb = tdbForTrack(database, geneTrack, &fullTrackList); if (geneTdb == NULL) { warn("Can't find tdb for gene track %s", geneTrack); doUi(); return; } int maxVarRows = cartUsualInt(cart, "hgva_variantLimit", 10); struct annoStreamer *primary = NULL; char *primaryLongLabel = NULL; char *variantTrack = cartString(cart, "hgva_variantTrack"); struct slName *commentList = NULL; @@ -2180,36 +2182,36 @@ } else if (sameString(variantTrack, hgvaUseVariantIds)) { // Override search position if cart position doesn't include all variants: primary = makeVariantIdStreamer(assembly, maxVarRows, &chrom, &start, &end, &commentList); primaryLongLabel = hgvaVariantIdsLabel; } else { struct trackDb *varTdb = getVariantTrackDb(variantTrack); if (varTdb == NULL) { doUi(); return; } - primary = streamerFromTrack(assembly, varTdb->table, varTdb, chrom, maxVarRows); + primary = hAnnoStreamerFromTrackDb(assembly, varTdb->table, varTdb, chrom, maxVarRows); primaryLongLabel = varTdb->longLabel; } enum annoGratorOverlap geneOverlapRule = agoMustOverlap; -struct annoGrator *gpVarGrator = gratorFromTrackDb(assembly, geneTdb->table, geneTdb, chrom, +struct annoGrator *gpVarGrator = hAnnoGratorFromTrackDb(assembly, geneTdb->table, geneTdb, chrom, NO_MAXROWS, primary->asObj, geneOverlapRule); setGpVarFuncFilter(gpVarGrator); // Some grators may be used as both filters and output values. To avoid making // multiple grators for the same source, hash them by trackName: struct hash *gratorsByName = hashNew(8); struct annoGrator *snpGrator = NULL; char *snpDesc = NULL; if (cartUsualBoolean(cart, "hgva_rsId", FALSE)) snpGrator = gratorForSnpBed4(gratorsByName, "", assembly, chrom, agoNoConstraint, &snpDesc); // Now construct gratorList in the order in which annoFormatVep wants to see them, // i.e. first the gpVar, then the snpNNN, then whatever else: struct annoGrator *gratorList = NULL;