380a1b308bd3bb4f4e52d89ef9e1ccb962892bab angie Tue Oct 3 14:10:37 2017 -0700 Major changes to annoGratorGpVar, annoFormatVep and gpFx.c with the addition of functional effect prediction to variantProjector using PSL+CDS from annoStreamDbPslPlus, which enables accurate predictions even when the genome and transcript have indel differences. struct gpFx includes new members exonCount, txRef and txAlt so that gpFx and variantProjector can compute those and send them forward to annoFormatVep, instead of annoFormatVep computing them assuming that genome and transcript match perfectly. annoGratorGpVar passes forward the new gpFx members in output columns and, when input is PSL+CDS instead of genePred, uses variantProjector instead of gpFx to do functional predictions. diff --git src/hg/lib/hAnno.c src/hg/lib/hAnno.c index 55fac63..f7ddf0f 100644 --- src/hg/lib/hAnno.c +++ src/hg/lib/hAnno.c @@ -8,30 +8,31 @@ #include "factorSource.h" #include "grp.h" #include "hdb.h" #include "hubConnect.h" #include "hui.h" #include "jksql.h" #include "pgSnp.h" #include "trackHub.h" #include "vcf.h" #include "annoGratorQuery.h" #include "annoGratorGpVar.h" #include "annoStreamBigBed.h" #include "annoStreamBigWig.h" #include "annoStreamDb.h" #include "annoStreamDbFactorSource.h" +#include "annoStreamDbPslPlus.h" #include "annoStreamTab.h" #include "annoStreamVcf.h" #include "annoStreamLongTabix.h" #include "annoStreamWig.h" #include "annoGrateWigDb.h" #include "annoFormatTab.h" #include "annoFormatVep.h" //#*** duplicated in hgVarAnnoGrator and annoGratorTester struct annoAssembly *hAnnoGetAssembly(char *db) /* Make annoAssembly for db. */ { static struct annoAssembly *aa = NULL; if (aa == NULL) { @@ -336,34 +337,44 @@ if (dbTable != NULL) // This is really a database table, not a bigDataUrl CT. dataDb = CUSTOM_TRASH; } grator = annoGrateWigDbNew(dataDb, dbTable, assembly, agwmAverage, maxOutRows); } else if (startsWithWord("bigWig", tdb->type)) { char *fileOrUrl = getBigDataFileName(assembly->name, tdb, tdb->table, chrom); grator = annoGrateBigWigNew(fileOrUrl, assembly, agwmAverage); } else { struct annoStreamer *streamer = hAnnoStreamerFromTrackDb(assembly, selTable, tdb, chrom, maxOutRows, config); - if (primaryIsVariants && - (asColumnNamesMatchFirstN(streamer->asObj, genePredAsObj(), 10) || - asObjectsMatch(streamer->asObj, bigGenePredAsObj()))) + boolean streamerIsGenePred = asColumnNamesMatchFirstN(streamer->asObj, genePredAsObj(), 10); + boolean streamerIsBigGenePred = asObjectsMatch(streamer->asObj, bigGenePredAsObj()); + if (primaryIsVariants && (streamerIsGenePred || streamerIsBigGenePred)) + { + if (streamerIsGenePred && + (sameString("refGene", tdb->table) || startsWith("ncbiRefSeq", tdb->table))) + { + // We have PSL+CDS+seq for these tracks -- pass that instead of genePred + // to annoGratorGpVar + streamer->close(&streamer); + streamer = annoStreamDbPslPlusNew(assembly, tdb->table, maxOutRows); + } grator = annoGratorGpVarNew(streamer); + } else grator = annoGratorNew(streamer); } grator->setOverlapRule(grator, overlapRule); return grator; } static struct asObject *getAutoSqlForType(char *db, char *chrom, struct trackDb *tdb) /* Return an asObject for tdb->type if recognized as a hub or custom track type. */ { struct asObject * asObj = NULL; if (startsWith("wig", tdb->type) || startsWithWord("bigWig", tdb->type)) asObj = annoStreamBigWigAsObject(); else if (startsWith("big", tdb->type)) {