533112afe2a2005e80cdb1f82904ea65032d4302
braney
  Sat Oct 2 11:37:34 2021 -0700
split hg/lib into two separate libaries, one only used by the cgis

diff --git src/hg/cgilib/tests/annoGratorTester.c src/hg/cgilib/tests/annoGratorTester.c
new file mode 100644
index 0000000..74e4038
--- /dev/null
+++ src/hg/cgilib/tests/annoGratorTester.c
@@ -0,0 +1,516 @@
+/* annoGratorTester -- exercise anno* lib modules (in kent/src as well as kent/src/hg) */
+
+/* Copyright (C) 2014 The Regents of the University of California 
+ * See README in this or parent directory for licensing information. */
+
+#include "annoGratorQuery.h"
+#include "annoGratorGpVar.h"
+#include "annoStreamBigBed.h"
+#include "annoStreamDb.h"
+#include "annoStreamTab.h"
+#include "annoStreamVcf.h"
+#include "annoStreamWig.h"
+#include "annoGrateWigDb.h"
+#include "annoFormatTab.h"
+#include "annoFormatVep.h"
+#include "bigBed.h"
+#include "dystring.h"
+#include "genePred.h"
+#include "hdb.h"
+#include "knetUdc.h"
+#include "memalloc.h"
+#include "pgSnp.h"
+#include "udc.h"
+#include "vcf.h"
+
+//#*** duplicated from hgVarAnnoGrator... libify me!
+struct annoAssembly *getAnnoAssembly(char *db)
+/* Make annoAssembly for db. */
+{
+static struct annoAssembly *aa = NULL;
+if (aa == NULL)
+    {
+    char *nibOrTwoBitDir = hDbDbNibPath(db);
+    if (nibOrTwoBitDir == NULL)
+        errAbort("Can't find .2bit for db '%s'", db);
+    char twoBitPath[HDB_MAX_PATH_STRING];
+    safef(twoBitPath, sizeof(twoBitPath), "%s/%s.2bit", nibOrTwoBitDir, db);
+    char *path = hReplaceGbdb(twoBitPath);
+    aa = annoAssemblyNew(db, path);
+    freeMem(path);
+    }
+return aa;
+}
+
+struct streamerInfo
+/* Enough info to create a streamer or grator that gets data from sql, file or URL. */
+    {
+    struct streamerInfo *next;
+    struct annoAssembly *assembly;	// Reference assembly name and sequence.
+    char *sqlDb;		// If non-NULL, then we are using this SQL database
+    char *tableFileUrl;		// If db is non-NULL, table name; else file or URL
+    enum annoRowType type;	// Data type (wig or words?)
+    struct asObject *asObj;	// not used if we're using a sqlDb table
+    };
+
+struct annoStreamer *streamerFromInfo(struct streamerInfo *info)
+/* Figure out which constructor to call, call it and return the results. */
+{
+struct annoStreamer *streamer = NULL;
+if (info->type == arWigVec)
+    streamer = annoStreamWigDbNew(info->sqlDb, info->tableFileUrl, info->assembly, BIGNUM);
+else if (info->sqlDb != NULL)
+    streamer = annoStreamDbNew(info->sqlDb, info->tableFileUrl, info->assembly, BIGNUM, NULL);
+else if (info->asObj && asObjectsMatch(info->asObj, vcfAsObj()))
+    {
+    //#*** this is kludgey, should test for .tbi file:
+    boolean looksLikeTabix = endsWith(info->tableFileUrl, ".gz");
+    streamer = annoStreamVcfNew(info->tableFileUrl, NULL, looksLikeTabix, info->assembly, BIGNUM);
+    }
+else if (endsWith(info->tableFileUrl, ".bb"))
+    {
+    streamer = annoStreamBigBedNew(info->tableFileUrl, info->assembly, BIGNUM);
+    }
+else
+    {
+    streamer = annoStreamTabNew(info->tableFileUrl, info->assembly, info->asObj, BIGNUM);
+    }
+return streamer;
+}
+
+void sourcesFromInfoList(struct streamerInfo *infoList, bool doGpFx,
+			 struct annoStreamer **retPrimary, struct annoGrator **retGrators)
+/* Translate streamerInfo parameters into primary source and list of secondary sources. */
+{
+assert(infoList && retPrimary && retGrators);
+struct streamerInfo *primaryInfo = infoList;
+struct streamerInfo *gratorInfoList = infoList->next;
+struct annoStreamer *primary = streamerFromInfo(primaryInfo);
+struct annoGrator *gratorList = NULL;
+struct streamerInfo *grInfo;
+for (grInfo = gratorInfoList;  grInfo != NULL;  grInfo = grInfo->next)
+    {
+    struct annoGrator *grator = NULL;
+    if (grInfo->type == arWigVec || grInfo->type == arWigSingle)
+	{
+	if (grInfo->sqlDb == NULL)
+	    grator = annoGrateBigWigNew(grInfo->tableFileUrl, grInfo->assembly, agwmAverage);
+	else
+	    grator = annoGrateWigDbNew(grInfo->sqlDb, grInfo->tableFileUrl, grInfo->assembly,
+				       agwmAverage, BIGNUM);
+	}
+    else
+	{
+	struct annoStreamer *src = streamerFromInfo(grInfo);
+	if (doGpFx && grInfo->asObj && asColumnNamesMatchFirstN(grInfo->asObj, genePredAsObj(), 10))
+	    grator = annoGratorGpVarNew(src);
+	else
+	    grator = annoGratorNew(src);
+	}
+    slAddHead(&gratorList, grator);
+    }
+slReverse(&gratorList);
+*retPrimary = primary;
+*retGrators = gratorList;
+}
+
+struct asObject *bigBedAsFromFileName(char *fileName)
+/* Look up bigBed filename in table and get its internally stored autoSql definition. */
+{
+struct bbiFile *bbi = bigBedFileOpen(fileName);
+struct asObject *asObj = bigBedAs(bbi);
+bigBedFileClose(&bbi);
+return asObj;
+}
+
+void dbToTabOut(struct streamerInfo *infoList, char *outFile,
+		char *chrom, uint start, uint end, bool doGpFx)
+/* Get data from one or more database tables and print all fields to tab-sep output. */
+{
+struct annoStreamer *primary = NULL;
+struct annoGrator *gratorList = NULL;
+sourcesFromInfoList(infoList, doGpFx, &primary, &gratorList);
+struct annoFormatter *tabOut = annoFormatTabNew(outFile);
+struct annoGratorQuery *query = annoGratorQueryNew(primary->assembly, primary, gratorList, tabOut);
+annoGratorQuerySetRegion(query, chrom, start, end);
+annoGratorQueryExecute(query);
+annoGratorQueryFree(&query);
+}
+
+void pgSnpDbToTabOut(struct annoAssembly *assembly)
+// First test: some rows of a pgSnp table
+{
+char *sqlDb = assembly->name;
+struct streamerInfo pgSnpInfo = { NULL, assembly, sqlDb, "pgNA12878", arWords, pgSnpAsObj() };
+pgSnpInfo.next = NULL;
+dbToTabOut(&pgSnpInfo, "stdout", "chr1", 705881, 752721, FALSE);
+}
+
+void pgSnpKgDbToTabOutShort(struct annoAssembly *assembly)
+// Second test: some rows of a pgSnp table integrated with knownGene
+{
+char *sqlDb = assembly->name;
+struct streamerInfo pgSnpInfo = { NULL, assembly, sqlDb, "pgNA12878", arWords, pgSnpAsObj() };
+struct streamerInfo kgInfo = { NULL, assembly, sqlDb, "knownGene", arWords,
+                               asParseFile("../../lib/knownGene.as") };
+pgSnpInfo.next = &kgInfo;
+dbToTabOut(&pgSnpInfo, "stdout", "chr1", 705881, 752721, FALSE);
+}
+
+void pgSnpKgDbToTabOutLong(struct annoAssembly *assembly)
+// Third test: all rows of a pgSnp table integrated with knownGene
+{
+char *sqlDb = assembly->name;
+struct streamerInfo pgSnpInfo = { NULL, assembly, sqlDb, "pgNA12878", arWords, pgSnpAsObj() };
+dbToTabOut(&pgSnpInfo, "stdout", NULL, 0, 0, FALSE);
+}
+
+void snpConsDbToTabOutShort(struct annoAssembly *assembly)
+// Fourth test: some rows of snp135 integrated with phyloP scores
+{
+char *sqlDb = assembly->name;
+struct streamerInfo snp135Info = { NULL, assembly, sqlDb, "snp135", arWords,
+                                   asParseFile("../../lib/snp132Ext.as") };
+struct streamerInfo phyloPInfo = { NULL, assembly, sqlDb, "phyloP46wayPlacental", arWigSingle,
+                                   NULL };
+snp135Info.next = &phyloPInfo;
+dbToTabOut(&snp135Info, "stdout", "chr1", 737224, 738475, FALSE);
+}
+
+void snpConsDbToTabOutLong(struct annoAssembly *assembly)
+// Long-running!: All rows of snp135 integrated with phyloP scores
+{
+char *sqlDb = assembly->name;
+struct streamerInfo snp135Info = { NULL, assembly, sqlDb, "snp135", arWords,
+                                   asParseFile("../../lib/snp132Ext.as") };
+struct streamerInfo phyloPInfo = { NULL, assembly, sqlDb, "phyloP46wayPlacental", arWigSingle,
+                                   NULL };
+snp135Info.next = &phyloPInfo;
+dbToTabOut(&snp135Info, "stdout", NULL, 0, 0, FALSE);
+}
+
+void vcfEx1(struct annoAssembly *assembly)
+// Fifth test: VCF with genotypes
+{
+struct streamerInfo vcfEx1 = { NULL, assembly, NULL,
+                               "http://genome.ucsc.edu/goldenPath/help/examples/vcfExample.vcf.gz",
+                               arWords, vcfAsObj() };
+dbToTabOut(&vcfEx1, "stdout", NULL, 0, 0, FALSE);
+}
+
+void vcfEx2(struct annoAssembly *assembly)
+// VCF with no genotypes
+{
+struct streamerInfo vcfEx2 = { NULL, assembly, NULL,
+                               "http://genome.ucsc.edu/goldenPath/help/examples/vcfExampleTwo.vcf",
+                               arWords, vcfAsObj() };
+dbToTabOut(&vcfEx2, "stdout", NULL, 0, 0, FALSE);
+}
+
+void pgSnpKgDbToGpFx(struct annoAssembly *assembly)
+// pgSnp + knownGene + gpFx = annotated variants
+{
+char *sqlDb = assembly->name;
+struct streamerInfo pg2SnpInfo = { NULL, assembly, NULL,
+                                   "input/annoGrator/pgForTestingGpFx.pgSnp.tab",
+                                   arWords, pgSnpAsObj() };
+struct streamerInfo kgInfo = { NULL, assembly, sqlDb, "knownGene", arWords,
+                               asParseFile("../../lib/knownGene.as") };
+pg2SnpInfo.next = &kgInfo;
+dbToTabOut(&pg2SnpInfo, "stdout", NULL, 0, 0, TRUE);
+
+/*
+  FIXME
+  // 3base insertion CDS - chr3:124,646,699-124,646,718
+  dbToTabOut(&pg2SnpInfo, "stdout", "chr3",124646699,124646718, TRUE);
+*/
+}
+
+void bigBedToTabOut(struct annoAssembly *assembly)
+// like bigBedToBed
+{
+struct streamerInfo bigBedInfo = { NULL, assembly, NULL,
+                               "http://genome.ucsc.edu/goldenPath/help/examples/bigBedExample.bb",
+                                   arWords, NULL };
+dbToTabOut(&bigBedInfo, "stdout", "chr21", 34716800, 34733700, FALSE);
+}
+
+void snpBigWigToTabOut(struct annoAssembly *assembly)
+// text + scores
+{
+char *sqlDb = assembly->name;
+struct streamerInfo snp135Info = { NULL, assembly, sqlDb, "snp135", arWords,
+                                   asParseFile("../../lib/snp132Ext.as") };
+struct streamerInfo bigWigInfo = { NULL, assembly, NULL,
+                               "http://genome.ucsc.edu/goldenPath/help/examples/bigWigExample.bw",
+                                   arWigSingle, NULL };
+snp135Info.next = &bigWigInfo;
+dbToTabOut(&snp135Info, "stdout", "chr21", 34716800, 34733700, FALSE);
+}
+
+void vepOut(struct annoAssembly *assembly)
+// variants + genes + gpFx + snps + annoFormatVep = annotated variants in VEP format
+{
+char *sqlDb = assembly->name;
+struct streamerInfo vepSamplePgSnp = { NULL, assembly, NULL,
+                                       "input/annoGrator/vepSample.pgSnp.tab",
+                                       arWords, asParseFile("../../lib/pgSnp.as") };
+struct streamerInfo ensGInfo = { NULL, assembly, sqlDb, "ensGene", arWords,
+                               asParseFile("../../lib/genePredExt.as") };
+struct streamerInfo snpInfo = { NULL, assembly, sqlDb, "snp135", arWords,
+                                asParseFile("../../lib/snp132Ext.as") };
+vepSamplePgSnp.next = &ensGInfo;
+ensGInfo.next = &snpInfo;
+// Instead of dbToTabOut, we need to make a VEP config data structure and
+// use it to create an annoFormatVep.
+struct streamerInfo *primaryInfo = &vepSamplePgSnp;
+struct annoStreamer *primary = NULL;
+struct annoGrator *gratorList = NULL;
+sourcesFromInfoList(primaryInfo, TRUE, &primary, &gratorList);
+struct annoStreamer *gpVarSource = (struct annoStreamer *)gratorList;
+struct annoStreamer *snpSource = gpVarSource->next;
+struct annoFormatter *vepOut = annoFormatVepNew("stdout", FALSE, primary, "vepSamplePgSnp",
+                                                gpVarSource, "UCSC Genes ...",
+                                                snpSource, "just dbSNP 135", assembly);
+struct annoGratorQuery *query = annoGratorQueryNew(assembly, primary, gratorList, vepOut);
+annoGratorQuerySetRegion(query, "chr1", 876900, 886920);
+annoGratorQueryExecute(query);
+annoGratorQuerySetRegion(query, "chr5", 135530, 145535);
+annoGratorQueryExecute(query);
+annoGratorQueryFree(&query);
+}
+
+void vepOutIndelTrim(struct annoAssembly *assembly)
+// variants with VCF's awful indel coordinates + ... = VEP
+{
+char *sqlDb = assembly->name;
+struct streamerInfo indelTrimVcf = { NULL, assembly, NULL,
+                                     "input/annoGrator/indelTrim.vcf",
+                                     arWords, vcfAsObj() };
+struct streamerInfo gencodeInfo = { NULL, assembly, sqlDb, "wgEncodeGencodeBasicV19", arWords,
+                                    asParseFile("../../lib/genePredExt.as") };
+indelTrimVcf.next = &gencodeInfo;
+// Instead of dbToTabOut, we need to make a VEP config data structure and
+// use it to create an annoFormatVep.
+struct streamerInfo *primaryInfo = &indelTrimVcf;
+struct annoStreamer *primary = NULL;
+struct annoGrator *gratorList = NULL;
+sourcesFromInfoList(primaryInfo, TRUE, &primary, &gratorList);
+struct annoStreamer *gpVarSource = (struct annoStreamer *)gratorList;
+struct annoFormatter *vepOut = annoFormatVepNew("stdout", FALSE, primary, "indelTrimVcf",
+                                                gpVarSource, "EnsemblGenes ...",
+                                                NULL, NULL, assembly);
+struct annoGratorQuery *query = annoGratorQueryNew(assembly, primary, gratorList, vepOut);
+annoGratorQuerySetRegion(query, "chr11", 0, 0);
+annoGratorQueryExecute(query);
+annoGratorQueryFree(&query);
+}
+
+void gpFx(struct annoAssembly *assembly)
+// Add in dbNsfp data for missense variants
+{
+char *sqlDb = assembly->name;
+struct streamerInfo variants = { NULL, assembly, NULL,
+                                 "input/annoGrator/moreVariants.pgSnp.tab",
+                                 arWords, asParseFile("../../lib/pgSnp.as") };
+struct streamerInfo kgInfo = { NULL, assembly, sqlDb, "knownGene", arWords,
+                               asParseFile("../../lib/knownGene.as") };
+struct streamerInfo snpInfo = { NULL, assembly, sqlDb, "snp137", arWords,
+                                asParseFile("../../lib/snp132Ext.as") };
+struct asObject *dbNsfpSeqChangeAs =
+    bigBedAsFromFileName("/gbdb/hg19/dbNsfp/dbNsfpSeqChange.bb");
+struct streamerInfo dbNsfpSeqChange =
+    { NULL, assembly, NULL, "/gbdb/hg19/dbNsfp/dbNsfpSeqChange.bb",
+      arWords, dbNsfpSeqChangeAs };
+struct asObject *dbNsfpSiftAs = bigBedAsFromFileName("/gbdb/hg19/dbNsfp/dbNsfpSift.bb");
+struct streamerInfo dbNsfpSift = { NULL, assembly, NULL, "/gbdb/hg19/dbNsfp/dbNsfpSift.bb",
+                                   arWords, dbNsfpSiftAs };
+variants.next = &kgInfo;
+kgInfo.next = &snpInfo;
+snpInfo.next = &dbNsfpSeqChange;
+dbNsfpSeqChange.next = &dbNsfpSift;
+// Instead of dbToTabOut, we need to make a VEP config data structure and
+// use it to create an annoFormatVep.
+struct streamerInfo *primaryInfo = &variants;
+struct annoStreamer *primary = NULL;
+struct annoGrator *gratorList = NULL;
+sourcesFromInfoList(primaryInfo, TRUE, &primary, &gratorList);
+struct annoStreamer *gpVarSource = (struct annoStreamer *)gratorList;
+struct annoStreamer *snpSource = gpVarSource->next;
+struct annoStreamer *dbNsfpSource = snpSource->next->next;
+struct annoFormatter *vepOut = annoFormatVepNew("stdout", FALSE, primary, "some more variants",
+                                                gpVarSource, "UCSC Genes of course",
+                                                snpSource, "now snp137.", assembly);
+annoFormatVepAddExtraItem(vepOut, dbNsfpSource, "SIFT", "SIFT score from dbNSFP", "", FALSE);
+struct annoGratorQuery *query = annoGratorQueryNew(assembly, primary, gratorList, vepOut);
+annoGratorQuerySetRegion(query, "chr19", 45405960, 45419476);
+annoGratorQueryExecute(query);
+annoGratorQueryFree(&query);
+}
+
+void doInsertionsRegions(struct streamerInfo *infoList)
+/* Perform a series of region queries on infoList for the 'insertions' test. */
+{
+// Entire range of features in both primary and secondary:
+puts("# region: chr1   0  500");
+dbToTabOut(infoList, "stdout", "chr1", 0, 500, FALSE);
+// Region to the left of insLeft:
+puts("# region: chr1 100  200");
+dbToTabOut(infoList, "stdout", "chr1", 100, 200, FALSE);
+// Region to the right of insLeft and left of insRight:
+puts("# region: chr1 200  300");
+dbToTabOut(infoList, "stdout", "chr1", 200, 300, FALSE);
+// Region to the right of insRight and left of pi (and insPi):
+puts("# region: chr1 300  400");
+dbToTabOut(infoList, "stdout", "chr1", 300, 400, FALSE);
+// Region to the right of pi (and insPi):
+puts("# region: chr1 400  500");
+dbToTabOut(infoList, "stdout", "chr1", 400, 500, FALSE);
+}
+
+void insertions(struct annoAssembly *assembly)
+// Test corner cases of intersection of zero-length insertions with regular items (length > 0)
+// and with different search regions to make sure that insertions at edges are included.
+{
+struct asObject *bed4AS = asParseFile("../../lib/bed.as");
+struct streamerInfo primary = { NULL, assembly, NULL,
+                                "input/annoGrator/insertionsPrimary.bed",
+                                arWords, bed4AS };
+struct streamerInfo secondary = { NULL, assembly, NULL,
+                                  "input/annoGrator/insertionsSecondary.bed",
+                                  arWords, bed4AS };
+primary.next = &secondary;
+
+// Plain BED files
+puts("# BED files");
+doInsertionsRegions(&primary);
+
+// BigBed versions of same files
+puts("# BigBed files");
+primary.tableFileUrl = "input/annoGrator/insertionsPrimary.bb";
+secondary.tableFileUrl = "input/annoGrator/insertionsSecondary.bb";
+doInsertionsRegions(&primary);
+
+// Mysql tables from BED files
+puts("# BED tables");
+primary.sqlDb = secondary.sqlDb = "test";
+primary.tableFileUrl = "insertionsPrimary";
+secondary.tableFileUrl = "insertionsSecondary";
+doInsertionsRegions(&primary);
+
+// Uncompressed VCF
+puts("# VCF files (uncompressed)");
+primary.sqlDb = secondary.sqlDb = NULL;
+primary.tableFileUrl = "input/annoGrator/insertionsPrimary.vcf";
+secondary.tableFileUrl = "input/annoGrator/insertionsSecondary.vcf";
+primary.asObj = secondary.asObj = vcfAsObj();
+doInsertionsRegions(&primary);
+
+// VCF+tabix
+puts("# VCF files (tabix)");
+primary.tableFileUrl = "input/annoGrator/insertionsPrimary.vcf.gz";
+secondary.tableFileUrl = "input/annoGrator/insertionsSecondary.vcf.gz";
+doInsertionsRegions(&primary);
+}
+
+
+struct testSpec
+    {
+    char *name;
+    void (*TestFunc)(struct annoAssembly *assembly);
+    };
+
+static const struct testSpec testSpecList[] =
+{
+    { "pgSnpDbToTabOut", pgSnpDbToTabOut },
+    { "pgSnpKgDbToTabOutShort", pgSnpKgDbToTabOutShort },
+    { "pgSnpKgDbToTabOutLong", pgSnpKgDbToTabOutLong },
+    { "pgSnpKgDbToGpFx", pgSnpKgDbToGpFx },
+    { "snpConsDbToTabOutShort", snpConsDbToTabOutShort },
+    { "snpConsDbToTabOutLong", snpConsDbToTabOutLong },
+    { "vcfEx1", vcfEx1 },
+    { "vcfEx2", vcfEx2 },
+    { "bigBedToTabOut", bigBedToTabOut },
+    { "snpBigWigToTabOut", snpBigWigToTabOut },
+    { "vepOut", vepOut },
+    { "vepOutIndelTrim", vepOutIndelTrim },
+    { "gpFx", gpFx },
+    { "insertions", insertions },
+    { NULL, NULL }
+};
+
+struct slName *makeTestNameList()
+// Extract just the names of the tests into a list.
+{
+struct slName *testNameList = NULL;
+int i;
+for (i = 0;  testSpecList[i].name != NULL;  i++)
+    {
+    slAddHead(&testNameList, slNameNew(testSpecList[i].name));
+    }
+slReverse(&testNameList);
+return testNameList;
+}
+
+char *makeTestNameUsage(struct slName *testNameList)
+// Make a user-friendly listing of valid test names
+{
+struct dyString *dy = dyStringCreate("testName can be one of the following:\n");
+struct slName *testName;
+for (testName = testNameList;  testName != NULL;  testName = testName->next)
+    {
+    dyStringPrintf(dy, "    %s\n", testName->name);
+    }
+return dyStringCannibalize(&dy);
+}
+
+void usage(struct slName *testNameList)
+/* explain usage and exit */
+{
+errAbort(
+    "annoGratorTester - test program for anno* lib modules\n\n"
+    "usage:\n"
+    "    annoGratorTester db testName\n"
+//    "options:\n"
+   "%s", makeTestNameUsage(testNameList)
+    );
+}
+
+static struct optionSpec optionSpecs[] = {
+    {NULL, 0}
+};
+
+int main(int argc, char *argv[])
+{
+// Check args
+optionInit(&argc, argv, optionSpecs);
+struct slName *testNameList = makeTestNameList();
+if (argc != 3)
+    usage(testNameList);
+char *db = argv[1];
+char *testName = argv[2];
+if (! slNameFind(testNameList, testName))
+    {
+    errAbort("Unrecognized test name '%s'\n"
+             "%s", argv[2], makeTestNameUsage(testNameList));
+    }
+
+// Set up environment
+pushCarefulMemHandler(LIMIT_2or6GB);
+if (udcCacheTimeout() < 300)
+    udcSetCacheTimeout(300);
+udcSetDefaultDir("./udcCache");
+knetUdcInstall();
+
+// Run the specified test
+struct annoAssembly *assembly = getAnnoAssembly(db);
+int i;
+for (i = 0;  testSpecList[i].name != NULL;  i++)
+    {
+    struct testSpec testSpec = testSpecList[i];
+    if (sameString(testName, testSpec.name))
+        testSpec.TestFunc(assembly);
+    }
+
+return 0;
+}