4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/cgilib/tests/annoGratorTester.c src/hg/cgilib/tests/annoGratorTester.c
index 74e4038..a5e98d8 100644
--- src/hg/cgilib/tests/annoGratorTester.c
+++ src/hg/cgilib/tests/annoGratorTester.c
@@ -1,516 +1,516 @@
 /* annoGratorTester -- exercise anno* lib modules (in kent/src as well as kent/src/hg) */
 
 /* Copyright (C) 2014 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 
 #include "annoGratorQuery.h"
 #include "annoGratorGpVar.h"
 #include "annoStreamBigBed.h"
 #include "annoStreamDb.h"
 #include "annoStreamTab.h"
 #include "annoStreamVcf.h"
 #include "annoStreamWig.h"
 #include "annoGrateWigDb.h"
 #include "annoFormatTab.h"
 #include "annoFormatVep.h"
 #include "bigBed.h"
 #include "dystring.h"
 #include "genePred.h"
 #include "hdb.h"
 #include "knetUdc.h"
 #include "memalloc.h"
 #include "pgSnp.h"
 #include "udc.h"
 #include "vcf.h"
 
 //#*** duplicated from hgVarAnnoGrator... libify me!
 struct annoAssembly *getAnnoAssembly(char *db)
 /* Make annoAssembly for db. */
 {
 static struct annoAssembly *aa = NULL;
 if (aa == NULL)
     {
     char *nibOrTwoBitDir = hDbDbNibPath(db);
     if (nibOrTwoBitDir == NULL)
         errAbort("Can't find .2bit for db '%s'", db);
     char twoBitPath[HDB_MAX_PATH_STRING];
     safef(twoBitPath, sizeof(twoBitPath), "%s/%s.2bit", nibOrTwoBitDir, db);
     char *path = hReplaceGbdb(twoBitPath);
     aa = annoAssemblyNew(db, path);
     freeMem(path);
     }
 return aa;
 }
 
 struct streamerInfo
 /* Enough info to create a streamer or grator that gets data from sql, file or URL. */
     {
     struct streamerInfo *next;
     struct annoAssembly *assembly;	// Reference assembly name and sequence.
     char *sqlDb;		// If non-NULL, then we are using this SQL database
     char *tableFileUrl;		// If db is non-NULL, table name; else file or URL
     enum annoRowType type;	// Data type (wig or words?)
     struct asObject *asObj;	// not used if we're using a sqlDb table
     };
 
 struct annoStreamer *streamerFromInfo(struct streamerInfo *info)
 /* Figure out which constructor to call, call it and return the results. */
 {
 struct annoStreamer *streamer = NULL;
 if (info->type == arWigVec)
     streamer = annoStreamWigDbNew(info->sqlDb, info->tableFileUrl, info->assembly, BIGNUM);
 else if (info->sqlDb != NULL)
     streamer = annoStreamDbNew(info->sqlDb, info->tableFileUrl, info->assembly, BIGNUM, NULL);
 else if (info->asObj && asObjectsMatch(info->asObj, vcfAsObj()))
     {
     //#*** this is kludgey, should test for .tbi file:
     boolean looksLikeTabix = endsWith(info->tableFileUrl, ".gz");
     streamer = annoStreamVcfNew(info->tableFileUrl, NULL, looksLikeTabix, info->assembly, BIGNUM);
     }
 else if (endsWith(info->tableFileUrl, ".bb"))
     {
     streamer = annoStreamBigBedNew(info->tableFileUrl, info->assembly, BIGNUM);
     }
 else
     {
     streamer = annoStreamTabNew(info->tableFileUrl, info->assembly, info->asObj, BIGNUM);
     }
 return streamer;
 }
 
 void sourcesFromInfoList(struct streamerInfo *infoList, bool doGpFx,
 			 struct annoStreamer **retPrimary, struct annoGrator **retGrators)
 /* Translate streamerInfo parameters into primary source and list of secondary sources. */
 {
 assert(infoList && retPrimary && retGrators);
 struct streamerInfo *primaryInfo = infoList;
 struct streamerInfo *gratorInfoList = infoList->next;
 struct annoStreamer *primary = streamerFromInfo(primaryInfo);
 struct annoGrator *gratorList = NULL;
 struct streamerInfo *grInfo;
 for (grInfo = gratorInfoList;  grInfo != NULL;  grInfo = grInfo->next)
     {
     struct annoGrator *grator = NULL;
     if (grInfo->type == arWigVec || grInfo->type == arWigSingle)
 	{
 	if (grInfo->sqlDb == NULL)
 	    grator = annoGrateBigWigNew(grInfo->tableFileUrl, grInfo->assembly, agwmAverage);
 	else
 	    grator = annoGrateWigDbNew(grInfo->sqlDb, grInfo->tableFileUrl, grInfo->assembly,
 				       agwmAverage, BIGNUM);
 	}
     else
 	{
 	struct annoStreamer *src = streamerFromInfo(grInfo);
 	if (doGpFx && grInfo->asObj && asColumnNamesMatchFirstN(grInfo->asObj, genePredAsObj(), 10))
 	    grator = annoGratorGpVarNew(src);
 	else
 	    grator = annoGratorNew(src);
 	}
     slAddHead(&gratorList, grator);
     }
 slReverse(&gratorList);
 *retPrimary = primary;
 *retGrators = gratorList;
 }
 
 struct asObject *bigBedAsFromFileName(char *fileName)
 /* Look up bigBed filename in table and get its internally stored autoSql definition. */
 {
 struct bbiFile *bbi = bigBedFileOpen(fileName);
 struct asObject *asObj = bigBedAs(bbi);
 bigBedFileClose(&bbi);
 return asObj;
 }
 
 void dbToTabOut(struct streamerInfo *infoList, char *outFile,
 		char *chrom, uint start, uint end, bool doGpFx)
 /* Get data from one or more database tables and print all fields to tab-sep output. */
 {
 struct annoStreamer *primary = NULL;
 struct annoGrator *gratorList = NULL;
 sourcesFromInfoList(infoList, doGpFx, &primary, &gratorList);
 struct annoFormatter *tabOut = annoFormatTabNew(outFile);
 struct annoGratorQuery *query = annoGratorQueryNew(primary->assembly, primary, gratorList, tabOut);
 annoGratorQuerySetRegion(query, chrom, start, end);
 annoGratorQueryExecute(query);
 annoGratorQueryFree(&query);
 }
 
 void pgSnpDbToTabOut(struct annoAssembly *assembly)
 // First test: some rows of a pgSnp table
 {
 char *sqlDb = assembly->name;
 struct streamerInfo pgSnpInfo = { NULL, assembly, sqlDb, "pgNA12878", arWords, pgSnpAsObj() };
 pgSnpInfo.next = NULL;
 dbToTabOut(&pgSnpInfo, "stdout", "chr1", 705881, 752721, FALSE);
 }
 
 void pgSnpKgDbToTabOutShort(struct annoAssembly *assembly)
 // Second test: some rows of a pgSnp table integrated with knownGene
 {
 char *sqlDb = assembly->name;
 struct streamerInfo pgSnpInfo = { NULL, assembly, sqlDb, "pgNA12878", arWords, pgSnpAsObj() };
 struct streamerInfo kgInfo = { NULL, assembly, sqlDb, "knownGene", arWords,
                                asParseFile("../../lib/knownGene.as") };
 pgSnpInfo.next = &kgInfo;
 dbToTabOut(&pgSnpInfo, "stdout", "chr1", 705881, 752721, FALSE);
 }
 
 void pgSnpKgDbToTabOutLong(struct annoAssembly *assembly)
 // Third test: all rows of a pgSnp table integrated with knownGene
 {
 char *sqlDb = assembly->name;
 struct streamerInfo pgSnpInfo = { NULL, assembly, sqlDb, "pgNA12878", arWords, pgSnpAsObj() };
 dbToTabOut(&pgSnpInfo, "stdout", NULL, 0, 0, FALSE);
 }
 
 void snpConsDbToTabOutShort(struct annoAssembly *assembly)
 // Fourth test: some rows of snp135 integrated with phyloP scores
 {
 char *sqlDb = assembly->name;
 struct streamerInfo snp135Info = { NULL, assembly, sqlDb, "snp135", arWords,
                                    asParseFile("../../lib/snp132Ext.as") };
 struct streamerInfo phyloPInfo = { NULL, assembly, sqlDb, "phyloP46wayPlacental", arWigSingle,
                                    NULL };
 snp135Info.next = &phyloPInfo;
 dbToTabOut(&snp135Info, "stdout", "chr1", 737224, 738475, FALSE);
 }
 
 void snpConsDbToTabOutLong(struct annoAssembly *assembly)
 // Long-running!: All rows of snp135 integrated with phyloP scores
 {
 char *sqlDb = assembly->name;
 struct streamerInfo snp135Info = { NULL, assembly, sqlDb, "snp135", arWords,
                                    asParseFile("../../lib/snp132Ext.as") };
 struct streamerInfo phyloPInfo = { NULL, assembly, sqlDb, "phyloP46wayPlacental", arWigSingle,
                                    NULL };
 snp135Info.next = &phyloPInfo;
 dbToTabOut(&snp135Info, "stdout", NULL, 0, 0, FALSE);
 }
 
 void vcfEx1(struct annoAssembly *assembly)
 // Fifth test: VCF with genotypes
 {
 struct streamerInfo vcfEx1 = { NULL, assembly, NULL,
                                "http://genome.ucsc.edu/goldenPath/help/examples/vcfExample.vcf.gz",
                                arWords, vcfAsObj() };
 dbToTabOut(&vcfEx1, "stdout", NULL, 0, 0, FALSE);
 }
 
 void vcfEx2(struct annoAssembly *assembly)
 // VCF with no genotypes
 {
 struct streamerInfo vcfEx2 = { NULL, assembly, NULL,
                                "http://genome.ucsc.edu/goldenPath/help/examples/vcfExampleTwo.vcf",
                                arWords, vcfAsObj() };
 dbToTabOut(&vcfEx2, "stdout", NULL, 0, 0, FALSE);
 }
 
 void pgSnpKgDbToGpFx(struct annoAssembly *assembly)
 // pgSnp + knownGene + gpFx = annotated variants
 {
 char *sqlDb = assembly->name;
 struct streamerInfo pg2SnpInfo = { NULL, assembly, NULL,
                                    "input/annoGrator/pgForTestingGpFx.pgSnp.tab",
                                    arWords, pgSnpAsObj() };
 struct streamerInfo kgInfo = { NULL, assembly, sqlDb, "knownGene", arWords,
                                asParseFile("../../lib/knownGene.as") };
 pg2SnpInfo.next = &kgInfo;
 dbToTabOut(&pg2SnpInfo, "stdout", NULL, 0, 0, TRUE);
 
 /*
   FIXME
   // 3base insertion CDS - chr3:124,646,699-124,646,718
   dbToTabOut(&pg2SnpInfo, "stdout", "chr3",124646699,124646718, TRUE);
 */
 }
 
 void bigBedToTabOut(struct annoAssembly *assembly)
 // like bigBedToBed
 {
 struct streamerInfo bigBedInfo = { NULL, assembly, NULL,
                                "http://genome.ucsc.edu/goldenPath/help/examples/bigBedExample.bb",
                                    arWords, NULL };
 dbToTabOut(&bigBedInfo, "stdout", "chr21", 34716800, 34733700, FALSE);
 }
 
 void snpBigWigToTabOut(struct annoAssembly *assembly)
 // text + scores
 {
 char *sqlDb = assembly->name;
 struct streamerInfo snp135Info = { NULL, assembly, sqlDb, "snp135", arWords,
                                    asParseFile("../../lib/snp132Ext.as") };
 struct streamerInfo bigWigInfo = { NULL, assembly, NULL,
                                "http://genome.ucsc.edu/goldenPath/help/examples/bigWigExample.bw",
                                    arWigSingle, NULL };
 snp135Info.next = &bigWigInfo;
 dbToTabOut(&snp135Info, "stdout", "chr21", 34716800, 34733700, FALSE);
 }
 
 void vepOut(struct annoAssembly *assembly)
 // variants + genes + gpFx + snps + annoFormatVep = annotated variants in VEP format
 {
 char *sqlDb = assembly->name;
 struct streamerInfo vepSamplePgSnp = { NULL, assembly, NULL,
                                        "input/annoGrator/vepSample.pgSnp.tab",
                                        arWords, asParseFile("../../lib/pgSnp.as") };
 struct streamerInfo ensGInfo = { NULL, assembly, sqlDb, "ensGene", arWords,
                                asParseFile("../../lib/genePredExt.as") };
 struct streamerInfo snpInfo = { NULL, assembly, sqlDb, "snp135", arWords,
                                 asParseFile("../../lib/snp132Ext.as") };
 vepSamplePgSnp.next = &ensGInfo;
 ensGInfo.next = &snpInfo;
 // Instead of dbToTabOut, we need to make a VEP config data structure and
 // use it to create an annoFormatVep.
 struct streamerInfo *primaryInfo = &vepSamplePgSnp;
 struct annoStreamer *primary = NULL;
 struct annoGrator *gratorList = NULL;
 sourcesFromInfoList(primaryInfo, TRUE, &primary, &gratorList);
 struct annoStreamer *gpVarSource = (struct annoStreamer *)gratorList;
 struct annoStreamer *snpSource = gpVarSource->next;
 struct annoFormatter *vepOut = annoFormatVepNew("stdout", FALSE, primary, "vepSamplePgSnp",
                                                 gpVarSource, "UCSC Genes ...",
                                                 snpSource, "just dbSNP 135", assembly);
 struct annoGratorQuery *query = annoGratorQueryNew(assembly, primary, gratorList, vepOut);
 annoGratorQuerySetRegion(query, "chr1", 876900, 886920);
 annoGratorQueryExecute(query);
 annoGratorQuerySetRegion(query, "chr5", 135530, 145535);
 annoGratorQueryExecute(query);
 annoGratorQueryFree(&query);
 }
 
 void vepOutIndelTrim(struct annoAssembly *assembly)
 // variants with VCF's awful indel coordinates + ... = VEP
 {
 char *sqlDb = assembly->name;
 struct streamerInfo indelTrimVcf = { NULL, assembly, NULL,
                                      "input/annoGrator/indelTrim.vcf",
                                      arWords, vcfAsObj() };
 struct streamerInfo gencodeInfo = { NULL, assembly, sqlDb, "wgEncodeGencodeBasicV19", arWords,
                                     asParseFile("../../lib/genePredExt.as") };
 indelTrimVcf.next = &gencodeInfo;
 // Instead of dbToTabOut, we need to make a VEP config data structure and
 // use it to create an annoFormatVep.
 struct streamerInfo *primaryInfo = &indelTrimVcf;
 struct annoStreamer *primary = NULL;
 struct annoGrator *gratorList = NULL;
 sourcesFromInfoList(primaryInfo, TRUE, &primary, &gratorList);
 struct annoStreamer *gpVarSource = (struct annoStreamer *)gratorList;
 struct annoFormatter *vepOut = annoFormatVepNew("stdout", FALSE, primary, "indelTrimVcf",
                                                 gpVarSource, "EnsemblGenes ...",
                                                 NULL, NULL, assembly);
 struct annoGratorQuery *query = annoGratorQueryNew(assembly, primary, gratorList, vepOut);
 annoGratorQuerySetRegion(query, "chr11", 0, 0);
 annoGratorQueryExecute(query);
 annoGratorQueryFree(&query);
 }
 
 void gpFx(struct annoAssembly *assembly)
 // Add in dbNsfp data for missense variants
 {
 char *sqlDb = assembly->name;
 struct streamerInfo variants = { NULL, assembly, NULL,
                                  "input/annoGrator/moreVariants.pgSnp.tab",
                                  arWords, asParseFile("../../lib/pgSnp.as") };
 struct streamerInfo kgInfo = { NULL, assembly, sqlDb, "knownGene", arWords,
                                asParseFile("../../lib/knownGene.as") };
 struct streamerInfo snpInfo = { NULL, assembly, sqlDb, "snp137", arWords,
                                 asParseFile("../../lib/snp132Ext.as") };
 struct asObject *dbNsfpSeqChangeAs =
     bigBedAsFromFileName("/gbdb/hg19/dbNsfp/dbNsfpSeqChange.bb");
 struct streamerInfo dbNsfpSeqChange =
     { NULL, assembly, NULL, "/gbdb/hg19/dbNsfp/dbNsfpSeqChange.bb",
       arWords, dbNsfpSeqChangeAs };
 struct asObject *dbNsfpSiftAs = bigBedAsFromFileName("/gbdb/hg19/dbNsfp/dbNsfpSift.bb");
 struct streamerInfo dbNsfpSift = { NULL, assembly, NULL, "/gbdb/hg19/dbNsfp/dbNsfpSift.bb",
                                    arWords, dbNsfpSiftAs };
 variants.next = &kgInfo;
 kgInfo.next = &snpInfo;
 snpInfo.next = &dbNsfpSeqChange;
 dbNsfpSeqChange.next = &dbNsfpSift;
 // Instead of dbToTabOut, we need to make a VEP config data structure and
 // use it to create an annoFormatVep.
 struct streamerInfo *primaryInfo = &variants;
 struct annoStreamer *primary = NULL;
 struct annoGrator *gratorList = NULL;
 sourcesFromInfoList(primaryInfo, TRUE, &primary, &gratorList);
 struct annoStreamer *gpVarSource = (struct annoStreamer *)gratorList;
 struct annoStreamer *snpSource = gpVarSource->next;
 struct annoStreamer *dbNsfpSource = snpSource->next->next;
 struct annoFormatter *vepOut = annoFormatVepNew("stdout", FALSE, primary, "some more variants",
                                                 gpVarSource, "UCSC Genes of course",
                                                 snpSource, "now snp137.", assembly);
 annoFormatVepAddExtraItem(vepOut, dbNsfpSource, "SIFT", "SIFT score from dbNSFP", "", FALSE);
 struct annoGratorQuery *query = annoGratorQueryNew(assembly, primary, gratorList, vepOut);
 annoGratorQuerySetRegion(query, "chr19", 45405960, 45419476);
 annoGratorQueryExecute(query);
 annoGratorQueryFree(&query);
 }
 
 void doInsertionsRegions(struct streamerInfo *infoList)
 /* Perform a series of region queries on infoList for the 'insertions' test. */
 {
 // Entire range of features in both primary and secondary:
 puts("# region: chr1   0  500");
 dbToTabOut(infoList, "stdout", "chr1", 0, 500, FALSE);
 // Region to the left of insLeft:
 puts("# region: chr1 100  200");
 dbToTabOut(infoList, "stdout", "chr1", 100, 200, FALSE);
 // Region to the right of insLeft and left of insRight:
 puts("# region: chr1 200  300");
 dbToTabOut(infoList, "stdout", "chr1", 200, 300, FALSE);
 // Region to the right of insRight and left of pi (and insPi):
 puts("# region: chr1 300  400");
 dbToTabOut(infoList, "stdout", "chr1", 300, 400, FALSE);
 // Region to the right of pi (and insPi):
 puts("# region: chr1 400  500");
 dbToTabOut(infoList, "stdout", "chr1", 400, 500, FALSE);
 }
 
 void insertions(struct annoAssembly *assembly)
 // Test corner cases of intersection of zero-length insertions with regular items (length > 0)
 // and with different search regions to make sure that insertions at edges are included.
 {
 struct asObject *bed4AS = asParseFile("../../lib/bed.as");
 struct streamerInfo primary = { NULL, assembly, NULL,
                                 "input/annoGrator/insertionsPrimary.bed",
                                 arWords, bed4AS };
 struct streamerInfo secondary = { NULL, assembly, NULL,
                                   "input/annoGrator/insertionsSecondary.bed",
                                   arWords, bed4AS };
 primary.next = &secondary;
 
 // Plain BED files
 puts("# BED files");
 doInsertionsRegions(&primary);
 
 // BigBed versions of same files
 puts("# BigBed files");
 primary.tableFileUrl = "input/annoGrator/insertionsPrimary.bb";
 secondary.tableFileUrl = "input/annoGrator/insertionsSecondary.bb";
 doInsertionsRegions(&primary);
 
 // Mysql tables from BED files
 puts("# BED tables");
 primary.sqlDb = secondary.sqlDb = "test";
 primary.tableFileUrl = "insertionsPrimary";
 secondary.tableFileUrl = "insertionsSecondary";
 doInsertionsRegions(&primary);
 
 // Uncompressed VCF
 puts("# VCF files (uncompressed)");
 primary.sqlDb = secondary.sqlDb = NULL;
 primary.tableFileUrl = "input/annoGrator/insertionsPrimary.vcf";
 secondary.tableFileUrl = "input/annoGrator/insertionsSecondary.vcf";
 primary.asObj = secondary.asObj = vcfAsObj();
 doInsertionsRegions(&primary);
 
 // VCF+tabix
 puts("# VCF files (tabix)");
 primary.tableFileUrl = "input/annoGrator/insertionsPrimary.vcf.gz";
 secondary.tableFileUrl = "input/annoGrator/insertionsSecondary.vcf.gz";
 doInsertionsRegions(&primary);
 }
 
 
 struct testSpec
     {
     char *name;
     void (*TestFunc)(struct annoAssembly *assembly);
     };
 
 static const struct testSpec testSpecList[] =
 {
     { "pgSnpDbToTabOut", pgSnpDbToTabOut },
     { "pgSnpKgDbToTabOutShort", pgSnpKgDbToTabOutShort },
     { "pgSnpKgDbToTabOutLong", pgSnpKgDbToTabOutLong },
     { "pgSnpKgDbToGpFx", pgSnpKgDbToGpFx },
     { "snpConsDbToTabOutShort", snpConsDbToTabOutShort },
     { "snpConsDbToTabOutLong", snpConsDbToTabOutLong },
     { "vcfEx1", vcfEx1 },
     { "vcfEx2", vcfEx2 },
     { "bigBedToTabOut", bigBedToTabOut },
     { "snpBigWigToTabOut", snpBigWigToTabOut },
     { "vepOut", vepOut },
     { "vepOutIndelTrim", vepOutIndelTrim },
     { "gpFx", gpFx },
     { "insertions", insertions },
     { NULL, NULL }
 };
 
 struct slName *makeTestNameList()
 // Extract just the names of the tests into a list.
 {
 struct slName *testNameList = NULL;
 int i;
 for (i = 0;  testSpecList[i].name != NULL;  i++)
     {
     slAddHead(&testNameList, slNameNew(testSpecList[i].name));
     }
 slReverse(&testNameList);
 return testNameList;
 }
 
 char *makeTestNameUsage(struct slName *testNameList)
 // Make a user-friendly listing of valid test names
 {
 struct dyString *dy = dyStringCreate("testName can be one of the following:\n");
 struct slName *testName;
 for (testName = testNameList;  testName != NULL;  testName = testName->next)
     {
     dyStringPrintf(dy, "    %s\n", testName->name);
     }
 return dyStringCannibalize(&dy);
 }
 
 void usage(struct slName *testNameList)
 /* explain usage and exit */
 {
 errAbort(
     "annoGratorTester - test program for anno* lib modules\n\n"
     "usage:\n"
     "    annoGratorTester db testName\n"
 //    "options:\n"
    "%s", makeTestNameUsage(testNameList)
     );
 }
 
 static struct optionSpec optionSpecs[] = {
     {NULL, 0}
 };
 
 int main(int argc, char *argv[])
 {
 // Check args
 optionInit(&argc, argv, optionSpecs);
 struct slName *testNameList = makeTestNameList();
 if (argc != 3)
     usage(testNameList);
 char *db = argv[1];
 char *testName = argv[2];
 if (! slNameFind(testNameList, testName))
     {
     errAbort("Unrecognized test name '%s'\n"
              "%s", argv[2], makeTestNameUsage(testNameList));
     }
 
 // Set up environment
 pushCarefulMemHandler(LIMIT_2or6GB);
 if (udcCacheTimeout() < 300)
     udcSetCacheTimeout(300);
 udcSetDefaultDir("./udcCache");
 knetUdcInstall();
 
 // Run the specified test
 struct annoAssembly *assembly = getAnnoAssembly(db);
 int i;
 for (i = 0;  testSpecList[i].name != NULL;  i++)
     {
     struct testSpec testSpec = testSpecList[i];
     if (sameString(testName, testSpec.name))
         testSpec.TestFunc(assembly);
     }
 
 return 0;
 }