4898794edd81be5285ea6e544acbedeaeb31bf78 max Tue Nov 23 08:10:57 2021 -0800 Fixing pointers to README file for license in all source code files. refs #27614 diff --git src/hg/makeDb/hgLoadWiggle/hgLoadWiggle.c src/hg/makeDb/hgLoadWiggle/hgLoadWiggle.c index 7690e2e..42b2b28 100644 --- src/hg/makeDb/hgLoadWiggle/hgLoadWiggle.c +++ src/hg/makeDb/hgLoadWiggle/hgLoadWiggle.c @@ -1,451 +1,451 @@ /* hgLoadWiggle - Load a Wiggle track "bed" file into database. */ /* Copyright (C) 2013 The Regents of the University of California - * See README in this or parent directory for licensing information. */ + * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" #include "options.h" #include "linefile.h" #include "obscure.h" #include "hash.h" #include "cheapcgi.h" #include "jksql.h" #include "dystring.h" #include "chromInfo.h" #include "wiggle.h" #include "hdb.h" #include "portable.h" #include "hgConfig.h" /* Command line switches. */ static boolean noBin = FALSE; /* Suppress bin field. */ static boolean noLoad = FALSE; /* Do not load table, create tab file */ static boolean noHistory = FALSE; /* Do not add history table comments */ static boolean strictTab = FALSE; /* Separate on tabs. */ static boolean oldTable = FALSE; /* Don't redo table. */ static boolean noChromInfo = FALSE; /* don't do size checks */ static char *pathPrefix = NULL; /* path prefix instead of /gbdb/hg16/wib */ static char *chromInfoDb = NULL; /* DB for chromInfo information */ static int maxChromNameLength = 0; /* specify to avoid chromInfo */ static char *tmpDir = (char *)NULL; /*location to create a temporary file */ static struct hash *chromHash = NULL; /* command line option specifications */ static struct optionSpec optionSpecs[] = { {"smallInsertSize", OPTION_INT}, {"tab", OPTION_BOOLEAN}, {"noBin", OPTION_BOOLEAN}, {"noLoad", OPTION_BOOLEAN}, {"noHistory", OPTION_BOOLEAN}, {"oldTable", OPTION_BOOLEAN}, {"pathPrefix", OPTION_STRING}, {"chromInfoDb", OPTION_STRING}, {"maxChromNameLength", OPTION_INT}, {"tmpDir", OPTION_STRING}, {"noChromInfo", OPTION_BOOLEAN}, {NULL, 0} }; static void usage() /* Explain usage and exit. */ { errAbort( "hgLoadWiggle - Load a wiggle track definition into database\n" "usage:\n" " hgLoadWiggle [options] database track files(s).wig\n" "options:\n" " -noBin\tsuppress bin field\n" " -noLoad\tdo not load table, only create .tab file\n" " -noHistory\tdo not add history table comments (for custom tracks)\n" " -oldTable\tadd to existing table\n" " -tab\t\tSeparate by tabs rather than space\n" " -pathPrefix=<path>\t.wib file path prefix to use " "(default /gbdb/<DB>/wib)\n" " -chromInfoDb=<DB>\tdatabase to extract chromInfo size information\n" " -maxChromNameLength=N - specify max chromName length to avoid\n" " - reference to chromInfo table\n" " -tmpDir=<path> - path to directory for creation of temporary .tab file\n" " - which will be removed after loading\n" " -verbose=N\tN=2 see # of lines input and SQL create statement,\n" "\t\tN=3 see chrom size info, N=4 see details on chrom size info" ); } static struct hash *loadAllChromInfo(char *database) /* Load up all chromosome infos. */ { struct chromInfo *el; struct sqlConnection *conn = NULL; struct sqlResult *sr = NULL; struct hash *ret; char **row; /* be wary of customTrack db loader pipeline which has special * environment variables for HGDB_HOST and so forth */ conn = hAllocConn(database); ret = newHash(0); sr = sqlGetResult(conn, NOSQLINJ "select * from chromInfo"); while ((row = sqlNextRow(sr)) != NULL) { el = chromInfoLoad(row); verbose(4, "Add hash %s value %u (%#lx)\n", el->chrom, el->size, (unsigned long)&el->size); hashAdd(ret, el->chrom, (void *)(& el->size)); } sqlFreeResult(&sr); hFreeConn(&conn); return ret; } static unsigned chromosomeSize(char *chromosome) /* Return full extents of chromosome. Warn and fill in if none. */ { if (noChromInfo) return 0; struct hashEl *el = hashLookup(chromHash,chromosome); if (el == NULL) errAbort("Couldn't find size of chromosome %s (note: chrom names are case sensitive)", chromosome); return *(unsigned *)el->val; } struct wiggleStub /* A line in a wiggle file with chromosome, start, end position parsed out. */ { struct wiggleStub *next; /* Next in list. */ char *chrom; /* Chromosome . */ int chromStart; /* Start position. */ int chromEnd; /* End position. */ char *line; /* Line. */ }; static int wiggleStubCmp(const void *va, const void *vb) /* Compare to sort based on query. */ { const struct wiggleStub *a = *((struct wiggleStub **)va); const struct wiggleStub *b = *((struct wiggleStub **)vb); int dif; dif = strcmp(a->chrom, b->chrom); if (dif == 0) dif = a->chromStart - b->chromStart; return dif; } static void loadOneWiggle(char *fileName, struct wiggleStub **pList) /* Load one wiggle file. Make sure all lines have same number of fields. * The first line is taken as the proper count. * Put results in *pList. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *words[64], *line, *dupe; int wordCount; struct wiggleStub *wiggle; int lineCount = 0; int wiggleSize = 0; while (lineFileNext(lf, &line, NULL)) { char *chrName; int chrStart; int chrEnd; ++lineCount; dupe = cloneString(line); if (strictTab) wordCount = chopTabs(line, words); else wordCount = chopLine(line, words); if (wiggleSize) lineFileExpectWords(lf, wiggleSize, wordCount); else { wiggleSize = wordCount; /* current wiggle standard expects 13 words */ lineFileExpectWords(lf, 13, wiggleSize); } chrName = cloneString(words[0]); chrStart = lineFileNeedNum(lf, words, 1); chrEnd = lineFileNeedNum(lf, words, 2); AllocVar(wiggle); wiggle->chrom = chrName; wiggle->chromStart = chrStart; wiggle->chromEnd = chrEnd; wiggle->line = dupe; slAddHead(pList, wiggle); } lineFileClose(&lf); verbose(2, "Read %d lines from %s\n", lineCount, fileName); } static void writeWiggleTab(char *fileName, struct wiggleStub *wiggleList, char *database) /* Write out wiggle list to tab-separated file. */ { struct wiggleStub *wiggle; FILE *f = mustOpen(fileName, "w"); char *words[64]; int i, wordCount; for (wiggle = wiggleList; wiggle != NULL; wiggle = wiggle->next) { static char *chrom = NULL; static unsigned size = 0; unsigned start; unsigned end; unsigned span; unsigned count; unsigned validCount; boolean valid = TRUE; if (strictTab) wordCount = chopTabs(wiggle->line, words); else wordCount = chopLine(wiggle->line, words); start = sqlUnsigned(words[1]); end = sqlUnsigned(words[2]); span = sqlUnsigned(words[4]); count = sqlUnsigned(words[5]); validCount = sqlUnsigned(words[10]); if (chrom && differentWord(chrom, words[0])) { chrom = words[0]; size = chromosomeSize(chrom); verbose(3, "chrom: %s size: %u\n", chrom, size); } else if (!chrom) { chrom = words[0]; size = chromosomeSize(chrom); verbose(3, "chrom: %s size: %u\n", chrom, size); } valid = TRUE; if (!noChromInfo && (end > size)) { unsigned overrun = 0; unsigned dropCount = 0; overrun = end - size; dropCount = 1 + (overrun / span); warn("WARNING: Exceeded %s size %u > %u. dropping %u data point(s)", chrom, end, size, dropCount); if (dropCount >= count) valid = FALSE; else { count -= dropCount; if (validCount > count) validCount = count; if ((end-(dropCount*span)) > start) end -= dropCount*span; else valid = FALSE; } } if (valid) { if (!noBin) fprintf(f, "%u\t", hFindBin(wiggle->chromStart, wiggle->chromEnd)); for (i=0; i<wordCount; ++i) { switch(i) { case 2: fprintf(f,"%u", end); break; case 5: fprintf(f,"%u", count); break; case 7: if (pathPrefix ) { if (endsWith(pathPrefix, "/")) fprintf(f,"%s", pathPrefix ); else fprintf(f,"%s/", pathPrefix ); } else fprintf(f,"/gbdb/%s/wib/", database ); fputs(words[i], f); break; case 10: fprintf(f,"%u", validCount); break; default: fputs(words[i], f); break; } if (i == wordCount-1) fputc('\n', f); else fputc('\t', f); } } } fclose(f); } static void loadDatabase(char *database, char *track, struct wiggleStub *wiggleList) /* Load database from wiggleList. */ { struct sqlConnection *conn = (struct sqlConnection *)NULL; struct dyString *dy = newDyString(1024); char *tab = (char *)NULL; if (! noLoad) { conn = hAllocConn(database); verbose(1, "Connected to database %s for track %s\n", database, track); } if ((char *)NULL != tmpDir) tab = cloneString(rTempName(tmpDir,"loadWig",".tab")); else tab = cloneString("wiggle.tab"); /* First make table definition. */ if ((!oldTable) && (!noLoad)) { int indexLen = 0; if (maxChromNameLength) indexLen = maxChromNameLength; else indexLen = hGetMinIndexLength(database); verbose(2, "INDEX chrom length: %d\n", indexLen); /* Create definition statement. */ verbose(1, "Creating wiggle table definition in %s.%s\n", database, track); sqlDyStringPrintf(dy, "CREATE TABLE %s (\n", track); if (!noBin) dyStringAppend(dy, " bin smallint unsigned not null,\n"); dyStringAppend(dy, " chrom varchar(255) not null,\n"); dyStringAppend(dy, " chromStart int unsigned not null,\n"); dyStringAppend(dy, " chromEnd int unsigned not null,\n"); dyStringAppend(dy, " name varchar(255) not null,\n"); dyStringAppend(dy, " span int unsigned not null,\n"); dyStringAppend(dy, " count int unsigned not null,\n"); dyStringAppend(dy, " offset int unsigned not null,\n"); dyStringAppend(dy, " file varchar(255) not null,\n"); dyStringAppend(dy, " lowerLimit double not null,\n"); dyStringAppend(dy, " dataRange double not null,\n"); dyStringAppend(dy, " validCount int unsigned not null,\n"); dyStringAppend(dy, " sumData double not null,\n"); dyStringAppend(dy, " sumSquares double not null,\n"); dyStringAppend(dy, "#Indices\n"); if (!noBin) dyStringPrintf(dy, " INDEX(chrom(%d),bin)\n", indexLen); else { dyStringPrintf(dy, " INDEX(chrom(%d),chromStart)\n", indexLen); } dyStringAppend(dy, ")\n"); verbose(2, "%s", dy->string); sqlRemakeTable(conn, track, dy->string); } verbose(1, "Saving %s\n", tab); writeWiggleTab(tab, wiggleList, database); if (! noLoad) { char comment[256]; char pathAdded[192]; verbose(1, "Loading %s\n", database); dyStringClear(dy); sqlDyStringPrintf(dy, "load data local infile '%s' into table %s", tab, track); sqlUpdate(conn, dy->string); if (pathPrefix) safef(pathAdded, sizeof(pathAdded), "%s/", pathPrefix); else safef(pathAdded, sizeof(pathAdded), "/gbdb/%s/wib/", database); if (oldTable) safef(comment, sizeof(comment), "adding to wiggle table %s from %s/%s with wib path %s", track, getCurrentDir(), tab, pathAdded); else safef(comment, sizeof(comment), "new wiggle table %s from %s/%s with wib path %s", track, getCurrentDir(), tab, pathAdded); if (! noHistory) hgHistoryComment(conn, "%s", comment); verbose(2, "#\t%s\n", comment); hFreeConn(&conn); /* if temp dir specified, unlink file to make it disappear */ if ((char *)NULL != tmpDir) unlink(tab); } else verbose(1, "noLoad option requested, see resulting file: %s\n", tab); } static void hgLoadWiggle(char *database, char *track, int wiggleCount, char *wiggleFiles[]) /* hgLoadWiggle - Load a generic wiggle file into database. */ { struct wiggleStub *wiggleList = NULL; int i; if (!noChromInfo) { if (chromInfoDb) chromHash = loadAllChromInfo(chromInfoDb); else chromHash = loadAllChromInfo(database); if (verboseLevel() > 2) { struct hashCookie cookie; struct hashEl *el; cookie = hashFirst(chromHash); verbose(3,"chrom\tsize\n"); while ((el = hashNext(&cookie)) != NULL) { unsigned size; size = chromosomeSize(el->name); verbose(3,"%s\t%u\n", el->name, size); } } } for (i=0; i<wiggleCount; ++i) loadOneWiggle(wiggleFiles[i], &wiggleList); slSort(&wiggleList, wiggleStubCmp); loadDatabase(database, track, wiggleList); } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, optionSpecs); if (argc < 4) usage(); noBin = optionExists("noBin"); noLoad = optionExists("noLoad"); noHistory = optionExists("noHistory"); strictTab = optionExists("tab"); noChromInfo = optionExists("noChromInfo"); oldTable = optionExists("oldTable"); pathPrefix = optionVal("pathPrefix",NULL); chromInfoDb = optionVal("chromInfoDb",NULL); maxChromNameLength = optionInt("maxChromNameLength",0); tmpDir = optionVal("tmpDir", tmpDir); verbose(2, "noBin: %s, noLoad: %s, noHistory: %s, tab: %s, oldTable: %s\n", noBin ? "TRUE" : "FALSE", noLoad ? "TRUE" : "FALSE", noHistory ? "TRUE" : "FALSE", strictTab ? "TRUE" : "FALSE", oldTable ? "TRUE" : "FALSE"); if (pathPrefix) verbose(2, " pathPrefix: %s\n", pathPrefix); if (chromInfoDb) verbose(2, "chromInfoDb: %s\n", chromInfoDb); hgLoadWiggle(argv[1], argv[2], argc-3, argv+3); if (verboseLevel() > 1) printVmPeak(); return 0; }