23470c9be7d5744390cefef23563e3d1109db14a markd Wed Dec 14 11:13:53 2016 -0800 load of GENCODE VM12 (not being pushed to RR) diff --git src/hg/hgGetAnn/hgGetAnn.c src/hg/hgGetAnn/hgGetAnn.c index 5631c51..1f8769a 100644 --- src/hg/hgGetAnn/hgGetAnn.c +++ src/hg/hgGetAnn/hgGetAnn.c @@ -1,74 +1,79 @@ /* hgGetAnn - get chromosome annotation rows from database tables using * browser-style position specification. */ /* Copyright (C) 2013 The Regents of the University of California * See README in this or parent directory for licensing information. */ #include "common.h" #include "options.h" #include "verbose.h" #include "hdb.h" #include "hgFind.h" #include "jksql.h" #include "cheapcgi.h" +#include "dystring.h" void usage(char *msg) /* Explain usage and exit. */ { errAbort( "%s\n" "hgGetAnn - get chromosome annotation rows from database tables using\n" " browser-style position specification.\n" "\n" "usage:\n" " hgGetAnn [options] db table spec tabfile\n" "\n" "Get chromosome annotation rows from tables. This takes a browser-style\n" "position specification and retrieves matching rows from the database.\n" "table. Output is a tab-separated file with optional header. The bin \n" "column of the table will be not be included in the output. If the spec\n" "contains whitespace or shell meta-characters, it must be quoted.\n" "For split tables, the leading chrN_ should be omitted. Use `mrna' or\n" - "`est' for mRNAs/ESTs. If spec is \"-\", then all rows are retrieved.\n" + "`est' for mRNAs/ESTs. If spec is an empty string, then all rows are retrieved.\n" "This will even work for split tables.\n" "\n" "Options:\n" " -colHeaders - Include column headers with leading #\n" " -tsvHeaders - Include TSV style column headers\n" " -keepBin - don't exclude bin column\n" " -noMatchOk - don't generated an error if nothing is found\n" " -noRandom - Exclude *_random chromsomes\n" + " -where=whereClause - additional clause to get a subset\n" + " of the table.\n" " -verbose=n - 2 is basic info, 3 prints positions found\n", msg); } /* command line */ static struct optionSpec optionSpec[] = { {"colHeaders", OPTION_BOOLEAN}, {"tsvHeaders", OPTION_BOOLEAN}, {"keepBin", OPTION_BOOLEAN}, {"noMatchOk", OPTION_BOOLEAN}, + {"where", OPTION_STRING}, {"noRandom", OPTION_BOOLEAN}, {NULL, 0} }; boolean colHeaders; boolean tsvHeaders; boolean keepBin; boolean noMatchOk; +char *whereClause; boolean noRandom; struct cart *cart = NULL; /* hgFind assumes this global */ void prIndent(int indent, char *format, ...) /* print with indentation */ { va_list args; fprintf(stderr, "%*s", 2*indent, ""); va_start(args, format); vfprintf(stderr, format, args); va_end(args); } @@ -205,31 +210,31 @@ pos->chromStart = 0; pos->chromEnd = hChromSize(db, pos->chrom); slAddHead(&posTab->posList, pos); positions->posCount++; } slReverse(&posTab->posList); return positions; } struct hgPositions *findPositions(char *db, char *spec) /* query database with hgFind algorithm */ { struct hgPositions *positions; verbose(2, "begin position query: %s\n", spec); -if (sameString(spec, "-")) +if (sameString(spec, "")) positions = findAllChroms(db); else positions = hgPositionsFind(db, spec, NULL, "hgGetAnn", NULL, FALSE); verbose(2, "end position query: %d matches\n", countFindMatches(positions)); if (verboseLevel() >= 2) printPositionsList(positions); if ((!noMatchOk) && (countFindMatches(positions) == 0)) errAbort("Error: no matches to find query"); return positions; } void checkTableFields(struct hTableInfo *tableInfo, boolean requireName) /* check that the specified table has the required fields */ { @@ -264,30 +269,44 @@ fputc('\n', outFh); hFreeConn(&conn); } FILE* outputOpen(char *db, char *tabFile, struct hTableInfo *tableInfo) /* open output file and write optional header */ { FILE* outFh = mustOpen(tabFile, "w"); if (colHeaders || tsvHeaders) writeHeaders(db, outFh, tableInfo); return outFh; } +char *getExtraWhereClause(void) +/* return extra where clause to and or an empty string. WARNING: static + * return */ +{ +static struct dyString *buf = NULL; +if (whereClause == NULL) + return ""; +if (buf == NULL) + buf = dyStringNew(1024); +dyStringClear(buf); +dyStringPrintf(buf, " AND (%s)", whereClause); +return dyStringContents(buf); +} + void outputRow(FILE *outFh, char **row, int numCols) /* output a row, which should already be adjusted to include/exclude the bin * column */ { int i; for (i = 0; i < numCols; i++) { if (i > 0) fputc('\t', outFh); fputs(row[i], outFh); } fputc('\n', outFh); } boolean sameStringNull(char *str1, char* str2) @@ -341,91 +360,93 @@ return rowCnt; } int outputByChromRange(char *db, FILE *outFh, struct hTableInfo *tableInfo, struct hgPos *pos) /* output a hgPos by overlaping chrom range */ { struct sqlConnection *conn = hAllocConn(db); struct sqlResult *sr; int rowOff = 0; int rowCnt = 0; /* start query */ if ((pos->chromStart == 0) && (pos->chromEnd >= hChromSize(db, pos->chrom))) { /* optimize full chromosome query */ - sr = hChromQuery(conn, tableInfo->rootName, pos->chrom, NULL, &rowOff); + sr = hChromQuery(conn, tableInfo->rootName, pos->chrom, whereClause, &rowOff); } else { /* chromosome range query */ sr = hRangeQuery(conn, tableInfo->rootName, pos->chrom, pos->chromStart, pos->chromEnd, - NULL, &rowOff); + whereClause, &rowOff); } rowCnt = outputRows(outFh, tableInfo, sr, NULL); sqlFreeResult(&sr); hFreeConn(&conn); return rowCnt; } int outputChromRangeHits(char *db, FILE *outFh, struct hTableInfo *tableInfo, struct hgPosTable *posTab) /* output for a chromosome ranges query, where hgPosTable is for chromInfo */ { struct hgPos *pos; int rowCnt = 0; checkTableFields(tableInfo, FALSE); for (pos = posTab->posList; pos != NULL; pos = pos->next) rowCnt += outputByChromRange(db, outFh, tableInfo, pos); return rowCnt; } int outputByName(char *db, FILE *outFh, struct hTableInfo *tableInfo, char *realTable, struct hgPos *pos) /* Output results where there is a name and no chrom range hgPos. Actual table * name must be supplied, as hgPos does not have a chrom. */ { struct sqlConnection *conn = hAllocConn(db); struct sqlResult *sr; char query[512]; int rowCnt = 0; -sqlSafef(query, sizeof(query), "select * from %s where (%s = '%s')", - realTable, tableInfo->nameField, pos->name); +sqlSafef(query, sizeof(query), "select * from %s where (%s = '%s') %s", + realTable, tableInfo->nameField, pos->name, + getExtraWhereClause()); sr = sqlGetResult(conn, query); rowCnt = outputRows(outFh, tableInfo, sr, NULL); sqlFreeResult(&sr); hFreeConn(&conn); return rowCnt; } int outputByPosition(char *db, FILE *outFh, struct hTableInfo *tableInfo, struct hgPos *pos) /* Output results where there is a name and chrom location hgPos. */ { struct sqlConnection *conn = hAllocConn(db); struct sqlResult *sr; char query[512]; int rowCnt = 0; -sqlSafef(query, sizeof(query), "select * from %s where (%s = '%s') and (%s = %d) and (%s = %d)", +sqlSafef(query, sizeof(query), "select * from %s where (%s = '%s') and (%s = %d) and (%s = %d) %s", getTableName(pos->chrom, tableInfo), tableInfo->chromField, pos->chrom, tableInfo->startField, pos->chromStart, - tableInfo->endField, pos->chromEnd); + tableInfo->endField, pos->chromEnd, + getExtraWhereClause()); sr = sqlGetResult(conn, query); rowCnt = outputRows(outFh, tableInfo, sr, pos); sqlFreeResult(&sr); hFreeConn(&conn); return rowCnt; } int outputTablePosHits(char *db, FILE *outFh, struct hTableInfo *tableInfo, struct hgPos *pos) /* Output results for when query matches requested table. */ { int rowCnt = 0; /* handle different cases */ @@ -516,21 +537,22 @@ outputResults(db, outFh, tableInfo, positions); carefulClose(&outFh); } } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, optionSpec); if (argc != 5) usage("wrong # of args"); colHeaders = optionExists("colHeaders"); tsvHeaders = optionExists("tsvHeaders"); keepBin = optionExists("keepBin"); noMatchOk = optionExists("noMatchOk"); +whereClause = optionVal("where", NULL); noRandom = optionExists("noRandom"); cgiSpoof(&argc, argv); hgGetAnn(argv[1], argv[2], argv[3], argv[4]); return 0; }