23470c9be7d5744390cefef23563e3d1109db14a
markd
  Wed Dec 14 11:13:53 2016 -0800
load of GENCODE VM12 (not being pushed to RR)

diff --git src/hg/hgGetAnn/hgGetAnn.c src/hg/hgGetAnn/hgGetAnn.c
index 5631c51..1f8769a 100644
--- src/hg/hgGetAnn/hgGetAnn.c
+++ src/hg/hgGetAnn/hgGetAnn.c
@@ -1,74 +1,79 @@
 /* hgGetAnn - get chromosome annotation rows from database tables using
  * browser-style position specification.  */
 
 /* Copyright (C) 2013 The Regents of the University of California 
  * See README in this or parent directory for licensing information. */
 #include "common.h"
 #include "options.h"
 #include "verbose.h"
 #include "hdb.h"
 #include "hgFind.h"
 #include "jksql.h"
 #include "cheapcgi.h"
+#include "dystring.h"
 
 
 void usage(char *msg)
 /* Explain usage and exit. */
 {
 errAbort(
     "%s\n"
     "hgGetAnn - get chromosome annotation rows from database tables using\n"
     "           browser-style position specification.\n"
     "\n"
     "usage:\n"
     "   hgGetAnn [options] db table spec tabfile\n"
     "\n"
     "Get chromosome annotation rows from tables.  This takes a browser-style\n"
     "position specification and retrieves matching rows from the database.\n"
     "table. Output is a tab-separated file with optional header.  The bin \n"
     "column of the table will be not be included in the output. If the spec\n"
     "contains whitespace or shell meta-characters, it must be quoted.\n"
     "For split tables, the leading chrN_ should be omitted.  Use `mrna' or\n"
-    "`est' for mRNAs/ESTs.  If spec is \"-\", then all rows are retrieved.\n"
+    "`est' for mRNAs/ESTs.  If spec is an empty string, then all rows are retrieved.\n"
     "This will even work for split tables.\n"
     "\n"
     "Options:\n"
     "   -colHeaders - Include column headers with leading #\n"
     "   -tsvHeaders - Include TSV style column headers\n"
     "   -keepBin - don't exclude bin column\n"
     "   -noMatchOk - don't generated an error if nothing is found\n"
     "   -noRandom - Exclude *_random chromsomes\n"
+    "   -where=whereClause - additional clause to get a subset\n"
+    "       of the table.\n"
     "   -verbose=n - 2 is basic info, 3 prints positions found\n",
     msg);
 }
 
 
 /* command line */
 static struct optionSpec optionSpec[] = {
     {"colHeaders", OPTION_BOOLEAN},
     {"tsvHeaders", OPTION_BOOLEAN},
     {"keepBin", OPTION_BOOLEAN},
     {"noMatchOk", OPTION_BOOLEAN},
+    {"where", OPTION_STRING},
     {"noRandom", OPTION_BOOLEAN},
     {NULL, 0}
 };
 
 boolean colHeaders;
 boolean tsvHeaders;
 boolean keepBin;
 boolean noMatchOk;
+char *whereClause;
 boolean noRandom;
 
 struct cart *cart = NULL; /* hgFind assumes this global */
 
 void prIndent(int indent, char *format, ...)
 /* print with indentation */
 {
 va_list args;
 
 fprintf(stderr, "%*s", 2*indent, "");
 va_start(args, format);
 vfprintf(stderr, format, args);
 va_end(args);
 }
 
@@ -205,31 +210,31 @@
     pos->chromStart = 0;
     pos->chromEnd = hChromSize(db, pos->chrom);
     slAddHead(&posTab->posList, pos);
     positions->posCount++;
     }
 slReverse(&posTab->posList);
 return positions;
 }
 
 struct hgPositions *findPositions(char *db, char *spec)
 /* query database with hgFind algorithm */
 {
 struct hgPositions *positions;
 verbose(2, "begin position query: %s\n", spec);
 
-if (sameString(spec, "-"))
+if (sameString(spec, ""))
     positions = findAllChroms(db);
 else
     positions = hgPositionsFind(db, spec, NULL, "hgGetAnn", NULL, FALSE);
 
 verbose(2, "end position query: %d matches\n", countFindMatches(positions));
 if (verboseLevel() >= 2)
     printPositionsList(positions);
 if ((!noMatchOk) && (countFindMatches(positions) == 0))
     errAbort("Error: no matches to find query");
 return positions;
 }
 
 void checkTableFields(struct hTableInfo *tableInfo, boolean requireName)
 /* check that the specified table has the required fields */
 {
@@ -264,30 +269,44 @@
 fputc('\n', outFh);
 hFreeConn(&conn);
 }
 
 FILE* outputOpen(char *db, char *tabFile, struct hTableInfo *tableInfo)
 /* open output file and write optional header */
 {
 FILE* outFh = mustOpen(tabFile, "w");
 
 if (colHeaders || tsvHeaders)
     writeHeaders(db, outFh, tableInfo);
 
 return outFh;
 }
 
+char *getExtraWhereClause(void)
+/* return extra where clause to and or an empty string.  WARNING: static
+ * return */
+{
+static struct dyString *buf = NULL;
+if (whereClause == NULL)
+    return "";
+if (buf == NULL)
+    buf = dyStringNew(1024);
+dyStringClear(buf);
+dyStringPrintf(buf, " AND (%s)", whereClause);
+return dyStringContents(buf);
+}
+
 void outputRow(FILE *outFh, char **row, int numCols)
 /* output a row, which should already be adjusted to include/exclude the bin
  * column */
 {
 int i;
 for (i = 0; i < numCols; i++)
     {
     if (i > 0)
         fputc('\t', outFh);
     fputs(row[i], outFh);
     }
 fputc('\n', outFh);
 }
 
 boolean sameStringNull(char *str1, char* str2)
@@ -341,91 +360,93 @@
 return rowCnt;
 }
 
 int outputByChromRange(char *db, FILE *outFh, struct hTableInfo *tableInfo, struct hgPos *pos)
 /* output a hgPos by overlaping chrom range */
 {
 struct sqlConnection *conn = hAllocConn(db);
 struct sqlResult *sr;
 int rowOff = 0;
 int rowCnt = 0;
 
 /* start query */
 if ((pos->chromStart == 0) && (pos->chromEnd >= hChromSize(db, pos->chrom)))
     {
     /* optimize full chromosome query */
-    sr = hChromQuery(conn, tableInfo->rootName, pos->chrom, NULL, &rowOff);
+    sr = hChromQuery(conn, tableInfo->rootName, pos->chrom, whereClause, &rowOff);
     }
 else
     {
     /* chromosome range query */
     sr = hRangeQuery(conn, tableInfo->rootName, pos->chrom, 
                      pos->chromStart, pos->chromEnd,
-                     NULL, &rowOff);
+                     whereClause, &rowOff);
     }
 
 rowCnt = outputRows(outFh, tableInfo, sr, NULL);
 
 sqlFreeResult(&sr);
 hFreeConn(&conn);
 return rowCnt;
 }
 
 int outputChromRangeHits(char *db, FILE *outFh, struct hTableInfo *tableInfo, struct hgPosTable *posTab)
 /* output for a chromosome ranges query, where hgPosTable is for chromInfo */
 {
 struct hgPos *pos;
 int rowCnt = 0;
 checkTableFields(tableInfo, FALSE);
 for (pos = posTab->posList; pos != NULL; pos = pos->next)
     rowCnt += outputByChromRange(db, outFh, tableInfo, pos);
 return rowCnt;
 }
 
 int outputByName(char *db, FILE *outFh, struct hTableInfo *tableInfo, char *realTable, struct hgPos *pos)
 /* Output results where there is a name and no chrom range hgPos. Actual table
  * name must be supplied, as hgPos does not have a chrom. */
 {
 struct sqlConnection *conn = hAllocConn(db);
 struct sqlResult *sr;
 char query[512];
 int rowCnt = 0;
 
-sqlSafef(query, sizeof(query), "select * from %s where (%s = '%s')",
-      realTable, tableInfo->nameField, pos->name);
+sqlSafef(query, sizeof(query), "select * from %s where (%s = '%s') %s",
+         realTable, tableInfo->nameField, pos->name,
+         getExtraWhereClause());
 
 sr = sqlGetResult(conn, query);
 rowCnt = outputRows(outFh, tableInfo, sr, NULL);
 
 sqlFreeResult(&sr);
 hFreeConn(&conn);
 return rowCnt;
 }
 
 int outputByPosition(char *db, FILE *outFh, struct hTableInfo *tableInfo, struct hgPos *pos)
 /* Output results where there is a name and chrom location hgPos. */
 {
 struct sqlConnection *conn = hAllocConn(db);
 struct sqlResult *sr;
 char query[512];
 int rowCnt = 0;
 
-sqlSafef(query, sizeof(query), "select * from %s where (%s = '%s') and (%s = %d) and (%s = %d)",
+sqlSafef(query, sizeof(query), "select * from %s where (%s = '%s') and (%s = %d) and (%s = %d) %s",
          getTableName(pos->chrom, tableInfo), 
          tableInfo->chromField, pos->chrom,
          tableInfo->startField, pos->chromStart, 
-      tableInfo->endField, pos->chromEnd);
+         tableInfo->endField, pos->chromEnd,
+         getExtraWhereClause());
 
 sr = sqlGetResult(conn, query);
 rowCnt = outputRows(outFh, tableInfo, sr, pos);
 
 sqlFreeResult(&sr);
 hFreeConn(&conn);
 return rowCnt;
 }
 
 int outputTablePosHits(char *db, FILE *outFh, struct hTableInfo *tableInfo, struct hgPos *pos)
 /* Output results for when query matches requested table. */
 {
 int rowCnt = 0;
 
 /* handle different cases */
@@ -516,21 +537,22 @@
     outputResults(db, outFh, tableInfo, positions);
     carefulClose(&outFh);
     }
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, optionSpec);
 if (argc != 5)
     usage("wrong # of args");
 colHeaders = optionExists("colHeaders");
 tsvHeaders = optionExists("tsvHeaders");
 keepBin = optionExists("keepBin");
 noMatchOk = optionExists("noMatchOk");
+whereClause = optionVal("where", NULL);
 noRandom = optionExists("noRandom");
 
 cgiSpoof(&argc, argv);
 hgGetAnn(argv[1], argv[2], argv[3], argv[4]);
 return 0;
 }