src/hg/hgGetAnn/hgGetAnn.c 1.7

1.7 2010/02/12 17:56:12 angie
Added cgiSpoof so the new cgiOptionalString in hgFind.c doesn't make cgiInit freak out.
Index: src/hg/hgGetAnn/hgGetAnn.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/hgGetAnn/hgGetAnn.c,v
retrieving revision 1.6
retrieving revision 1.7
diff -b -B -U 1000000 -r1.6 -r1.7
--- src/hg/hgGetAnn/hgGetAnn.c	3 Sep 2008 19:18:55 -0000	1.6
+++ src/hg/hgGetAnn/hgGetAnn.c	12 Feb 2010 17:56:12 -0000	1.7
@@ -1,532 +1,534 @@
 /* hgGetAnn - get chromosome annotation rows from database tables using
  * browser-style position specification.  */
 #include "common.h"
 #include "options.h"
 #include "verbose.h"
 #include "hdb.h"
 #include "hgFind.h"
 #include "jksql.h"
+#include "cheapcgi.h"
 
 static char const rcsid[] = "$Id$";
 
 void usage(char *msg)
 /* Explain usage and exit. */
 {
 errAbort(
     "%s\n"
     "hgGetAnn - get chromosome annotation rows from database tables using\n"
     "           browser-style position specification.\n"
     "\n"
     "usage:\n"
     "   hgGetAnn [options] db table spec tabfile\n"
     "\n"
     "Get chromosome annotation rows from tables.  This takes a browser-style\n"
     "position specification and retrieves matching rows from the database.\n"
     "table. Output is a tab-separated file with optional header.  The bin \n"
     "column of the table will be not be included in the output. If the spec\n"
     "contains whitespace or shell meta-characters, it must be quoted.\n"
     "For split tables, the leading chrN_ should be omitted.  Use `mrna' or\n"
     "`est' for mRNAs/ESTs.  If spec is \"-\", then all rows are retrieved.\n"
     "This will even work for split tables.\n"
     "\n"
     "Options:\n"
     "   -colHeaders - Include column headers with leading #\n"
     "   -tsvHeaders - Include TSV style column headers\n"
     "   -keepBin - don't exclude bin column\n"
     "   -noMatchOk - don't generated an error if nothing is found\n"
     "   -noRandom - Exclude *_random chromsomes\n"
     "   -verbose=n - 2 is basic info, 3 prints positions found\n",
     msg);
 }
 
 
 /* command line */
 static struct optionSpec optionSpec[] = {
     {"colHeaders", OPTION_BOOLEAN},
     {"tsvHeaders", OPTION_BOOLEAN},
     {"keepBin", OPTION_BOOLEAN},
     {"noMatchOk", OPTION_BOOLEAN},
     {"noRandom", OPTION_BOOLEAN},
     {NULL, 0}
 };
 
 boolean colHeaders;
 boolean tsvHeaders;
 boolean keepBin;
 boolean noMatchOk;
 boolean noRandom;
 
 struct cart *cart = NULL; /* hgFind assumes this global */
 
 void prIndent(int indent, char *format, ...)
 /* print with indentation */
 {
 va_list args;
 
 fprintf(stderr, "%*s", 2*indent, "");
 va_start(args, format);
 vfprintf(stderr, format, args);
 va_end(args);
 }
 
 void printHgPos(int indent, struct hgPos *pos)
 /* print a hgPos struct */
 {
 prIndent(indent, "%s", pos->name);
 if (pos->chrom != NULL)
     fprintf(stderr, "  %s:%d-%d", pos->chrom, pos->chromStart, pos->chromEnd);
 if (pos->browserName != NULL)
     fprintf(stderr, "  %s", pos->browserName);
 fprintf(stderr, "\n");
 }
 
 void printHgPosTable(int indent, struct hgPosTable *posTab)
 /* print a hgPosTable struct */
 {
 struct hgPos *pos;
 prIndent(indent, "name: %s\n", posTab->name);
 prIndent(indent, "desc: %s\n", posTab->description);
 for (pos = posTab->posList; pos != NULL; pos = pos->next)
     printHgPos(indent+1, pos);
 } 
 
 void printHgPositions(int indent, struct hgPositions *pos)
 /* print a hgPositions struct */
 {
 prIndent(indent, "query: %s\n", pos->query);
 indent++;
 prIndent(indent, "database: %s, posCount: %d, useAlias: %s\n",
          pos->database, pos->posCount, (pos->useAlias ? "true" : "false"));
 if (pos->singlePos != NULL)
     {
     prIndent(indent, "singlePos:\n");
     printHgPos(indent+1, pos->singlePos);
     assert(pos->singlePos->next == NULL);
     }
 if (pos->tableList != NULL)
     {
     struct hgPosTable *posTab;
     prIndent(indent, "tableList:\n");
     for (posTab = pos->tableList; posTab != NULL; posTab = posTab->next)
         printHgPosTable(indent+1, posTab);
     }
 } 
 
 void printPositionsList(struct hgPositions *positions)
 /* print a list of positions */
 {
 struct hgPositions *pos;
 fprintf(stderr, "Returned positions:\n");
 for (pos = positions; pos != NULL; pos = pos->next)
     printHgPositions(1, pos);
 } 
 
 boolean inclChrom(char *chrom)
 /* test if chromsome should be included, NULL chrom returns TRUE. */
 {
 if (noRandom)
     return (strstr(chrom, "_random") == NULL);
 else
     return TRUE;
 }
 
 int getResultCol(struct sqlResult *sr, char *colName)
 /* get the column in a result set, or -1 if it's not defined */
 {
 int iCol = -1;
 if ((colName != NULL) && (strlen(colName) > 0))
     {
     iCol = sqlFieldColumn(sr, colName);
     if (iCol < 0)
         errAbort("field %s not in result set", colName);
     }
 return iCol;
 }
 
 int countFindMatches(struct hgPositions *positions)
 /* count number of matches to query */
 {
 int numMatches = 0;
 struct hgPositions *pos;
 for (pos = positions; pos != NULL; pos = pos->next)
     numMatches += pos->posCount;
 return numMatches;
 }
 
 char *getTableName(char *chrom, struct hTableInfo *tableInfo)
 /* get the actual table name, given a chrom.  Note: static return */
 {
 static char tableName[256];
 if (tableInfo->isSplit)
     safef(tableName, sizeof(tableName), "%s_%s", chrom,
           tableInfo->rootName);
 else
     safef(tableName, sizeof(tableName), "%s", tableInfo->rootName);
 return tableName;
 }
 
 char *getTableDesc(struct hTableInfo *tableInfo)
 /* get a description of a table to use in error messages.  This describes
  * split tables as chr*_xxx, to make it clear.  Note: static return */
 {
 static char tableDesc[256];
 if (tableInfo->isSplit)
     safef(tableDesc, sizeof(tableDesc), "chr*_%s", tableInfo->rootName);
 else
     safef(tableDesc, sizeof(tableDesc), "%s", tableInfo->rootName);
 return tableDesc;
 }
 
 struct hgPositions* findAllChroms(char *db)
 /* generate a hgPositions record for the full range of all chromsomes */
 {
 struct hgPositions *positions;
 struct hgPosTable *posTab;
 struct slName *chrom;
 
 /* setup s hgPositions object */
 AllocVar(positions);
 positions->query = cloneString("-");
 positions->database = db;
 
 AllocVar(posTab);
 posTab->name = "chromInfo";
 posTab->description = "all rows";
 positions->tableList = posTab;
 
 for (chrom = hAllChromNames(db); chrom != NULL; chrom = chrom->next)
     {
     struct hgPos *pos;
     AllocVar(pos);
     pos->chrom = chrom->name;
     pos->chromStart = 0;
     pos->chromEnd = hChromSize(db, pos->chrom);
     slAddHead(&posTab->posList, pos);
     positions->posCount++;
     }
 slReverse(&posTab->posList);
 return positions;
 }
 
 struct hgPositions *findPositions(char *db, char *spec)
 /* query database with hgFind algorithm */
 {
 struct hgPositions *positions;
 verbose(2, "begin position query: %s\n", spec);
 
 if (sameString(spec, "-"))
     positions = findAllChroms(db);
 else
     positions = hgPositionsFind(db, spec, NULL, "hgGetAnn", NULL, FALSE);
 
 verbose(2, "end position query: %d matches\n", countFindMatches(positions));
 if (verboseLevel() >= 2)
     printPositionsList(positions);
 if ((!noMatchOk) && (countFindMatches(positions) == 0))
     errAbort("Error: no matches to find query");
 return positions;
 }
 
 void checkTableFields(struct hTableInfo *tableInfo, boolean requireName)
 /* check that the specified table has the required fields */
 {
 if (strlen(tableInfo->chromField) == 0)
     errAbort("Error: table %s doesn't have a chrom name field", getTableDesc(tableInfo));
 if ((strlen(tableInfo->startField) == 0) || (strlen(tableInfo->endField) == 0))
     errAbort("Error: table %s doesn't have a chromosome start or end fields", getTableDesc(tableInfo));
 if (requireName && (strlen(tableInfo->nameField) == 0))
     errAbort("Error: table %s doesn't have a name field", getTableDesc(tableInfo));
 }
 
 void writeHeaders(char *db, FILE *outFh, struct hTableInfo *tableInfo)
 /* write column headers */
 {
 char *tableName = getTableName(hDefaultChrom(db), tableInfo);
 struct sqlConnection *conn = hAllocConn(db);
 struct slName *fields, *fld;
 
 if (colHeaders)
     fputc('#', outFh);
 
 fields = sqlListFields(conn, tableName);
 if (tableInfo->hasBin && !keepBin)
     fields = fields->next;  /* skipBin */
 
 for (fld = fields; fld != NULL; fld = fld->next)
     {
     if (fld != fields)
         fputc('\t', outFh);
     fputs(fld->name, outFh);
     }
 fputc('\n', outFh);
 hFreeConn(&conn);
 }
 
 FILE* outputOpen(char *db, char *tabFile, struct hTableInfo *tableInfo)
 /* open output file and write optional header */
 {
 FILE* outFh = mustOpen(tabFile, "w");
 
 if (colHeaders || tsvHeaders)
     writeHeaders(db, outFh, tableInfo);
 
 return outFh;
 }
 
 void outputRow(FILE *outFh, char **row, int numCols)
 /* output a row, which should already be adjusted to include/exclude the bin
  * column */
 {
 int i;
 for (i = 0; i < numCols; i++)
     {
     if (i > 0)
         fputc('\t', outFh);
     fputs(row[i], outFh);
     }
 fputc('\n', outFh);
 }
 
 boolean sameStringNull(char *str1, char* str2)
 /* compare strings, allow NULLs (which always return false) */
 {
 if ((str1 == NULL)  || (str2 == NULL))
     return FALSE;
 else
     return sameString(str1, str2);
 }
 
 boolean inclRow(char **row, int chromCol, int nameCol, struct hgPos *nameSelect)
 /* check if a row should be included based on check optional criteria
  * of chrom and name */
 {
 if ((chromCol >= 0) && !inclChrom(row[chromCol]))
     return FALSE;
 if ((nameCol >= 0) && (nameSelect != NULL))
     {
     if (!(sameStringNull(row[nameCol], nameSelect->name)
           || sameStringNull(row[nameCol], nameSelect->browserName)))
         return FALSE;
     }
 return TRUE;                                                                            
 }
 
 int outputRows(FILE *outFh, struct hTableInfo *tableInfo, struct sqlResult *sr,
                struct hgPos *nameSelect)
 /* read query resuts and output rows.  Name will be checked against
  * the  names in nameSelect if not null */
 {
 int chromCol =  getResultCol(sr, tableInfo->chromField);
 int nameCol = -1;
 int rowOff = tableInfo->hasBin ? 1 : 0;
 int numCols, rowCnt = 0;
 char **row;
 if (nameSelect != NULL)
     nameCol = getResultCol(sr, tableInfo->nameField);
 if (keepBin)
     rowOff = 0;  /* force bin to be included */
 numCols = sqlCountColumns(sr) - rowOff;
 
 while ((row = sqlNextRow(sr)) != NULL)
     {
     if (inclRow(row, chromCol, nameCol, nameSelect))
         {
         outputRow(outFh, row+rowOff, numCols);
         rowCnt++;
         }
     }
 return rowCnt;
 }
 
 int outputByChromRange(char *db, FILE *outFh, struct hTableInfo *tableInfo, struct hgPos *pos)
 /* output a hgPos by overlaping chrom range */
 {
 struct sqlConnection *conn = hAllocConn(db);
 struct sqlResult *sr;
 int rowOff = 0;
 int rowCnt = 0;
 
 /* start query */
 if ((pos->chromStart == 0) && (pos->chromEnd >= hChromSize(db, pos->chrom)))
     {
     /* optimize full chromosome query */
     sr = hChromQuery(conn, tableInfo->rootName, pos->chrom, NULL, &rowOff);
     }
 else
     {
     /* chromosome range query */
     sr = hRangeQuery(conn, tableInfo->rootName, pos->chrom, 
                      pos->chromStart, pos->chromEnd,
                      NULL, &rowOff);
     }
 
 rowCnt = outputRows(outFh, tableInfo, sr, NULL);
 
 sqlFreeResult(&sr);
 hFreeConn(&conn);
 return rowCnt;
 }
 
 int outputChromRangeHits(char *db, FILE *outFh, struct hTableInfo *tableInfo, struct hgPosTable *posTab)
 /* output for a chromosome ranges query, where hgPosTable is for chromInfo */
 {
 struct hgPos *pos;
 int rowCnt = 0;
 checkTableFields(tableInfo, FALSE);
 for (pos = posTab->posList; pos != NULL; pos = pos->next)
     rowCnt += outputByChromRange(db, outFh, tableInfo, pos);
 return rowCnt;
 }
 
 int outputByName(char *db, FILE *outFh, struct hTableInfo *tableInfo, char *realTable, struct hgPos *pos)
 /* Output results where there is a name and no chrom range hgPos. Actual table
  * name must be supplied, as hgPos does not have a chrom. */
 {
 struct sqlConnection *conn = hAllocConn(db);
 struct sqlResult *sr;
 char query[512];
 int rowCnt = 0;
 
 safef(query, sizeof(query), "select * from %s where (%s = '%s')",
       realTable, tableInfo->nameField, pos->name);
 
 sr = sqlGetResult(conn, query);
 rowCnt = outputRows(outFh, tableInfo, sr, NULL);
 
 sqlFreeResult(&sr);
 hFreeConn(&conn);
 return rowCnt;
 }
 
 int outputByPosition(char *db, FILE *outFh, struct hTableInfo *tableInfo, struct hgPos *pos)
 /* Output results where there is a name and chrom location hgPos. */
 {
 struct sqlConnection *conn = hAllocConn(db);
 struct sqlResult *sr;
 char query[512];
 int rowCnt = 0;
 
 safef(query, sizeof(query), "select * from %s where (%s = '%s') and (%s = %d) and (%s = %d)",
       getTableName(pos->chrom, tableInfo), 
       tableInfo->chromField, pos->chrom,
       tableInfo->startField, pos->chromStart, 
       tableInfo->endField, pos->chromEnd);
 
 sr = sqlGetResult(conn, query);
 rowCnt = outputRows(outFh, tableInfo, sr, pos);
 
 sqlFreeResult(&sr);
 hFreeConn(&conn);
 return rowCnt;
 }
 
 int outputTablePosHits(char *db, FILE *outFh, struct hTableInfo *tableInfo, struct hgPos *pos)
 /* Output results for when query matches requested table. */
 {
 int rowCnt = 0;
 
 /* handle different cases */
 if (pos->chrom != NULL)
     {
     /* have exact location */
     rowCnt = outputByPosition(db, outFh, tableInfo, pos);
     }
 else if (!tableInfo->isSplit)
     {
     /* table not split */
     rowCnt += outputByName(db, outFh, tableInfo, tableInfo->rootName, pos);
     }
 else if (pos->chrom != NULL) 
    {
     /* split table, but we have chrom */
     rowCnt += outputByName(db, outFh, tableInfo, getTableName(pos->chrom, tableInfo), pos);
     }
 else 
     {
     /* got to try each chrom */
     struct slName *chrom;
     for (chrom = hAllChromNames(db); chrom != NULL; chrom = chrom->next)
         {
         char *table = getTableName(chrom->name, tableInfo);
         if (hTableExists(db, table))
             rowCnt += outputByName(db, outFh, tableInfo, table, pos);
         }
     }
 return rowCnt;
 }
 
 int outputTableHits(char *db, FILE *outFh, struct hTableInfo *tableInfo, struct hgPosTable *posTab)
 /* output results where table is requested table hgPos */
 {
 struct hgPos *pos;
 int rowCnt = 0;
 checkTableFields(tableInfo, TRUE);
 for (pos = posTab->posList; pos != NULL; pos = pos->next)
     rowCnt += outputTablePosHits(db, outFh, tableInfo, pos);
 return rowCnt;
 }
 
 int outputPositions(char *db, FILE *outFh, struct hTableInfo *tableInfo, struct hgPositions *positions)
 /* output results for a single hgPositions record  */
 {
 struct hgPosTable *posTab;
 int rowCnt = 0;
 for (posTab = positions->tableList; posTab != NULL; posTab = posTab->next)
     {
     if (sameString(posTab->name, "chromInfo"))
         rowCnt += outputChromRangeHits(db, outFh, tableInfo, posTab);
     else if (sameString(posTab->name, tableInfo->rootName))
         rowCnt += outputTableHits(db, outFh, tableInfo, posTab);
     }
 return rowCnt;
 }
 
 void outputResults(char *db, FILE *outFh, struct hTableInfo *tableInfo, struct hgPositions *positions)
 /* output hits from hgFind in table */
 {
 struct hgPositions *pos;
 int numRows = 0;
 
 for (pos = positions; pos != NULL; pos = pos->next)
     numRows += outputPositions(db, outFh, tableInfo, pos);
 
 if ((!noMatchOk) && (numRows == 0))
     errAbort("Error: no table rows matching query");
 }
 
 void hgGetAnn(char *db, char *table, char *spec, char *tabFile)
 /* get chromosome annotation rows from database tables using
  * browser-style position specification. */
 {
 struct hgPositions *positions;
 struct hTableInfo *tableInfo;
 
 /* get table info upfront so don't have to wait long find for error */
 tableInfo = hFindTableInfo(db, NULL, table);
 if (tableInfo == NULL)
     errAbort("Error: no table: %s or *_%s", table, table);
 
 positions = findPositions(db, spec);
 if (positions != NULL)
     {
     FILE* outFh = outputOpen(db, tabFile, tableInfo);
     outputResults(db, outFh, tableInfo, positions);
     carefulClose(&outFh);
     }
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, optionSpec);
 if (argc != 5)
     usage("wrong # of args");
 colHeaders = optionExists("colHeaders");
 tsvHeaders = optionExists("tsvHeaders");
 keepBin = optionExists("keepBin");
 noMatchOk = optionExists("noMatchOk");
 noRandom = optionExists("noRandom");
 
+cgiSpoof(&argc, argv);
 hgGetAnn(argv[1], argv[2], argv[3], argv[4]);
 return 0;
 }