8b49d2145ae719c3b3577de7956f7b9131c385ba
angie
  Wed Jan 7 09:43:45 2015 -0800
Libified code from hgVai/libifyMe.c to lib/hAnno.c in anticipationof sharing code with hgAi.  Also added hAnnoGetAutoSqlForTdb for hgAi.

diff --git src/hg/lib/hAnno.c src/hg/lib/hAnno.c
new file mode 100644
index 0000000..419ff0f
--- /dev/null
+++ src/hg/lib/hAnno.c
@@ -0,0 +1,303 @@
+/* hAnno -- helpers for creating anno{Streamers,Grators,Formatters,Queries} */
+
+#include "common.h"
+#include "hAnno.h"
+#include "basicBed.h"
+#include "customTrack.h"
+#include "grp.h"
+#include "hdb.h"
+#include "hubConnect.h"
+#include "hui.h"
+#include "jksql.h"
+#include "pgSnp.h"
+#include "vcf.h"
+#include "annoGratorQuery.h"
+#include "annoGratorGpVar.h"
+#include "annoStreamBigBed.h"
+#include "annoStreamBigWig.h"
+#include "annoStreamDb.h"
+#include "annoStreamDbFactorSource.h"
+#include "annoStreamTab.h"
+#include "annoStreamVcf.h"
+#include "annoStreamWig.h"
+#include "annoGrateWigDb.h"
+#include "annoFormatTab.h"
+#include "annoFormatVep.h"
+
+//#*** duplicated in hgVarAnnoGrator and annoGratorTester
+struct annoAssembly *hAnnoGetAssembly(char *db)
+/* Make annoAssembly for db. */
+{
+static struct annoAssembly *aa = NULL;
+if (aa == NULL)
+    {
+    char *nibOrTwoBitDir = hDbDbNibPath(db);
+    if (nibOrTwoBitDir == NULL)
+        errAbort("Can't find .2bit for db '%s'", db);
+    char twoBitPath[HDB_MAX_PATH_STRING];
+    safef(twoBitPath, sizeof(twoBitPath), "%s/%s.2bit", nibOrTwoBitDir, db);
+    char *path = hReplaceGbdb(twoBitPath);
+    aa = annoAssemblyNew(db, path);
+    freeMem(path);
+    }
+return aa;
+}
+
+static boolean columnsMatch(struct asObject *asObj, struct sqlFieldInfo *fieldList)
+/* Return TRUE if asObj's column names match the given SQL fields. */
+{
+if (asObj == NULL)
+    return FALSE;
+struct sqlFieldInfo *firstRealField = fieldList;
+if (sameString("bin", fieldList->field) && differentString("bin", asObj->columnList->name))
+    firstRealField = fieldList->next;
+boolean columnsMatch = TRUE;
+struct sqlFieldInfo *field = firstRealField;
+struct asColumn *asCol = asObj->columnList;
+for (;  field != NULL && asCol != NULL;  field = field->next, asCol = asCol->next)
+    {
+    if (!sameString(field->field, asCol->name))
+	{
+	columnsMatch = FALSE;
+	break;
+	}
+    }
+if (field != NULL || asCol != NULL)
+    columnsMatch = FALSE;
+return columnsMatch;
+}
+
+static struct asObject *asObjectFromFields(char *name, struct sqlFieldInfo *fieldList,
+                                           boolean skipBin)
+/* Make autoSql text from SQL fields and pass it to asParse. */
+{
+struct dyString *dy = dyStringCreate("table %s\n"
+				     "\"Column names grabbed from mysql\"\n"
+				     "    (\n", name);
+struct sqlFieldInfo *field;
+for (field = fieldList;  field != NULL;  field = field->next)
+    {
+    if (skipBin && field == fieldList && sameString("bin", field->field))
+        continue;
+    char *sqlType = field->type;
+    // hg19.wgEncodeOpenChromSynthGm12878Pk.pValue has sql type "float unsigned",
+    // and I'd rather pretend it's just a float than work unsigned floats into autoSql.
+    if (sameString(sqlType, "float unsigned"))
+	sqlType = "float";
+    char *asType = asTypeNameFromSqlType(sqlType);
+    if (asType == NULL)
+	errAbort("No asTypeInfo for sql type '%s'!", field->type);
+    dyStringPrintf(dy, "    %s %s;\t\"\"\n", asType, field->field);
+    }
+dyStringAppend(dy, "    )\n");
+return asParseText(dy->string);
+}
+
+static struct asObject *getAutoSqlForTable(char *dataDb, char *dbTable, struct trackDb *tdb,
+                                           boolean skipBin)
+/* Get autoSql for dataDb.dbTable from tdb and/or db.tableDescriptions;
+ * if it doesn't match columns, make one up from dataDb.table sql fields.
+ * Some subtleties are lost in translation from .as to .sql, that's why
+ * we try tdb & db.tableDescriptions first.  But ultimately we need to return
+ * an asObj whose columns match all fields of the table. */
+{
+struct sqlConnection *connDataDb = hAllocConn(dataDb);
+struct sqlFieldInfo *fieldList = sqlFieldInfoGet(connDataDb, dbTable);
+hFreeConn(&connDataDb);
+struct asObject *asObj = NULL;
+if (tdb != NULL)
+    {
+    struct sqlConnection *connDb = hAllocConn(dataDb);
+    asObj = asForTdb(connDb, tdb);
+    hFreeConn(&connDb);
+    }
+if (columnsMatch(asObj, fieldList))
+    return asObj;
+else
+    return asObjectFromFields(dbTable, fieldList, skipBin);
+}
+
+static char *getBigDataFileName(char *db, struct trackDb *tdb, char *selTable, char *chrom)
+/* Get fileName from bigBed/bigWig/BAM/VCF database table, or bigDataUrl from custom track. */
+{
+struct sqlConnection *conn = hAllocConn(db);
+char *fileOrUrl = bbiNameFromSettingOrTableChrom(tdb, conn, selTable, chrom);
+hFreeConn(&conn);
+return fileOrUrl;
+}
+
+struct annoStreamer *hAnnoStreamerFromTrackDb(struct annoAssembly *assembly, char *selTable,
+                                              struct trackDb *tdb, char *chrom, int maxOutRows)
+/* Figure out the source and type of data and make an annoStreamer. */
+{
+struct annoStreamer *streamer = NULL;
+char *db = assembly->name, *dataDb = db, *dbTable = selTable;
+if (chrom == NULL)
+    chrom = hDefaultChrom(db);
+if (isCustomTrack(selTable))
+    {
+    dbTable = trackDbSetting(tdb, "dbTableName");
+    if (dbTable != NULL)
+	// This is really a database table, not a bigDataUrl CT.
+	dataDb = CUSTOM_TRASH;
+    }
+if (startsWithWord("wig", tdb->type))
+    streamer = annoStreamWigDbNew(dataDb, dbTable, assembly, maxOutRows);
+else if (sameString("vcfTabix", tdb->type))
+    {
+    char *fileOrUrl = getBigDataFileName(dataDb, tdb, selTable, chrom);
+    streamer = annoStreamVcfNew(fileOrUrl, TRUE, assembly, maxOutRows);
+    }
+else if (sameString("vcf", tdb->type))
+    {
+    char *fileOrUrl = getBigDataFileName(dataDb, tdb, dbTable, chrom);
+    streamer = annoStreamVcfNew(fileOrUrl, FALSE, assembly, maxOutRows);
+    }
+else if (sameString("bam", tdb->type))
+    {
+    warn("Sorry, BAM is not yet supported");
+    }
+else if (startsWith("bigBed", tdb->type))
+    {
+    char *fileOrUrl = getBigDataFileName(dataDb, tdb, selTable, chrom);
+    streamer = annoStreamBigBedNew(fileOrUrl, assembly, maxOutRows);
+    }
+else if (startsWith("bigWig", tdb->type))
+    {
+    char *fileOrUrl = getBigDataFileName(dataDb, tdb, selTable, chrom);
+    streamer = annoStreamBigWigNew(fileOrUrl, assembly); //#*** no maxOutRows support
+    }
+else if (sameString("factorSource", tdb->type))
+    {
+    char *sourceTable = trackDbSetting(tdb, "sourceTable");
+    char *inputsTable = trackDbSetting(tdb, "inputTrackTable");
+    streamer = annoStreamDbFactorSourceNew(dataDb, tdb->track, sourceTable, inputsTable, assembly,
+					   maxOutRows);
+    }
+else
+    {
+    struct sqlConnection *conn = hAllocConn(dataDb);
+    char maybeSplitTable[1024];
+    if (sqlTableExists(conn, dbTable))
+	safecpy(maybeSplitTable, sizeof(maybeSplitTable), dbTable);
+    else
+	safef(maybeSplitTable, sizeof(maybeSplitTable), "%s_%s", chrom, dbTable);
+    hFreeConn(&conn);
+    struct asObject *asObj = getAutoSqlForTable(dataDb, maybeSplitTable, tdb, TRUE);
+    streamer = annoStreamDbNew(dataDb, maybeSplitTable, assembly, asObj, maxOutRows);
+    }
+return streamer;
+}
+
+struct annoGrator *hAnnoGratorFromBigFileUrl(char *fileOrUrl, struct annoAssembly *assembly,
+                                             int maxOutRows, enum annoGratorOverlap overlapRule)
+/* Determine what kind of big data file/url we have and make streamer & grator for it. */
+{
+struct annoStreamer *streamer = NULL;
+struct annoGrator *grator = NULL;
+char *type = customTrackTypeFromBigFile(fileOrUrl);
+if (sameString(type, "bigBed"))
+    streamer = annoStreamBigBedNew(fileOrUrl, assembly, maxOutRows);
+else if (sameString(type, "vcfTabix"))
+    streamer = annoStreamVcfNew(fileOrUrl, TRUE, assembly, maxOutRows);
+else if (sameString(type, "bigWig"))
+    grator = annoGrateBigWigNew(fileOrUrl, assembly);
+else if (sameString(type, "bam"))
+    errAbort("Sorry, BAM is not yet supported");
+else
+    errAbort("Unrecognized bigData type %s of file or url '%s'", type, fileOrUrl);
+if (grator == NULL)
+    grator = annoGratorNew(streamer);
+grator->setOverlapRule(grator, overlapRule);
+return grator;
+}
+
+struct annoGrator *hAnnoGratorFromTrackDb(struct annoAssembly *assembly, char *selTable,
+                                          struct trackDb *tdb, char *chrom, int maxOutRows,
+                                          struct asObject *primaryAsObj,
+                                          enum annoGratorOverlap overlapRule)
+/* Figure out the source and type of data, make an annoStreamer & wrap in annoGrator.
+ * If not NULL, primaryAsObj is used to determine whether we can make an annoGratorGpVar. */
+{
+struct annoGrator *grator = NULL;
+char *bigDataUrl = trackDbSetting(tdb, "bigDataUrl");
+if (bigDataUrl != NULL)
+    grator = hAnnoGratorFromBigFileUrl(bigDataUrl, assembly, maxOutRows, overlapRule);
+else if (startsWithWord("wig", tdb->type))
+    grator = annoGrateWigDbNew(assembly->name, selTable, assembly, maxOutRows);
+else
+    {
+    struct annoStreamer *streamer = hAnnoStreamerFromTrackDb(assembly, selTable, tdb, chrom,
+                                                             maxOutRows);
+    if (primaryAsObj != NULL &&
+	(asObjectsMatch(primaryAsObj, pgSnpAsObj()) || asObjectsMatch(primaryAsObj, vcfAsObj()))
+	&& asColumnNamesMatchFirstN(streamer->asObj, genePredAsObj(), 10))
+	grator = annoGratorGpVarNew(streamer);
+    else
+	grator = annoGratorNew(streamer);
+    }
+grator->setOverlapRule(grator, overlapRule);
+return grator;
+}
+
+static struct asObject *getAutoSqlForType(char *db, char *chrom, struct trackDb *tdb)
+/* Return an asObject for tdb->type if recognized as a hub or custom track type. */
+{
+struct asObject * asObj = NULL;
+if (startsWith("wig", tdb->type) || startsWith("bigWig", tdb->type))
+    asObj = annoStreamBigWigAsObject();
+else if (startsWith("vcf", tdb->type))
+    asObj = vcfAsObj();
+else if (startsWith("bigBed", tdb->type))
+    {
+    char *fileOrUrl = getBigDataFileName(db, tdb, tdb->table, chrom);
+    asObj = bigBedFileAsObjOrDefault(fileOrUrl);
+    }
+else if (sameString("pgSnp", tdb->type))
+    asObj = pgSnpAsObj();
+else if (sameString("bam", tdb->type) || sameString("maf", tdb->type))
+    warn("Sorry, %s is not yet supported", tdb->type);
+else if (startsWithWord("bed", tdb->type) && !strchr(tdb->type, '+'))
+    {
+    // BED with no + fields; parse bed field count out of type line.
+    int bedFieldCount = 3;
+    char typeCopy[PATH_LEN];
+    safecpy(typeCopy, sizeof(typeCopy), tdb->type);
+    char *words[8];
+    int wordCount = chopLine(typeCopy, words);
+    if (wordCount > 1)
+        bedFieldCount = atoi(words[1]);
+    asObj = asParseText(bedAsDef(bedFieldCount, bedFieldCount));
+    }
+return asObj;
+}
+
+struct asObject *hAnnoGetAutoSqlForTdb(char *db, char *chrom, struct trackDb *tdb)
+/* If possible, return the asObj that a streamer for this track would use, otherwise NULL. */
+{
+struct asObject *asObj = getAutoSqlForType(db, chrom, tdb);
+
+if (!asObj && !isHubTrack(tdb->track))
+    {
+    // If none of the above, it must be a database table; deduce autoSql from sql fields.
+    char *dataDb = db, *dbTable = tdb->table;
+    if (isCustomTrack(tdb->track))
+        {
+        dbTable = trackDbSetting(tdb, "dbTableName");
+        if (dbTable)
+            dataDb = CUSTOM_TRASH;
+        else
+            return NULL;
+        }
+    struct sqlConnection *conn = hAllocConn(dataDb);
+    char maybeSplitTable[1024];
+    if (sqlTableExists(conn, dbTable))
+	safecpy(maybeSplitTable, sizeof(maybeSplitTable), dbTable);
+    else
+	safef(maybeSplitTable, sizeof(maybeSplitTable), "%s_%s", chrom, dbTable);
+    hFreeConn(&conn);
+    asObj = getAutoSqlForTable(dataDb, maybeSplitTable, tdb, TRUE);
+    }
+return asObj;
+}
+