0564395ec363631f2ff8d295da6f6b50f873fda4
braney
Mon Jan 24 17:01:53 2022 -0800
more chromAlias work: some name changes and support for the new genark
chromAlias format
diff --git src/hg/lib/chromAlias.c src/hg/lib/chromAlias.c
index c5c2a59..d123ccd 100644
--- src/hg/lib/chromAlias.c
+++ src/hg/lib/chromAlias.c
@@ -1,252 +1,362 @@
/* chromAlias.c was originally generated by the autoSql program, which also
* generated chromAlias.h and chromAlias.sql. This module links the database and
* the RAM representation of objects. */
#include "common.h"
#include "linefile.h"
#include "dystring.h"
#include "jksql.h"
#include "chromAlias.h"
#include "hdb.h"
+#include "trackHub.h"
+#include "fieldedTable.h"
char *chromAliasCommaSepFieldNames = "alias,chrom,source";
void chromAliasStaticLoad(char **row, struct chromAlias *ret)
/* Load a row from chromAlias table into ret. The contents of ret will
* be replaced at the next call to this function. */
{
ret->alias = row[0];
ret->chrom = row[1];
ret->source = row[2];
}
struct chromAlias *chromAliasLoad(char **row)
/* Load a chromAlias from row fetched with select * from chromAlias
* from database. Dispose of this with chromAliasFree(). */
{
struct chromAlias *ret;
AllocVar(ret);
ret->alias = cloneString(row[0]);
ret->chrom = cloneString(row[1]);
ret->source = cloneString(row[2]);
return ret;
}
struct chromAlias *chromAliasLoadAll(char *fileName)
/* Load all chromAlias from a whitespace-separated file.
* Dispose of this with chromAliasFreeList(). */
{
struct chromAlias *list = NULL, *el;
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *row[3];
while (lineFileRow(lf, row))
{
el = chromAliasLoad(row);
slAddHead(&list, el);
}
lineFileClose(&lf);
slReverse(&list);
return list;
}
struct chromAlias *chromAliasLoadAllByChar(char *fileName, char chopper)
/* Load all chromAlias from a chopper separated file.
* Dispose of this with chromAliasFreeList(). */
{
struct chromAlias *list = NULL, *el;
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *row[3];
while (lineFileNextCharRow(lf, chopper, row, ArraySize(row)))
{
el = chromAliasLoad(row);
slAddHead(&list, el);
}
lineFileClose(&lf);
slReverse(&list);
return list;
}
struct chromAlias *chromAliasCommaIn(char **pS, struct chromAlias *ret)
/* Create a chromAlias out of a comma separated string.
* This will fill in ret if non-null, otherwise will
* return a new chromAlias */
{
char *s = *pS;
if (ret == NULL)
AllocVar(ret);
ret->alias = sqlStringComma(&s);
ret->chrom = sqlStringComma(&s);
ret->source = sqlStringComma(&s);
*pS = s;
return ret;
}
void chromAliasFree(struct chromAlias **pEl)
/* Free a single dynamically allocated chromAlias such as created
* with chromAliasLoad(). */
{
struct chromAlias *el;
if ((el = *pEl) == NULL) return;
freeMem(el->alias);
freeMem(el->chrom);
freeMem(el->source);
freez(pEl);
}
void chromAliasFreeList(struct chromAlias **pList)
/* Free a list of dynamically allocated chromAlias's */
{
struct chromAlias *el, *next;
for (el = *pList; el != NULL; el = next)
{
next = el->next;
chromAliasFree(&el);
}
*pList = NULL;
}
void chromAliasOutput(struct chromAlias *el, FILE *f, char sep, char lastSep)
/* Print out chromAlias. Separate fields with sep. Follow last field with lastSep. */
{
if (sep == ',') fputc('"',f);
fprintf(f, "%s", el->alias);
if (sep == ',') fputc('"',f);
fputc(sep,f);
if (sep == ',') fputc('"',f);
fprintf(f, "%s", el->chrom);
if (sep == ',') fputc('"',f);
fputc(sep,f);
if (sep == ',') fputc('"',f);
fprintf(f, "%s", el->source);
if (sep == ',') fputc('"',f);
fputc(lastSep,f);
}
void chromAliasJsonOutput(struct chromAlias *el, FILE *f)
/* Print out chromAlias in JSON format. */
{
fputc('{',f);
fputc('"',f);
fprintf(f,"alias");
fputc('"',f);
fputc(':',f);
fputc('"',f);
fprintf(f, "%s", el->alias);
fputc('"',f);
fputc(',',f);
fputc('"',f);
fprintf(f,"chrom");
fputc('"',f);
fputc(':',f);
fputc('"',f);
fprintf(f, "%s", el->chrom);
fputc('"',f);
fputc(',',f);
fputc('"',f);
fprintf(f,"source");
fputc('"',f);
fputc(':',f);
fputc('"',f);
fprintf(f, "%s", el->source);
fputc('"',f);
fputc('}',f);
}
/* -------------------------------- End autoSql Generated Code -------------------------------- */
/* our "global" data */
static struct
{
+boolean inited;
char *database;
-struct hash *nameHash;
+struct hash *chromToAliasHash;
+struct hash *aliasToChromHash;
struct hash *forwardHash;
struct hash *reverseHash;
} chromHashes;
static boolean checkDatabase(char *database)
/* Make sure we don't see different databases. */
{
if (database == NULL)
return TRUE;
if (chromHashes.database != NULL)
{
if (!sameString(chromHashes.database, database))
{
errAbort("chromAliasSetup: only works for one db. %s was passed in earlier, now %s.", chromHashes.database, database);
return FALSE;
}
return TRUE;
}
chromHashes.database = cloneString(database);
return TRUE;
}
-void chromAliasSetup(char *database)
-/* Read in the chromAlias file/table for this database. */
+static void readOldAlias(struct lineFile *lf)
+/* Don't assume the table is fully populated, and dummy up a value for source. */
{
-if (!checkDatabase(database))
+char *words[1024]; /* process lines, no more than 1,024 words on a line */
+char *line;
+int size;
+printf("
readOldAlias
\n");
+while (lineFileNext(lf, &line, &size))
+ {
+ int wordCount = chopByWhite(line, words, ArraySize(words));
+ if (wordCount > 1)
+ {
+ int i = 1;
+ for ( ; i < wordCount; ++i )
+ {
+ if (isNotEmpty(words[i]))
+ {
+ struct chromAlias *ali;
+ AllocVar(ali);
+ ali->alias = cloneString(words[i]);
+ ali->chrom = cloneString(words[0]);
+ ali->source = cloneString("asmHub");
+ hashAdd(chromHashes.forwardHash, ali->alias, ali);
+ hashAdd(chromHashes.reverseHash, ali->chrom, ali);
+ hashAdd(chromHashes.chromToAliasHash, ali->chrom, ali->alias);
+ hashAdd(chromHashes.aliasToChromHash, ali->alias, ali->chrom);
+ //hashAdd(aliasHash, words[0], ali);
+ }
+ }
+ }
+ }
+
+lineFileClose(&lf);
+}
+
+static void readFieldedTable(struct lineFile *lf)
+/* Use the fieldedTable library to read in fully populated chromAlias.txt file. */
+{
+struct fieldedTable *aliasTable = fieldedTableAttach(lf, NULL, 0);
+
+struct fieldedRow *row;
+for(row = aliasTable->rowList; row; row = row->next)
+ {
+ char *chrom = row->row[0];
+
+ unsigned field;
+ for(field=1; field< aliasTable->fieldCount; field++)
+ {
+ struct chromAlias *new;
+ AllocVar(new);
+ new->chrom = chrom;
+ new->alias = row->row[field];
+ new->source = aliasTable->fields[field];
+
+ hashAdd(chromHashes.forwardHash, new->alias, new);
+ hashAdd(chromHashes.reverseHash, new->chrom, new);
+ hashAdd(chromHashes.chromToAliasHash, new->chrom, new->alias);
+ hashAdd(chromHashes.aliasToChromHash, new->alias, new->chrom);
+ }
+ }
+}
+
+static void chromAliasSetupHub(char *database)
+/* Look for a chromAlias text table and load the hashes with its contents. */
+{
+char *aliasFile = trackHubAliasFile(database);
+if (aliasFile == NULL)
return;
+struct lineFile *lf = udcWrapShortLineFile(aliasFile, NULL, MAX_HUB_TRACKDB_FILE_SIZE);
+
+chromHashes.forwardHash = hashNew(0);
+chromHashes.reverseHash = hashNew(0);
+chromHashes.chromToAliasHash = hashNew(0);
+chromHashes.aliasToChromHash = hashNew(0);
+
+char *line;
+if (!lineFileNext(lf, &line, NULL))
+ errAbort("%s is empty", lf->fileName);
+lineFileReuse(lf);
+
+if (line[0] == '#')
+ readFieldedTable(lf);
+else
+ readOldAlias(lf);
+}
+
+
+static void chromAliasSetupSql(char *database)
+/* Look for a chromAlias SQL table and load the hashes with its contents. */
+{
if (!hTableExists(database, "chromAlias"))
return;
struct sqlConnection *conn = hAllocConn(database);
chromHashes.forwardHash = hashNew(0);
chromHashes.reverseHash = hashNew(0);
-chromHashes.nameHash = hashNew(0);
+chromHashes.chromToAliasHash = hashNew(0);
+chromHashes.aliasToChromHash = hashNew(0);
char query[2048];
sqlSafef(query, sizeof(query), "select * from chromAlias");
struct sqlResult *sr = sqlGetResult(conn, query);
char **row;
while ((row = sqlNextRow(sr)) != NULL)
{
struct chromAlias *new = chromAliasLoad(row);
hashAdd(chromHashes.forwardHash, new->alias, new);
hashAdd(chromHashes.reverseHash, new->chrom, new);
- hashAdd(chromHashes.nameHash, new->chrom, new->alias);
+ hashAdd(chromHashes.chromToAliasHash, new->chrom, new->alias);
+ hashAdd(chromHashes.aliasToChromHash, new->alias, new->chrom);
}
sqlFreeResult(&sr);
hFreeConn(&conn);
}
+void chromAliasSetup(char *database)
+/* Read in the chromAlias file/table for this database. */
+{
+if (!checkDatabase(database))
+ return;
+
+if (chromHashes.inited)
+ return;
+chromHashes.inited = TRUE;
+
+if (trackHubDatabase(database))
+ chromAliasSetupHub(database);
+else
+ chromAliasSetupSql(database);
+}
+
struct hash *chromAliasMakeLookupTable(char *database)
/* Given a database name and a connection to that database, construct a lookup table
* that takes chromosome alias names to a matching struct chromAlias. Returns NULL
* if the given database does not have a chromAlias table. */
{
-if (!checkDatabase(database))
- return NULL;
+chromAliasSetup(database);
return chromHashes.forwardHash;
}
struct hash *chromAliasMakeReverseLookupTable(char *database)
/* Given a database name and a connection to that database, construct a lookup table
* that takes the actual assembly chromosome names to struct chromAliases. Because a
* chromosome name may well have multiple aliases, repeated calls to hashLookupNext
* may be required to see them all. Returns NULL if the given database does not have
* a chromAlias table. */
{
-if (!checkDatabase(database))
- return NULL;
+chromAliasSetup(database);
return chromHashes.reverseHash;
}
-struct hash *chromAliasGetHash(char *database)
+struct hash *chromAliasAliasToChromHash(char *database)
/* Get the hash that maps chrom names to their aliases. */
{
-if (!checkDatabase(database))
- return NULL;
+chromAliasSetup(database);
+return chromHashes.aliasToChromHash;
+}
-return chromHashes.nameHash;
+struct hash *chromAliasChromToAliasHash(char *database)
+/* Get the hash that maps chrom names to their aliases. */
+{
+chromAliasSetup(database);
+return chromHashes.chromToAliasHash;
}