198c9b8daecc44fbda6a6494c566c723920f030a
lrnassar
  Wed Mar 11 18:25:21 2026 -0700
Fixing a few hundred clear typos with the help of Claude. Some are less important in code comments, but majority of them are in user-facing places. I manually approved 60%+ of the changes and didn't see any that were an incorrect suggestion, at worst it was potentially uncessesary, like a code comment having cant instead of can't. No RM.

diff --git src/hg/encode3/importEncode2/encode2Meta/encode2Meta.c src/hg/encode3/importEncode2/encode2Meta/encode2Meta.c
index 6aab0f38a7f..3279c595397 100644
--- src/hg/encode3/importEncode2/encode2Meta/encode2Meta.c
+++ src/hg/encode3/importEncode2/encode2Meta/encode2Meta.c
@@ -1,589 +1,589 @@
 /* encode2Meta - Create meta files.. */
 
 /* Copyright (C) 2013 The Regents of the University of California 
  * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "encode/encodeExp.h"
 #include "encode3/encode2Manifest.h"
 #include "mdb.h"
 
 char *metaDbs[] = {"hg19", "mm9"};
 char *organisms[] = {"human", "mouse"};
 char *metaTable = "metaDb";
 char *expDb = "hgFixed";
 char *expTable = "encodeExp";
 
 /* Command line variables */
 boolean withParent = FALSE;
 boolean maniFields = FALSE;
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
-  "encode2Meta - Create meta.txt file. This is a hierarchical .ra file with heirarchy defined\n"
+  "encode2Meta - Create meta.txt file. This is a hierarchical .ra file with hierarchy defined\n"
   "by indentation.  You might think of it as a meta tag tree.  It contains the contents of\n"
   "the hg19 and mm9 metaDb tables and the hgFixed.encodeExp table.\n"
   "usage:\n"
   "   encode2Meta database manifest.tab meta.txt\n"
   "options:\n"
   "   -withParent - if set put a parent tag in each stanza in addition to indentation\n"
   "   -maniFields - includes some fileds normally suppressed because they are also in manifest\n"
   );
 }
 
 /* Command line validation table. */
 static struct optionSpec options[] = {
    {"withParent", OPTION_BOOLEAN},
    {"maniFields", OPTION_BOOLEAN},
    {NULL, 0},
 };
 
 struct metaNode
 /* A node in a metadata tree */
     {
     struct metaNode *next;	/* Next sibling. */
     struct metaNode *children;  /* Children if any */
     struct metaNode *parent;	/* Parent if any */
     char *name;	/* Node's unique symbolic name. */
     struct mdbVar *vars;    /* Variables used if any */
     };
 
 char *mdbVarLookup(struct mdbVar *list, char *var)
 /* Return value associated with given var if var is on list, else NULL */
 {
 struct mdbVar *v;
 for (v = list; v != NULL; v = v->next)
     {
     if (sameString(v->var, var))
         return v->val;
     }
 return NULL;
 }
 
 char *metaLocalVal(struct metaNode *node, char *var)
 /* Look up value, not going up to parents. */
 {
 return mdbVarLookup(node->vars, var);
 }
 
 char *metaVal(struct metaNode *node, char *var)
 /* Return value of given var, or none if variable isn't defined.
  * Looks first in self, and then in parents. */
 {
 char *val;
 while (node != NULL)
     {
     if ((val = metaLocalVal(node, var)) != NULL)
         return val;
     node = node->parent;
     }
 return NULL;
 }
 
 struct mdbVar *mdbVarNew(char *var, char *val)
 /* Return a new mdbVar. */
 {
 struct mdbVar *v;
 AllocVar(v);
 v->var = cloneString(var);
 v->val = cloneString(val);
 return v;
 }
 
 void metaNodeAddVar(struct metaNode *node, char *var, char *val)
 /* Add var to node - but only if it is not already present at same value at a higher level */
 {
 if (val == NULL)
     return;
 if (node->parent != NULL && sameOk(metaVal(node->parent, var), val))
     return;   /* Already in parent, we are fine. */
 if (metaLocalVal(node, var))
     errAbort("Redefining %s.%s\n", node->name, var);
 struct mdbVar *v = mdbVarNew(var, val);
 slAddHead(&node->vars, v);
 }
 
 void metaNodeAddVarVals(struct metaNode *node, char *varVals)
 /* Add string of var=vals to node */
 {
 if (varVals == NULL)
     return;
 struct slPair *pair, *pairList = slPairListFromString(varVals, FALSE);
 for (pair = pairList; pair != NULL; pair = pair->next)
     metaNodeAddVar(node, pair->name, pair->val);
 }
 
 struct metaNode *metaNodeNew(char *name)
 /* Make new but empty and unconnected node. */
 {
 struct metaNode *meta;
 AllocVar(meta);
 meta->name = cloneString(name);
 return meta;
 }
 
 struct metaNode *metaTreeNew(char *name)
 /* Make largely empty root node. */
 {
 return metaNodeNew(name);
 }
 
 struct mdbObj *getMdbList(char *database)
 /* Get list of metaDb objects for a database. */
 {
 struct sqlConnection *conn = sqlConnect(database);
 struct mdbObj *list = mdbObjsQueryAll(conn, metaTable);
 sqlDisconnect(&conn);
 return list;
 }
 
 struct metaNode *wrapNodeAroundExp(struct encodeExp *exp)
 /* Wrap a metaNode around exp,  and return it. */
 {
 struct metaNode *node = metaNodeNew(exp->accession);
 metaNodeAddVar(node, "organism", exp->organism);
 metaNodeAddVar(node, "lab", exp->lab);
 metaNodeAddVar(node, "dataType", exp->dataType);
 metaNodeAddVar(node, "cellType", exp->cellType);
 metaNodeAddVar(node, "updateTime", exp->updateTime);
 metaNodeAddVarVals(node, exp->expVars);
 return node;
 }
 
 void metaTreeWrite(int level, int minLevel, int maxLevel, boolean isFile,
     char *parent, struct metaNode *node, struct hash *suppress, FILE *f)
 /* Write out self and children to file recursively. */
 {
 if (level >= minLevel && level < maxLevel)
     {
     int indent = (level-minLevel)*3;
     spaceOut(f, indent);
     fprintf(f, "meta %s\n", node->name);
     if (withParent && parent != NULL)
 	{
 	spaceOut(f, indent);
 	fprintf(f, "parent %s\n", parent);
 	}
     struct mdbVar *v;
     for (v = node->vars; v != NULL; v = v->next)
 	{
 	if (!hashLookup(suppress, v->var))
 	    {
 	    spaceOut(f, indent);
 	    fprintf(f, "%s %s\n", v->var, v->val);
 	    }
 	}
     fprintf(f, "\n");
     }
 struct metaNode *child;
 for (child = node->children; child != NULL; child = child->next)
     metaTreeWrite(level+1, minLevel, maxLevel, isFile, node->name, child, suppress, f);
 }
 
 boolean mdbVarRemove(struct mdbVar **pList, char *var)
 /* Find given variable in list and remove it. Returns TRUE if it
  * actually removed it,  FALSE if it never found it. */
 {
 struct mdbVar **ln = pList;
 struct mdbVar *v;
 for (v = *pList; v != NULL; v = v->next)
     {
     if (sameString(v->var, var))
         {
 	*ln = v->next;
 	return TRUE;
 	}
     ln = &v->next;
     }
 return FALSE;
 }
 
 
 void hoistOne(struct metaNode *node, char *var, char *val)
 /* We've already determined that var exists and has same value in all children.
  * What we do here is add it to ourselves and remove it from children. */
 {
 if (mdbVarLookup(node->vars, var))
     mdbVarRemove(&node->vars, var);
 metaNodeAddVar(node, var, val);
 struct metaNode *child;
 for (child = node->children; child != NULL; child = child->next)
     {
     mdbVarRemove(&child->vars, var);
     }
 }
 
 struct slName *varsInAnyNode(struct metaNode *nodeList)
 /* Return list of variables that are used in any node in list. */
 {
 struct hash *varHash = hashNew(6);
 struct slName *var, *varList = NULL;
 struct metaNode *node;
 for (node = nodeList; node != NULL; node = node->next)
     {
     struct mdbVar *v;
     for (v = node->vars; v != NULL; v = v->next)
         {
 	if (!hashLookup(varHash, v->var))
 	    {
 	    var = slNameAddHead(&varList, v->var);
 	    hashAdd(varHash, var->name, var);
 	    }
 	}
     }
 hashFree(&varHash);
 return varList;
 }
 
 char *allSameVal(char *var, struct metaNode *nodeList)
 /* Return value of variable if it exists and is the same in each node on list */
 {
 char *val = NULL;
 struct metaNode *node;
 for (node = nodeList; node != NULL; node = node->next)
     {
     char *oneVal = mdbVarLookup(node->vars, var);
     if (oneVal == NULL)
         return NULL;
     if (val == NULL)
         val = oneVal;
     else
         {
 	if (!sameString(oneVal, val))
 	    return NULL;
 	}
     }
 return val;
 }
 
 char *allSameValWithDataMostWithData(char *var, struct metaNode *nodeList, double minProportion)
 /* Return variable if all nodes that have it have it set to same value, and
  * most (at least minProportion) have it. */
 {
 char *val = NULL;
 struct metaNode *node;
 int nodeCount = 0, dataCount = 0;
 for (node = nodeList; node != NULL; node = node->next)
     {
     ++nodeCount;
     char *oneVal = mdbVarLookup(node->vars, var);
     if (oneVal != NULL)
 	{
 	++dataCount;
 	if (val == NULL)
 	    val = oneVal;
 	else
 	    {
 	    if (!sameString(oneVal, val))
 		return NULL;
 	    }
 	}
     }
 int minDataNeeded = round(nodeCount * minProportion);
 if (dataCount < minDataNeeded)
     return NULL;
 return val;
 }
 
 
 void metaTreeHoist(struct metaNode *node, struct hash *closeEnoughTags)
 /* Move variables that are the same in all children up to parent. */
 {
 /* Do depth first recursion, but get early return if we're a leaf. */
 struct metaNode *child;
 if (node->children == NULL)
     return;
 for (child = node->children; child != NULL; child = child->next)
     metaTreeHoist(child, closeEnoughTags);
 
 /* Build up list of variables used in any child. */
 struct slName *var, *varList = varsInAnyNode(node->children);
 
 /* Go through list and figure out ones that are same in all children. */
 for (var = varList; var != NULL; var = var->next)
     {
     char *val;
     double *closeEnough = hashFindVal(closeEnoughTags, var->name);
     if (closeEnough)
         val = allSameValWithDataMostWithData(var->name, node->children, *closeEnough);
     else
 	val = allSameVal(var->name, node->children);
     if (val != NULL)
         {
 	if (!sameString(var->name, "fileName"))
 	    hoistOne(node, var->name, val);
 	}
     }
 }
 
 double *cloneDouble(double x)
 /* Return clone of double in dynamic memory */
 {
 return CloneVar(&x);
 }
 
 struct hash *makeCloseEnoughTags()
 /* Make double pointer valued hash keyed by tags that only need to be
  * present in most children to be hoisted. */
 {
 struct hash *closeEnoughTags = hashNew(5);
 hashAdd(closeEnoughTags, "organism", cloneDouble(0.8));
 hashAdd(closeEnoughTags, "lab", cloneDouble(0.8));
 hashAdd(closeEnoughTags, "age", cloneDouble(0.8));
 hashAdd(closeEnoughTags, "grant", cloneDouble(0.8));
 hashAdd(closeEnoughTags, "dateSubmitted", cloneDouble(0.8));
 hashAdd(closeEnoughTags, "dateUnrestricted", cloneDouble(0.8));
 hashAdd(closeEnoughTags, "softwareVersion", cloneDouble(0.8));
 hashAdd(closeEnoughTags, "control", cloneDouble(0.9));
 hashAdd(closeEnoughTags, "geoSampleAccession", cloneDouble(0.7));
 return closeEnoughTags;
 }
 
 struct hash *makeSuppress()
 /* Make a hash full of fields to suppress. */
 {
 struct hash *suppress = hashNew(4);
 hashAdd(suppress, "objType", NULL);   // Inherent in hierarchy or ignored
 hashAdd(suppress, "subId", NULL);     // Submission ID not worth carrying forward
 hashAdd(suppress, "tableName", NULL);	// We aren't interested in tables, just files
 hashAdd(suppress, "project", NULL);   // Always wgEncode
 hashAdd(suppress, "expId", NULL);     // Redundant with dccAccession
 hashAdd(suppress, "cell", NULL);      // Completely redundant with cellType - I checked
 hashAdd(suppress, "sex", NULL);       // This should be implied in cellType
 if (!maniFields)
     {
     hashAdd(suppress, "dccAccession", NULL);  // Redundant with meta object name
     hashAdd(suppress, "composite", NULL); // Inherent in hierarchy now
     hashAdd(suppress, "view", NULL);      // This is in maniest
     hashAdd(suppress, "replicate", NULL); // This is in manifest
     hashAdd(suppress, "md5sum", NULL);    // Also in manifest
     }
 return suppress;
 }
 
 boolean originalData(char *symbol)
 /* Return TRUE if it's not just a repackaging. */
 {
 return (symbol != NULL && !startsWith("wgEncodeAwg", symbol) && !startsWith("wgEncodeReg", symbol));
 }
 
 int metaNodeCmp(const void *va, const void *vb)
 // Compare metaNode to sort on var name, case-insensitive.
 {
 const struct metaNode *a = *((struct metaNode **)va);
 const struct metaNode *b = *((struct metaNode **)vb);
 return strcasecmp(a->name, b->name);
 }
 
 void metaTreeSortChildrenSortTags(struct metaNode *node)
 /* Reverse child list recursively and sort tags list. */
 {
 slSort(&node->vars, mdbVarCmp);
 slSort(&node->children,  metaNodeCmp);
 struct metaNode *child;
 for (child = node->children; child !=NULL; child = child->next)
     metaTreeSortChildrenSortTags(child);
 }
 
 void encode2Meta(char *database, char *manifestIn, char *outMetaRa)
 /* encode2Meta - Create meta files.. */
 {
 int dbIx = stringArrayIx(database, metaDbs, ArraySize(metaDbs));
 if (dbIx < 0)
     errAbort("Unrecognized database %s", database);
 
 /* Create a three level meta.ra format file based on hgFixed.encodeExp
  * and database.metaDb tables. The levels are composite, experiment, file */
 struct metaNode *metaTree = metaTreeNew("encode2");
 
 /* Load up the manifest. */
 struct encode2Manifest *mi, *miList = encode2ManifestShortLoadAll(manifestIn);
 struct hash *miHash = hashNew(18);
 for (mi = miList; mi != NULL; mi = mi->next)
     hashAdd(miHash, mi->fileName, mi);
 verbose(1, "%d files in %s\n", miHash->elCount, manifestIn);
 
 /* Load up encodeExp info. */
 struct sqlConnection *expConn = sqlConnect(expDb);
 char query[1024];
 sqlSafef(query, sizeof query, "select * from encodeExp");
 struct encodeExp *expList = encodeExpLoadByQuery(expConn, query);
 sqlDisconnect(&expConn);
 verbose(1, "%d experiments in encodeExp\n", slCount(expList));
 
 struct hash *compositeHash = hashNew(0);
 
 /* Go through each  organism database in turn. */
 int i;
 for (i=0; i<ArraySize(metaDbs); ++i)
     {
     char *db = metaDbs[i];
     if (!sameString(database, db))
         continue;
 
     verbose(1, "exploring %s\n", db);
     struct mdbObj *mdb, *mdbList = getMdbList(db);
     verbose(1, "%d meta objects in %s\n", slCount(mdbList), db);
 
     /* Get info on all composites. */
     for (mdb = mdbList; mdb != NULL; mdb = mdb->next)
         {
 	char *objType = mdbVarLookup(mdb->vars, "objType");
 	if (objType != NULL && sameString(objType, "composite"))
 	    {
 	    char compositeName[256];
 	    safef(compositeName, sizeof(compositeName), "%s", mdb->obj);
 	    struct metaNode *compositeNode = metaNodeNew(compositeName);
 	    slAddHead(&metaTree->children, compositeNode);
 	    compositeNode->parent = metaTree;
 	    struct mdbVar *v;
 	    for (v=mdb->vars; v != NULL; v = v->next)
 	        {
 		metaNodeAddVar(compositeNode, v->var, v->val);
 		}
 	    metaNodeAddVar(compositeNode, "assembly", db);
 	    hashAdd(compositeHash, mdb->obj, compositeNode);
 	    }
 	}
 
     /* Make up one more for experiments with no composite. */
     char *noCompositeName = "wgEncodeZz";
     struct metaNode *noCompositeNode = metaNodeNew(noCompositeName);
     slAddHead(&metaTree->children, noCompositeNode);
     noCompositeNode->parent = metaTree;
     hashAdd(compositeHash, noCompositeName, noCompositeNode);
 
 
     /* Now go through objects trying to tie experiments to composites. */ 
     struct hash *expToComposite = hashNew(16);
     for (mdb = mdbList; mdb != NULL; mdb = mdb->next)
         {
 	char *composite = mdbVarLookup(mdb->vars, "composite");
 	if (originalData(composite))
 	    {
 	    char *dccAccession = mdbVarLookup(mdb->vars, "dccAccession");
 	    if (dccAccession != NULL)
 	        {
 		char *oldComposite = hashFindVal(expToComposite, dccAccession);
 		if (oldComposite != NULL)
 		    {
 		    if (!sameString(oldComposite, composite))
 		        verbose(2, "%s maps to %s ignoring mapping to %s", dccAccession, oldComposite, composite);
 		    }
 		else
 		    {
 		    hashAdd(expToComposite, dccAccession, composite);
 		    }
 		}
 	    }
 	}
     /* Now get info on all experiments in this organism. */
     struct hash *expHash = hashNew(0);
     struct encodeExp *exp;
     for (exp = expList; exp != NULL; exp = exp->next)
         {
 	if (sameString(exp->organism, organisms[i]))
 	    {
 	    if (exp->accession != NULL)
 		{
 		char *composite = hashFindVal(expToComposite,  exp->accession);
 		struct metaNode *compositeNode;
 		if (composite != NULL)
 		    {
 		    compositeNode = hashMustFindVal(compositeHash, composite);
 		    }
 		else
 		    {
 		    compositeNode = noCompositeNode;
 		    }
 		struct metaNode *expNode = wrapNodeAroundExp(exp);
 		hashAdd(expHash, expNode->name, expNode);
 		slAddHead(&compositeNode->children, expNode);
 		expNode->parent = compositeNode;
 		}
 	    }
 	}
 
     for (mdb = mdbList; mdb != NULL; mdb = mdb->next)
 	{
 	char *fileName = NULL, *dccAccession = NULL;
 	char *objType = mdbVarLookup(mdb->vars, "objType");
 	if (objType != NULL && sameString(objType, "composite"))
 	    continue;
 	dccAccession = mdbVarLookup(mdb->vars, "dccAccession");
 	if (dccAccession == NULL)
 	    continue;
 	char *composite = hashFindVal(expToComposite,  dccAccession);
 	if (composite == NULL)
 	    errAbort("Can't find composite for %s", mdb->obj);
 	struct mdbVar *v;
 	for (v = mdb->vars; v != NULL; v = v->next)
 	    {
 	    char *var = v->var, *val = v->val;
 	    if (sameString("fileName", var))
 		{
 		fileName = val;
 		char path[PATH_LEN];
 		char *comma = strchr(fileName, ',');
 		if (comma != NULL)
 		     *comma = 0;	/* Cut off comma separated list. */
 		safef(path, sizeof(path), "%s/%s/%s", db, 
 		    composite, fileName);  /* Add database path */
 		fileName = val = v->val = cloneString(path);
 		}
 	    }
 	if (fileName != NULL)
 	    {
 	    if (hashLookup(miHash, fileName))
 		{
 		struct metaNode *expNode = hashFindVal(expHash, dccAccession);
 		if (expNode != NULL)
 		    {
 		    struct metaNode *fileNode = metaNodeNew(mdb->obj);
 		    slAddHead(&expNode->children, fileNode);
 		    fileNode->parent = expNode;
 		    struct mdbVar *v;
 		    for (v=mdb->vars; v != NULL; v = v->next)
 			{
 			metaNodeAddVar(fileNode, v->var, v->val);
 			}
 		    }
 		}
 	    }
 	}
 #ifdef SOON
 #endif /* SOON */
     }
 
 struct hash *suppress = makeSuppress();
 struct hash *closeEnoughTags = makeCloseEnoughTags();
 
 metaTreeHoist(metaTree, closeEnoughTags);
 metaTreeSortChildrenSortTags(metaTree);
 FILE *f = mustOpen(outMetaRa, "w");
 struct metaNode *node;
 for (node = metaTree->children; node != NULL; node = node->next)
     metaTreeWrite(0, 0, BIGNUM, FALSE, NULL, node, suppress, f);
 carefulClose(&f);
 
 /* Write warning about tags in highest parent. */
 struct mdbVar *v;
 for (v = metaTree->vars; v != NULL; v = v->next)
     verbose(1, "Omitting universal %s %s\n", v->var, v->val);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc != 4)
     usage();
 withParent = optionExists("withParent");
 maniFields = optionExists("maniFields");
 encode2Meta(argv[1], argv[2], argv[3]);
 return 0;
 }