4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/encode3/encodeDataWarehouse/edwMetaManiToTdb/edwMetaManiToTdb.c src/hg/encode3/encodeDataWarehouse/edwMetaManiToTdb/edwMetaManiToTdb.c
index b52e3b7..7462ea7 100644
--- src/hg/encode3/encodeDataWarehouse/edwMetaManiToTdb/edwMetaManiToTdb.c
+++ src/hg/encode3/encodeDataWarehouse/edwMetaManiToTdb/edwMetaManiToTdb.c
@@ -1,567 +1,567 @@
 /* edwMetaManiToTdb - Create a trackDb file based on input from meta file and manifest file.. */
 
 /* Copyright (C) 2013 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "net.h"
 #include "meta.h"
 #include "hmmstats.h"
 #include "bigBed.h"
 #include "bigWig.h"
 #include "bamFile.h"
 #include "encode3/encode2Manifest.h"
 #include "encodeDataWarehouse.h"
 #include "edwLib.h"
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "edwMetaManiToTdb - Create a trackDb file based on input from meta file and manifest file.\n"
   "usage:\n"
   "   edwMetaManiToTdb metaInput manifestInput list,of,vars tdbOutput\n"
   "Where metaInput is metadate in tag-storm format,  manifest is an ENCODE submission manifest,\n"
   "list,of,vars is a comma separated list of variables (tags in meta file) that users can use\n"
   "to select which experiments to view,  and tdbOutput is a trackDb.txt format file\n"
   );
 }
 
 /* Command line validation table. */
 static struct optionSpec options[] = {
    {NULL, 0},
 };
 
 char *tagVal(struct encode2Manifest *man, struct hash *metaHash, char *tag)
 /* Return value associated with tag or NULL if not found. */
 {
 char *experiment = man->experiment;
 struct meta *meta = hashFindVal(metaHash, experiment);
 if (meta == NULL)
     errAbort("experiment %s is in manifest but not meta", experiment);
 return metaTagVal(meta, tag);
 }
 
 struct composite
 /* Info on a composite track. */
     {
     struct composite *next;
     char *name;	    /* Name is either from composite tag, or if missing a concat of other vals. */
     struct slRef *manRefList;	/* Manifest items associated with this one. */
     };
 
 struct composite *makeCompositeList(struct encode2Manifest *manList, struct hash *metaHash)
 /* Return a list of composites with everything on manList */
 {
 struct composite *comp, *compList = NULL;
 struct hash *compHash = hashNew(0);
 char compName[256];
 struct encode2Manifest *man;
 for (man = manList; man != NULL; man = man->next)
     {
     char *realComp = tagVal(man, metaHash, "composite");
     if (realComp != NULL)
         safef(compName, sizeof(compName), "%s", realComp);
     else
         {
 	char *lab = emptyForNull(tagVal(man, metaHash, "lab"));
 	char *dataType = emptyForNull(tagVal(man, metaHash, "dataType"));
 	safef(compName, sizeof(compName), "comp%s%s", lab, dataType);
 	}
     comp = hashFindVal(compHash, compName);
     if (comp == NULL)
         {
 	AllocVar(comp);
 	comp->name = cloneString(compName);
 	hashAdd(compHash, compName, comp);
 	slAddTail(&compList, comp);
 	}
     struct slRef *manRef = slRefNew(man);
     slAddTail(&comp->manRefList, manRef);
     }
 hashFree(&compHash);
 return compList;
 }
 
 int trackId = 0, viewId = 0;
 
 struct view
 /* A view aka output_type */
     {
     struct view *next;
     char *name;			/* Name of view */
     char *format;	        /* Format associated with view */
     char *trackName;		/* Name when used as a track. */
     struct slRef *manRefList;	/* Files associated with view */
     };
 
 struct view *makeViewList(struct slRef *manRefList)
 /* Return a list of views with everything on manList */
 {
 struct hash *hash = hashNew(0);
 struct view *view, *viewList = NULL;
 struct slRef *ref;
 for (ref = manRefList; ref != NULL; ref = ref->next)
     {
     struct encode2Manifest *man = ref->val;
     char *outputType = man->outputType;
     char *format = man->format;
     view = hashFindVal(hash,  outputType);
     if (view == NULL)
         {
 	AllocVar(view);
 	view->name = cloneString(outputType);
 	view->format = cloneString(format);
 	char trackName[64];
 	safef(trackName, sizeof(trackName), "v%d", ++viewId);
 	view->trackName = cloneString(trackName);
 	hashAdd(hash, outputType, view);
 	slAddTail(&viewList, view);
 	}
     if (!sameString(format, view->format))
         errAbort("Multiple formats (%s and %s) in output_type %s", 
 	    format, view->format, view->name);
     struct slRef *newRef = slRefNew(man);
     slAddTail(&view->manRefList, newRef);
     }
 hashFree(&hash);
 return viewList;
 }
 
 int indent = 4;
 
 boolean viewableFormat(char *format)
 /* Return TRUE if it's a format we can visualize. */
 {
 return sameString(format, "bigWig") || sameString(format, "bam") 
        || edwIsSupportedBigBedFormat(format);
 }
 
 struct taggedFile
 /* A bunch of tags mostly */
     {
     struct taggedFile *next;
     struct metaTagVal *tagList;	/* All tags. */
     struct encode2Manifest *manifest;  /* File in manifest */
     };
 
 struct expVal
 /* A value seen in for a tag along with a list of all files with that value */
     {
     struct expVal *next;
     char *val;	    /* Value seen here*/
     struct slRef *tfList;  /* Tagged files this is found in. */
     };
 
 struct expVal *expValFind(struct expVal *list, char *val)
 /* Find val in list, return NULL if not found. */
 {
 struct expVal *el;
 for (el = list; el != NULL; el = el->next)
     if (sameString(el->val, val))
          break;
 return el;
 }
 
 struct expVar
 /* An experimental var - a tag, a list of all the values it holds. */
     {
     struct expVar *next;
     char *name;	    /* Name of variable. */
     struct expVal *valList; /* All values seen */
     int useCount;   /* Number of times used */
     double priority;   /* Overall priority score - smaller is better. */
     };
 
 int expVarCmpQuery(const void *va, const void *vb)
 /* Compare score (descending) */
 {
 const struct expVar *a = *((struct expVar **)va);
 const struct expVar *b = *((struct expVar **)vb);
 double dif = a->priority - b->priority;
 if (dif > 0)
     return 1;
 else if (dif < 0)
     return -1;
 else
     return strcmp(a->name, b->name);
 }
 
 struct expVar *expVarsFromTaggedFiles(struct taggedFile *tfList)
 /* Build up a list of vars and their values and what file they are found in. */
 {
 struct hash *tagHash = hashNew(0);
 struct expVar *varList = NULL, *var;
 struct taggedFile *tf;
 for (tf = tfList; tf != NULL; tf = tf->next)
     {
     struct metaTagVal *mtv;
     for (mtv = tf->tagList; mtv != NULL; mtv = mtv->next)
         {
 	if (sameString(mtv->val, "n/a") || isEmpty(mtv->tag))
 	    continue;
 	var = hashFindVal(tagHash, mtv->tag);
 	if (var == NULL)
 	    {
 	    AllocVar(var);
 	    var->name = cloneString(mtv->tag);
 	    slAddHead(&varList, var);
 	    hashAdd(tagHash, var->name, var);
 	    }
 	struct expVal *val = expValFind(var->valList, mtv->val);
 	if (val == NULL)
 	    {
 	    AllocVar(val);
 	    val->val = cloneString(mtv->val);
 	    slAddHead(&var->valList, val);
 	    }
 	var->useCount += 1;
 	refAdd(&val->tfList, tf);
 	}
     }
 hashFree(&tagHash);
 return varList;
 }
 
 struct taggedFile *taggedFileForComposite(struct composite *composite, struct hash *metaHash)
 /* Return a taggedFile for every file in the composite. */
 {
 struct slRef *manRefList = composite->manRefList;
 struct taggedFile *tf, *tfList = NULL;
 struct slRef *ref;
 for (ref = manRefList; ref != NULL; ref = ref->next)
     {
     /* Wrap up tags and manifest together, including a bonus tag or two from manifest. */
     struct encode2Manifest *man = ref->val;
     struct meta *meta = hashMustFindVal(metaHash, man->experiment);
     AllocVar(tf);
     tf->manifest = man;
     slAddHead(&tfList, tf);
     struct metaTagVal *s, *d;
     for (s = meta->tagList; s != NULL; s = s->next)
         {
 	d = metaTagValNew(s->tag, s->val);
 	slAddHead(&tf->tagList, d);
 	}
     d = metaTagValNew("replicate", man->replicate);
     slAddHead(&tf->tagList, d);
     }
 return tfList;
 }
 
 struct metaTagVal *metaTagValLookup(struct metaTagVal *list, char *tag)
 /* Return metaTagVal on list with given tag name, or NULL if none exists */
 {
 struct metaTagVal *mtv;
 for (mtv = list; mtv != NULL; mtv = mtv->next)
     {
     if (sameString(mtv->tag, tag))
 	break;
     }
 return mtv;
 }
 
 char *metaTagValFindVal(struct metaTagVal *list, char *tag)
 /* Return val associated with tag on list, or NULL if no such tag on list */
 {
 struct metaTagVal *mtv = metaTagValLookup(list, tag);
 if (mtv == NULL)
     return NULL;
 return mtv->val;
 }
 
 struct slName *valsForVar(char *varName, struct taggedFile *tfList)
 /* Return all values for given variable. */
 {
 struct slName *list = NULL;
 struct hash *uniqHash = hashNew(7);
 struct taggedFile *tf;
 for (tf = tfList; tf != NULL; tf = tf->next)
     {
     char *val = metaTagValFindVal(tf->tagList, varName);
     if (val != NULL)
         {
 	if (hashLookup(uniqHash, val) == NULL)
 	    {
 	    hashAdd(uniqHash, val, NULL);
 	    slNameAddHead(&list, val);
 	    }
         }
     }
 hashFree(&uniqHash);
 slNameSort(&list);
 return list;
 }
 
 char *printBigWigViewInfo(FILE *f, char *indent, struct view *view, 
     struct composite *comp, struct taggedFile *tfList)
 /* Print out info for a bigWig view, including subtracks. */
 {
 /* Look at all tracks in this view and calculate overall limits. */
 double sumOfSums = 0, sumOfSumSquares = 0;
 bits64 sumOfN = 0;
 struct taggedFile *tf;
 for (tf = tfList; tf != NULL; tf = tf->next)
     {
     if (sameString(tf->manifest->outputType, view->name))
 	{
 	char *relativeName = tf->manifest->fileName;
 	char *path = relativeName;
 	struct bbiFile *bbi = bigWigFileOpen(path);
 	struct bbiSummaryElement sum = bbiTotalSummary(bbi);
 	sumOfSums += sum.sumData;
 	sumOfSumSquares += sum.sumSquares;
 	sumOfN = sum.validCount;
 	bigWigFileClose(&bbi);
 	}
     }
 double mean = sumOfSums/sumOfN;
 double std = calcStdFromSums(sumOfSums, sumOfSumSquares, sumOfN);
 double clipMax = mean + 6*std;
 
 /* Output view stanza. */
 char type[64];
 safef(type, sizeof(type), "bigWig %g %g", 0.0, clipMax);
 fprintf(f, "%stype %s\n", indent, type);
 fprintf(f, "%sviewLimits 0:%g\n", indent, clipMax);
 fprintf(f, "%sminLimit 0\n", indent);
 fprintf(f, "%smaxLimit %g\n", indent, clipMax);
 fprintf(f, "%sautoScale off\n", indent);
 fprintf(f, "%smaxHeightPixels 100:32:16\n", indent);
 fprintf(f, "%swindowingFunction mean+whiskers\n", indent);
 return cloneString(type);
 }
 
 char *printBigBedViewInfo(FILE *f, char *indent, struct view *view, 
     struct composite *comp, struct taggedFile *tfList)
 /* Print out info for a bigBed view. */
 {
 /* Get defined fields and total fields, and make sure they are the same for everyone. */
 int defFields = 0, fields = 0;
 struct taggedFile *tf, *bigBedTf = NULL;
 for (tf = tfList; tf != NULL; tf = tf->next)
     {
     if (sameString(view->name, tf->manifest->outputType))
 	{
 	struct bbiFile *bbi = bigBedFileOpen(tf->manifest->fileName);
 	if (defFields == 0)
 	    {
 	    fields = bbi->fieldCount;
 	    defFields = bbi->definedFieldCount;
 	    bigBedTf = tf;
 	    }
 	else
 	    {
 	    if (fields != bbi->fieldCount || defFields != bbi->definedFieldCount)
 		errAbort("Different formats for bigBeds in %s vs %s", bigBedTf->manifest->fileName,
 		    tf->manifest->fileName);
 	    }
 	bigBedFileClose(&bbi);
 	}
     }
 char type[32];
 safef(type, sizeof(type), "bigBed %d%s", defFields, (fields > defFields ? " +" : ""));
 fprintf(f, "%stype %s\n", indent, type); 
 return cloneString(type);
 }
 
 char *printBamViewInfo(FILE *f, char *indent, struct view *view, 
     struct composite *comp, struct taggedFile *tfList)
 /* Print out info for a Bam view. */
 {
 fprintf(f, "%stype bam\n", indent);
 fprintf(f, "%sshowNames off\n", indent);
 fprintf(f, "%sbamColorMode gray\n", indent);
 fprintf(f, "%sindelDoubleInsert on\n", indent);
 fprintf(f, "%sindelQueryInsert on\n", indent);
 fprintf(f, "%smaxWindowToDraw 100000\n", indent);
 return "bam";
 }
 
 char *printFormatSpecificViewInfo(FILE *f, char *indent, struct view *view, 
     struct composite *comp, struct taggedFile *tfList)
 /* Print out part of a view trackDb stanza that are format specific. */
 {
 char *format = view->format;
 if (sameString(format, "bigWig"))
     {
     return printBigWigViewInfo(f, indent, view, comp, tfList);
     }
 else if (sameString(format, "bam"))
     {
     return printBamViewInfo(f, indent, view, comp, tfList);
     }
 else if (edwIsSupportedBigBedFormat(format))
     {
     return printBigBedViewInfo(f, indent, view, comp, tfList);
     }
 else
     {
     errAbort("Unrecognized format in printFormatSpecificViewInfo");
     return NULL;
     }
 }
 
 void printLeafTrackList(FILE *f, char *indent, struct view *view, struct composite *comp,
     struct slName *varList, struct taggedFile *tfList, char *type)
 /* Print list of low level tracks under view */
 {
 struct taggedFile *tf;
 for (tf = tfList; tf != NULL; tf = tf->next)
     {
     if (sameString(tf->manifest->outputType, view->name))
 	{
 	fprintf(f, "%strack t%d\n", indent, ++trackId);
 	fprintf(f, "%sparent %s\n", indent, view->trackName);
 	fprintf(f, "%stype %s\n", indent, type);
 	fprintf(f, "%ssubGroups view=%s", indent, view->name);
 	struct slName *var;
 	for (var = varList; var != NULL; var = var->next)
 	    {
 	    char *val = metaTagValFindVal(tf->tagList, var->name);
 	    if (val != NULL)
 		fprintf(f, " %s=%s", var->name, val);
 	    }
 	fprintf(f, "\n");
 	fprintf(f, "%sshortLabel", indent);
 	for (var = varList; var != NULL; var = var->next)
 	    {
 	    char *val = metaTagValFindVal(tf->tagList, var->name);
 	    if (val != NULL)
 		fprintf(f, " %s", val);
 	    }
 	fprintf(f, "\n");
 	char *lab = emptyForNull(metaTagValFindVal(tf->tagList, "lab"));
 	char *dataType = emptyForNull(metaTagValFindVal(tf->tagList, "dataType"));
 	fprintf(f, "%slongLabel %s %s", indent, lab, dataType);
 	boolean gotOne = FALSE;
 	for (var = varList; var != NULL; var = var->next)
 	    {
 	    char *val = metaTagValFindVal(tf->tagList, var->name);
 	    if (val != NULL)
 		{
 		if (gotOne)
 		    fprintf(f, ",");
 		else
 		    gotOne = TRUE;
 		fprintf(f, " %s %s", var->name, val);
 		}
 	    }
 	fprintf(f, "\n");
 	fprintf(f, "%sbigDataUrl %s\n", indent, tf->manifest->fileName);
 	fprintf(f, "\n");
 	}
     }
 }
 
 void outputComposite(struct composite *comp, struct slName *varList, struct hash *metaHash, FILE *f)
 /* Rummage through composite and try to make a trackDb stanza for it. */
 {
 struct encode2Manifest *firstMan = comp->manRefList->val;
 struct view *view, *viewList = makeViewList(comp->manRefList);
 fprintf(f, "track %s\n", comp->name);
 fprintf(f, "compositeTrack on\n");
 char *lab = tagVal(firstMan, metaHash, "lab");
 if (lab == NULL)
     lab = "unknown";
 char *dataType = tagVal(firstMan, metaHash, "dataType");
 if (dataType == NULL)
     dataType = "unknown";
 fprintf(f, "shortLabel %s %s\n", lab, dataType);
 fprintf(f, "longLabel %s %s\n", lab, dataType);
 fprintf(f, "type bed 3\n");
 fprintf(f, "subGroup1 view Views");
 for (view = viewList; view != NULL; view = view->next)
     {
     if (viewableFormat(view->format))
 	fprintf(f, " %s=%s", view->name, view->name);
     }
 fprintf(f, "\n");
 
 struct taggedFile *tfList = taggedFileForComposite(comp, metaHash);
 uglyf("%d tfList\n", slCount(tfList));
 
 struct slName *var;
 int groupId = 1;
 for (var = varList; var != NULL; var = var->next)
     {
     fprintf(f, "subGroup%d %s %s", ++groupId, var->name, var->name);
     struct slName *val, *valList = valsForVar(var->name, tfList);
     for (val = valList; val != NULL; val = val->next)
         fprintf(f, " %s=%s", val->name, val->name);
     fprintf(f, "\n");
     }
 
 fprintf(f, "dimensions");
 int varIx = 0;
 for (var = varList; var != NULL; var = var->next)
     {
     char c = 0;
     varIx += 1;
     if (varIx == 1)
 	c = 'Y';
     else if (varIx == 2)
 	c = 'X';
     else if (varIx == 3)
 	c = 'Z';
     else
         errAbort("Too many dimensions in list,of,vars");
     fprintf(f, " dimension%c=%s", c, var->name);
     }
 fprintf(f, "\n");
 
 fprintf(f, "sortOrder");
 for (var = varList; var != NULL; var = var->next)
     fprintf(f, " %s=+", var->name);
 fprintf(f, " view=+\n");
 fprintf(f, "dragAndDrop subtracks\n");
 fprintf(f, "\n");
 
 for (view = viewList; view != NULL; view = view->next)
     {
     if (viewableFormat(view->format))
 	{
 	fprintf(f, "    track %s\n", view->trackName);
 	fprintf(f, "    parent %s\n", comp->name);
 	fprintf(f, "    view %s\n", view->name);
 	fprintf(f, "    shortLabel %s\n", view->name);
 	fprintf(f, "    longLabel %s\n", view->name);
 	fprintf(f, "    visibility dense\n");
 	char *type = printFormatSpecificViewInfo(f, "    ", view, comp, tfList);
 	fprintf(f, "\n");
 	printLeafTrackList(f, "\t", view, comp, varList, tfList, type);
 	}
     }
 }
 
 void edwMetaManiToTdb(char *metaInput, char *manifestInput, char *varCommaList, char *tdbOutput)
 /* edwMetaManiToTdb - Create a trackDb file based on input from meta file and manifest file.. */
 {
 struct encode2Manifest *manList = encode2ManifestShortLoadAll(manifestInput);
 struct meta *metaForest = metaLoadAll(metaInput, "meta", "parent", TRUE, FALSE);
 struct slName *varList = slNameListFromComma(varCommaList);
 struct hash *hash = metaHash(metaForest);
 struct composite *comp, *compList = makeCompositeList(manList, hash);
 uglyf("%d elements in manList, %d in metaForest top level, %d total, %d vars, %d composites\n", slCount(manList), slCount(metaForest), hash->elCount, slCount(varList), slCount(compList));
 
 FILE *f = mustOpen(tdbOutput, "w");
 for (comp = compList; comp != NULL; comp = comp->next)
     {
     outputComposite(comp, varList, hash, f);
     }
 carefulClose(&f);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc != 5)
     usage();
 edwMetaManiToTdb(argv[1], argv[2], argv[3], argv[4]);
 return 0;
 }