4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/encode3/eap/lib/eapGraph.c src/hg/encode3/eap/lib/eapGraph.c
index 44601b0..359c132 100644
--- src/hg/encode3/eap/lib/eapGraph.c
+++ src/hg/encode3/eap/lib/eapGraph.c
@@ -1,418 +1,418 @@
 /* eapGraph - stuff to help traverse the graph defined by the eapRun, eapInput, and eapOutput
  * tables that define what files were used to produce what other files */
 
 /* Copyright (C) 2014 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 
 
 #include "common.h"
 #include "hash.h"
 #include "linefile.h"
 #include "jksql.h"
 #include "portable.h"
 #include "intValTree.h"
 #include "longToList.h"
 #include "../../encodeDataWarehouse/inc/encodeDataWarehouse.h"
 #include "../../encodeDataWarehouse/inc/edwLib.h"
 #include "eapDb.h"
 #include "eapLib.h"
 #include "eapGraph.h"
 
 
 struct eapGraph *eapGraphNew(struct sqlConnection *conn)
 /* Return new eapGraph made by querying database */
 {
 struct eapGraph *eg;
 AllocVar(eg);
 char query[256];
 
 /* Load up everything from database, logging time maybe */
 sqlSafef(query, sizeof(query), "select * from eapRun");
 eg->runList = eapRunLoadByQuery(conn, query);
 sqlSafef(query, sizeof(query), "select * from eapInput");
 eg->inputList = eapInputLoadByQuery(conn, query);
 sqlSafef(query, sizeof(query), "select * from eapOutput");
 eg->outputList = eapOutputLoadByQuery(conn, query);
 sqlSafef(query, sizeof(query), "select * from edwFile");
 eg->fileList = edwFileLoadByQuery(conn, query);
 sqlSafef(query, sizeof(query), "select * from edwValidFile");
 eg->validList = edwValidFileLoadByQuery(conn, query);
 
 /* Set up runByExperiment */
 struct hash *hash;
 eg->runByExperiment = hash = hashNew(0);
 struct eapRun *run;
 for (run = eg->runList; run != NULL; run = run->next)
      hashAdd(hash, run->experiment, run);
 
 /* Set up runById */
 struct rbTree *ivt;
 eg->runById = ivt = intValTreeNew();
 for (run = eg->runList; run != NULL; run = run->next)
      intValTreeAdd(ivt, run->id, run);
 
 /* Set up inputByFile */
 struct longToList *lToL;
 eg->inputByFile = lToL = longToListNew();
 struct eapInput *in;
 for (in = eg->inputList; in != NULL; in = in->next)
     longToListAdd(lToL, in->fileId, in);
 
 /* Set up inputByRun */
 eg->inputByRun = lToL = longToListNew();
 for (in = eg->inputList; in != NULL; in = in->next)
     longToListAdd(lToL, in->runId, in);
 
 /* Set up outputByFile - this one is singly-valued so easier */
 eg->outputByFile = ivt = intValTreeNew();
 struct eapOutput *out;
 for (out = eg->outputList; out != NULL; out = out->next)
     intValTreeAdd(ivt, out->fileId, out);
 
 /* Set up outputByRun */
 eg->outputByRun = lToL = longToListNew();
 for (out = eg->outputList; out != NULL; out = out->next)
     longToListAdd(lToL, out->runId, out);
 
 /* Set up fileById */
 eg->fileById = ivt = intValTreeNew();
 struct edwFile *file;
 for (file = eg->fileList; file != NULL; file = file->next)
     intValTreeAdd(ivt, file->id, file);
 
 /* Set up validByFileId */
 eg->validByFileId = ivt = intValTreeNew();
 struct edwValidFile *valid;
 for (valid = eg->validList; valid != NULL; valid = valid->next)
     intValTreeAdd(ivt, valid->fileId, valid);
     
 /* Set up validByExperiment */
 eg->validByExperiment = hash = hashNew(0);
 for (valid = eg->validList; valid != NULL; valid = valid->next)
     hashAdd(hash, valid->experiment, valid);
 
 return eg;
 }
 
 
 void eapGraphFree(struct eapGraph **pEg)
 /* Free up resources associated with graph */
 {
 struct eapGraph *eg = *pEg;
 if (eg == NULL)
     return;
 
 /* Free up the 5 lists we own */
 eapRunFreeList(&eg->runList);
 eapInputFreeList(&eg->inputList);
 eapOutputFreeList(&eg->outputList);
 edwFileFreeList(&eg->fileList);
 edwValidFileFreeList(&eg->validList);
 
 /* Free up all 9 subcontainers, in same order as the corresponding fields are declared. */
 hashFree(&eg->runByExperiment);
 rbTreeFree(&eg->runById);
 longToListFree(&eg->inputByFile);
 longToListFree(&eg->inputByRun);
 rbTreeFree(&eg->outputByFile);
 longToListFree(&eg->outputByRun);
 rbTreeFree(&eg->fileById);
 rbTreeFree(&eg->validByFileId);
 hashFree(&eg->validByExperiment);
 
 /* Finally free up self and set to NULL */
 freez(pEg);
 }
 
 /* Routines to fetch out files given fileIds */
 
 struct edwValidFile *eapGraphValidFromId(struct eapGraph *eg, unsigned fileId)
 /* Return edwValidFile given  fileId */
 {
 return intValTreeMustFind(eg->validByFileId, fileId);
 }
 
 struct edwFile *eapGraphFileFromId(struct eapGraph *eg, unsigned fileId)
 /* Return edwFile given ID */
 {
 return intValTreeMustFind(eg->fileById, fileId);
 }
 
 struct slRef *eapGraphRunInputs(struct eapGraph *eg, unsigned runId)
 /* Fetch all inputs to this run.  Vals on slRef are eapInputs. 
  * Do not free this list, it is owned by graph. */
 {
 return longToListFindVal(eg->inputByRun, runId);
 }
 
 struct slRef *eapGraphRunOutputs(struct eapGraph *eg, unsigned runId)
 /* Fetch all outputs to this run.  Vals on slRef are eapOutputs.   
  * Do not free this list, it is owned by graph. */
 {
 return longToListFindVal(eg->outputByRun, runId);
 }
 
 struct slRef *eapGraphParentList(struct eapGraph *eg, unsigned fileId)
 /* Return list of all parents, possibly NULL.  List is in form of slRefs with eapInput vals. 
  * Do not free this list, it is owned by graph. */
 {
 struct eapOutput *out = intValTreeFind(eg->outputByFile, fileId);
 if (out == NULL)
     return NULL;
 return longToListFindVal(eg->inputByRun, out->runId);
 }
 
 
 unsigned eapGraphAnyParent(struct eapGraph *eg, unsigned fileId)
 /* Return fileId of a parent chosen at convenience, zero if none */
 {
 struct slRef *ref = eapGraphParentList(eg, fileId);
 if (ref == NULL)
     return 0;
 struct eapInput *in = ref->val;
 return in->fileId;
 }
 
 unsigned eapGraphSingleParent(struct eapGraph *eg, unsigned fileId)
 /* Return fileId of a parent, and make sure no more than one.  Returns zero if none. */
 {
 struct slRef *ref = eapGraphParentList(eg, fileId);
 if (ref == NULL)
     return 0;
 if (ref->next != NULL)
     errAbort("Multiple parents for %u in eapGraphSingleParent", fileId);
 struct eapInput *in = ref->val;
 return in->fileId;
 }
 
 unsigned eapGraphOneSingleParent(struct eapGraph *eg, unsigned fileId)
 /* Get file ID of parent gauranteeing that it exists and there is just one or aborting. */
 {
 unsigned id = eapGraphSingleParent(eg, fileId);
 if (id == 0)
     errAbort("%u doesn't exist in eapGraphOneSingleParent", fileId);
 return id;
 }
 
 unsigned eapGraphAnyAncestorOfFormat(struct eapGraph *eg, unsigned fileId, char *format)
 /* Return fileId of first convenient ancestor of given format */
 {
 struct slRef *ref, *refList = eapGraphParentList(eg, fileId);
 
 /* We do breadth first searching */
 for (ref = refList; ref != NULL; ref = ref->next)
     {
     struct eapInput *in = ref->val;
     struct edwValidFile *valid = eapGraphValidFromId(eg, in->fileId);
     if (sameString(valid->format, format))
         return valid->fileId;
     }
 
 /* If we got to here then we recurse */
 for (ref = refList; ref != NULL; ref = ref->next)
     {
     struct eapInput *in = ref->val;
     unsigned id = eapGraphAnyAncestorOfFormat(eg, in->fileId, format);
     if (id != 0)
         return id;
     }
 
 /* Got to here, nothing left to search, just have to return 0 */
 return 0;
 }
 
 
 void eapGraphAncestorsOfFormat(struct eapGraph *eg, unsigned fileId, 
     char *format, int maxGenerations, struct slRef **retList)
 /* Return list of all ancestors of format.  Set maxGenerations to -1 for any number of
  * generations back,   otherwise 1 will stop at parents, 2 at grandparents, etc. The vals
  * on the returned list are eapInputs. This returned value should be slFreeList()'d when
  * done. */
 {
 /* Guard against excessive depth.  If they give us a negative maxGeneration will not be true ever */
 if (maxGenerations == 0)
     return;
 maxGenerations -= 1;
 
 /* Look through parents. */
 struct slRef *ref, *refList = eapGraphParentList(eg, fileId);
 for (ref = refList; ref != NULL; ref = ref->next)
     {
     struct eapInput *in = ref->val;
     struct edwValidFile *valid = eapGraphValidFromId(eg, in->fileId);
     if (sameString(valid->format, format))
 	{
 	refAdd(retList, in);
 	}
     eapGraphAncestorsOfFormat(eg, in->fileId, format, maxGenerations, retList);
     }
 }
 
 unsigned eapGraphAnyChild(struct eapGraph *eg, unsigned fileId)
 /* Return fileId of a child chosen at convenience, zero if none */
 {
 struct slRef *refList = eapGraphChildList(eg, fileId);
 if (refList == NULL)
     return 0;
 struct eapInput *in = refList->val;
 unsigned child = in->fileId;
 slFreeList(&refList);
 return child;
 }
 
 unsigned eapGraphSingleChild(struct eapGraph *eg, unsigned fileId)
 /* Return fileId of a child, and make sure no more than one.  Returns zero if none. */
 {
 struct slRef *refList = eapGraphChildList(eg, fileId);
 if (refList == NULL)
    return 0;
 if (refList->next != NULL)
    errAbort("Multiple children for %u in eapGraphSingleChild", fileId);
 struct eapInput *in = refList->val;
 unsigned child = in->fileId;
 slFreeList(&refList);
 return child;
 }
 
 unsigned eapGraphOneSingleChild(struct eapGraph *eg, unsigned fileId)
 /* Get file ID of child gauranteeing that it exists and there is just one or aborting. */
 {
 unsigned id = eapGraphSingleChild(eg, fileId);
 if (id == 0)
     errAbort("Can't find child for %u in eapGraphOnSingleChild", fileId);
 return id;
 }
 
 struct slRef *eapGraphChildList(struct eapGraph *eg, unsigned fileId)
 /* Return list of all children, possibly NULL.  List is in form of slRefs with eapOutput vals. 
  * You can slFreeList result when done. */
 {
 struct slRef *outList = NULL;
 struct slRef *in, *inList = longToListFindVal(eg->inputByFile, fileId);
 for (in = inList; in != NULL; in = in->next)
     {
     struct eapInput *input = in->val;
     unsigned runId = input ->runId;
     struct slRef *runOut, *runOutList = longToListFindVal(eg->outputByRun, runId);
     for (runOut = runOutList; runOut != NULL; runOut = runOut->next)
         {
 	struct eapOutput *output = runOut->val;
 	refAdd(&outList, output);
 	}
     }
 return outList;
 }
 
 unsigned eapGraphAnyDescendantOfFormat(struct eapGraph *eg, unsigned fileId, char *format)
 /* Return first convenient descendant of given format */
 {
 unsigned result = 0;
 struct slRef *ref, *refList = eapGraphChildList(eg, fileId);
 
 /* We do breadth first searching */
 for (ref = refList; ref != NULL; ref = ref->next)
     {
     struct eapOutput *out = ref->val;
     struct edwValidFile *valid = eapGraphValidFromId(eg, out->fileId);
     if (sameString(valid->format, format))
 	{
 	result = valid->fileId;
 	break;
 	}
     }
 
 /* If we got to here then we recurse */
 if (result == 0)
     {
     for (ref = refList; ref != NULL; ref = ref->next)
 	{
 	struct eapOutput *out = ref->val;
 	unsigned id = eapGraphAnyDescendantOfFormat(eg, out->fileId, format);
 	if (id != 0)
 	    {
 	    result = id;
 	    break;
 	    }
 	}
     }
 
 /* Clean up and return result */
 slFreeList(&refList);
 return result;
 }
 
 void eapGraphDescendantsOfFormat(struct eapGraph *eg, unsigned fileId, 
     char *format, int maxGenerations, struct slRef **retList)
 /* Return list of all descendants of format.  Set maxGenerations to -1 for any number of
  * generations back,   otherwise 1 will stop at children, 2 at grandchildren, etc. The vals
  * on the returned list are eapOutputs. This returned value should be slFreeList()'d when
  * done. */
 {
 /* Guard against excessive depth.  If they give us a negative maxGeneration will not be true ever */
 if (maxGenerations == 0)
     return;
 maxGenerations -= 1;
 
 /* Look through children. */
 struct slRef *ref, *refList = eapGraphChildList(eg, fileId);
 for (ref = refList; ref != NULL; ref = ref->next)
     {
     struct eapOutput *out = ref->val;
     struct edwValidFile *valid = eapGraphValidFromId(eg, out->fileId);
     if (sameString(valid->format, format))
 	{
 	refAdd(retList, out);
 	}
     eapGraphDescendantsOfFormat(eg, out->fileId, format, maxGenerations, retList);
     }
 slFreeList(&refList);  // This little bit of cleanup not needed for ancestors
 }
 
 void rEapGraphAllAncestors(struct eapGraph *eg, unsigned fileId, struct slRef **retList)
 /* Recursively add all ancestors. */
 {
 struct slRef *parent, *parentList = eapGraphParentList(eg, fileId);
 for (parent = parentList; parent != NULL; parent = parent->next)
     {
     struct eapOutput *out = parent->val;
     refAdd(retList, out);
     rEapGraphAllAncestors(eg, out->fileId, retList);
     }
 }
 
 struct slRef *eapGraphAllAncestors(struct eapGraph *eg, unsigned fileId)
 /* Return list of all ancestors.  Values of slRef are eapOutputs from which you can
  * harvest either ancestral fileIds or analysis runIds. */
 {
 /* In implementation this is just a minor wrapper around recursive routine. */
 struct slRef *list = NULL;
 rEapGraphAllAncestors(eg, fileId, &list);
 return list;
 }
 
 
 void rEapGraphAllDescendants(struct eapGraph *eg, unsigned fileId, struct slRef **retList)
 /* Recursively add all descendants. */
 {
 struct slRef *child, *childList = eapGraphChildList(eg, fileId);
 for (child = childList; child != NULL; child = child->next)
     {
     struct eapInput *in = child->val;
     refAdd(retList, in);
     rEapGraphAllDescendants(eg, in->fileId, retList);
     }
 slFreeList(&childList);
 }
 
 struct slRef *eapGraphAllDescendants(struct eapGraph *eg, unsigned fileId)
 /* Return list of all ancestors.  Values of slRef are eapOutputs from which you can
  * harvest either ancestral fileIds or analysis runIds. */
 {
 /* In implementation this is just a minor wrapper around recursive routine. */
 struct slRef *list = NULL;
 rEapGraphAllDescendants(eg, fileId, &list);
 return list;
 }