4898794edd81be5285ea6e544acbedeaeb31bf78 max Tue Nov 23 08:10:57 2021 -0800 Fixing pointers to README file for license in all source code files. refs #27614 diff --git src/hg/encode3/eap/lib/eapGraph.c src/hg/encode3/eap/lib/eapGraph.c index 44601b0..359c132 100644 --- src/hg/encode3/eap/lib/eapGraph.c +++ src/hg/encode3/eap/lib/eapGraph.c @@ -1,418 +1,418 @@ /* eapGraph - stuff to help traverse the graph defined by the eapRun, eapInput, and eapOutput * tables that define what files were used to produce what other files */ /* Copyright (C) 2014 The Regents of the University of California - * See README in this or parent directory for licensing information. */ + * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" #include "hash.h" #include "linefile.h" #include "jksql.h" #include "portable.h" #include "intValTree.h" #include "longToList.h" #include "../../encodeDataWarehouse/inc/encodeDataWarehouse.h" #include "../../encodeDataWarehouse/inc/edwLib.h" #include "eapDb.h" #include "eapLib.h" #include "eapGraph.h" struct eapGraph *eapGraphNew(struct sqlConnection *conn) /* Return new eapGraph made by querying database */ { struct eapGraph *eg; AllocVar(eg); char query[256]; /* Load up everything from database, logging time maybe */ sqlSafef(query, sizeof(query), "select * from eapRun"); eg->runList = eapRunLoadByQuery(conn, query); sqlSafef(query, sizeof(query), "select * from eapInput"); eg->inputList = eapInputLoadByQuery(conn, query); sqlSafef(query, sizeof(query), "select * from eapOutput"); eg->outputList = eapOutputLoadByQuery(conn, query); sqlSafef(query, sizeof(query), "select * from edwFile"); eg->fileList = edwFileLoadByQuery(conn, query); sqlSafef(query, sizeof(query), "select * from edwValidFile"); eg->validList = edwValidFileLoadByQuery(conn, query); /* Set up runByExperiment */ struct hash *hash; eg->runByExperiment = hash = hashNew(0); struct eapRun *run; for (run = eg->runList; run != NULL; run = run->next) hashAdd(hash, run->experiment, run); /* Set up runById */ struct rbTree *ivt; eg->runById = ivt = intValTreeNew(); for (run = eg->runList; run != NULL; run = run->next) intValTreeAdd(ivt, run->id, run); /* Set up inputByFile */ struct longToList *lToL; eg->inputByFile = lToL = longToListNew(); struct eapInput *in; for (in = eg->inputList; in != NULL; in = in->next) longToListAdd(lToL, in->fileId, in); /* Set up inputByRun */ eg->inputByRun = lToL = longToListNew(); for (in = eg->inputList; in != NULL; in = in->next) longToListAdd(lToL, in->runId, in); /* Set up outputByFile - this one is singly-valued so easier */ eg->outputByFile = ivt = intValTreeNew(); struct eapOutput *out; for (out = eg->outputList; out != NULL; out = out->next) intValTreeAdd(ivt, out->fileId, out); /* Set up outputByRun */ eg->outputByRun = lToL = longToListNew(); for (out = eg->outputList; out != NULL; out = out->next) longToListAdd(lToL, out->runId, out); /* Set up fileById */ eg->fileById = ivt = intValTreeNew(); struct edwFile *file; for (file = eg->fileList; file != NULL; file = file->next) intValTreeAdd(ivt, file->id, file); /* Set up validByFileId */ eg->validByFileId = ivt = intValTreeNew(); struct edwValidFile *valid; for (valid = eg->validList; valid != NULL; valid = valid->next) intValTreeAdd(ivt, valid->fileId, valid); /* Set up validByExperiment */ eg->validByExperiment = hash = hashNew(0); for (valid = eg->validList; valid != NULL; valid = valid->next) hashAdd(hash, valid->experiment, valid); return eg; } void eapGraphFree(struct eapGraph **pEg) /* Free up resources associated with graph */ { struct eapGraph *eg = *pEg; if (eg == NULL) return; /* Free up the 5 lists we own */ eapRunFreeList(&eg->runList); eapInputFreeList(&eg->inputList); eapOutputFreeList(&eg->outputList); edwFileFreeList(&eg->fileList); edwValidFileFreeList(&eg->validList); /* Free up all 9 subcontainers, in same order as the corresponding fields are declared. */ hashFree(&eg->runByExperiment); rbTreeFree(&eg->runById); longToListFree(&eg->inputByFile); longToListFree(&eg->inputByRun); rbTreeFree(&eg->outputByFile); longToListFree(&eg->outputByRun); rbTreeFree(&eg->fileById); rbTreeFree(&eg->validByFileId); hashFree(&eg->validByExperiment); /* Finally free up self and set to NULL */ freez(pEg); } /* Routines to fetch out files given fileIds */ struct edwValidFile *eapGraphValidFromId(struct eapGraph *eg, unsigned fileId) /* Return edwValidFile given fileId */ { return intValTreeMustFind(eg->validByFileId, fileId); } struct edwFile *eapGraphFileFromId(struct eapGraph *eg, unsigned fileId) /* Return edwFile given ID */ { return intValTreeMustFind(eg->fileById, fileId); } struct slRef *eapGraphRunInputs(struct eapGraph *eg, unsigned runId) /* Fetch all inputs to this run. Vals on slRef are eapInputs. * Do not free this list, it is owned by graph. */ { return longToListFindVal(eg->inputByRun, runId); } struct slRef *eapGraphRunOutputs(struct eapGraph *eg, unsigned runId) /* Fetch all outputs to this run. Vals on slRef are eapOutputs. * Do not free this list, it is owned by graph. */ { return longToListFindVal(eg->outputByRun, runId); } struct slRef *eapGraphParentList(struct eapGraph *eg, unsigned fileId) /* Return list of all parents, possibly NULL. List is in form of slRefs with eapInput vals. * Do not free this list, it is owned by graph. */ { struct eapOutput *out = intValTreeFind(eg->outputByFile, fileId); if (out == NULL) return NULL; return longToListFindVal(eg->inputByRun, out->runId); } unsigned eapGraphAnyParent(struct eapGraph *eg, unsigned fileId) /* Return fileId of a parent chosen at convenience, zero if none */ { struct slRef *ref = eapGraphParentList(eg, fileId); if (ref == NULL) return 0; struct eapInput *in = ref->val; return in->fileId; } unsigned eapGraphSingleParent(struct eapGraph *eg, unsigned fileId) /* Return fileId of a parent, and make sure no more than one. Returns zero if none. */ { struct slRef *ref = eapGraphParentList(eg, fileId); if (ref == NULL) return 0; if (ref->next != NULL) errAbort("Multiple parents for %u in eapGraphSingleParent", fileId); struct eapInput *in = ref->val; return in->fileId; } unsigned eapGraphOneSingleParent(struct eapGraph *eg, unsigned fileId) /* Get file ID of parent gauranteeing that it exists and there is just one or aborting. */ { unsigned id = eapGraphSingleParent(eg, fileId); if (id == 0) errAbort("%u doesn't exist in eapGraphOneSingleParent", fileId); return id; } unsigned eapGraphAnyAncestorOfFormat(struct eapGraph *eg, unsigned fileId, char *format) /* Return fileId of first convenient ancestor of given format */ { struct slRef *ref, *refList = eapGraphParentList(eg, fileId); /* We do breadth first searching */ for (ref = refList; ref != NULL; ref = ref->next) { struct eapInput *in = ref->val; struct edwValidFile *valid = eapGraphValidFromId(eg, in->fileId); if (sameString(valid->format, format)) return valid->fileId; } /* If we got to here then we recurse */ for (ref = refList; ref != NULL; ref = ref->next) { struct eapInput *in = ref->val; unsigned id = eapGraphAnyAncestorOfFormat(eg, in->fileId, format); if (id != 0) return id; } /* Got to here, nothing left to search, just have to return 0 */ return 0; } void eapGraphAncestorsOfFormat(struct eapGraph *eg, unsigned fileId, char *format, int maxGenerations, struct slRef **retList) /* Return list of all ancestors of format. Set maxGenerations to -1 for any number of * generations back, otherwise 1 will stop at parents, 2 at grandparents, etc. The vals * on the returned list are eapInputs. This returned value should be slFreeList()'d when * done. */ { /* Guard against excessive depth. If they give us a negative maxGeneration will not be true ever */ if (maxGenerations == 0) return; maxGenerations -= 1; /* Look through parents. */ struct slRef *ref, *refList = eapGraphParentList(eg, fileId); for (ref = refList; ref != NULL; ref = ref->next) { struct eapInput *in = ref->val; struct edwValidFile *valid = eapGraphValidFromId(eg, in->fileId); if (sameString(valid->format, format)) { refAdd(retList, in); } eapGraphAncestorsOfFormat(eg, in->fileId, format, maxGenerations, retList); } } unsigned eapGraphAnyChild(struct eapGraph *eg, unsigned fileId) /* Return fileId of a child chosen at convenience, zero if none */ { struct slRef *refList = eapGraphChildList(eg, fileId); if (refList == NULL) return 0; struct eapInput *in = refList->val; unsigned child = in->fileId; slFreeList(&refList); return child; } unsigned eapGraphSingleChild(struct eapGraph *eg, unsigned fileId) /* Return fileId of a child, and make sure no more than one. Returns zero if none. */ { struct slRef *refList = eapGraphChildList(eg, fileId); if (refList == NULL) return 0; if (refList->next != NULL) errAbort("Multiple children for %u in eapGraphSingleChild", fileId); struct eapInput *in = refList->val; unsigned child = in->fileId; slFreeList(&refList); return child; } unsigned eapGraphOneSingleChild(struct eapGraph *eg, unsigned fileId) /* Get file ID of child gauranteeing that it exists and there is just one or aborting. */ { unsigned id = eapGraphSingleChild(eg, fileId); if (id == 0) errAbort("Can't find child for %u in eapGraphOnSingleChild", fileId); return id; } struct slRef *eapGraphChildList(struct eapGraph *eg, unsigned fileId) /* Return list of all children, possibly NULL. List is in form of slRefs with eapOutput vals. * You can slFreeList result when done. */ { struct slRef *outList = NULL; struct slRef *in, *inList = longToListFindVal(eg->inputByFile, fileId); for (in = inList; in != NULL; in = in->next) { struct eapInput *input = in->val; unsigned runId = input ->runId; struct slRef *runOut, *runOutList = longToListFindVal(eg->outputByRun, runId); for (runOut = runOutList; runOut != NULL; runOut = runOut->next) { struct eapOutput *output = runOut->val; refAdd(&outList, output); } } return outList; } unsigned eapGraphAnyDescendantOfFormat(struct eapGraph *eg, unsigned fileId, char *format) /* Return first convenient descendant of given format */ { unsigned result = 0; struct slRef *ref, *refList = eapGraphChildList(eg, fileId); /* We do breadth first searching */ for (ref = refList; ref != NULL; ref = ref->next) { struct eapOutput *out = ref->val; struct edwValidFile *valid = eapGraphValidFromId(eg, out->fileId); if (sameString(valid->format, format)) { result = valid->fileId; break; } } /* If we got to here then we recurse */ if (result == 0) { for (ref = refList; ref != NULL; ref = ref->next) { struct eapOutput *out = ref->val; unsigned id = eapGraphAnyDescendantOfFormat(eg, out->fileId, format); if (id != 0) { result = id; break; } } } /* Clean up and return result */ slFreeList(&refList); return result; } void eapGraphDescendantsOfFormat(struct eapGraph *eg, unsigned fileId, char *format, int maxGenerations, struct slRef **retList) /* Return list of all descendants of format. Set maxGenerations to -1 for any number of * generations back, otherwise 1 will stop at children, 2 at grandchildren, etc. The vals * on the returned list are eapOutputs. This returned value should be slFreeList()'d when * done. */ { /* Guard against excessive depth. If they give us a negative maxGeneration will not be true ever */ if (maxGenerations == 0) return; maxGenerations -= 1; /* Look through children. */ struct slRef *ref, *refList = eapGraphChildList(eg, fileId); for (ref = refList; ref != NULL; ref = ref->next) { struct eapOutput *out = ref->val; struct edwValidFile *valid = eapGraphValidFromId(eg, out->fileId); if (sameString(valid->format, format)) { refAdd(retList, out); } eapGraphDescendantsOfFormat(eg, out->fileId, format, maxGenerations, retList); } slFreeList(&refList); // This little bit of cleanup not needed for ancestors } void rEapGraphAllAncestors(struct eapGraph *eg, unsigned fileId, struct slRef **retList) /* Recursively add all ancestors. */ { struct slRef *parent, *parentList = eapGraphParentList(eg, fileId); for (parent = parentList; parent != NULL; parent = parent->next) { struct eapOutput *out = parent->val; refAdd(retList, out); rEapGraphAllAncestors(eg, out->fileId, retList); } } struct slRef *eapGraphAllAncestors(struct eapGraph *eg, unsigned fileId) /* Return list of all ancestors. Values of slRef are eapOutputs from which you can * harvest either ancestral fileIds or analysis runIds. */ { /* In implementation this is just a minor wrapper around recursive routine. */ struct slRef *list = NULL; rEapGraphAllAncestors(eg, fileId, &list); return list; } void rEapGraphAllDescendants(struct eapGraph *eg, unsigned fileId, struct slRef **retList) /* Recursively add all descendants. */ { struct slRef *child, *childList = eapGraphChildList(eg, fileId); for (child = childList; child != NULL; child = child->next) { struct eapInput *in = child->val; refAdd(retList, in); rEapGraphAllDescendants(eg, in->fileId, retList); } slFreeList(&childList); } struct slRef *eapGraphAllDescendants(struct eapGraph *eg, unsigned fileId) /* Return list of all ancestors. Values of slRef are eapOutputs from which you can * harvest either ancestral fileIds or analysis runIds. */ { /* In implementation this is just a minor wrapper around recursive routine. */ struct slRef *list = NULL; rEapGraphAllDescendants(eg, fileId, &list); return list; }