be4311c07e14feb728abc6425ee606ffaa611a58 markd Fri Jan 22 06:46:58 2021 -0800 merge with master diff --git src/hg/txGraph/txBedToGraph/makeGraph.c src/hg/txGraph/txBedToGraph/makeGraph.c index 873ce59..0c9e6f8 100644 --- src/hg/txGraph/txBedToGraph/makeGraph.c +++ src/hg/txGraph/txBedToGraph/makeGraph.c @@ -231,95 +231,95 @@ /* Add final exon */ struct vertex *end = matchingVertex(vertexTree, bed->chromEnd, ggSoftEnd); addUniqueEdge(edgeTree, start, end, lb); /* If there's another bed to go, add a soft intron connecting it. */ if (nextBed != NULL) { start = matchingVertex(vertexTree, nextBed->chromStart, ggSoftStart); addUniqueEdge(edgeTree, end, start, lb); } } } return edgeTree; } -static void incVertexUses(void *item) +void incVertexUses(void *item) /* Callback to clear edge->start->useCount and edge->end->useCount. */ { struct edge *edge = item; edge->start->count += 1; edge->end->count += 1; } -static void removeUnusedVertices(struct rbTree *vertexTree, struct rbTree *edgeTree) +void removeUnusedVertices(struct rbTree *vertexTree, struct rbTree *edgeTree) /* Remove vertices not connected to any edges. */ { /* Get vertex list and clear counts. */ struct slRef *vRef, *vRefList = rbTreeItems(vertexTree); for (vRef = vRefList; vRef != NULL; vRef = vRef->next) { struct vertex *v = vRef->val; v->count = 0; } /* Inc counts of vertices connected to edges. */ rbTreeTraverse(edgeTree, incVertexUses); /* Remove unused vertices. */ for (vRef = vRefList; vRef != NULL; vRef = vRef->next) { struct vertex *v = vRef->val; if (v->count == 0) rbTreeRemove(vertexTree, v); } slFreeList(&vRefList); } -static void removeEmptyEdges(struct rbTree *vertexTree, struct rbTree *edgeTree) +void removeEmptyEdges(struct rbTree *vertexTree, struct rbTree *edgeTree) /* Remove edges that are zero or negative in length. */ { int removeCount = 0; struct slRef *edgeRef, *edgeRefList = rbTreeItems(edgeTree); for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next) { struct edge *edge = edgeRef->val; if (edge->start->position >= edge->end->position) { removeCount += 1; rbTreeRemove(edgeTree, edge); } } if (removeCount) removeUnusedVertices(vertexTree, edgeTree); slFreeList(&edgeRefList); } static struct dlList *sortedListFromTree(struct rbTree *tree) /* Create a double-linked list from tree. List will be sorted. */ { struct slRef *ref, *refList = rbTreeItems(tree); struct dlList *list = dlListNew(); for (ref = refList; ref != NULL; ref = ref->next) dlAddValTail(list, ref->val); slFreeList(&refList); return list; } -static void addWaysInAndOut(struct rbTree *vertexTree, struct rbTree *edgeTree, +void addWaysInAndOut(struct rbTree *vertexTree, struct rbTree *edgeTree, struct lm *lm) /* Put a bunch of ways in and out of the graph onto the edges. */ { struct slRef *edgeRef, *edgeRefList = rbTreeItems(edgeTree); for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next) { struct edge *edge = edgeRef->val; struct slRef *inRef, *outRef; lmAllocVar(lm, inRef); lmAllocVar(lm, outRef); inRef->val = outRef->val = edge; slAddHead(&edge->start->waysOut, outRef); slAddHead(&edge->end->waysIn, inRef); } slFreeList(&edgeRefList); @@ -454,71 +454,71 @@ if (dif > maxSnapSize) break; if (v->type == ggHardStart) { if (checkSnapOk(vOld, v, TRUE, dif, maxUncheckedSnapSize, seqCache, chromName)) { vOld->movedTo = v; return TRUE; } } } } return FALSE; } -static void mergeOrAddEdge(struct rbTree *edgeTree, struct edge *edge) +void mergeOrAddEdge(struct rbTree *edgeTree, struct edge *edge) /* Add edge back if it is still unique, otherwise move evidence from * edge into existing edge. */ { struct edge *existing = rbTreeFind(edgeTree, edge); if (existing) { existing->evList = slCat(existing->evList, edge->evList); edge->evList = NULL; } else rbTreeAdd(edgeTree, edge); } -static void updateForwardedEdges(struct rbTree *edgeTree) +void updateForwardedEdges(struct rbTree *edgeTree) /* Go through edges, following the movedTo's in the * vertices if need be. */ { struct slRef *ref, *refList = rbTreeItems(edgeTree); int forwardCount = 0; for (ref = refList; ref != NULL; ref = ref->next) { struct edge *edge = ref->val; struct vertex *start = edge->start, *end = edge->end; if (start->movedTo || end->movedTo) { ++forwardCount; rbTreeRemove(edgeTree, edge); if (start->movedTo) edge->start = start->movedTo; if (end->movedTo) edge->end = end->movedTo; mergeOrAddEdge(edgeTree, edge); } } if (forwardCount > 0) verbose(3, "Forwarded %d edges.\n", forwardCount); slFreeList(&refList); } -static void snapSoftToCloseHard(struct rbTree *vertexTree, struct rbTree *edgeTree, int maxSnapSize, +void snapSoftToCloseHard(struct rbTree *vertexTree, struct rbTree *edgeTree, int maxSnapSize, int maxUncheckedSnapSize, struct nibTwoCache *seqCache, char *chromName) /* Snap hard vertices to nearby soft vertices of same type. */ { struct lm *lm = lmInit(0); addWaysInAndOut(vertexTree, edgeTree, lm); struct dlList *vList = sortedListFromTree(vertexTree); struct dlNode *node; int snapCount = 0; for (node = vList->head; !dlEnd(node); node = node->next) { if (snapVertex(node, maxSnapSize, maxUncheckedSnapSize, seqCache, chromName)) { rbTreeRemove(vertexTree, node->val); ++snapCount; } @@ -669,31 +669,31 @@ } else if (currentType == ggHardEnd) { softType = ggSoftStart; hardType = ggHardStart; } else return 0; int snapCount = 0; snapCount += snapHalfHardForward(v, edgeTree, softType, hardType); snapCount += snapHalfHardBackward(v, edgeTree, softType, hardType); return snapCount; } -static void snapHalfHards(struct rbTree *vertexTree, struct rbTree *edgeTree) +void snapHalfHards(struct rbTree *vertexTree, struct rbTree *edgeTree) /* Snap edges that are half hard to edges that are fully hard and that * share the original hard end */ { struct lm *lm = lmInit(0); addWaysInAndOut(vertexTree, edgeTree, lm); struct slRef *vRef, *vRefList = rbTreeItems(vertexTree); for (vRef = vRefList; vRef != NULL; vRef = vRef->next) { struct vertex *v = vRef->val; snapHalfHard(v, edgeTree); v->waysIn = v->waysOut = NULL; /* These are trashed so remove them. */ } slFreeList(&vRefList); removeUnusedVertices(vertexTree, edgeTree); lmCleanup(&lm); @@ -727,31 +727,31 @@ int listSize, enum ggVertexType softType) /* Return first trusted (refSeq) vertex, or if no trusted one, then * a soft vertex 1/4 of way (rounded down) through list. */ { struct sourceAndPos *el; for (el = list; el != NULL; el = el->next) { if (el->trustedSource) break; } if (el == NULL) el = slElementFromIx(list, listSize/4); return matchingVertex(vertexTree, el->position, softType); } -static void halfConsensusForward(struct vertex *v, +void halfConsensusForward(struct vertex *v, struct rbTree *vertexTree, struct rbTree *edgeTree, enum ggVertexType softType, struct lm *lm) /* Figure out consensus end of all edges beginning at v that have soft end. */ { /* Collect a list of all attached softies. */ struct sourceAndPos *list = NULL, *el; struct slRef *edgeRef; int softCount = 0; for (edgeRef = v->waysOut; edgeRef != NULL; edgeRef = edgeRef->next) { struct edge *edge = edgeRef->val; struct vertex *v = edge->end; if (v->type == softType) { struct evidence *ev; @@ -777,31 +777,31 @@ struct edge *edge = edgeRef->val; struct vertex *v = edge->end; if (v != end && v->type == softType) { rbTreeRemove(edgeTree, edge); verbose(3, "Performing half-hard consensus: moving edge end from %d to %d\n", edge->end->position, end->position); edge->end = end; mergeOrAddEdge(edgeTree, edge); // Will always merge. } } } } -static void halfConsensusBackward(struct vertex *v, +void halfConsensusBackward(struct vertex *v, struct rbTree *vertexTree, struct rbTree *edgeTree, enum ggVertexType softType, struct lm *lm) /* Figure out consensus start of all edges end at v that have soft start. */ { /* Collect a list of all attached softies. */ struct sourceAndPos *list = NULL, *el; struct slRef *edgeRef; int softCount = 0; for (edgeRef = v->waysIn; edgeRef != NULL; edgeRef = edgeRef->next) { struct edge *edge = edgeRef->val; struct vertex *v = edge->start; if (v->type == softType) { struct evidence *ev; @@ -826,55 +826,55 @@ { struct edge *edge = edgeRef->val; struct vertex *v = edge->start; if (v != start && v->type == softType) { rbTreeRemove(edgeTree, edge); verbose(3, "Performing half-hard consensus: moving edge start from %d to %d\n", edge->start->position, start->position); edge->start = start; mergeOrAddEdge(edgeTree, edge); // Will always merge. } } } } -static void halfHardConsensus(struct vertex *v, +void halfHardConsensus(struct vertex *v, struct rbTree *vertexTree, struct rbTree *edgeTree, struct lm *lm) /* Form consensus of soft vertices connected to hard vertex v. * Consensus is: * 1) The largest refSeq. * 2) If no refSeq, something 3/4 of the way to the largest. */ { enum ggVertexType currentType = v->type; enum ggVertexType softType; /* Figure out what types look for at the other end. If we're * a soft vertex we can't do anything. */ if (currentType == ggHardStart) softType = ggSoftEnd; else if (currentType == ggHardEnd) softType = ggSoftStart; else return; halfConsensusForward(v, vertexTree, edgeTree, softType, lm); halfConsensusBackward(v, vertexTree, edgeTree, softType, lm); } -static void halfHardConsensuses(struct rbTree *vertexTree, struct rbTree *edgeTree) +void halfHardConsensuses(struct rbTree *vertexTree, struct rbTree *edgeTree) /* Collect soft edges that share a hard edge. Move all of them to a consensus * vertex, which is either: * 1) The largest refSeq. * 2) If no refSeq, something 3/4 of the way to the largest. */ { struct lm *lm = lmInit(0); addWaysInAndOut(vertexTree, edgeTree, lm); struct slRef *vRef, *vRefList = rbTreeItems(vertexTree); for (vRef = vRefList; vRef != NULL; vRef = vRef->next) { struct vertex *v = vRef->val; halfHardConsensus(v, vertexTree, edgeTree, lm); v->waysIn = v->waysOut = NULL; /* These are trashed so remove them. */ } slFreeList(&vRefList); @@ -920,31 +920,31 @@ edge->start = start; edge->end = end; edge->next = NULL; /* Add overlapping evidence to edge. */ for (ev = evList; ev != NULL; ev = nextEv) { nextEv = ev->next; if (rangeIntersection(ev->start, ev->end, start->position, end->position) > 0) slAddHead(&edge->evList, ev); } return edge; } -static void removeEnclosedDoubleSofts(struct rbTree *vertexTree, struct rbTree *edgeTree, +void removeEnclosedDoubleSofts(struct rbTree *vertexTree, struct rbTree *edgeTree, int maxBleedOver, double singleExonMaxOverlap) /* Move double-softs that overlap spliced things to a very great extent into * the spliced things. Also remove tiny double-softs (no more than 2*maxBleedOver). */ { /* Traverse graph and build up range tree covering spliced exons. For each * range of overlapping exons, assemble a singly-linked list of all exons in * the range */ struct rbTree *rangeTree = rangeTreeNew(0); struct slRef *edgeRef, *edgeRefList = rbTreeItems(edgeTree); int removedCount = 0; for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next) { struct edge *edge = edgeRef->val; struct vertex *start = edge->start; struct vertex *end = edge->end; @@ -1016,31 +1016,31 @@ removeUnusedVertices(vertexTree, edgeTree); for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next) { struct edge *nextEdge, *edge = edgeRef->val; while (edge != NULL) { nextEdge = edge->next; edge->next = NULL; edge = nextEdge; } } slFreeList(&edgeRefList); rbTreeFree(&rangeTree); } -static void mergeDoubleSofts(struct rbTree *vertexTree, struct rbTree *edgeTree) +void mergeDoubleSofts(struct rbTree *vertexTree, struct rbTree *edgeTree) /* Merge together overlapping edges with soft ends. */ { struct mergedEdge /* Hold together info on a merged edge. */ { struct evidence *evidence; }; /* Traverse graph and build up range tree. Each node in the range tree * will represent the bounds of coordinates of overlapping double softs */ struct rbTree *rangeTree = rangeTreeNew(0); struct slRef *edgeRef, *edgeRefList = rbTreeItems(edgeTree); for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next) { struct edge *edge = edgeRef->val; @@ -1090,47 +1090,50 @@ if (edge != NULL) rbTreeAdd(edgeTree, edge); verbose(3, "Deriving edge (%d,%d) from all the double softs in range (%d,%d)\n", edge->start->position, edge->end->position, r->start, r->end); } /* Clean up and go home. */ lmCleanup(&lm); removeUnusedVertices(vertexTree, edgeTree); slFreeList(&edgeRefList); rbTreeFree(&rangeTree); } #ifdef DEBUG -static void dumpVertices(struct rbTree *vertexTree) +oid dumpVertices(struct rbTree *vertexTree) { struct slRef *vRef, *vRefList = rbTreeItems(vertexTree); for (vRef = vRefList; vRef != NULL; vRef = vRef->next) { struct vertex *v = vRef->val; printf(" %d(%d)", v->position, v->type); } printf("\n"); slFreeList(&vRefList); } #endif /* DEBUG */ struct txGraph *treeTreeToTxg(struct rbTree *vertexTree, struct rbTree *edgeTree, char *name, struct linkedBeds *lbList) /* Convert from vertexTree/edgeTree representation to txGraph */ { +if (vertexTree->root == NULL || edgeTree->root == NULL) // Nothing to do really + return NULL; + /* Allocate txGraph, and fill in some of the relatively easy fields. */ struct txGraph *txg; AllocVar(txg); struct bed *bed = lbList->bedList; txg->name = cloneString(name); txg->tName = cloneString(bed->chrom); txg->strand[0] = bed->strand[0]; txg->vertexCount = vertexTree->n; txg->edgeCount = edgeTree->n; /* Get vertex list and number sequentially. Fill in vertex array */ int i; struct slRef *vRef, *vRefList = rbTreeItems(vertexTree); struct txVertex *tv = NULL; if (vertexTree->n)