4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/txGraph/txBedToGraph/makeGraph.c src/hg/txGraph/txBedToGraph/makeGraph.c
index 0c9e6f8..7fbd1e7 100644
--- src/hg/txGraph/txBedToGraph/makeGraph.c
+++ src/hg/txGraph/txBedToGraph/makeGraph.c
@@ -1,1263 +1,1263 @@
 /* Copyright (C) 2011 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 
 /* makeGraph - this module creates a txGraph from a list of 
  * linkedBeds.  The main steps to this process are:
  *   1) Create list of all unique start and end points in
  *      beds.  These are 'hard' if they are at a real splice
  *      site and 'soft' otherwise.
  *   2) Create list of all unique edges. Edges include evidence
  *      if any.
  *   3) Snap soft ends to nearby hard ends of compatible type.
  *   4) For half-soft edges, examine all other edges that share
  *      the hard end, and and that are hard on both ends.  Merge
  *      the half-soft edge with the most similar such double-hard
  *      edge, favoring extensions of edge over truncations. 
  *   5) For edges that are half soft that can't yet be snapped, 
  *      merge all soft ends to a single point that is a consensus favoring 
  *      larger transcripts.
  *    6) Merge together all overlapping edges that are soft on both sides.  Make
  *     the edge boundaries equal to the median value of all the merged boundaries. 
  * Through most of the algorithm the graph is represented as two binary trees, one
  * for vertices and one for edges. At the end this is converted to the txGraph
  * structure. */
 
 #include "common.h"
 #include "hash.h"
 #include "localmem.h"
 #include "rbTree.h"
 #include "rangeTree.h"
 #include "dlist.h"
 #include "dnautil.h"
 #include "bed.h"
 #include "ggTypes.h"
 #include "nibTwo.h"
 #include "txGraph.h"
 #include "txBedToGraph.h"
 
 struct evidence
 /* One piece of evidence. */
     {
     struct evidence *next;
     struct linkedBeds *lb;	/* Associated input data. */
     int start,end;		/* Bounds of evidence. */
     };
 
 struct vertex
 /* A point in our graph */
     {
     int position;		/* Position in genome. */
     enum ggVertexType type;	/* hard/soft? start/end? */
     struct vertex *movedTo;	/* Forwarding address (temporary) */
     struct slRef *waysIn;	/* References to preceeding edges (temporary). */
     struct slRef *waysOut;	/* References to following edges (temporary). */
     int count;			/* Number of times used, or index of vertex (temporary). */
     };
 
 struct edge
 /* An edge in our graph */
     {
     struct edge *next;          /* for forming a singly-linked list of edges */
     struct vertex *start;	/* Starting vertex. */
     struct vertex *end;		/* Ending vertex. */
     struct evidence *evList;	/* List of evidence. */
     };
 
 
 static int vertexCmp(void *va, void *vb)
 /* Return: if a before b, return negative.  If a after b, return positive.  
  * else if a and b have the same position and same type, return 0.
  * else if a and b have same position but different types, return 
  * a nonzero value reflecting the difference in their types. */
 {
 struct vertex *a = va;
 struct vertex *b = vb;
 int diff = a->position - b->position;
 if (diff == 0)
     diff = a->type - b->type;
 return diff;
 }
 
 static int edgeCmp(void *va, void *vb)
 /* Return -1 if a before b,  0 if a and b overlap,
  * and 1 if a after b. */
 {
 struct edge *a = va;
 struct edge *b = vb;
 int diff = vertexCmp(a->start, b->start);
 if (diff == 0)
     diff = vertexCmp(a->end, b->end);
 return diff;
 }
 
 static boolean encloses(struct edge *a, struct edge *b)
 /* Return TRUE if the first edge encloses the second, false otherwise */
 {
 if (a->start->position <= b->start->position
     && a->end->position >= b->end->position)
     {
     return(TRUE);
     } 
 else 
     {
     return(FALSE);
     }
 }
 
 static boolean trustedEdge(struct edge *edge)
 /* Return TRUE if any member of edge evidence is from a trusted source. */
 {
 struct evidence *ev;
 for (ev = edge->evList; ev != NULL; ev = ev->next)
     {
     if (trustedSource(ev->lb->sourceType))
         return TRUE;
     }
 return FALSE;
 }
 
 
 static struct vertex *matchingVertex(struct rbTree *tree, int position, enum ggVertexType type)
 /* Find matching vertex.  Return NULL if none. */
 {
 struct vertex temp;
 temp.position = position;
 temp.type = type;
 return rbTreeFind(tree, &temp);
 }
 
 static struct vertex *addUniqueVertex(struct rbTree *tree, int position, enum ggVertexType type)
 /* Find existing vertex if it exists, otherwise create and return new one. */
 {
 struct vertex *v = matchingVertex(tree, position, type);
 if (v == NULL)
     {
     lmAllocVar(tree->lm, v);
     v->position = position;
     v->type = type;
     rbTreeAdd(tree, v);
     }
 return v;
 }
 
 static struct edge *matchingEdge(struct rbTree *tree, struct vertex *start, struct vertex *end)
 /* Find matching edge. Return NULL if none. */
 {
 struct edge temp;
 temp.start = start;
 temp.end = end;
 return rbTreeFind(tree, &temp);
 }
 
 static struct edge *addUniqueEdge(struct rbTree *tree, struct vertex *start, struct vertex *end,
 	struct linkedBeds *lb)
 /* Find existing edge if it exists.  Otherwise create and return new one. 
  * Regardless add lb as evidence to edge. */
 {
 struct edge *e = matchingEdge(tree, start, end);
 if (e == NULL)
     {
     lmAllocVar(tree->lm, e);
     e->start = start;
     e->end = end;
     e->next = NULL;
     rbTreeAdd(tree, e);
     }
 struct evidence *ev;
 lmAllocVar(tree->lm, ev);
 ev->lb = lb;
 ev->start = start->position;
 ev->end = end->position;
 slAddHead(&e->evList, ev);
 return e;
 }
 
 static struct rbTree *makeVertexTree(struct linkedBeds *lbList)
 /* Make tree of unique vertices. */
 {
 struct rbTree *vertexTree = rbTreeNew(vertexCmp);
 struct linkedBeds *lb;
 for (lb = lbList; lb != NULL; lb = lb->next)
     {
     struct bed *bed;
     for (bed = lb->bedList; bed != NULL; bed = bed->next)
         {
 	/* Add very beginning and end, they'll be soft. */
 	addUniqueVertex(vertexTree, bed->chromStart, ggSoftStart);
 	addUniqueVertex(vertexTree, bed->chromEnd, ggSoftEnd);
 
 	/* Add internal hard ends. */
 	int i, lastBlock = bed->blockCount-1;
 	for (i=0; i<lastBlock; ++i)
 	    {
 	    addUniqueVertex(vertexTree, 
 	    	bed->chromStart + bed->chromStarts[i] + bed->blockSizes[i], ggHardEnd);
 	    addUniqueVertex(vertexTree, 
 	    	bed->chromStart + bed->chromStarts[i+1], ggHardStart);
 	    }
 	}
     }
 return vertexTree;
 }
 
 static struct rbTree *makeEdgeTree(struct linkedBeds *lbList, struct rbTree *vertexTree)
 /* Make tree of unique edges. */
 {
 struct rbTree *edgeTree = rbTreeNew(edgeCmp);
 struct linkedBeds *lb;
 for (lb = lbList; lb != NULL; lb = lb->next)
     {
     struct bed *bed, *nextBed;
     for (bed = lb->bedList; bed != NULL; bed = nextBed)
         {
 	nextBed = bed->next;
 
 	/* Loop to add all introns and all but last exon. */
 	struct vertex *start = matchingVertex(vertexTree, bed->chromStart, ggSoftStart);
 	int i, lastBlock = bed->blockCount-1;
 	for (i=0; i<lastBlock; ++i)
 	    {
 	    /* Add exon */
 	    struct vertex *end = matchingVertex(vertexTree,
 		start->position + bed->blockSizes[i], ggHardEnd);
 	    addUniqueEdge(edgeTree, start, end, lb);
 
 	    /* Add intron */
 	    start = matchingVertex(vertexTree, 
 		    bed->chromStart + bed->chromStarts[i+1], ggHardStart);
 	    addUniqueEdge(edgeTree, end, start, lb);
 	    }
 
 	/* Add final exon */
 	struct vertex *end = matchingVertex(vertexTree, bed->chromEnd, ggSoftEnd);
 	addUniqueEdge(edgeTree, start, end, lb);
 
 	/* If there's another bed to go, add a soft intron connecting it. */
 	if (nextBed != NULL)
 	    {
 	    start = matchingVertex(vertexTree, nextBed->chromStart, ggSoftStart);
 	    addUniqueEdge(edgeTree, end, start, lb);
 	    }
 	}
     }
 return edgeTree;
 }
 
 void incVertexUses(void *item)
 /* Callback to clear edge->start->useCount and edge->end->useCount. */
 {
 struct edge *edge = item;
 edge->start->count += 1; 
 edge->end->count += 1;
 }
 
 void removeUnusedVertices(struct rbTree *vertexTree, struct rbTree *edgeTree)
 /* Remove vertices not connected to any edges. */
 {
 /* Get vertex list and clear counts. */
 struct slRef *vRef, *vRefList = rbTreeItems(vertexTree);
 for (vRef = vRefList; vRef != NULL; vRef = vRef->next)
     {
     struct vertex *v = vRef->val;
     v->count = 0;
     }
 
 /* Inc counts of vertices connected to edges. */
 rbTreeTraverse(edgeTree, incVertexUses);
 
 /* Remove unused vertices. */
 for (vRef = vRefList; vRef != NULL; vRef = vRef->next)
     {
     struct vertex *v = vRef->val;
     if (v->count == 0)
         rbTreeRemove(vertexTree, v);
     }
 
 slFreeList(&vRefList);
 }
 
 void removeEmptyEdges(struct rbTree *vertexTree, struct rbTree *edgeTree)
 /* Remove edges that are zero or negative in length. */
 {
 int removeCount = 0;
 struct slRef *edgeRef, *edgeRefList = rbTreeItems(edgeTree);
 for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next)
     {
     struct edge *edge = edgeRef->val;
     if (edge->start->position >= edge->end->position)
         {
 	removeCount += 1;
 	rbTreeRemove(edgeTree, edge);
 	}
     }
 if (removeCount)
     removeUnusedVertices(vertexTree, edgeTree);
 slFreeList(&edgeRefList);
 }
 
 
 static struct dlList *sortedListFromTree(struct rbTree *tree)
 /* Create a double-linked list from tree. List will be sorted.  */
 {
 struct slRef *ref, *refList = rbTreeItems(tree);
 struct dlList *list = dlListNew();
 for (ref = refList; ref != NULL; ref = ref->next)
     dlAddValTail(list, ref->val);
 slFreeList(&refList);
 return list;
 }
 
 void addWaysInAndOut(struct rbTree *vertexTree, struct rbTree *edgeTree,
 	struct lm *lm)
 /* Put a bunch of ways in and out of the graph onto the edges. */
 {
 struct slRef *edgeRef, *edgeRefList = rbTreeItems(edgeTree);
 for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next)
     {
     struct edge *edge = edgeRef->val;
     struct slRef *inRef, *outRef;
     lmAllocVar(lm, inRef);
     lmAllocVar(lm, outRef);
     inRef->val = outRef->val = edge;
     slAddHead(&edge->start->waysOut, outRef);
     slAddHead(&edge->end->waysIn, inRef);
     }
 slFreeList(&edgeRefList);
 }
 
 static boolean checkSeqSimilar(struct dnaSeq *a, struct dnaSeq *b, int minScore)
 /* Check that two sequences are similar.  Assumes sequences are same size. */
 {
 int size = a->size;
 int size1 = size-1;
 return dnaScoreMatch(a->dna, b->dna, size) >= minScore 
 	|| dnaScoreMatch(a->dna+1, b->dna, size1) >= minScore
 	|| dnaScoreMatch(a->dna, b->dna+1, size1) >= minScore;
 }
 
 static boolean uncuttableEdge(struct edge *e)
 /* Return TRUE if any evidence for edge is uncuttable (ccds) */
 {
 struct evidence *ev;
 for (ev = e->evList; ev != NULL; ev = ev->next)
     if (noCutSource(ev->lb->sourceType))
         return TRUE;
 return FALSE;
 }
 
 static boolean vertexPartOfUncuttableEdge(struct vertex *v, boolean isStart)
 /* See if vertex is part of an uncuttable edge. */
 {
 struct slRef *eRef, *eRefList = (isStart ? v->waysOut : v->waysIn);
 assert(eRefList != NULL);
 for (eRef = eRefList; eRef != NULL; eRef = eRef->next)
     if (uncuttableEdge(eRef->val))
 	return TRUE;
 return FALSE;
 }
 
 static boolean checkSnapOk(struct vertex *vOld, struct vertex *vNew, boolean isRev, 
 	int bleedSize, int maxUncheckedSize, struct nibTwoCache *seqCache, char *chromName)
 /* Load sequence that corresponds to bleed-over, and  make sure that sequence of next
  * exon is similar. */
 {
 if (vertexPartOfUncuttableEdge(vOld, isRev))
     return FALSE;
 if (bleedSize <= maxUncheckedSize)
     return TRUE;
 int minScore = bleedSize-2;
 boolean similar = FALSE;
 if (isRev)
     {
     int oldStart = vOld->position;
     struct slRef *eRef;
     struct dnaSeq *oldSeq = nibTwoCacheSeqPartExt(seqCache, chromName, oldStart, bleedSize, FALSE, NULL);
     for (eRef = vNew->waysIn; eRef != NULL; eRef = eRef->next)
         {
 	struct edge *edge = eRef->val;
 	struct vertex *vRest = edge->start;
 	int newStart = vRest->position - bleedSize;
 	struct dnaSeq *newSeq = nibTwoCacheSeqPartExt(seqCache, chromName, newStart, bleedSize, FALSE, NULL);
 	similar = checkSeqSimilar(oldSeq, newSeq, minScore);
 	dnaSeqFree(&newSeq);
 	if (similar)
 	    break;
 	}
     dnaSeqFree(&oldSeq);
     }
 else
     {
     int oldStart = vOld->position - bleedSize;
     struct slRef *eRef;
     struct dnaSeq *oldSeq = nibTwoCacheSeqPartExt(seqCache, chromName, oldStart, bleedSize, FALSE, NULL);
     for (eRef = vNew->waysOut; eRef != NULL; eRef = eRef->next)
         {
 	struct edge *edge = eRef->val;
 	struct vertex *vRest = edge->end;
 	int newStart = vRest->position;
 	struct dnaSeq *newSeq = nibTwoCacheSeqPartExt(seqCache, chromName, newStart, bleedSize, FALSE, NULL);
 	similar = checkSeqSimilar(oldSeq, newSeq, minScore);
 	dnaSeqFree(&newSeq);
 	if (similar)
 	    break;
 	}
     }
 return similar;
 }
 
 static boolean snapVertex(struct dlNode *oldNode, int maxSnapSize, int maxUncheckedSnapSize,
 	struct nibTwoCache *seqCache, char *chromName)
 /* Snap vertex to nearby previous hard vertex. */
 {
 /* Figure out hard type we want to snap to, and return if not
  * soft to begin with. */
 struct vertex *vOld = oldNode->val;
 int oldPos = vOld->position;
 enum ggVertexType currentType = vOld->type;
 
 /* If no seqCache, then set up things so not bothering to check it. */
 if (seqCache == NULL)
     maxUncheckedSnapSize = maxSnapSize;
 
 /* Search backwards */
 if (currentType == ggSoftEnd)
     {
     struct dlNode *node;
     for (node = oldNode->prev; !dlStart(node); node = node->prev)
 	{
 	struct vertex *v = node->val;
 	int newPos = v->position;
 	int dif = oldPos - newPos;
 	if (dif > maxSnapSize)
 	    break;
 	if (v->type == ggHardEnd)
 	    {
 	    if (checkSnapOk(vOld, v, FALSE, dif, maxUncheckedSnapSize, seqCache, chromName))
 		{
 		vOld->movedTo = v;
 		verbose(3, "snapping vertex %d to %d\n", oldPos, newPos);
 		return TRUE;
 		}
 	    }
 	}
     }
 
 /* Search forward */
 else if (currentType == ggSoftStart)
     {
     struct dlNode *node;
     for (node = oldNode->next; !dlEnd(node); node = node->next)
 	{
 	struct vertex *v = node->val;
 	int newPos = v->position;
 	int dif = newPos - oldPos;
 	if (dif > maxSnapSize)
 	    break;
 	if (v->type == ggHardStart)
 	    {
 	    if (checkSnapOk(vOld, v, TRUE, dif, maxUncheckedSnapSize, seqCache, chromName))
 		{
 		vOld->movedTo = v;
 		return TRUE;
 		}
 	    }
 	}
     }
 return FALSE;
 }
 
 void mergeOrAddEdge(struct rbTree *edgeTree, struct edge *edge)
 /* Add edge back if it is still unique, otherwise move evidence from
  * edge into existing edge. */
 {
 struct edge *existing = rbTreeFind(edgeTree, edge);
 if (existing)
     {
     existing->evList = slCat(existing->evList, edge->evList);
     edge->evList = NULL;
     }
 else
     rbTreeAdd(edgeTree, edge);
 }
 
 void updateForwardedEdges(struct rbTree *edgeTree)
 /* Go through edges, following the movedTo's in the
  * vertices if need be. */
 {
 struct slRef *ref, *refList = rbTreeItems(edgeTree);
 int forwardCount = 0;
 for (ref = refList; ref != NULL; ref = ref->next)
     {
     struct edge *edge = ref->val;
     struct vertex *start = edge->start, *end = edge->end;
     if (start->movedTo || end->movedTo)
         {
 	++forwardCount;
 	rbTreeRemove(edgeTree, edge);
 	if (start->movedTo)
 	    edge->start = start->movedTo;
 	if (end->movedTo)
 	    edge->end = end->movedTo;
 	mergeOrAddEdge(edgeTree, edge);
 	}
     }
 if (forwardCount > 0)
     verbose(3, "Forwarded %d edges.\n", forwardCount);
 slFreeList(&refList);
 }
 
 void snapSoftToCloseHard(struct rbTree *vertexTree, struct rbTree *edgeTree, int maxSnapSize,
 	int maxUncheckedSnapSize, struct nibTwoCache *seqCache, char *chromName)
 /* Snap hard vertices to nearby soft vertices of same type. */
 {
 struct lm *lm = lmInit(0);
 addWaysInAndOut(vertexTree, edgeTree, lm);
 struct dlList *vList = sortedListFromTree(vertexTree);
 struct dlNode *node;
 int snapCount = 0;
 for (node = vList->head; !dlEnd(node); node = node->next)
     {
     if (snapVertex(node, maxSnapSize, maxUncheckedSnapSize, seqCache, chromName))
 	{
 	rbTreeRemove(vertexTree, node->val);
         ++snapCount;
 	}
     }
 /* Clean up ways in and out since have removed some nodes. */
 for (node = vList->head; !dlEnd(node); node = node->next)
     {
     struct vertex *v = node->val;
     v->waysIn = v->waysOut = NULL;
     }
 if (snapCount > 0)
     {
     verbose(3, "Snapped %d close edges, now have %d vertices\n", snapCount, vertexTree->n);
     updateForwardedEdges(edgeTree);
     }
 dlListFree(&vList);
 lmCleanup(&lm);
 }
 
 int edgeRefCmpEnd(const void *va, const void *vb)
 /* Comparison to sort a slRef list of edges
  * by the edge end location. */
 {
 const struct slRef *a = *((struct slRef **)va);
 const struct slRef *b = *((struct slRef **)vb);
 struct edge *aEdge = a->val;
 struct edge *bEdge = b->val;
 return aEdge->end->position - bEdge->end->position;
 }
 
 int edgeRefCmpStartRev(const void *va, const void *vb)
 /* Comparison to sort a slRef list of edges
  * by the edge start location, with biggest biggest
  * starts first. */
 {
 const struct slRef *a = *((struct slRef **)va);
 const struct slRef *b = *((struct slRef **)vb);
 struct edge *aEdge = a->val;
 struct edge *bEdge = b->val;
 return bEdge->end->position - aEdge->end->position;
 }
 
 
 
 int snapHalfHardForward(struct vertex *v, struct rbTree *edgeTree,
 	enum ggVertexType softType, enum ggVertexType hardType)
 /* V is a hard vertex. Try to snap soft end vertices connected to v
  * to nearest hard vertex connected to v. */
 {
 int snapCount = 0;
 // enum ggVertex otherHardType = (hardType == ggHardStart ? ggHardEnd : ggHardStart);
 slSort(&v->waysOut, edgeRefCmpEnd); 
 struct slRef *hardRef = v->waysOut, *softRef = v->waysOut;
 for (;;)
     {
     /* Advance softRef to next soft ended edge. */
     for (;softRef != NULL; softRef = softRef->next)
         {
 	struct edge *edge = softRef->val;
 	if (edge->end->type == softType)
 	    break;
 	}
     if (softRef == NULL)
         break;
     struct edge *softEdge = softRef->val;
 
     /* If hardRef is before softRef (or it's not hard) advance it */
     for (;hardRef != NULL; hardRef = hardRef->next)
         {
 	struct edge *edge = hardRef->val;
 	if (edge->end->type == hardType && edge->end->position >= softEdge->end->position)
 	   break;
 	}
     if (hardRef == NULL)
         break;
     rbTreeRemove(edgeTree, softEdge);
     struct edge *hardEdge = hardRef->val;
     verbose(3, "Snapping half-hard edge ending at %d to %d\n", softEdge->end->position, 
 	    hardEdge->end->position); 
     softEdge->end = hardEdge->end;
     ++snapCount;
     mergeOrAddEdge(edgeTree, softEdge);
     softRef = softRef->next;
     }
 if (snapCount > 0)
     verbose(3, "Snapped %d forward\n", snapCount);
 return snapCount;
 }
 
 int snapHalfHardBackward(struct vertex *v, struct rbTree *edgeTree,
 	enum ggVertexType softType, enum ggVertexType hardType)
 /* V is a hard vertex. Try to snap soft start vertices connected to v
  * to nearest hard vertex connected to v. */
 {
 int snapCount = 0;
 // enum ggVertex otherHardType = (hardType == ggHardStart ? ggHardEnd : ggHardStart);
 slSort(&v->waysIn, edgeRefCmpStartRev); 
 struct slRef *hardRef = v->waysIn, *softRef = v->waysIn;
 for (;;)
     {
     /* Advance softRef to next soft ended edge. */
     for (;softRef != NULL; softRef = softRef->next)
         {
 	struct edge *edge = softRef->val;
 	if (edge->start->type == softType)
 	    break;
 	}
     if (softRef == NULL)
         break;
     struct edge *softEdge = softRef->val;
 
     /* If hardRef is before softRef (or it's not hard) advance it */
     for (;hardRef != NULL; hardRef = hardRef->next)
         {
 	struct edge *edge = hardRef->val;
 	if (edge->start->type == hardType && edge->start->position <= softEdge->start->position)
 	   break;
 	}
     if (hardRef == NULL)
         break;
     rbTreeRemove(edgeTree, softEdge);
     struct edge *hardEdge = hardRef->val;
     verbose(3, "Snapping half-hard edge starting at %d to %d\n", softEdge->start->position, 
 	    hardEdge->start->position); 
     softEdge->start = hardEdge->start;
     ++snapCount;
     mergeOrAddEdge(edgeTree, softEdge);
     softRef = softRef->next;
     }
 if (snapCount > 0)
     verbose(3, "Snapped %d reverse\n", snapCount);
 return snapCount;
 }
 
 static int snapHalfHard(struct vertex *v, struct rbTree *edgeTree)
 /* Snap soft ends of half-hard edges connected to v to 
  * compatible hard end.  Returns number of snaps it's made. */
 {
 enum ggVertexType currentType = v->type;
 enum ggVertexType softType, hardType;
 
 /* Figure out what types look for at the other end.  If we're
  * a soft vertex we can't do anything. */
 if (currentType == ggHardStart)
     {
     softType = ggSoftEnd;
     hardType = ggHardEnd;
     }
 else if (currentType == ggHardEnd)
     {
     softType = ggSoftStart;
     hardType = ggHardStart;
     }
 else
     return 0;
 
 int snapCount = 0;
 snapCount += snapHalfHardForward(v, edgeTree, softType, hardType);
 snapCount += snapHalfHardBackward(v, edgeTree, softType, hardType);
 return snapCount;
 }
 
 void snapHalfHards(struct rbTree *vertexTree, struct rbTree *edgeTree)
 /* Snap edges that are half hard to edges that are fully hard and that
  * share the original hard end */
 {
 struct lm *lm = lmInit(0);
 addWaysInAndOut(vertexTree, edgeTree, lm);
 struct slRef *vRef, *vRefList = rbTreeItems(vertexTree);
 for (vRef = vRefList; vRef != NULL; vRef = vRef->next)
     {
     struct vertex *v = vRef->val;
     snapHalfHard(v, edgeTree);
     v->waysIn = v->waysOut = NULL;	/* These are trashed so remove them. */
     }
 slFreeList(&vRefList);
 removeUnusedVertices(vertexTree, edgeTree);
 lmCleanup(&lm);
 }
 
 struct sourceAndPos
 /* A vertex source and position. */
     {
     struct sourceAndPos *next;
     int position;		/* Position in genome. */
     bool trustedSource;		/* If true this is trusted source (refSeq) */
     };
 
 int sourceAndPosCmp(const void *va, const void *vb)
 /* Compare two sourceAndPos by position. */
 {
 const struct sourceAndPos *a = *((struct sourceAndPos **)va);
 const struct sourceAndPos *b = *((struct sourceAndPos **)vb);
 return a->position - b->position;
 }
 
 int sourceAndPosCmpRev(const void *va, const void *vb)
 /* Compare two sourceAndPos in reverse direction. */
 {
 const struct sourceAndPos *a = *((struct sourceAndPos **)va);
 const struct sourceAndPos *b = *((struct sourceAndPos **)vb);
 return b->position - a->position;
 }
 
 static struct vertex *consensusVertex(struct rbTree *vertexTree, struct sourceAndPos *list, 
 	int listSize, enum ggVertexType softType)
 /* Return first trusted (refSeq) vertex, or if no trusted one, then 
  * a soft vertex 1/4 of way (rounded down) through list. */
 {
 struct sourceAndPos *el;
 for (el = list; el != NULL; el = el->next)
     {
     if (el->trustedSource)
         break;
     }
 if (el == NULL)
     el = slElementFromIx(list, listSize/4);
 return matchingVertex(vertexTree, el->position, softType);
 }
 
 void halfConsensusForward(struct vertex *v, 
 	struct rbTree *vertexTree, struct rbTree *edgeTree,
 	enum ggVertexType softType, struct lm *lm)
 /* Figure out consensus end of all edges beginning at v that have soft end. */
 {
 /* Collect a list of all attached softies. */
 struct sourceAndPos *list = NULL, *el;
 struct slRef *edgeRef;
 int softCount = 0;
 for (edgeRef = v->waysOut; edgeRef != NULL; edgeRef = edgeRef->next)
     {
     struct edge *edge = edgeRef->val;
     struct vertex *v = edge->end;
     if (v->type == softType)
         {
 	struct evidence *ev;
 	for (ev = edge->evList; ev != NULL; ev = ev->next)
 	    {
 	    lmAllocVar(lm, el);
 	    el->position = ev->end;
 	    el->trustedSource = trustedSource(ev->lb->sourceType);
 	    slAddHead(&list, el);
 	    ++softCount;
 	    }
 	}
     }
 
 /* See if have enough elements to make consensus forming
  * worthwhile. */
 if (softCount > 1)
     {
     slSort(&list, sourceAndPosCmpRev);
     struct vertex *end = consensusVertex(vertexTree, list, softCount, softType);
     for (edgeRef = v->waysOut; edgeRef != NULL; edgeRef = edgeRef->next)
 	{
 	struct edge *edge = edgeRef->val;
 	struct vertex *v = edge->end;
 	if (v != end && v->type == softType)
 	    {
 	    rbTreeRemove(edgeTree, edge);
 	    verbose(3, "Performing half-hard consensus: moving edge end from %d to %d\n",
 		    edge->end->position, end->position);
 	    edge->end = end;
 	    mergeOrAddEdge(edgeTree, edge);  // Will always merge. 
 	    }
 	}
     }
 }
 
 
 void halfConsensusBackward(struct vertex *v, 
 	struct rbTree *vertexTree, struct rbTree *edgeTree,
 	enum ggVertexType softType, struct lm *lm)
 /* Figure out consensus start of all edges end at v that have soft start. */
 {
 /* Collect a list of all attached softies. */
 struct sourceAndPos *list = NULL, *el;
 struct slRef *edgeRef;
 int softCount = 0;
 for (edgeRef = v->waysIn; edgeRef != NULL; edgeRef = edgeRef->next)
     {
     struct edge *edge = edgeRef->val;
     struct vertex *v = edge->start;
     if (v->type == softType)
         {
 	struct evidence *ev;
 	for (ev = edge->evList; ev != NULL; ev = ev->next)
 	    {
 	    lmAllocVar(lm, el);
 	    el->position = ev->start;
 	    el->trustedSource = trustedSource(ev->lb->sourceType);
 	    slAddHead(&list, el);
 	    ++softCount;
 	    }
 	}
     }
 
 /* See if have enough elements to make consensus forming
  * worthwhile. */
 if (softCount > 1)
     {
     slSort(&list, sourceAndPosCmp);
     struct vertex *start = consensusVertex(vertexTree, list, softCount, softType);
     for (edgeRef = v->waysIn; edgeRef != NULL; edgeRef = edgeRef->next)
 	{
 	struct edge *edge = edgeRef->val;
 	struct vertex *v = edge->start;
 	if (v != start && v->type == softType)
 	    {
 	    rbTreeRemove(edgeTree, edge);
 	    verbose(3, "Performing half-hard consensus: moving edge start from %d to %d\n",
 		    edge->start->position, start->position);
 	    edge->start = start;
 	    mergeOrAddEdge(edgeTree, edge);  // Will always merge. 
 	    }
 	}
     }
 }
 
 void halfHardConsensus(struct vertex *v, 
 	struct rbTree *vertexTree, struct rbTree *edgeTree,
 	struct lm *lm)
 /* Form consensus of soft vertices connected to hard vertex v. 
  * Consensus is:
  *    1) The largest refSeq.
  *    2) If no refSeq, something 3/4 of the way to the largest. */
 {
 enum ggVertexType currentType = v->type;
 enum ggVertexType softType;
 
 /* Figure out what types look for at the other end.  If we're
  * a soft vertex we can't do anything. */
 if (currentType == ggHardStart)
     softType = ggSoftEnd;
 else if (currentType == ggHardEnd)
     softType = ggSoftStart;
 else
     return;
 
 halfConsensusForward(v, vertexTree, edgeTree, softType, lm);
 halfConsensusBackward(v, vertexTree, edgeTree, softType, lm);
 }
 
 void halfHardConsensuses(struct rbTree *vertexTree, struct rbTree *edgeTree)
 /* Collect soft edges that share a hard edge. Move all of them to a consensus
  * vertex, which is either:
  *    1) The largest refSeq.
  *    2) If no refSeq, something 3/4 of the way to the largest. */
 {
 struct lm *lm = lmInit(0);
 addWaysInAndOut(vertexTree, edgeTree, lm);
 struct slRef *vRef, *vRefList = rbTreeItems(vertexTree);
 for (vRef = vRefList; vRef != NULL; vRef = vRef->next)
     {
     struct vertex *v = vRef->val;
     halfHardConsensus(v, vertexTree, edgeTree, lm);
     v->waysIn = v->waysOut = NULL;	/* These are trashed so remove them. */
     }
 slFreeList(&vRefList);
 removeUnusedVertices(vertexTree, edgeTree);
 lmCleanup(&lm);
 }
 
 struct edge *edgeFromConsensusOfEvidence(struct rbTree *vertexTree, struct evidence *evList,
 	struct lm *lm)
 /* Attempt to create a single edge from a list of overlapping evidence ranges.
  * The start will be the consensus of all evidence starts.  Likewise
  * the end will be the consensus of all evidence ends.  The evidence that
  * overlaps this edge will be included in the edge. */
 {
 /* Gather up lists of starts and ends. */
 struct sourceAndPos *startList = NULL, *endList = NULL;
 struct evidence *ev, *nextEv;
 int listSize = 0;
 for (ev = evList; ev != NULL; ev = ev->next)
     {
     struct sourceAndPos *x;
     boolean trusted = trustedSource(ev->lb->sourceType);
     lmAllocVar(lm, x);
     x->position = ev->start;
     x->trustedSource = trusted;
     slAddHead(&startList, x);
     lmAllocVar(lm, x);
     x->position = ev->end;
     x->trustedSource = trusted;
     slAddHead(&endList, x);
     ++listSize;
     }
 
 /* Get consensus starts and ends. */
 slSort(&startList, sourceAndPosCmp);
 struct vertex *start = consensusVertex(vertexTree, startList, listSize, ggSoftStart);
 slSort(&endList, sourceAndPosCmpRev);
 struct vertex *end = consensusVertex(vertexTree, endList, listSize, ggSoftEnd);
 
 /* Make edge */
 struct edge *edge;
 AllocVar(edge);
 edge->start = start;
 edge->end = end;
 edge->next = NULL;
 
 /* Add overlapping evidence to edge. */
 for (ev = evList; ev != NULL; ev = nextEv)
     {
     nextEv = ev->next;
     if (rangeIntersection(ev->start, ev->end, start->position, end->position) > 0)
         slAddHead(&edge->evList, ev);
     }
 
 return edge;
 }
 
 void removeEnclosedDoubleSofts(struct rbTree *vertexTree, struct rbTree *edgeTree, 
 	int maxBleedOver, double singleExonMaxOverlap)
 /* Move double-softs that overlap spliced things to a very great extent into
  * the spliced things. Also remove tiny double-softs (no more than 2*maxBleedOver). */
 {
 /* Traverse graph and build up range tree covering spliced exons.  For each 
  * range of overlapping exons, assemble a singly-linked list of all exons in 
  * the range */
 struct rbTree *rangeTree = rangeTreeNew(0);
 struct slRef *edgeRef, *edgeRefList = rbTreeItems(edgeTree);
 int removedCount = 0;
 for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next)
     {
     struct edge *edge = edgeRef->val;
     struct vertex *start = edge->start;
     struct vertex *end = edge->end;
     if (start->type == ggHardStart || end->type == ggHardEnd)
 	{
 	rangeTreeAddValList(rangeTree, start->position, end->position, edge);
 	}
     }
 
 /* Traverse graph yet one more time looking for doubly-soft exons
  * that are overlapping the spliced exons. */
 for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next)
     {
     struct edge *edge = edgeRef->val;
     struct vertex *start = edge->start;
     struct vertex *end = edge->end;
     if (start->type == ggSoftStart && end->type == ggSoftEnd)
         {
 	int s = start->position;
 	int e = end->position;
 	int size = e - s;
 	if (size <= maxBleedOver+maxBleedOver)
 	     {
 	     /* Tiny case, just remove edge and forget it. */
 	     verbose(3, "Removing tiny double-soft edge from %d to %d\n", s, e);
 	     rbTreeRemove(edgeTree, edge);
 	     ++removedCount;
 	     }
 	else
 	     {
 	     /* Normal case, look for exon list that encloses us, and
 	      * if any single exon in that list encloses us, merge into it. */
 	     int splicedOverlap = rangeTreeOverlapSize(rangeTree, s, e);
 	     if (splicedOverlap > 0 && splicedOverlap > singleExonMaxOverlap*size)
 	         {
 		 if (!trustedEdge(edge))
 		     {
 		     /* Once we find a range that overlaps the doubly-soft edge, find 
 		      * (half-hard or better) edge from that range that encloses the 
 		      * doubly soft edge. */
 		     struct range *r = rangeTreeMaxOverlapping(rangeTree, s, e);
 		     struct edge *nextEdge, *edgeList = r->val;
 		     struct edge *enclosingEdge = NULL;
 		     for (nextEdge = edgeList; edgeList != NULL; edgeList = edgeList->next)
 			 {
 			 if (encloses(nextEdge, edge))
 			     {
 			     enclosingEdge = nextEdge;
 			     }
 			 }
 		     if (enclosingEdge != NULL) 
 			 {
 			 enclosingEdge->evList = slCat(enclosingEdge->evList, edge->evList);
 			 edge->evList = NULL;
 			 verbose(3, "Removing doubly-soft edge %d-%d, reassigning to %d-%d\n",
 				 s, e, enclosingEdge->start->position, 
 				 enclosingEdge->end->position);
 			 rbTreeRemove(edgeTree, edge);
 			 ++removedCount;
 			 }
 		     }
 		 }
 	     }
 	}
     }
 
 /* Clean up and go home. */
 if (removedCount > 0)
     removeUnusedVertices(vertexTree, edgeTree);
 for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next)
     {
     struct edge *nextEdge, *edge = edgeRef->val;
     while (edge != NULL) 
 	{
 	nextEdge = edge->next;
 	edge->next = NULL;
 	edge = nextEdge;
 	}
     }
 slFreeList(&edgeRefList);
 rbTreeFree(&rangeTree);
 }
 
 void mergeDoubleSofts(struct rbTree *vertexTree, struct rbTree *edgeTree)
 /* Merge together overlapping edges with soft ends. */
 {
 struct mergedEdge
 /* Hold together info on a merged edge. */
     {
     struct evidence *evidence;
     };
 
 /* Traverse graph and build up range tree.  Each node in the range tree
  * will represent the bounds of coordinates of overlapping double softs */
 struct rbTree *rangeTree = rangeTreeNew(0);
 struct slRef *edgeRef, *edgeRefList = rbTreeItems(edgeTree);
 for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next)
     {
     struct edge *edge = edgeRef->val;
     struct vertex *start = edge->start;
     struct vertex *end = edge->end;
     if (start->type == ggSoftStart && end->type == ggSoftEnd)
         rangeTreeAdd(rangeTree, start->position, end->position);
     }
 
 /* Traverse graph again merging edges */
 for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next)
     {
     struct edge *edge = edgeRef->val;
     struct vertex *start= edge->start;
     struct vertex *end = edge->end;
     if (start->type == ggSoftStart && end->type == ggSoftEnd)
         {
 	struct range *r = rangeTreeFindEnclosing(rangeTree,
 		start->position, end->position);
 	assert(r != NULL);
 	/* At this point, r represents the bounds of a double-soft
 	 * region that encompasses this edge.  Collect the set of
 	 * evidence of edges overlapping this range */
         struct mergedEdge *mergeEdge = r->val;
         if (mergeEdge == NULL)
             {
             lmAllocVar(rangeTree->lm, mergeEdge);
             r->val = mergeEdge;
             }
         mergeEdge->evidence = slCat(edge->evList, mergeEdge->evidence);
 	verbose(3, "Merging doubly-soft edge (%d,%d) into range (%d,%d)\n", 
 		start->position, end->position, r->start, r->end);
         edge->evList = NULL;
         rbTreeRemove(edgeTree, edge);
 	}
     }
 
 /* Traverse merged edge list, making a single edge from each range. At this point,
  * each range will have some evidence attached to it, from each of the double softs
  * that fall within the range.  From all of this evidence, make a single consensus edge */
 struct range *r;
 struct lm *lm = lmInit(0);
 for (r = rangeTreeList(rangeTree); r != NULL; r = r->next)
     {
     struct mergedEdge *mergedEdge = r->val;
     struct edge *edge = edgeFromConsensusOfEvidence(vertexTree, mergedEdge->evidence, lm);
     if (edge != NULL)
         rbTreeAdd(edgeTree, edge);
     verbose(3, "Deriving edge (%d,%d) from all the double softs in range (%d,%d)\n", 
 	    edge->start->position, edge->end->position, r->start, r->end);
     }
 
 
 /* Clean up and go home. */
 lmCleanup(&lm);
 removeUnusedVertices(vertexTree, edgeTree);
 slFreeList(&edgeRefList);
 rbTreeFree(&rangeTree);
 }
 
 #ifdef DEBUG
 oid dumpVertices(struct rbTree *vertexTree)
 {
 struct slRef *vRef, *vRefList = rbTreeItems(vertexTree);
 for (vRef = vRefList; vRef != NULL; vRef = vRef->next)
     {
     struct vertex *v = vRef->val;
     printf(" %d(%d)", v->position, v->type);
     }
 printf("\n");
 slFreeList(&vRefList);
 }
 #endif /* DEBUG */
 
 struct txGraph *treeTreeToTxg(struct rbTree *vertexTree, struct rbTree *edgeTree, char *name,
 	struct linkedBeds *lbList)
 /* Convert from vertexTree/edgeTree representation to txGraph */
 {
 if (vertexTree->root == NULL || edgeTree->root == NULL)  // Nothing to do really
     return NULL;
 
 /* Allocate txGraph, and fill in some of the relatively easy fields. */
 struct txGraph *txg;
 AllocVar(txg);
 struct bed *bed = lbList->bedList;
 txg->name = cloneString(name);
 txg->tName = cloneString(bed->chrom);
 txg->strand[0] = bed->strand[0];
 txg->vertexCount = vertexTree->n;
 txg->edgeCount = edgeTree->n;
 
 /* Get vertex list and number sequentially. Fill in vertex array */
 int i;
 struct slRef *vRef, *vRefList = rbTreeItems(vertexTree);
 struct txVertex *tv = NULL;
 if (vertexTree->n)
     tv = AllocArray(txg->vertices, vertexTree->n);
 int tStart = BIGNUM, tEnd = -BIGNUM;
 for (vRef = vRefList, i=0; vRef != NULL; vRef = vRef->next, i++)
     {
     struct vertex *v = vRef->val;
     int position = v->position;
     v->count = i;
     tv->position = position;
     tv->type = v->type;
     tStart = min(tStart, position);
     tEnd = max(tEnd, position);
     ++tv;
     }
 slFreeList(&vRefList);
 
 /* Fill in overall bounds. */
 txg->tStart = tStart;
 txg->tEnd = tEnd;
 
 /* Deal with sources. */
 int sourceCount = 0;
 struct linkedBeds *lb;
 for (lb = lbList; lb != NULL; lb = lb->next)
     lb->id = sourceCount++;
 struct txSource *source = AllocArray(txg->sources, sourceCount);
 for (lb = lbList; lb != NULL; lb = lb->next)
     {
     source->accession = cloneString(lb->bedList->name);
     source->type = cloneString(lb->sourceType);
     ++source;
     }
 txg->sourceCount = sourceCount;
 
 /* Convert edges */
 struct slRef *edgeRef, *edgeRefList = rbTreeItems(edgeTree);
 for (edgeRef = edgeRefList; edgeRef != NULL; edgeRef = edgeRef->next)
     {
     struct edge *edge = edgeRef->val;
 
     /* Allocate edge and fill in start and end. */
     struct txEdge *te;
     AllocVar(te);
     struct vertex *start = edge->start;
     struct vertex *end = edge->end;
     te->startIx = start->count;
     te->endIx = end->count;
 
     /* Figure out whether it's an intron or exon. */
     enum ggVertexType startType = edge->start->type;
     if (startType == ggHardStart || startType == ggSoftStart)
         te->type = ggExon;
     else
         te->type = ggIntron;
 
     /* Convert the evidence. */
     struct evidence *ev;
     for (ev = edge->evList; ev != NULL; ev = ev->next)
         {
 	struct txEvidence *tev;
 	AllocVar(tev);
 	tev->sourceId = ev->lb->id;
 	tev->start = ev->start;
 	tev->end = ev->end;
 	slAddHead(&te->evList, tev);
 	te->evCount += 1;
 	}
     slReverse(&te->evList);
 
     slAddHead(&txg->edgeList, te);
     }
 slReverse(&txg->edgeList);
 return txg;
 }
 
 struct txGraph *makeGraph(struct linkedBeds *lbList, int maxBleedOver, 
 	int maxUncheckedBleed, struct nibTwoCache *seqCache,
 	double singleExonMaxOverlap, char *name)
 /* Create a graph corresponding to linkedBedsList.
  * The maxBleedOver parameter controls how much of a soft edge that
  * can be cut off when snapping to a hard edge.  The singleExonMaxOverlap
  * controls what ratio of a single exon transcript can overlap spliced 
  * transcripts */
 {
 char *chromName = lbList->bedList->chrom;
 
 /* Create tree of all unique vertices. */
 struct rbTree *vertexTree = makeVertexTree(lbList);
 verbose(2, "%d unique vertices\n", vertexTree->n);
 
 /* Create tree of all unique edges */
 struct rbTree *edgeTree = makeEdgeTree(lbList, vertexTree);
 verbose(2, "%d unique edges\n", edgeTree->n);
 
 snapSoftToCloseHard(vertexTree, edgeTree, maxBleedOver, maxUncheckedBleed, seqCache, chromName);
 verbose(2, "%d edges, %d vertices after snapSoftToCloseHard\n", 
 	edgeTree->n, vertexTree->n);
 
 removeEmptyEdges(vertexTree, edgeTree);
 verbose(2, "%d edges, %d vertices after removeEmptyEdges\n",
 	edgeTree->n, vertexTree->n);
 
 snapHalfHards(vertexTree, edgeTree);
 verbose(2, "%d edges, %d vertices after snapHalfHards\n", 
 	edgeTree->n, vertexTree->n);
 
 halfHardConsensuses(vertexTree, edgeTree);
 verbose(2, "%d edges, %d vertices after medianHalfHards\n", 
 	edgeTree->n, vertexTree->n);
 
 removeEnclosedDoubleSofts(vertexTree, edgeTree, maxBleedOver, singleExonMaxOverlap);
 verbose(2, "%d edges, %d vertices after mergeEnclosedDoubleSofts\n",
 	edgeTree->n, vertexTree->n);
 
 mergeDoubleSofts(vertexTree, edgeTree);
 verbose(2, "%d edges, %d vertices after mergeDoubleSofts\n",
 	edgeTree->n, vertexTree->n);
 
 struct txGraph *txg = treeTreeToTxg(vertexTree, edgeTree, name, lbList);
 
 /* Clean up and go home. */
 rbTreeFree(&vertexTree);
 rbTreeFree(&edgeTree);
 return txg;
 }