4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/txCds/txCdsCluster/txCdsCluster.c src/hg/txCds/txCdsCluster/txCdsCluster.c
index fe0050e..edb43e0 100644
--- src/hg/txCds/txCdsCluster/txCdsCluster.c
+++ src/hg/txCds/txCdsCluster/txCdsCluster.c
@@ -1,351 +1,351 @@
 /* txCdsCluster - Cluster transcripts purely in the CDS regions, only putting things together 
  * if they share same frame as well as a genomic region. */
 
 /* Copyright (C) 2007 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 #include "common.h"
 #include "memalloc.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "bed.h"
 #include "binRange.h"
 #include "rangeTree.h"
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "txCdsCluster - Cluster transcripts purely in the CDS regions, only putting things\n"
   "together if they share same frame as well as a genomic region.\n"
   "usage:\n"
   "   txCdsCluster input.bed output.clusters\n"
   "options:\n"
   "   -xxx=XXX\n"
   );
 }
 
 static struct optionSpec options[] = {
    {NULL, 0},
 };
 
 struct txCdsCluster
 /* A cluster of overlapping (at the block level on the same strand)
  * alignments. */
     {
     struct txCdsCluster *next;
     int start,end;
     char strand;
     struct bed *bedList;
     struct rbTree *frameTrees[3];
     };
 
 void txCdsClusterFree(struct txCdsCluster **pCluster)
 /* Free up txCdsCluster */
 {
 struct txCdsCluster *cluster = *pCluster;
 if (cluster != NULL)
     {
     bedFreeList(&cluster->bedList);
     rbTreeFree(&cluster->frameTrees[0]);
     rbTreeFree(&cluster->frameTrees[1]);
     rbTreeFree(&cluster->frameTrees[2]);
     freez(pCluster);
     }
 }
 
 void txCdsClusterFreeList(struct txCdsCluster **pList)
 /* Free a list of dynamically allocated txCdsClusters */
 {
 struct txCdsCluster *el, *next;
 
 for (el = *pList; el != NULL; el = next)
     {
     next = el->next;
     txCdsClusterFree(&el);
     }
 *pList = NULL;
 }
 
 void txCdsClusterSave(struct txCdsCluster *cluster, int id, FILE *f)
 /* Write out info on cluster to file. */
 {
 struct bed *bed = cluster->bedList;
 fprintf(f, "%s\t%d\t%d\t", bed->chrom, cluster->start, cluster->end);
 fprintf(f, "tcc%d\t", id);
 fprintf(f, "0\t%c\t", cluster->strand);
 fprintf(f, "%d\t", slCount(cluster->bedList));
 for (bed=cluster->bedList; bed != NULL; bed = bed->next)
     fprintf(f, "%s,", bed->name);
 fprintf(f, "\n");
 }
 
 struct txCdsCluster *txCdsClusterNew(struct bed *bed, 
 	struct lm *lm, struct rbTreeNode *rbStack[128])
 /* Create cluster around a single bed. */
 {
 /* Allocate and fill in basic fields. */
 struct txCdsCluster *cluster;
 AllocVar(cluster);
 cluster->start = bed->thickStart;
 cluster->end = bed->thickEnd;
 cluster->bedList = bed;
 bed->next = NULL;
 
 /* Allocate frame trees. */
 int i;
 for (i=0; i<3; ++i)
     cluster->frameTrees[i] = rangeTreeNewDetailed(lm, rbStack);
 
 /* Fill in frame trees with coding exons. */
 int cdsPos = 0;
 int block, blockCount = bed->blockCount;
 for (block = 0; block < blockCount; ++block)
     {
     int start = bed->chromStart + bed->chromStarts[block];
     int end = start + bed->blockSizes[block];
     start = max(start, bed->thickStart);
     end = min(end, bed->thickEnd);
     if (start < end)
 	{
 	int size = end - start;
 	int frame = (start - cdsPos)%3;
 	rangeTreeAdd(cluster->frameTrees[frame], start, end);
 	cdsPos += size;
 	}
     }
 return cluster;
 }
 
 void txCdsClusterMerge(struct txCdsCluster *a, struct txCdsCluster **pB)
 /* Merge b into a.  Destroys b. */
 {
 struct txCdsCluster *b = *pB;
 a->bedList = slCat(a->bedList, b->bedList);
 b->bedList = NULL;
 a->start = min(a->start, b->start);
 a->end = max(a->end, b->end);
 struct range *range;
 int frame;
 for (frame=0; frame<3; ++frame)
     {
     struct rbTree *aTree = a->frameTrees[frame];
     struct rbTree *bTree = b->frameTrees[frame];
     for (range = rangeTreeList(bTree); range != NULL; range = range->next)
 	{
 	rangeTreeAdd(aTree, range->start, range->end);
 	}
     freez(&b->frameTrees[frame]);  /* Needed here, so txCdsClusterFree doesn't do it. */
     }
 txCdsClusterFree(pB);
 }
 
 boolean bedIntersectsCluster(struct txCdsCluster *cluster, struct bed *bed)
 /* Return TRUE if any block in bed intersects with cluster. */
 {
 int block, blockCount = bed->blockCount;
 int cdsPos = 0;
 for (block = 0; block < blockCount; ++block)
     {
     struct range tempR;
     int start = bed->chromStarts[block] + bed->chromStart;
     int end = start + bed->blockSizes[block];
     start = max(start, bed->thickStart);
     end = min(end, bed->thickEnd);
     if (start < end)
 	{
 	int size = end - start;
 	int frame = (start - cdsPos)%3;
 	tempR.start = start;
 	tempR.end = end;
 	if (rbTreeFind(cluster->frameTrees[frame], &tempR))
 	    return TRUE;
 	cdsPos += size;
 	}
     }
 return FALSE;
 }
 
 void flipBed(struct bed *bed, int chromSize)
 /* Flip bed to seem to be on other strand. */
 {
 int bedSize = bed->chromEnd - bed->chromStart;
 bed->strand[0] = (bed->strand[0] == '+' ? '-' : '+');
 reverseUnsignedRange(&bed->chromStart, &bed->chromEnd, chromSize);
 reverseUnsignedRange(&bed->thickStart, &bed->thickEnd, chromSize);
 
 /* Convert blockSizes for moment to hold ends. */
 int i, blockCount = bed->blockCount;
 for (i=0; i<blockCount; ++i)
     bed->blockSizes[i] += bed->chromStarts[i];
 
 /* Flip start and end. */
 for (i=0; i<blockCount; ++i)
     reverseIntRange(&bed->chromStarts[i], &bed->blockSizes[i], bedSize);
 
 /* Convert blockSizes from ends back to sizes. */
 for (i=0; i<blockCount; ++i)
     bed->blockSizes[i] -= bed->chromStarts[i];
 
 /* Reverse arrays */
 reverseInts(bed->blockSizes, blockCount);
 reverseInts(bed->chromStarts, blockCount);
 }
 
 void flipBedList(struct bed *bedList, int chromSize)
 /* Flip a list of beds. */
 {
 struct bed *bed;
 for (bed = bedList; bed != NULL; bed = bed->next)
      flipBed(bed, chromSize);
 }
 
 void bedIntoBinsOfClusters(struct bed *bed, struct binKeeper *bins, 
 	struct lm *lm, struct rbTreeNode *rbStack[128])
 /* Add lb into a binKeeper full of txCdsClusters.  This will create and merge 
  * clusters as need be. */
 {
 /* Create new cluster around bed. */
 struct txCdsCluster *newCluster = txCdsClusterNew(bed, lm, rbStack);
 
 /* Merge in any existing overlapping clusters.. */
 struct binElement *bel, *belList = binKeeperFind(bins, bed->thickStart, bed->thickEnd);
 for (bel = belList; bel != NULL; bel = bel->next)
     {
     struct txCdsCluster *oldCluster = bel->val;
     if (bedIntersectsCluster(oldCluster, bed))
 	{
 	binKeeperRemove(bins, oldCluster->start, oldCluster->end, oldCluster);
 	txCdsClusterMerge(oldCluster, &newCluster);
 	newCluster = oldCluster;
 	}
     }
 slFreeList(&belList);
 
 /* Add to binKeeper. */
 binKeeperAdd(bins, newCluster->start, newCluster->end, newCluster);
 }
 
 struct txCdsCluster *clusterOneStrand(struct bed *bedList)
 /* Make clusters of overlapping beds on a single chromosome. 
  * Return a list of such clusters. */
 {
 /* Set up local memory pool and stack for all the rangeTrees we'll be using. */
 struct lm *lm = lmInit(0);
 struct rbTreeNode *rbStack[128];
 boolean minusStrand = (bedList->strand[0] == '-');
 
 
 /* Figure out maximum size needed for binKeeper */
 struct bed *bed, *nextBed;
 int strandSizeLimit = 0;
 for (bed = bedList; bed != NULL; bed = bed->next)
     strandSizeLimit = max(strandSizeLimit, bed->chromEnd);
 
 /* Flip around beds if on minus strand. */
 if (minusStrand)
     flipBedList(bedList, strandSizeLimit);
 
 /* Create binKeeper full of clusters. */
 struct binKeeper *bins = binKeeperNew(0, strandSizeLimit);
 for (bed = bedList; bed != NULL; bed = nextBed)
     {
     nextBed = bed->next;
     if (bed->thickStart < bed->thickEnd)
 	bedIntoBinsOfClusters(bed, bins, lm, rbStack);
     }
 
 /* Convert from binKeeper of clusters to simple list of clusters. */
 struct txCdsCluster *clusterList = NULL;
 struct binElement *binList, *bin;
 binList = binKeeperFindAll(bins);
 for (bin = binList; bin != NULL; bin = bin->next)
     {
     struct txCdsCluster *cluster = bin->val;
     if (minusStrand)
 	{
 	cluster->strand = '-';
         reverseIntRange(&cluster->start, &cluster->end, strandSizeLimit);
 	}
     else
         cluster->strand = '+';
     slAddHead(&clusterList, cluster);
     }
 slReverse(&clusterList);
 
 /* Flip around beds if on minus strand. */
 if (minusStrand)
     flipBedList(bedList, strandSizeLimit);
 
 /* Clean up and go home. */
 struct txCdsCluster *cluster;
 for (cluster = clusterList; cluster != NULL; cluster = cluster->next)
     {
     freez(&cluster->frameTrees[0]);
     freez(&cluster->frameTrees[1]);
     freez(&cluster->frameTrees[2]);
     }
 lmCleanup(&lm);
 binKeeperFree(&bins);
 return clusterList;
 }
 
 void txCdsCluster(char *inFile, char *outFile)
 /* txCdsCluster - Cluster transcripts purely in the CDS regions, only putting things 
  * together if they share same frame as well as a genomic region. */
 {
 /* Load input and sort by chromosome. */
 struct bed *bedList = bedLoadNAll(inFile, 12);
 slSort(&bedList, bedCmpChromStrandStart);
 
 /* Open output and initialize unique ID generator. */
 FILE *f = mustOpen(outFile, "w");
 int clusterId = 0;
 
 /* Go through input one chromosome strand at a time. */
 struct bed *chromStart, *chromEnd = NULL;
 for (chromStart = bedList; chromStart != NULL; chromStart = chromEnd)
     {
     verbose(2, "chrom %s %s\n", chromStart->chrom, chromStart->strand); 
 
     /* Find chromosome end. */
     char *chromName = chromStart->chrom;
     char strand = chromStart->strand[0];
     struct bed *bed, *dummy, **endAddress = &dummy;
     for (bed = chromStart; bed != NULL; bed = bed->next)
         {
 	if (!sameString(bed->chrom, chromName))
 	    break;
 	if (bed->strand[0] != strand)
 	    break;
 	endAddress = &bed->next;
 	}
     chromEnd = bed;
 
     /* Terminate list before next chromosome */
     *endAddress = NULL;
 
     /* Create clusters. */
     struct txCdsCluster *clusterList = clusterOneStrand(chromStart);
 
     /* Write clusters to file. */
     struct txCdsCluster *cluster;
     for (cluster = clusterList; cluster != NULL; cluster = cluster->next)
          txCdsClusterSave(cluster, ++clusterId, f);
 
     txCdsClusterFreeList(&clusterList);	/* Note free's beds as well! */
     }
 
 carefulClose(&f);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 pushCarefulMemHandler(100000000);
 optionInit(&argc, argv, options);
 if (argc != 3)
     usage();
 txCdsCluster(argv[1], argv[2]);
 return 0;
 }