4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/oneShot/testIntersect/testIntersect.c src/hg/oneShot/testIntersect/testIntersect.c
index dbedaed..7fa1dab 100644
--- src/hg/oneShot/testIntersect/testIntersect.c
+++ src/hg/oneShot/testIntersect/testIntersect.c
@@ -1,503 +1,503 @@
 /* testIntersect - Test some ideas on intersections. */
 
 /* Copyright (C) 2011 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "localmem.h"
 #include "featureBits.h"
 #include "hdb.h"
 #include "binRange.h"
 
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "testIntersect - Test some ideas on intersections\n"
   "usage:\n"
   "   testIntersect database track1 track2\n"
   "options:\n"
   "   -chrom=chrN - Restrict to a single chromosome\n"
   );
 }
 
 static struct optionSpec options[] = {
    {"chrom", OPTION_STRING},
    {NULL, 0},
 };
 
 boolean htiIsPsl(struct hTableInfo *hti)
 /* Return TRUE if table looks to be in psl format. */
 {
 return sameString("tStarts", hti->startsField);
 }
 
 void bedSqlFieldsExceptForChrom(struct hTableInfo *hti,
 	int *retFieldCount, char **retFields)
 /* Given tableInfo figure out what fields are needed to
  * add to a database query to have information to create
  * a bed. The chromosome is not one of these fields - we
  * assume that is already known since we're processing one
  * chromosome at a time.   Return a comma separated (no last
  * comma) list of fields that can be freeMem'd, and the count
  * of fields (this *including* the chromosome). */
 {
 struct dyString *fields = dyStringNew(128);
 int fieldCount = fieldCount = hTableInfoBedFieldCount(hti);
 
 dyStringPrintf(fields, "%s,%s", hti->startField, hti->endField);
 if (fieldCount >= 4)
     {
     if (hti->nameField[0] != 0)
 	dyStringPrintf(fields, ",%s", hti->nameField);
     else /* Put in . as placeholder. */
 	dyStringPrintf(fields, ",'.'");  
     }
 if (fieldCount >= 5)
     {
     if (hti->scoreField[0] != 0)
 	dyStringPrintf(fields, ",%s", hti->scoreField);
     else
 	dyStringPrintf(fields, ",0", hti->startField);  
     }
 if (fieldCount >= 6)
     {
     if (hti->strandField[0] != 0)
 	dyStringPrintf(fields, ",%s", hti->strandField);
     else
 	dyStringPrintf(fields, ",'.'");  
     }
 if (fieldCount >= 8)
     {
     if (hti->cdsStartField[0] != 0)
 	dyStringPrintf(fields, ",%s,%s", hti->cdsStartField, hti->cdsEndField);
     else
 	dyStringPrintf(fields, ",%s,%s", hti->startField, hti->endField);  
     }
 if (fieldCount >= 12)
     {
     dyStringPrintf(fields, ",%s,%s,%s", hti->countField, 
         hti->endsSizesField, hti->startsField);
     }
 if (htiIsPsl(hti))
     {
     /* For psl format we need to know chrom size as well
      * to handle reverse strand case. */
     dyStringPrintf(fields, ",tSize");
     }
 *retFieldCount = fieldCount;
 *retFields = dyStringCannibalize(&fields);
 }
 
 struct bed *bedFromRow(
 	char *chrom, 		  /* Chromosome bed is on. */
 	char **row,  		  /* Row with other data for bed. */
 	int fieldCount,		  /* Number of fields in final bed. */
 	boolean isPsl, 		  /* True if in PSL format. */
 	boolean isGenePred,	  /* True if in GenePred format. */
 	boolean isBedWithBlocks,  /* True if BED with block list. */
 	boolean *pslKnowIfProtein,/* Have we figured out if psl is protein? */
 	boolean *pslIsProtein,    /* True if we know psl is protien. */
 	struct lm *lm)		  /* Local memory pool */
 /* Create bed from a database row when we already understand
  * the format pretty well.  The bed is allocated inside of
  * the local memory pool lm.  Generally use this in conjunction
  * with the results of a SQL query constructed with the aid
  * of the bedSqlFieldsExceptForChrom function. */
 {
 char *strand, tStrand, qStrand;
 struct bed *bed;
 int i, blockCount;
 
 lmAllocVar(lm, bed);
 bed->chrom = chrom;
 bed->chromStart = sqlUnsigned(row[0]);
 bed->chromEnd = sqlUnsigned(row[1]);
 
 if (fieldCount < 4)
     return bed;
 bed->name = lmCloneString(lm, row[2]);
 if (fieldCount < 5)
     return bed;
 bed->score = atoi(row[3]);
 if (fieldCount < 6)
     return bed;
 strand = row[4];
 qStrand = strand[0];
 tStrand = strand[1];
 if (tStrand == 0)
     bed->strand[0] = qStrand;
 else
     {
     /* psl: use XOR of qStrand,tStrand if both are given. */
     if (tStrand == qStrand)
 	bed->strand[0] = '+';
     else
 	bed->strand[0] = '-';
     }
 if (fieldCount < 8)
     return bed;
 bed->thickStart = sqlUnsigned(row[5]);
 bed->thickEnd   = sqlUnsigned(row[6]);
 if (fieldCount < 12)
     return bed;
 bed->blockCount = blockCount = sqlUnsigned(row[7]);
 lmAllocArray(lm, bed->blockSizes, blockCount);
 sqlUnsignedArray(row[8], bed->blockSizes, blockCount);
 lmAllocArray(lm, bed->chromStarts, blockCount);
 sqlUnsignedArray(row[9], bed->chromStarts, blockCount);
 if (isGenePred)
     {
     /* Translate end coordinates to sizes. */
     for (i=0; i<bed->blockCount; ++i)
 	bed->blockSizes[i] -= bed->chromStarts[i];
     }
 else if (isPsl)
     {
     if (!*pslKnowIfProtein)
 	{
 	/* Figure out if is protein using a rather elaborate but
 	 * working test I think Angie or Brian must have figured out. */
 	if (tStrand == '-')
 	    {
 	    int tSize = sqlUnsigned(row[10]);
 	    *pslIsProtein = 
 		   (bed->chromStart == 
 		    tSize - (3*bed->blockSizes[bed->blockCount - 1]  + 
 		    bed->chromStarts[bed->blockCount - 1]));
 	    }
 	else
 	    {
 	    *pslIsProtein = (bed->chromEnd == 
 		    3*bed->blockSizes[bed->blockCount - 1]  + 
 		    bed->chromStarts[bed->blockCount - 1]);
 	    }
 	*pslKnowIfProtein = TRUE;
 	}
     if (*pslIsProtein)
 	{
 	/* if protein then blockSizes are in protein space */
 	for (i=0; i<blockCount; ++i)
 	    bed->blockSizes[i] *= 3;
 	}
     if (tStrand == '-')
 	{
 	/* psl: if target strand is '-', flip the coords.
 	 * (this is the target part of pslRcBoth from src/lib/psl.c) */
 	int tSize = sqlUnsigned(row[10]);
 	for (i=0; i<blockCount; ++i)
 	    {
 	    bed->chromStarts[i] = tSize - 
 		    (bed->chromStarts[i] + bed->blockSizes[i]);
 	    }
 	reverseInts(bed->chromStarts, bed->blockCount);
 	reverseInts(bed->blockSizes, bed->blockCount);
 	}
     }
 if (!isBedWithBlocks)
     {
     /* non-bed: translate absolute starts to relative starts */
     for (i=0;  i < bed->blockCount;  i++)
 	bed->chromStarts[i] -= bed->chromStart;
     }
 return bed;
 }
 
 
 static struct bed *getChromAsBed(
 	struct sqlConnection *conn, 
 	char *db, char *table, 	/* Database and table. */
 	char *chrom,
 	struct lm *lm,
 	int *retFieldCount)		/* Where to allocate memory. */
 /* Return a bed list of all items in the given range in table.
  * Cleanup result via lmCleanup(&lm) rather than bedFreeList.  */
 {
 char *fields = NULL;
 struct sqlResult *sr;
 struct hTableInfo *hti;
 struct bed *bedList=NULL, *bed;
 char **row;
 int fieldCount;
 boolean isPsl, isGenePred, isBedWithBlocks;
 boolean pslKnowIfProtein = FALSE, pslIsProtein = FALSE;
 
 hti = hFindTableInfoDb(db, chrom, table);
 if (hti == NULL)
     errAbort("Could not find table info for table %s", table);
 
 bedSqlFieldsExceptForChrom(hti, &fieldCount, &fields);
 isPsl = htiIsPsl(hti);
 isGenePred = sameString("exonEnds", hti->endsSizesField);
 isBedWithBlocks = (
     (sameString("chromStarts", hti->startsField) ||
      sameString("blockStarts", hti->startsField))
 	 && sameString("blockSizes", hti->endsSizesField));
 
 /* All beds have at least chrom,start,end.  We omit the chrom
  * from the query since we already know it. */
 sr = hExtendedChromQuery(conn, table, chrom, NULL, FALSE, fields, NULL);
 while (sr != NULL && (row = sqlNextRow(sr)) != NULL)
     {
     bed = bedFromRow(chrom, row, fieldCount, isPsl, isGenePred,
 		     isBedWithBlocks, &pslKnowIfProtein, &pslIsProtein, lm);
     slAddHead(&bedList, bed);
     }
 freez(&fields);
 sqlFreeResult(&sr);
 slReverse(&bedList);
 *retFieldCount = fieldCount;
 return(bedList);
 }
 
 void scanChromTable(struct sqlConnection *conn, char *chrom, char *table)
 /* Scan chromosome table, don't do anything with data. */
 {
 struct sqlResult *sr;
 int rowOffset;
 char **row;
 
 sr = hChromQuery(conn, table, chrom, NULL, &rowOffset);
 while ((row = sqlNextRow(sr)) != NULL)
     ;
 sqlFreeResult(&sr);
 }
 
 struct featureBits *fbList(char *db, char *chrom, char *table, 
 	struct bed *bedList, int chromSize)
 /* Get feature bits list from bed list. */
 {
 struct hTableInfo *hti;
 hti = hFindTableInfoDb(db, chrom, table);
 return fbFromBed(table, hti, bedList, 0, chromSize, FALSE, FALSE);
 }
 
 static struct bed *bitsToBed4List(Bits *bits, int bitSize, 
 	char *chrom, int minSize, int rangeStart, int rangeEnd,
 	struct lm *lm)
 /* Translate ranges of set bits to bed 4 items. */
 {
 struct bed *bedList = NULL, *bed;
 boolean thisBit, lastBit;
 int start = 0;
 int end = 0;
 int id = 0;
 char name[128];
 
 if (rangeStart < 0)
     rangeStart = 0;
 if (rangeEnd > bitSize)
     rangeEnd = bitSize;
 end = rangeStart;
 
 /* We depend on extra zero BYTE at end in case bitNot was used on bits. */
 for (;;)
     {
     start = bitFindSet(bits, end, rangeEnd);
     if (start >= rangeEnd)
         break;
     end = bitFindClear(bits, start, rangeEnd);
     if (end - start >= minSize)
 	{
 	lmAllocVar(lm, bed);
 	bed->chrom = chrom;
 	bed->chromStart = start;
 	bed->chromEnd = end;
 	snprintf(name, sizeof(name), "%s.%d", chrom, ++id);
 	bed->name = lmCloneString(lm, name);
 	slAddHead(&bedList, bed);
 	}
     }
 slReverse(&bedList);
 return(bedList);
 }
 
 static int bitCountBasesOverlap(struct bed *bedItem, Bits *bits, boolean hasBlocks)
 /* Return the number of bases belonging to bedItem covered by bits. */
 {
 int count = 0;
 int i, j;
 if (hasBlocks)
     {
     for (i=0;  i < bedItem->blockCount;  i++)
 	{
 	int start = bedItem->chromStart + bedItem->chromStarts[i];
 	int end   = start + bedItem->blockSizes[i];
 	for (j=start;  j < end;  j++)
 	    if (bitReadOne(bits, j))
 		count++;
 	}
     }
 else
     {
     for (i=bedItem->chromStart;  i < bedItem->chromEnd;  i++)
 	if (bitReadOne(bits, i))
 	    count++;
     }
     return(count);
 }
 
 int bitCountAllOverlaps(struct bed *bedList, Bits *bits, int fieldCount)
 /* Count all overlapping. */
 {
 struct bed *bed;
 int total = 0, one;
 boolean hasBlocks = (fieldCount >= 12);
 
 for (bed = bedList;  bed != NULL; bed = bed->next)
      {
      one = bitCountBasesOverlap(bed, bits, hasBlocks);
      total += one;
      }
 return total;
 }
 
 struct binKeeper *fbToBinKeeper(struct featureBits *fbList, int chromSize)
 /* Make a binKeeper filled with fbList. */
 {
 struct binKeeper *bk = binKeeperNew(0, chromSize);
 struct featureBits *fb;
 for (fb = fbList; fb != NULL; fb = fb->next)
     binKeeperAdd(bk, fb->start, fb->end, fb);
 return bk;
 }
 
 int bkCountOverlappingRange(struct binKeeper *bk, int start, int end)
 /* Return biggest overlap of anything in binKeeper with given range. */
 {
 struct binElement *el, *list = binKeeperFind(bk, start, end);
 int overlap, bestOverlap = 0;
 
 for (el = list; el != NULL; el = el->next)
     {
     overlap = rangeIntersection(el->start, el->end, start, end);
     if (overlap > bestOverlap)
         bestOverlap = overlap;
     }
 return bestOverlap;
 }
 
 static int bkCountBasesOverlap(struct bed *bedItem, struct binKeeper *bk, boolean hasBlocks)
 /* Return the number of bases belonging to bedItem covered by bits. */
 {
 int count = 0;
 int i;
 if (hasBlocks)
     {
     for (i=0;  i < bedItem->blockCount;  i++)
 	{
 	int start = bedItem->chromStart + bedItem->chromStarts[i];
 	int end   = start + bedItem->blockSizes[i];
 	count += bkCountOverlappingRange(bk, start, end);
 	}
     }
 else
     {
     count += bkCountOverlappingRange(bk, bedItem->chromStart, bedItem->chromEnd);
     }
 return(count);
 }
 
 int bkCountAllOverlaps(struct bed *bedList, struct binKeeper *bk, int fieldCount)
 /* Count all overlapping. */
 {
 struct bed *bed;
 int total = 0, one;
 boolean hasBlocks = (fieldCount >= 12);
 
 for (bed = bedList;  bed != NULL; bed = bed->next)
      {
      one = bkCountBasesOverlap(bed, bk, hasBlocks);
      total += one;
      }
 return total;
 }
 
 
 void intersectOnChrom(char *db, struct sqlConnection *conn, char *chrom, 
 	char *track1, char *track2)
 /* Do intersection on one chromosome. */
 {
 int chromSize = hChromSize(chrom);
 struct lm *lm = lmInit(0);
 struct bed *bedList1, *bedList2, *andBed;
 struct featureBits *fb1, *fb2;
 Bits *bit1, *bit2;
 int fieldCount1, fieldCount2;
 struct binKeeper *bk2;
 
 uglyTime(NULL);
 scanChromTable(conn, chrom, track1);
 scanChromTable(conn, chrom, track2);
 uglyTime("Scan tracks");
 bedList1 = getChromAsBed(conn, db, track1, chrom, lm, &fieldCount1);
 bedList2 = getChromAsBed(conn, db, track2, chrom, lm, &fieldCount2);
 uglyTime("Tracks as bed");
 uglyf("%d items with %d fields in %s, ", slCount(bedList1), fieldCount1, track1);
 uglyf("%d items with %d fields in %s\n", slCount(bedList2), fieldCount2, track2);
 bit1 = bitAlloc(chromSize+8);
 bit2 = bitAlloc(chromSize+8);
 uglyTime("bitAlloc");
 
 fb1 = fbList(db, chrom, track1,  bedList1, chromSize);
 fb2 = fbList(db, chrom, track1,  bedList1, chromSize);
 uglyTime("bed to featureBits list");
 
 fbOrBits(bit1, chromSize, fb1, 0);
 fbOrBits(bit2, chromSize, fb2, 0);
 uglyTime("or into bits");
 
 bitAnd(bit1, bit2, chromSize);
 uglyTime("Anding bitfields");
 
 andBed = bitsToBed4List(bit1, chromSize, chrom, 0, 0, chromSize, lm);
 uglyTime("Converting bitfield to bed 4");
 
 bitCountAllOverlaps(bedList1, bit2, fieldCount2);
 uglyTime("Counting overlaps in track1 with bitfield of track2");
 
 bk2 = fbToBinKeeper(fb2, chromSize);
 uglyTime("Adding featureBits list from track 2 into binKeeper.");
 
 bkCountAllOverlaps(bedList1, bk2, fieldCount2);
 uglyTime("Count overlaps in track1 with binKeeper of track2");
 
 featureBitsFreeList(&fb1);
 featureBitsFreeList(&fb2);
 uglyTime("free featureBits");
 
 
 bitFree(&bit1);
 bitFree(&bit2);
 uglyTime("bitFree");
 }
 
 void testIntersect(char *db, char *track1, char *track2)
 /* testIntersect - Test some ideas on intersections. */
 {
 struct slName *chromList = NULL, *chrom;
 struct sqlConnection *conn;
 
 hSetDb(db);
 if (optionExists("chrom"))
     chromList = slNameNew(optionVal("chrom", NULL));
 else
     chromList = hAllChromNames();
 conn = hAllocConn();
 for (chrom = chromList; chrom != NULL; chrom = chrom->next)
      intersectOnChrom(db, conn, chrom->name, track1, track2);
 hFreeConn(&conn);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc != 4)
     usage();
 testIntersect(argv[1], argv[2], argv[3]);
 return 0;
 }