src/hg/regulate/companion/regCompanionChia/regCompanionChia.c 4898794edd81be5285ea6e544acbedeaeb31bf78

4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/regulate/companion/regCompanionChia/regCompanionChia.c src/hg/regulate/companion/regCompanionChia/regCompanionChia.c
index 410fca5..18dcb3c 100644
--- src/hg/regulate/companion/regCompanionChia/regCompanionChia.c
+++ src/hg/regulate/companion/regCompanionChia/regCompanionChia.c
@@ -1,357 +1,357 @@
 /* regCompanionChia - Analyse chia pet data against promoters and enhancers.. */
 
 /* Copyright (C) 2011 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "basicBed.h"
 #include "bigWig.h"
 #include "obscure.h"
 #include "localmem.h"
 #include "verbose.h"
 #include "bits.h"
 #include "genomeRangeTree.h"
 
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "regCompanionChia - Analyse chia pet data against promoters and enhancers.\n"
   "usage:\n"
   "   regCompanionChia input.bed output.tab\n"
   "where input.bed has to be all two-block items.  There's alas 7 other input files\n"
   "which you'll have to recompile the C source to change.  The output is also complex\n"
   "a table with 23 fields:\n"
   "  1 - chromosome\n"
   "  2 - chromosome start - 0 based\n"
   "  3 - chromosome end - 1 based\n"
   "  4 - name - unique id for this pair\n"
   "  5 - score - 0-1000 browser style score\n"
   "  6 - always the word \"block1\"\n"
   "  7 - size of block 1\n"
   "  8 - average DNAse level in block 1\n"
   "  9 - average H3K4Me3 level in block 1\n"
   " 10 - average H3K27Ac level in block 1\n"
   " 11 - average H3K4Me1 level in block 1\n"
   " 12 - average CTCF level in block 1\n"
   " 13 - average coverage by +-500 base from txStart promoters in block 1\n"
   " 14 - average coverage by enhancer in block 1\n"
   " 15 - always the word \"block2\"\n"
   " 16 - size of block 2\n"
   " 17 - average DNAse level in block 2\n"
   " 18 - average H3K4Me3 level in block 2\n"
   " 19 - average H3K27Ac level in block 2\n"
   " 20 - average H3K4Me1 level in block 2\n"
   " 21 - average CTCF level in block 2\n"
   " 22 - average coverage by +-500 base from txStart promoters in block 2\n"
   " 23 - average coverage by enhancer in block 2\n"
   );
 }
 
 static struct optionSpec options[] = {
    {NULL, 0},
 };
 
 enum inType {itBlockedBed, itBigWig, itPromoterBed, itUnstrandedBed};
 
 struct inInfo
 /* Help classify input types. */
     {
     char *name;			/* Name of input */
     enum inType type;		/* Type of input */
     char *fileName;			/* Path of file with input */
     struct lineFile *lf;	/* Open lineFile if a bed */
     struct bbiFile *bbi;	/* Open bbiFile if a bigWig */
     double *out[2];	/* Average of signal for each of two blocks. */
     };
 
 struct inInfo inArray[] =
     {
     {"pairs", itBlockedBed, NULL},
     // {"CHIApet", itBlockedBed, "/hive/groups/encode/dcc/analysis/ftp/pipeline/hg19/wgEncodeGisChiaPet/wgEncodeGisChiaPetGm12878D000005628.bed.gz"},
     {"DNAse", itBigWig, "/hive/users/kent/regulate/companion/dnase/normalized/Gm12878.bw"},
     {"H3K4Me3", itBigWig, "/hive/users/kent/regulate/companion/histones/normalized/wgEncodeBroadHistoneGm12878H3k4me3NormSig.bw"},
     {"H3K27Ac", itBigWig, "/hive/users/kent/regulate/companion/histones/normalized/wgEncodeBroadHistoneGm12878H3k27acNormSig.bw"},
     {"H3K4Me1", itBigWig, "/hive/users/kent/regulate/companion/histones/normalized/wgEncodeBroadHistoneGm12878H3k4me1NormSig.bw"},
     {"CTCF", itBigWig, "/hive/users/kent/regulate/companion/histones/normalized/wgEncodeBroadHistoneGm12878CtcfNormSig.bw"},
     {"UCSCgenes", itPromoterBed, "/hive/data/genomes/hg19/bed/ucsc.12/ucscGenes.bed"},
     {"UCSCenh", itUnstrandedBed, "/hive/users/kent/regulate/companion/enhPicks/stringent/enh7.bed"},
     };
 
 #ifdef SOON
 #endif /* SOON */
 
 void checkInputOpenFiles(struct inInfo *array, int count)
 /* Make sure all of the input is there and of right format before going forward. Since
  * this is going to take a while we want to fail fast. */
 {
 int i;
 for (i=0; i<count; ++i)
     {
     struct inInfo *in = &array[i];
     switch (in->type)
         {
 	case itBigWig:
 	    {
 	    /* Just open and close, it will abort if any problem. */
 	    in->bbi = bigWigFileOpen(in->fileName);
 	    break;
 	    }
 	case itPromoterBed:
 	case itUnstrandedBed:
 	case itBlockedBed:
 	    {
 	    struct lineFile *lf = in->lf = lineFileOpen(in->fileName, TRUE);
 	    char *line;
 	    lineFileNeedNext(lf, &line, NULL);
 	    char *dupe = cloneString(line);
 	    char *row[256];
 	    int wordCount = chopLine(dupe, row);
 	    struct bed *bed = NULL;
 	    switch (in->type)
 	        {
 		case itPromoterBed:
 		    lineFileExpectAtLeast(lf, 6, wordCount);
 		    bed = bedLoadN(row, 6);
 		    char strand = bed->strand[0];
 		    if (strand != '+' && strand != '-')
 		        errAbort("%s must be stranded, got %s in that field", lf->fileName, row[6]);
 		    break;
 		case itUnstrandedBed:
 		    lineFileExpectAtLeast(lf, 4, wordCount);
 		    bed = bedLoadN(row, 4);
 		    break;
 		case itBlockedBed:
 		    lineFileExpectAtLeast(lf, 4, wordCount);
 		    bed = bedLoadN(row, 12);
 		    break;
 		default:
 		    internalErr();
 		    break;
 		}
 	    bedFree(&bed);
 	    freez(&dupe);
 	    lineFileReuse(lf);
 	    break;
 	    }
 	default:
 	    internalErr();
 	    break;
 	}
     }
 }
 
 struct bed *bedLoadTwoBlocks(struct lineFile *lf)
 /* Load a bed12, and make sure each item is 1 or 2 blocks.  Return list of
  * the two block ones. */
 {
 struct bed *bedList = NULL, *bed;
 char *row[12];
 int totalCount = 0, pairCount = 0;
 while (lineFileRow(lf, row))
     {
     bed = bedLoad12(row);
     if (bed->blockCount != 1 && bed->blockCount != 2)
        errAbort("Line %d of %s got blockCount of %d,  expecting 1 or 2", 
        	lf->lineIx, lf->fileName, bed->blockCount);
     ++totalCount;
     if (bed->blockCount == 2)
         {
 	slAddHead(&bedList, bed);
 	++pairCount;
 	}
     }
 slReverse(&bedList);
 verbose(1, "Got %d items including %d pairs in %s\n", totalCount, pairCount, lf->fileName);
 return bedList;
 }
 
 struct genomeRangeTree *grtFromOpenBed(struct lineFile *lf, int size, boolean doPromoter)
 /* Read an open bed file into a genomeRangeTree and return it. */
 {
 struct genomeRangeTree *grt = genomeRangeTreeNew();
 char *row[size];
 while (lineFileRow(lf, row))
     {
     struct bed *bed = bedLoadN(row, size);
     if (doPromoter)
         {
 	if (bed->strand[0] == '+')
 	    genomeRangeTreeAdd(grt, bed->chrom, bed->chromStart - 500, bed->chromEnd + 500);
 	else
 	    genomeRangeTreeAdd(grt, bed->chrom, bed->chromEnd - 500, bed->chromEnd + 500);
 	}
     else
 	genomeRangeTreeAdd(grt,  bed->chrom, bed->chromStart, bed->chromEnd);
     }
 return grt;
 }
 
 void outputOverlappingGrt(struct bed *chiaList, struct genomeRangeTree *grt,
 	double *out1, double *out2)
 {
 int chiaIx = 0;
 struct bed *chia;
 for (chia = chiaList; chia != NULL; chia = chia->next)
     {
     int blockStart = chia->chromStart;
     int blockSize = chia->blockSizes[0];
     int blockEnd = blockStart + blockSize;
     int overlap = genomeRangeTreeOverlapSize(grt, chia->chrom, blockStart, blockEnd);
     out1[chiaIx] = (double)overlap/blockSize;
 
     blockStart = chia->chromStart + chia->chromStarts[1];
     blockSize = chia->blockSizes[1];
     blockEnd = blockStart + blockSize;
     overlap = genomeRangeTreeOverlapSize(grt, chia->chrom, blockStart, blockEnd);
     out2[chiaIx] = (double)overlap/blockSize;
 
     ++chiaIx;
     }
 }
 
 void doItPromoterBed(struct inInfo *in, struct bed *chiaList, double *out1, double *out2)
 /* Process overlaps with promoters defined by a gene bed file. */
 {
 struct genomeRangeTree *grt = grtFromOpenBed(in->lf, 12, TRUE);
 outputOverlappingGrt(chiaList, grt, out1, out2);
 uglyf("done %s\n", in->fileName);
 }
 
 void doItUnstrandedBed(struct inInfo *in, struct bed *chiaList, double *out1, double *out2)
 /* Process overlaps with unstranded bed into output */
 {
 struct genomeRangeTree *grt = grtFromOpenBed(in->lf, 3, FALSE);
 outputOverlappingGrt(chiaList, grt, out1, out2);
 }
 
 double averageInRegion(struct bigWigValsOnChrom *chromVals, int start, int size)
 /* Return average value in region where there is data. */
 {
 int n = bitCountRange(chromVals->covBuf, start, size);
 if (n == 0)
     return 0.0;
 else
     {
     double *val = chromVals->valBuf + start;
     double sum = 0;
     int i = size;
     while (--i >= 0)
         sum += *val++;
     return sum/n;
     }
 }
 
 void doItBigWig(struct inInfo *in, struct bed *chiaList, struct bigWigValsOnChrom *chromVals,
 	double *out1, double *out2)
 {
 struct bed *chromStart, *chromEnd, *chia;
 int chiaIx = 0;
 for (chromStart = chiaList; chromStart != NULL; chromStart = chromEnd)
     {
     chromEnd = bedListNextDifferentChrom(chromStart);
     if (bigWigValsOnChromFetchData(chromVals, chromStart->chrom, in->bbi))
         {
 	for (chia = chromStart; chia != chromEnd; chia = chia->next)
 	    {
 	    int blockStart = chia->chromStart;
 	    int blockSize = chia->blockSizes[0];
 	    out1[chiaIx] = averageInRegion(chromVals, blockStart, blockSize);
 
 	    blockStart = chia->chromStart + chia->chromStarts[1];
 	    blockSize = chia->blockSizes[1];
 	    out2[chiaIx] = averageInRegion(chromVals, blockStart, blockSize);
 
 	    ++chiaIx;
 	    }
 	}
     else
 	{
 	/* No data on this chrom, just output zero everywhere. */
 	for (chia = chromStart; chia != chromEnd; chia = chia->next)
 	    {
 	    out1[chiaIx] = out2[chiaIx] = 0;
 	    ++chiaIx;
 	    }
 	}
     verboseDot();
     }
 }
 
 
 void regCompanionChia(char *inBedPairs, char *output)
 /* regCompanionChia - Analyse chia pet data against promoters and enhancers.. */
 {
 inArray[0].fileName = inBedPairs;
 checkInputOpenFiles(inArray, ArraySize(inArray));
 verbose(1, "Opened all %d inputs successfully\n", (int)ArraySize(inArray));
 struct bed *chiaList = bedLoadTwoBlocks(inArray[0].lf);
 slSort(&chiaList, bedCmp);
 int chiaCount = slCount(chiaList);
 struct bigWigValsOnChrom *chromVals = bigWigValsOnChromNew();
 
 int inIx;
 for (inIx=1; inIx < ArraySize(inArray); ++inIx)
     {
     /* Allocate output arrays. */
     struct inInfo *in = &inArray[inIx];
     double *out1 = AllocArray(in->out[0], chiaCount);
     double *out2 = AllocArray(in->out[1], chiaCount);
 
     /* Process input depending on type. */
     verbose(1, "Processing %s", in->fileName);
     switch(in->type)
         {
 	case itPromoterBed:
 	    doItPromoterBed(in, chiaList, out1, out2);
 	    break;
 	case itUnstrandedBed:
 	    doItUnstrandedBed(in, chiaList, out1, out2);
 	    break;
 	case itBigWig:
 	    doItBigWig(in, chiaList, chromVals, out1, out2);
 	    break;
 	default:
 	    internalErr();
 	    break;
 	}
     verbose(1, "\n");
     }
 
 /* do output */
 FILE *f = mustOpen(output, "w");
 struct bed *chia;
 int chiaIx = 0;
 for (chia = chiaList; chia != NULL; chia = chia->next, ++chiaIx)
     {
     // fprintf(f, "%s\t%d\t%d\tchia%d\t%d", chia->chrom, chia->chromStart, chia->chromEnd, chiaIx+1, chia->score);
     fprintf(f, "%s\t%d\t%d\t%s\t%d", chia->chrom, chia->chromStart, chia->chromEnd, chia->name, chia->score);
     int blockIx;
     for (blockIx=0; blockIx < 2; ++blockIx)
 	{
 	fprintf(f, "\tblock%d\t%d", blockIx+1, chia->blockSizes[blockIx]);
 	for (inIx=1; inIx < ArraySize(inArray); ++inIx)
 	    {
 	    struct inInfo *in = &inArray[inIx];
 	    double *out = in->out[blockIx];
 	    fprintf(f, "\t%g", out[chiaIx]);
 	    }
 	}
     fprintf(f, "\n");
     }
 
 carefulClose(&f);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc != 3)
     usage();
 regCompanionChia(argv[1], argv[2]);
 return 0;
 }