4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/ratStuff/mafBreak/mafBreak.c src/hg/ratStuff/mafBreak/mafBreak.c
index 98b08d0..b5aa9ad 100644
--- src/hg/ratStuff/mafBreak/mafBreak.c
+++ src/hg/ratStuff/mafBreak/mafBreak.c
@@ -1,452 +1,452 @@
 /* Split MAF file based on positions in a bed file */
 
 /* Copyright (C) 2011 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 
 #include "common.h"
 #include "portable.h"
 #include "hash.h"
 #include "options.h"
 #include "sqlNum.h"
 #include "maf.h"
 #include "bed.h"
 #include "dlist.h"
 
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "mafBreak - calculate distance to chrom breaks for each bed for each species\n"
   "usage:\n"
   "    mafBreak regions.bed species.lst outStats *.maf\n"
   );
 }
 
 static struct optionSpec options[] = {
     {"outDir", OPTION_BOOLEAN},
     {"ignoreDups", OPTION_BOOLEAN},
     {NULL, 0},
 };
 
 /* a structure to keep the lists of bed's for each chromosome */
 struct bedHead
 {
     struct bed *head;
 };
 
 struct breakPoint 
 {
 int refAddress;
 int nestLevel;
 int type;
 }; 
 
 struct speciesBreaks
 {
     int currentNestLevel;
     int numBreaks;
     int allocedBreaks;
     struct breakPoint *breaks;
 };
 
 boolean outDir = FALSE;
 char *dir = NULL;
 char *scoring = NULL;
 boolean ignoreDups = FALSE;
 
 void checkBeds(struct bed *bed)
 {
 char *chrom = NULL;
 struct bed *prev = NULL;
 
 for(; bed; prev = bed, bed = bed->next)
     {
     if ((chrom == NULL) || !sameString(chrom, bed->chrom))
 	chrom = bed->chrom;
     else
 	{
 	if ((bed->chromStart == prev->chromStart) && (bed->chromEnd == prev->chromEnd))
 	    {
 	    if (ignoreDups)
 		{
 		prev->next = bed->next;
 		bed = prev;
 		}
 	    else
 		errAbort("dup'ed bed starting at %d end at %d on chrom %s with name %s",
 		    bed->chromStart, bed->chromEnd, bed->chrom, bed->name);
 	    }
 	else if (bed->chromStart < prev->chromEnd)
 	    {
 	    errAbort("bed starting at %d overlaps previous end at %d on chrom %s",
 	    	bed->chromStart, prev->chromEnd, chrom);
 	    }
 	}
     }
 }
 
 
 struct hash *loadSpecies(char *file)
 {
 struct lineFile *lf = lineFileOpen(file, TRUE);
 char *row[1];
 struct hash *cHash = newHash(0);
 while (lineFileRow(lf, row))
     {
     struct speciesBreaks *sb;
     char *name = row[0];
     //struct breakPoint *aBreak;
     AllocVar(sb);
     sb->allocedBreaks = 0;
 
     hashAdd(cHash, name, sb);
     }
 lineFileClose(&lf);
 return cHash;
 }
 
 struct hash *loadRegions(char *file)
 /* load regions into a hash of lists by chrom */
 {
 struct bed *bed = NULL,  *nextBed = NULL;
 struct hash *regionHash = newHash(6);
 struct bed *regions;
 char *chrom = NULL;
 struct bedHead *bHead = NULL;
 
 regions = bedLoadNAll(file, 4);
 /* order by chrom, start */
 slSort(&regions, bedCmp);
 verbose(2, "found %d regions\n", slCount(regions));
 
 /* make sure the bed regions are in the right order */
 slSort(regions, bedCmp);
 checkBeds(regions);
 
 for (bed = regions; bed != NULL; bed = nextBed)
     {
     verbose(3, "region %s:%d-%d\n", bed->chrom, bed->chromStart+1, bed->chromEnd);
     nextBed = bed->next;
     if ((chrom == NULL) || (!sameString(chrom, bed->chrom)))
 	{
 	if ((bHead = hashFindVal(regionHash, bed->chrom)) != NULL)
 	    errAbort("bed file not sorted correctly");
 	
 	AllocVar(bHead);
 	verbose(3, "adding %s to hash\n", bed->chrom);
 	hashAdd(regionHash, bed->chrom, bHead);
 	chrom = bed->chrom;
         }
 
     slAddHead(&bHead->head, bed);
     }
 
 if (bHead)
     slReverse(&bHead->head);
 
 return regionHash;
 }
 
 char *chromFromSrc(char *src)
 /* get chrom name from <db>.<chrom> */
 {
 char *p;
 if ((p = strchr(src, '.')) == NULL)
     errAbort("Can't find chrom in MAF component src: %s\n", src);
 return ++p;
 }
 
 FILE *startOutFile(char *outFile)
 {
 static FILE *f = NULL;
 
 f = mustOpen(outFile, "w");
 verbose(3, "creating %s\n", outFile);
 return f;
 }
 
 void endOutFile(FILE *f)
 {
 if (f && outDir)
     {
     mafWriteEnd(f);
     carefulClose(&f);
     }
 }
 
 void clearBreaks(struct hash *speciesHash)
 {
 struct hashCookie cookie = hashFirst(speciesHash);
 struct hashEl *hel;
 
 while ((hel = hashNext(&cookie)) != NULL)
     {
     struct speciesBreaks *sb = hel->val;
 
     //printf("clearing %s\n",hel->name);
     sb->numBreaks = 0;
     sb->currentNestLevel = 0;
     freez(&sb->breaks);
     sb->allocedBreaks = 0;
     }
 }
 
 struct breakPoint *getBreak(struct speciesBreaks *sb)
 {
 struct breakPoint *aBreak;
 
 if (sb->numBreaks == sb->allocedBreaks)
     {
     int oldSize = sb->allocedBreaks * sizeof(struct breakPoint);
 
     //printf("allocating more \n");
     sb->allocedBreaks += 1000;
     if (sb->breaks == NULL)
 	sb->breaks = needMem(sb->allocedBreaks * sizeof(struct breakPoint));
     else
 	sb->breaks = needMoreMem(sb->breaks, oldSize, 
 		sb->allocedBreaks * sizeof(struct breakPoint));
     }
 
 aBreak = &sb->breaks[sb->numBreaks++];
 
 return aBreak;
 }
 
 void recordBreaks(struct hash *speciesHash, struct mafAli *maf)
 {
 struct hashCookie cookie = hashFirst(speciesHash);
 struct hashEl *hel;
 struct mafComp *masterMc = maf->components;
 
 while ((hel = hashNext(&cookie)) != NULL)
     {
     struct speciesBreaks *sb = hel->val;
     struct mafComp *mc = mafMayFindCompSpecies(maf, hel->name, '.');
 
     if (mc != NULL)
 	{
 	struct breakPoint *aBreak;
 
 	/* check for leftStatus */
 	if ((mc->leftStatus == 'N') || (mc->leftStatus == 'n'))
 	    {
 	    int numDashes = 0;
 	    char *ptr = mc->text;
 
 	    if (mc->text)
 		while (*ptr++ == '-')
 		    numDashes++;
 
 	    aBreak = getBreak(sb);
 
 	    if (mc->leftStatus == 'n')
 		{
 		aBreak->type = 1;
 		sb->currentNestLevel++;
 		}
 	    aBreak->refAddress = masterMc->start + numDashes;
 	    aBreak->nestLevel = sb->currentNestLevel;
 
 	    //printf("adding break %s %d %d\n",hel->name,aBreak->refAddress, aBreak->nestLevel);
 	    }
 
 	if ((mc->rightStatus == 'N') || (mc->rightStatus == 'n'))
 	    {
 	    int numDashes = 0;
 	    char *ptr = &mc->text[maf->textSize - 1];
 
 	    if (mc->text)
 		while (*ptr-- == '-')
 		    numDashes++;
 
 	    aBreak = getBreak(sb);
 	    if (mc->rightStatus == 'n')
 		{
 		aBreak->type = -1;
 		sb->currentNestLevel--;
 		}
 	    aBreak->refAddress = masterMc->start + masterMc->size - numDashes;
 	    aBreak->nestLevel = sb->currentNestLevel;
 
 	    //printf("adding break %s %d %d\n",hel->name,aBreak->refAddress, aBreak->nestLevel);
 	    if (mc->rightStatus == 'N')
 		assert(sb->currentNestLevel== 0);
 	    }
 	}
     }
 }
 
 
 void outItems(FILE *f, struct bedHead *bHead, struct hash *speciesHash)
 {
 struct bed *bed;
 
 bed = bHead->head;
 for(; bed;  bed = bed->next)
     {
     struct hashCookie cookie = hashFirst(speciesHash);
     struct hashEl *hel;
     int nestLevel;
 
     fprintf(f, ">%s\n",bed->name);
     while ((hel = hashNext(&cookie)) != NULL)
 	{
 	struct speciesBreaks *sb = hel->val;
 	struct breakPoint *aBreak;
 
 	fprintf(f, "%s\t",hel->name);
 
 	/* find break before bed chromStart */
 	for(aBreak = sb->breaks; aBreak < &sb->breaks[sb->numBreaks] ;  aBreak++)
 	    {
 	    if (bed->chromStart < aBreak->refAddress)
 		{
 		struct breakPoint *nestBreak = aBreak - 1;
 		int countNestLevel = 0;
 		nestLevel = -1;
 
 		for (; nestBreak >= sb->breaks; nestBreak--)
 		    {
 		    if (nestBreak->type == -1)
 			countNestLevel++;
 		    else if (nestBreak->type == 1)
 			{
 			if (countNestLevel == 0)
 			    {
 			    nestLevel = nestBreak->nestLevel;
 			    break;
 			    }
 			countNestLevel--;
 			}
 		    else if (nestBreak->type == 0)
 			{
 			assert(countNestLevel == 0);
 			nestLevel = 0;
 			break;
 			}
 		    }
 
 		if (nestLevel == -1)
 		    fprintf(f, "%d\t", -1);
 		else
 		    fprintf(f, "%d\t", bed->chromStart - nestBreak->refAddress);
 		break;
 		}
 	    }
 	assert(aBreak < &sb->breaks[sb->numBreaks]);
 	nestLevel = -1000000000;
 	for(; aBreak < &sb->breaks[sb->numBreaks] ;  aBreak++)
 	    {
 	    int countNestLevel = 0;
 
 	    if ((aBreak->refAddress >= bed->chromStart) &&
 		(aBreak->refAddress < bed->chromEnd))
 		{
 		nestLevel = aBreak->nestLevel;
 	//	fprintf(f, "-1\n");
 	//	nestLevel = -2;
 		break;
 		}
 	    if (bed->chromEnd < aBreak->refAddress)
 		{
 		if (aBreak->type == 1)
 		    countNestLevel++;
 		else if (aBreak->type == -1)
 		    {
 		    if (countNestLevel == 0)
 			{
 			nestLevel = aBreak->nestLevel;
 			break;
 			}
 		    countNestLevel--;
 		    }
 		else if (aBreak->type == 0)
 		    {
 		    assert(countNestLevel == 0);
 		    nestLevel = 0;
 		    break;
 		    }
 		}
 	    }
 
 	if (nestLevel == -1000000000)
 	    errAbort("didn't find after nest level %s",hel->name);
 
 	//if (nestLevel != -2)
 	    fprintf(f, "%d\n",  aBreak->refAddress - bed->chromEnd);
 	}
     }
 }
 
 void extractMafs(char *file, FILE *f, 
 	struct hash *regionHash, struct hash *speciesHash, struct hash *statsHash)
 /* extract MAFs in a file from regions specified in hash */
 {
 char *chrom = NULL, *thisChrom;
 struct bedHead *bHead = NULL;
 struct mafFile *mf = mafOpen(file);
 struct mafAli *maf = NULL;
 
 verbose(1, "extracting from %s\n", file);
 for(maf = mafNext(mf); maf; mafAliFree(&maf), maf = mafNext(mf) )
     {
     struct mafComp *mc = maf->components;
 
     thisChrom = chromFromSrc(mc->src);
     if ((chrom == NULL) || (!sameString(chrom, thisChrom)))
 	{
 	if (chrom)
 	    {
 	    if (bHead)
 		outItems(f, bHead, speciesHash);
 	    clearBreaks(speciesHash);
 	    }
 	bHead = hashFindVal(regionHash, thisChrom);
 	chrom = cloneString(thisChrom);
 	}
 
     if (bHead)
 	recordBreaks(speciesHash, maf);
     }
 
 if (bHead)
     outItems(f, bHead, speciesHash);
 
 clearBreaks(speciesHash);
 mafFileFree(&mf);
 }
 
 void mafBreak(char *regionFile, char *speciesFile, char *out, int mafCount, char *mafFiles[])
 /* Extract MAFs in regions listed in regin file */
 {
 int i = 0;
 struct hash *bedHash = NULL;
 struct hash *speciesHash = NULL;
 struct hash *statsHash = newHash(5);
 FILE *f = NULL;
 
 verbose(1, "Extracting from %d files to %s\n", mafCount, out);
 bedHash = loadRegions(regionFile);
 speciesHash = loadSpecies(speciesFile);
 
 f = startOutFile(out);
 
 for (i = 0; i < mafCount; i++)
     extractMafs(mafFiles[i], f, bedHash, speciesHash, statsHash);
 
 endOutFile(f);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 outDir = optionExists("outDir");
 ignoreDups = optionExists("ignoreDups");
 if (argc < 5)
     usage();
 mafBreak(argv[1], argv[2], argv[3], argc - 4, &argv[4]);
 return 0;
 }