4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/snp/pbToAlleleCounts/pbToAlleleCounts.c src/hg/snp/pbToAlleleCounts/pbToAlleleCounts.c
index 67d206f..862874a 100644
--- src/hg/snp/pbToAlleleCounts/pbToAlleleCounts.c
+++ src/hg/snp/pbToAlleleCounts/pbToAlleleCounts.c
@@ -1,245 +1,245 @@
 /* pbToAlleleCounts.c -- reformat Seattel SNP data from prettybase format to bed with alleles and counts */
 
 /* Copyright (C) 2011 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 #include "common.h"
 #include "errAbort.h"
 #include "linefile.h"
 #include "obscure.h"
 #include "dystring.h"
 #include "cheapcgi.h"
 #include "dnaseq.h"
 #include "hdb.h"
 #include "memalloc.h"
 #include "hash.h"
 #include "jksql.h"
 #include "dystring.h"
 #include "options.h"
 #include "sqlNum.h"
 
 
 static int ssnpId=0;
 boolean strictSnp=FALSE;
 boolean strictBiallelic=FALSE;
 
 static struct optionSpec optionSpecs[] =
 /* command line option specifications */
 {
     {"strictBiallelic", OPTION_BOOLEAN},
     {"strictSnp", OPTION_BOOLEAN},
     {NULL, 0}
 };
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort("pbToAlleleCounts -- reformat Seattle SNP data from prettybase format to bed with alleles and counts\n"
 	 "Usage:   pbToAlleleCounts prettybaseFile strandFile bedFile\n"
 	 "Options: strictSnp - ignore loci with non-SNP mutations\n"
 	 "         strictBiallelic - ignore loci with more than two alleles \n");
 }
 
 struct alleleInfo
 /* */
 {
     struct alleleInfo *next;
     char  *allele;
     int    count;
 };
 
 struct locus
 /*  */
 {
     struct locus *next;
     char   *chrom;
     int     chromStart;
     int     chromEnd;
     char   *name; /* unique locus id */
     char   *strand;
     char   *hugoId;
     int     sampleSize;
     int     alleleCount;
     boolean strictSnp;
     struct  alleleInfo *alleles;
 };
 
 struct strand
 {
     struct strand *next;
     char *name;
     char *strand;
 };
 
 void convertToUppercase(char *ptr)
 /* convert and entire string to uppercase */
 {
 for( ; *ptr !='\0'; ptr++)
     *ptr=toupper(*ptr);
 }
 
 void printAlleles(FILE *f, struct alleleInfo *ai)
 /* print alleles and counts for this locus */
 {
 struct alleleInfo *i = NULL;
 
 for (i=ai; i!=NULL; i=i->next)
     fprintf(f,"\t%s\t%d", i->allele, i->count);
 fprintf(f,"\n");
 }
 
 void printLocus(FILE *f, struct locus *l)
 /* print positional information for this locus */
 {
 if (strictSnp&&!l->strictSnp)
     return;
 if (strictBiallelic&&l->alleleCount!=2)
     return;
 fprintf(f,"%s\t%d\t%d\t%s\t0\t%s\t%d\t%d", l->chrom, l->chromStart, l->chromEnd, l->name, l->strand, l->sampleSize, l->alleleCount);
 printAlleles(f, l->alleles);
 }
 
 void printLoci(char *output, struct locus *head)
 /* print all information for this locus */
 {
 FILE  *f        = mustOpen(output, "w");
 struct locus *l = NULL;
 
 for (l=head; l!=NULL; l=l->next)
     printLocus(f, l);
 }
 
 struct hash *readStrand(char *strandFile)
 /* read the strands from a file */
 {
 struct hash *strandHash = newHash(16);
 struct lineFile  *lf = lineFileOpen(strandFile, TRUE); /* input file */
 char             *row[2]; /* number of fields in input file */
 
 while (lineFileRow(lf, row)) /* process one snp at a time */
     {
     struct strand *strand;
 
     AllocVar(strand);
     strand->name   = cloneString(row[0]);
     strand->strand = cloneString(row[1]);
     hashAddSaveName(strandHash, strand->name, strand, &strand->name);
     }
 return strandHash;
 }
 
 struct locus *readSs(char *pbFile, char *strandFile)
 /* determine which allele matches assembly and store in details file */
 {
 struct hash *strandHash = readStrand(strandFile);
 struct strand *strand = NULL;
 struct hash *missingHugoIdHash = newHash(16);
 struct hashCookie hashPtr;
 char *missingName;
 struct locus     *l  = NULL, *lPtr = NULL;
 struct alleleInfo *aPtr = NULL;
 struct lineFile  *lf = lineFileOpen(pbFile, TRUE); /* input file */
 char             *row[4], *row2[3]; /* number of fields in input file */
 char  *pbName;
 char   chrom[32];
 int    chromStart;
 int    chromEnd;
 char   name[32];
 char  *allele;
 
 while (lineFileRow(lf, row)) /* process one snp at a time */
     {
     struct alleleInfo *ai1 = NULL, *ai2 = NULL, *aiPtr;
     struct locus *m        = NULL;
 
     chopString(row[0], "-", row2, 3);
     chromStart = sqlUnsigned(row2[0]);
     chromEnd   = chromStart+1;
     safef(chrom, sizeof(chrom), "chr%s", row2[2]);
 
     if(l==NULL||l->chrom==NULL||l->chromStart!=chromStart||!(sameString(l->chrom,chrom)))
 	{
 	AllocVar(m);
 	safef(name, sizeof(name), "%s_%d", row2[1], ++ssnpId);
 	m->chrom       = cloneString(chrom);
 	m->chromStart  = chromStart;
 	m->chromEnd    = chromEnd;
 	m->name        = cloneString(name);
 	m->hugoId      = cloneString(row2[1]);
 	m->strictSnp   = TRUE;
 	slAddHead(&l, m);
 	}
 
     allele=cloneString(row[2]);
     convertToUppercase(allele);
     if ( sameString(allele,"A") || sameString(allele,"C") || 
 	 sameString(allele,"G") || sameString(allele,"T") )
 	{
 	for (aiPtr=l->alleles; aiPtr!=NULL; aiPtr=aiPtr->next)
 	    if (sameString(aiPtr->allele, allele))
 		break;
 	if (aiPtr==NULL)
 	    {
 	    AllocVar(ai1);
 	    ai1->allele=cloneString(allele);
 	    slAddHead(&(l->alleles), ai1);
 	    l->alleleCount++;
 	    aiPtr=l->alleles;
 	    }
 	aiPtr->count++;
 	l->sampleSize++;
 	}
 
     allele=cloneString(row[3]);
     convertToUppercase(allele);
     if ( sameString(allele,"A") || sameString(allele,"C") || 
 	 sameString(allele,"G") || sameString(allele,"T") )
 	{
 	for (aiPtr=l->alleles; aiPtr!=NULL; aiPtr=aiPtr->next)
 	    if (sameString(aiPtr->allele, allele))
 		break;
 	if (aiPtr==NULL)
 	    {
 	    AllocVar(ai2);
 	    ai2->allele=cloneString(allele);
 	    slAddHead(&(l->alleles), ai2);
 	    l->alleleCount++;
 	    aiPtr=l->alleles;
 	    }
 	aiPtr->count++;
 	l->sampleSize++;
 	}
     }
 slReverse(&l);
 for(lPtr=l; lPtr!=NULL; lPtr=lPtr->next)
     {
     strand = hashFindVal(strandHash, lPtr->hugoId);
     if (strand == NULL)
 	{
 	hashStore(missingHugoIdHash, lPtr->hugoId);
 	slRemoveEl(l, lPtr);
 	continue;
 	}
     lPtr->strand = cloneString(strand->strand);
     }
 freeHash(&strandHash);
 hashPtr = hashFirst(missingHugoIdHash);
 while ( (missingName=hashNextName(&hashPtr)) != NULL )
     printf("HUGO ID was not found in strand.txt (usually from proteome.hgncXref): %s\n", missingName);
 freeHash(&missingHugoIdHash);
 return l;
 }
 
 int main(int argc, char *argv[])
 /* error check, process command line input, and call getSnpDetails */
 {
 struct locus *l = NULL;
 
 optionInit(&argc, argv, optionSpecs);
 strictSnp = optionExists("strictSnp");
 strictBiallelic = optionExists("strictBiallelic");
 if (argc != 4)
     usage();
 l = readSs(argv[1], argv[2]);
 printLoci(argv[3], l);
 return 0;
 }