4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/affyTransciptome/affyPairsToSample.c src/hg/affyTransciptome/affyPairsToSample.c
index 41054ca..9a432fd 100644
--- src/hg/affyTransciptome/affyPairsToSample.c
+++ src/hg/affyTransciptome/affyPairsToSample.c
@@ -1,257 +1,257 @@
 /* Copyright (C) 2011 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 
 #include "common.h"
 #include "affyPairs.h"
 #include "affyOffset.h"
 #include "sample.h"
 #include "hash.h"
 #include "linefile.h"
 
 
 void usage()
 /* Give the user a hint about how to use this program. */
 {
 errAbort("affyPairsToSample - Takes a 'pairs' format file from the Affy transcriptome\n"
 	 "data set and combines it with the Affy offset.txt file to output a 'sample' file\n"
 	 "which has the contig coordinates of the result.\n"
 	 "usage:\n\t"
 	 "affyPairsToSample <offset.txt> <grouping amount> <pairs files....>\n");
 }
 
 struct liftSpec
 /* How to lift coordinates. */
     {
     struct liftSpec *next;	/* Next in list. */
     char *oldName;		/* Name in source file. */
     int offset;			/* Offset to add. */
     char *newName;		/* Name in dest file. */
     int size;                   /* Size of new sequence. */
     };
 
 struct liftSpec *readLifts(char *fileName)
 /* Read in lift file. */
 {
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
 int wordCount;
 char *words[16];
 struct liftSpec *list = NULL, *el;
 
 while ((wordCount = lineFileChop(lf, words)) != 0)
     {
     char *offs;
     lineFileExpectWords(lf, 5, wordCount);
     offs = words[0];
     if (!isdigit(offs[0]) && !(offs[0] == '-' && isdigit(offs[1])))
 	errAbort("Expecting number in first field line %d of %s", lf->lineIx, lf->fileName);
     if (!isdigit(words[4][0]))
 	errAbort("Expecting number in fifth field line %d of %s", lf->lineIx, lf->fileName);
     AllocVar(el);
     el->oldName = cloneString(words[1]);
     el->offset = atoi(offs);
     el->newName = cloneString(words[3]);
     el->size = atoi(words[4]);
     slAddHead(&list, el);
     }
 slReverse(&list);
 lineFileClose(&lf);
 printf("Got %d lifts in %s\n", slCount(list), fileName);
 if (list == NULL)
     errAbort("Empty liftSpec file %s", fileName);
 return list;
 }
 
 char *rmChromPrefix(char *s)
 /* Remove chromosome prefix if any. */
 {
 char *e = strchr(s, '/');
 if (e != NULL)
     return e+1;
 else
     return s;
 }
 
 struct hash *hashLift(struct liftSpec *list)
 /* Return a hash of the lift spec. */
 {
 struct hash *hash = newHash(0);
 struct liftSpec *el;
 for (el = list; el != NULL; el = el->next)
     hashAdd(hash, el->oldName, el);
 return hash;
 }
 
 void loadAoHash(struct hash *aoHash, struct affyOffset *aoList)
 /* put the aoList into the hash */
 {
 struct affyOffset *ao = NULL;
 for(ao = aoList; ao != NULL; ao = ao->next)
     {
     hashAddUnique(aoHash, ao->piece, ao);
     }
 }
 
 char *contigFromAffyPairName(char *probeSet)
 /* parse 21/ctg21fin1 from ctg21fin1piece100 */
 {
 char *ret = NULL, *tmp = NULL;
 char buff[128];
 assert(probeSet);
 ret = cloneString(probeSet);
 tmp = strstr(ret, "piece");
 if(tmp == NULL)
     {
     freez(&ret);
     return NULL;
     }
 //    errAbort("affyPairsToSample.c::contigFromAffyPairName() - Couldn't find 'piece' in %s", probeSet);
 tmp[0] = '\0';
 snprintf(buff, sizeof(buff), "%c%c/%s", ret[3],ret[4],ret);
 freez(&ret);
 return cloneString(buff);
 }
 
 struct sample *sampFromAffyPair(struct affyPairs *ap, struct hash *liftHash)
 /* Use the data in the affy pair and the offset info in the hash to make a
    sample data type. */
 {
 struct sample *samp = NULL;
 struct liftSpec *lf = NULL;
 char *name = contigFromAffyPairName(ap->probeSet);
 float score = 0;
 if(name != NULL)
     {
     lf = hashFindVal(liftHash, name);
     freez(&name);
     }
 if(lf != NULL)
     {
     AllocVar(samp);
     samp->chrom = cloneString(lf->newName);
     samp->chromStart = samp->chromEnd = ap->pos + lf->offset;
     samp->name = cloneString("a");
     score = (ap->pm) - (ap->mm);
     if(score < 1)
 	score = 1;
     samp->score = (int)score;
     sprintf(samp->strand, "+");
     samp->sampleCount = 1;
     AllocArray(samp->samplePosition, samp->sampleCount);
     samp->samplePosition[0] = 0;
     AllocArray(samp->sampleHeight, samp->sampleCount);
     samp->sampleHeight[0] = samp->score;
     }
 return samp;
 }
 
 int sampleCoordCmp(const void *va, const void *vb)
 /* Compare to sort based on query. */
 {
 const struct sample *a = *((struct sample **)va);
 const struct sample *b = *((struct sample **)vb);
 int diff;
 diff = strcmp(a->chrom, b->chrom);
 if(diff == 0)
     diff = a->chromStart - b->chromStart;
 return diff;
 }
 
 void addSampleToCurrent(struct sample *target, struct sample *samp, int grouping)
 {
 int i;
 int base = target->sampleCount;
 for(i=0;i<samp->sampleCount; i++)
     {
     assert(base+i < grouping);
     target->score += samp->score;
     target->samplePosition[base + i] = samp->samplePosition[i] + samp->chromStart - target->chromStart;
     target->sampleHeight[base + i] = samp->sampleHeight[i];
     target->sampleCount++;
     }
 }
 
 struct sample *groupByPosition(int grouping , struct sample *sampList)
 {
 struct sample *groupedList = NULL, *samp = NULL, *currSamp = NULL, *sampNext=NULL;
 int count = 0;
 for(samp = sampList; samp != NULL; samp = sampNext)
     {
     sampNext = samp->next;
     AllocVar(currSamp);
     currSamp->chrom = cloneString(samp->chrom);
     currSamp->chromStart = samp->chromStart;
     currSamp->name = cloneString(samp->name);
     snprintf(currSamp->strand, sizeof(currSamp->strand), "%s", samp->strand);
     AllocArray(currSamp->samplePosition, grouping);
     AllocArray(currSamp->sampleHeight, grouping);
     count = 0;
     while(samp != NULL && count < grouping && sameString(samp->chrom,currSamp->chrom))
 	{
 	addSampleToCurrent(currSamp, samp, grouping);
 	count += samp->sampleCount;
 	samp = sampNext = samp->next;
 	}
     if(count != 0)
 	currSamp->score = currSamp->score / count;
     currSamp->chromEnd = currSamp->chromStart + currSamp->samplePosition[count -1];
     slAddHead(&groupedList, currSamp);
     }
 slReverse(&groupedList);
 return groupedList;
 }
 
 void affyPairsToSample(struct hash *liftHash, int grouping, char *pairsIn)
 /* Top level function to run combine pairs and offset files to give sample. */
 {
 struct affyPairs *apList = NULL, *ap = NULL;
 struct sample *sampList = NULL, *samp = NULL;
 struct sample *groupedList = NULL;
 char buff[10+strlen(pairsIn)];
 FILE *out = NULL;
 fprintf(stderr, ".");
 fflush(stderr);
 //warn("Loading Affy Pairs.");
 apList = affyPairsLoadAll(pairsIn);
 //warn("Creating samples.");
 for(ap = apList; ap != NULL; ap = ap->next)
     {
     samp = sampFromAffyPair(ap, liftHash);
     if(samp != NULL)
 	{
 	slAddHead(&sampList, samp);
 	}
     }
 //warn("Sorting Samples");
 slSort(&sampList, sampleCoordCmp);
 groupedList = groupByPosition(grouping, sampList);
 //warn("Saving Samples.");
 snprintf(buff, sizeof(buff), "%s.sample", pairsIn);
 out = mustOpen(buff, "w");
 for(samp = groupedList; samp != NULL; samp = samp->next)
     {
     sampleTabOut(samp, out);
     }
 //warn("Cleaning up.");
 carefulClose(&out);
 sampleFreeList(&sampList);
 sampleFreeList(&groupedList);
 affyPairsFreeList(&apList);
 }
 
 
 int main(int argc, char *argv[])
 {
 int i;
 struct liftSpec *lsList = NULL;
 struct hash *liftHash = NULL;
 int grouping;
 if(argc < 4)
     usage();
 lsList = readLifts(argv[1]);
 liftHash = hashLift(lsList);
 grouping = atoi(argv[2]);
 for(i=3; i<argc; i++)
     affyPairsToSample(liftHash, grouping , argv[i]);
 fprintf(stderr,"\nfinished.\n");
 return 0;
 }