a44421a79fb36cc2036fe116b97ea3bc9590cd0c braney Fri Dec 2 09:34:39 2011 -0800 removed rcsid (#295) diff --git src/hg/sage/createSageSummary.c src/hg/sage/createSageSummary.c index ebd6ae1..2b6ba8a 100644 --- src/hg/sage/createSageSummary.c +++ src/hg/sage/createSageSummary.c @@ -1,257 +1,256 @@ /** \page createSageSummary.doxp createSageSummary Doc Page
createSageSummary takes the tagList file
(i.e. SAGEmap_ug_tag-rel-Nla3-Hs) and the file produced by the
createArraysForTags.pl script in ~/cc/sage/sage/extr
(i.e. tagExpArrays.tab). The output is a file of sage records from
sage.h which have precalculate./createSageSummary SAGEmap_ug_tag-rel-Nla3-Hs tagExpArrays.tab out.tabd unigene entries mean,median, and stdev
of tag counts for each experiment.
*/
#include "common.h"
#include "hash.h"
#include "linefile.h"
#include "sage.h"
#include "sageCounts.h"
-static char const rcsid[] = "$Id: createSageSummary.c,v 1.3 2008/09/03 19:21:18 markd Exp $";
struct sage *createNewSage(int numExp)
{
struct sage *sg = NULL;
AllocVar(sg);
sg->numExps = numExp;
sg->exps = needMem(sizeof(int) * sg->numExps);
sg->meds = needMem(sizeof(float) * sg->numExps);
sg->aves = needMem(sizeof(float) * sg->numExps);
sg->stdevs = needMem(sizeof(float) * sg->numExps);
return sg;
}
struct sage *loadSageTags(char *fileName, int numExps)
{
struct sage *sgList=NULL, *sg=NULL;
char *words[3];
struct lineFile *lf = lineFileOpen(fileName, TRUE);
while(lineFileNextRow(lf, words,3))
{
if(sg == NULL || sg->uni != atoi(words[0]))
{
if(sg != NULL)
slSafeAddHead(&sgList,sg);
sg = createNewSage(numExps);
sg->uni = atoi(words[0]);
snprintf(sg->gb, sizeof(sg->gb), "unknown");
snprintf(sg->gi, sizeof(sg->gb), "unknown");
sg->description = cloneString(words[1]);
sg->numTags =1;
assert(strlen(words[2]) <= 10);
sg->tags = needMem(sizeof(char*) * 1);
sg->tags[0] = needMem(sizeof(char) * 11);
strcpy(sg->tags[0],words[2]);
}
else
{
sg->tags = needMoreMem(sg->tags, (sg->numTags*sizeof(char*)), ((sg->numTags+1)*sizeof(char*)));
sg->tags[sg->numTags] = needMem(sizeof(char) * 11);
strcpy(sg->tags[sg->numTags],words[2]);
sg->numTags++;
}
}
return(sgList);
/*for(sg=sgList; sg != NULL; sg = sg->next)
{
sageTabOut(sg,stdout);
}*/
}
void putTic()
{
printf(".");
fflush(stdout);
}
void loadTagHash(struct hash *h, struct sageCounts *scList)
{
struct sageCounts *sc =NULL;
int count=0;
for(sc=scList;sc!=NULL;sc=sc->next)
{
if(count++ % 10000 == 0)
{
putTic();
}
hashAddUnique(h,sc->tag,sc);
}
printf("\tDone.\n");
}
struct sortable {
struct sortable *next;
int val;
};
int compSortable(const void *e1, const void *e2)
{
const struct sortable *v1 = *((struct sortable**)e1);
const struct sortable *v2 = *((struct sortable**)e2);
return v1->val - v2->val;
}
/*
if N is odd, the median is the element in the middle of the
rearranged sequence, and if N is even, the median is the average of
the two elements in the middle of the rearranged sequence.
*/
float scMedian(struct sageCounts *scList, int index)
{
struct sageCounts *sc = NULL;
struct sortable *s=NULL, *sList = NULL, *next = NULL, *s2 = NULL;
boolean odd = slCount(scList) %2;
int half = slCount(scList)/2;
float median = -1;
for(sc = scList; sc != NULL; sc = sc->next)
{
AllocVar(s);
s->val = sc->expCounts[index];
slAddHead(&sList,s);
}
slSort(&sList,compSortable);
if(odd)
{
s = slElementFromIx(sList,half);
if(s != NULL)
median = s->val;
}
else
{
s = slElementFromIx(sList,half);
s2 = slElementFromIx(sList,half-1);
if(s != NULL && s2 != NULL)
median = (float)((float)s->val + (float)s2->val)/2;
}
for(s=sList; s != NULL; s = next)
{
next = s->next;
freez(&s);
}
return median;
}
float scAverage(struct sageCounts *scList, int index)
{
struct sageCounts *sc = NULL;
int sum=0,count=0;
if(slCount(scList)==0)
return -1.0;
for(sc=scList;sc!=NULL;sc=sc->next)
{
count++;
sum+= sc->expCounts[index];
}
return (float)sum/count;
}
float scStdev(struct sageCounts *scList, int index, float ave)
{
struct sageCounts *sc = NULL;
float sum =0;
int count =0;
if(ave == -1)
return -1;
if(slCount(scList)==0)
return -1.0;
if(slCount(scList)==1)
return 0.0;
for(sc=scList;sc!=NULL;sc=sc->next)
{
sum += (sc->expCounts[index]-ave) * (sc->expCounts[index]-ave);
count++;
}
return (float) sqrt(sum/(count-1));
}
void printListVals(struct sageCounts *scList, int index)
{
struct sageCounts *sc = NULL;
struct sortable *s=NULL, *sList = NULL, *next = NULL;
for(sc = scList; sc != NULL; sc = sc->next)
{
AllocVar(s);
s->val = sc->expCounts[index];
slAddHead(&sList,s);
}
slSort(&sList,compSortable);
for(s = sList; s!= NULL; s=s->next)
uglyf("%d,",s->val);
uglyf("\n");
for(s=sList; s != NULL; s = next)
{
next = s->next;
freez(&s);
}
}
void createSageSummary(char *tagList, char *tagCounts, char *outFile)
{
struct sage *sgList = NULL,*sg = NULL;
struct sageCounts *scList = NULL, *sc;
struct hash *scHash = newHash(16);
FILE *out = mustOpen(outFile,"w");
int i,count=0;
printf("Loading tagCounts.\n");
scList = sageCountsLoadAll(tagCounts);
printf("Loading SageTags.\n");
sgList = loadSageTags(tagList, scList->numExps);
printf("Inserting scList into hash, %d elements.\n",slCount(scList));
loadTagHash(scHash,scList);
printf("Calculating avgs and stdevs");
fflush(stdout);
for(sg=sgList; sg != NULL; sg=sg->next)
{
if(count++ %1000 == 0)
putTic();
scList = NULL;
for(i=0;i