src/hg/instinct/lib/hgHeatmapLib.c 1.59

1.59 2009/06/04 03:42:49 jsanborn
added copyright notices, removed cluster library
Index: src/hg/instinct/lib/hgHeatmapLib.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/lib/hgHeatmapLib.c,v
retrieving revision 1.58
retrieving revision 1.59
diff -b -B -U 4 -r1.58 -r1.59
--- src/hg/instinct/lib/hgHeatmapLib.c	6 May 2009 21:20:13 -0000	1.58
+++ src/hg/instinct/lib/hgHeatmapLib.c	4 Jun 2009 03:42:49 -0000	1.59
@@ -1,4 +1,8 @@
+/********************************************************************************/
+/* Copyright 2007-2009 -- The Regents of the University of California           */
+/********************************************************************************/
+
 /* hgHeatmap is a CGI script that produces a web page containing
  * a graphic with all chromosomes in genome, and a heatmap or two
  * on top of them. This module just contains the main routine,
  * the routine that dispatches to various page handlers, and shared
@@ -1849,177 +1853,8 @@
 result->max = max;
 return result;	
 }
 
-
-/* Calculate the differnce of means between two subsets
- * Clean memory of returned pointer after use */
-struct analysisResultHash *diffPCASubgroupHash (struct genoHeatmap *gh,int subsetNum, char *raName,
-                                                struct geneSet *geneSets, char *tableName,
-                                                boolean (*func)(float data1[], unsigned long n1,
-                                                                float data2[], unsigned long n2,
-                                                                float *r, float *prob) )
-{
-if (!gh || !func || !subsetNum || (subsetNum !=2) )
-    return NULL;
-
-struct hash **subHash = getSubgroupChromHeatmapHash(gh, subsetNum,raName, geneSets);
-
-char *probeAliases = gh->probeTable;
-
-struct hash *geneHash = NULL;
-getChromHeatmapHash(&geneHash, database, probeAliases,tableName, NULL, geneSets); 
-if (!subHash || !geneHash)
-    return NULL;
-
-int i;
-for (i=0; i< subsetNum; i++)
-    if (!subHash[i])
-        return NULL;
-
-struct hashEl **ptHashStart=NULL;
-AllocArray(ptHashStart, subsetNum);
-struct hashEl **ptHashEl=NULL;
-AllocArray(ptHashEl, subsetNum);
-
-for (i=0; i< subsetNum; i++)
-    {
-    ptHashEl[i] = hashElListHash(subHash[i]);
-    ptHashStart[i] = ptHashEl[i];
-    }
-
-struct hash *newBed5Hash =newHash(0);
-struct hgStats *stats=NULL;
-double min = DBL_MAX, max = -DBL_MAX;
-
-char *name;
-struct bed *bed = NULL;
-struct bed *bed0 = NULL, *bed1 =NULL;
-
-struct geneSet *gs;
-for (gs = geneSets; gs; gs = gs->next)
-    {
-    struct hash *statHash = performPCA(geneHash, gs->genes);
-    if (statHash == NULL)
-	continue;
-
-    int compIndex;
-    struct hashCookie hc = hashFirst(statHash);
-    struct hashEl *el = hashNext(&hc);
-    struct pcaData *pd = el->val;
-    int totalComps = pd->numComponents;
-
-    for (compIndex = 0; compIndex < totalComps; compIndex++)
-	{
-	struct bed *bed0PCA = AllocA(struct bed);
-	struct bed *bed1PCA = AllocA(struct bed);
-	
-	bed0PCA->name = cloneString(gs->name);
-	bed1PCA->name = cloneString(gs->name);
-	bed0PCA->chrom = cloneString("chrXXX"); // put dummy chromosomes, never used
-	bed1PCA->chrom = cloneString("chrXXX");
-	
-	for (i = 0; i < subsetNum; i++)
-	    ptHashEl[i] = ptHashStart[i];
-	
-	boolean shouldSkip = FALSE;
-	for (i = 0; i < subsetNum; i++)
-	    if (!ptHashEl[i])
-		shouldSkip = TRUE;    
-
-	if (shouldSkip)
-	    {
-	    hashFree(&statHash);
-	    bedFree(&bed0PCA);
-	    bedFree(&bed1PCA);
-	    continue;
-	    }
-
-	bed0 = (struct bed*) ptHashEl[0]->val; 
-	bed1 = (struct bed*) ptHashEl[1]->val;
-	
-	bed0PCA->expCount = bed0->expCount;
-	AllocArray(bed0PCA->expScores, bed0PCA->expCount);
-	bed1PCA->expCount = bed1->expCount;
-	AllocArray(bed1PCA->expScores, bed1PCA->expCount);
-	
-	for (i = 0; i < bed0->expCount; i++)
-	    bed0PCA->expScores[i] = 0.0;
-	for (i = 0; i < bed1->expCount; i++)
-	    bed1PCA->expScores[i] = 0.0;
-	
-	while (ptHashEl[0] && ptHashEl[1])
-	    {
-	    name = ptHashEl[0]->name;
-	    el = hashLookup(statHash, name);
-
-	    if (el)
-		{
-		pd = el->val;
-		double factor = 0.0;
-		if (compIndex < pd->numComponents)
-		    factor = pd->components[compIndex];
-
-		bed  = (struct bed*) ptHashEl[0]->val;
-		bed0 = (struct bed*) ptHashEl[0]->val; 
-		bed1 = (struct bed*) ptHashEl[1]->val;
-		
-		for (i = 0; i < bed0->expCount; i++)
-		    bed0PCA->expScores[i] += bed0->expScores[i] * factor;
-		
-		for (i = 0; i < bed1->expCount; i++)
-		    bed1PCA->expScores[i] += bed1->expScores[i] * factor;
-		}
-	    for (i = 0; i < subsetNum; i++)
-		ptHashEl[i] = ptHashEl[i]->next;
-	    }
-	
-	float r,p ;
-	if (func(bed0PCA->expScores, bed0PCA->expCount, bed1PCA->expScores, bed1PCA->expCount, &r, &p))
-	    {
-	    if (fabs(p) > max)
-		max =fabs(p);
-	    if (fabs(p) < min)
-		min =fabs(p);
-	    AllocVar(stats);
-	    stats->chrom = cloneString(bed0PCA->chrom);
-	    stats->chromStart = 0;
-	    stats->chromEnd = 0;
-	    stats->name = cloneString(bed0PCA->name);
-	    stats->stats = r;
-	    stats->prob = p;
-	    stats->compIndex = compIndex;
-	    stats->totalComps = totalComps;
-	    if (stats->stats < 0)
-		stats->outputVal = -p;
-	    else
-		stats->outputVal = p;
-	    hashAdd(newBed5Hash, bed0PCA->name, stats);
-	    }
-	
-	bedFree(&bed0PCA);
-	bedFree(&bed1PCA);
-	}
-    hashFree(&statHash);
-    }
-
-//free memory
-for (i=0; i<subsetNum; i++)
-    {
-    hashFree(&subHash[i]);
-    hashElFreeList(&ptHashStart[i]);
-    }
-free(subHash);
-
-if (min == DBL_MAX || max == -DBL_MAX) //no data
-    return NULL;
-
-struct analysisResultHash *result = AllocVar(result);
-result->hash = newBed5Hash;
-result->min = min;
-result->max = max;
-return result;
-}
 /* Calculate the differnce of means between two subsets
  * Clean memory of returned pointer after use */
 struct analysisResultHash *diffMetaSubgroupHash (struct genoHeatmap *gh,
                                                 int subsetNum, char *raName,
@@ -2207,14 +2042,11 @@
 					     boolean (*func)(float data1[], unsigned long n1,
 							     float data2[], unsigned long n2,
 							     float *r, float *prob) )
 {
-if (blockStatFunc 
-    && sameWord(blockStatFunc,blockStatTest[pca])) // block level PCA on geneset -> t-test
-    return diffPCASubgroupHash(gh, subsetNum, raName, geneSets, gh->name, func);
-else if (blockStatFunc && 
+if (blockStatFunc && 
 	 (sameWord(blockStatFunc,blockStatTest[fisher]) 
-	  || sameWord(blockStatFunc,blockStatTest[weightedZ]))) // block-level PCA -> t-test
+     || sameWord(blockStatFunc,blockStatTest[weightedZ])))
     return diffMetaSubgroupHash(gh, subsetNum, raName, geneSets, gh->name, func,blockStatFunc);
 else
     return diffAveSubgroupHash(gh, subsetNum, raName, geneSets, func);
 }
@@ -2323,44 +2155,8 @@
 
 return newGeneSets;
 }
 
-void rankPCADiffAvePathways ( struct genoHeatmap *gh, int subsetNum, char *raName, char *chromHeatmap, 
-			      struct geneSet *geneSets, FILE *output,
-			      boolean (*func)(float data1[], unsigned long n1, float data2[], 
-					      unsigned long n2, float *r, float *prob),
-			      boolean useAccess)
-{
-if (!gh || !func || !subsetNum || (subsetNum !=2) )
-    return;
-
-struct analysisResultHash *ar =
-    diffPCASubgroupHash(gh, subsetNum, raName, geneSets, chromHeatmap, func);
-
-if (!ar)
-    return;
-
-struct hgStats *bd, *bedList = NULL;
-struct hashCookie hc = hashFirst(ar->hash);
-struct hashEl *el;
-while ((el = hashNext(&hc)) != NULL)                                                            
-    {
-    bd = el->val;
-    bd->outputVal = fabs(bd->prob);
-    slAddHead(&bedList, bd);
-    }
-
-slSort(&bedList, hgStatsCmpScore);
-slReverse(&bedList);
-
-int rank=0;
-for (bd = bedList; bd; bd = bd->next)
-    {
-    rank++;
-    if(output)
-	fprintf(output,"%s\t%d\t%f\t%d\n", bd->name,bd->compIndex, bd->outputVal, rank);
-    }
-}
 
 /*Rank genes based on the differnce of between two subsets */
 void rankDiffAveGenes ( struct genoHeatmap *gh,int subsetNum, char *raName, char *chromHeatmap, 
 			struct slName *genes, FILE *output,