src/hg/instinct/lib/hgStats.c 1.5

1.5 2009/06/04 03:42:50 jsanborn
added copyright notices, removed cluster library
Index: src/hg/instinct/lib/hgStats.c
===================================================================
RCS file: src/hg/instinct/lib/hgStats.c
diff -N src/hg/instinct/lib/hgStats.c
--- src/hg/instinct/lib/hgStats.c	21 Jan 2009 23:28:19 -0000	1.4
+++ /dev/null	1 Jan 1970 00:00:00 -0000
@@ -1,1206 +0,0 @@
-/* 
- * hgStats -- wrappers that set up hgHeatmap data for cluster statistical routines
- */
-
-#include "common.h"
-#include "bed.h"
-#include "memgfx.h"
-#include "vGfx.h" 
-#include "cart.h"
-#include "errCatch.h"
-#include "hash.h"
-#include "hdb.h"
-#include "web.h"
-#include "htmshell.h"
-#include "featuresLib.h"
-#include "hPrint.h"
-#include "trashDir.h"
-#include "hgStats.h"
-
-#define MAX_ROWS 2000    // Maximum number of rows to cluster
-#define MAX_COLUMNS 1000 // Maximum number of columns to cluster
-
-#define CLIP(p,limit) if (p < 0) p = 0; if (p >= (limit)) p = (limit)-1; 
-
-
-void getNodeOrderFromTree(Node* tree, int nNodes, const double* order, 
-			  double *nodeorder, int *nodecounts, char metric)
-{
-if (tree == NULL || nodeorder == NULL || nodecounts == NULL) 
-    return;
-
-int i;
-
-if (metric=='e' || metric=='b')
-    /* Scale all distances such that they are between 0 and 1 */
-    { 
-    double scale = 0.0;
-    for (i = 0; i < nNodes; i++)
-	if (tree[i].distance > scale) 
-	    scale = tree[i].distance;
-    
-    if (scale) 
-	for (i = 0; i < nNodes; i++) 
-	    tree[i].distance /= scale;
-    }
-
-/* Now we join nodes */
-for (i = 0; i < nNodes; i++)
-    { 
-    int min1 = tree[i].left;
-    int min2 = tree[i].right;
-
-    /* min1 and min2 are the elements that are to be joined */
-    double order1;
-    double order2;
-    int counts1;
-    int counts2;
-
-    if (min1 < 0)
-	{ 
-	int index1 = -min1-1;
-	order1 = nodeorder[index1];
-	counts1 = nodecounts[index1];
-	
-	tree[i].distance = max(tree[i].distance, tree[index1].distance);
-	}
-    else
-	{ 
-	order1 = order[min1];
-	counts1 = 1;
-	}
-
-    if (min2 < 0)
-	{ 
-	int index2 = -min2-1;
-	order2 = nodeorder[index2];
-	counts2 = nodecounts[index2];
-	
-	tree[i].distance = max(tree[i].distance, tree[index2].distance);
-	}
-    else
-	{ 
-	order2 = order[min2];
-	counts2 = 1;
-	}
- 
-    nodecounts[i] = counts1 + counts2;
-    nodeorder[i] = (counts1*order1 + counts2*order2) / (counts1 + counts2);
-    }
-}
-
-
-static void TreeSort(const int nNodes, const int nData, int *dataindex, const double* order,
-		     const double* nodeorder, const int* nodecounts, Node* tree)
-{ 
-const int nElements = nNodes + 1;
-int i;
-double* neworder = needMem(nElements*sizeof(double)); /* initialized to 0.0 */
-int* clusterids = needMem(nElements*sizeof(int));
-
-for (i = 0; i < nElements; i++) 
-    clusterids[i] = i;
-
-for (i = 0; i < nNodes; i++)
-    { 
-    const int i1 = tree[i].left;
-    const int i2 = tree[i].right;
-    const double order1 = (i1<0) ? nodeorder[-i1-1] : order[i1];
-    const double order2 = (i2<0) ? nodeorder[-i2-1] : order[i2];
-    const int count1 = (i1<0) ? nodecounts[-i1-1] : 1;
-    const int count2 = (i2<0) ? nodecounts[-i2-1] : 1;
-    /* If order1 and order2 are equal, their order is determined by 
-     * the order in which they were clustered */
-    if (i1<i2)
-	{ 
-	const double increase = (order1<order2) ? count1 : count2;
-	int j;
-	for (j = 0; j < nElements; j++)
-	    { 
-	    const int clusterid = clusterids[j];
-	    if (clusterid==i1 && order1>=order2) 
-		neworder[j] += increase;
-	    if (clusterid==i2 && order1<order2) 
-		neworder[j] += increase;
-	    if (clusterid==i1 || clusterid==i2) 
-		clusterids[j] = -i-1;
-	    }
-	}
-    else
-	{ 
-	const double increase = (order1<=order2) ? count1 : count2;
-	int j;
-	for (j = 0; j < nElements; j++)
-	    { 
-	    const int clusterid = clusterids[j];
-	    if (clusterid==i1 && order1>order2) 
-		neworder[j] += increase;
-	    if (clusterid==i2 && order1<=order2) 
-		neworder[j] += increase;
-	    if (clusterid==i1 || clusterid==i2) 
-		clusterids[j] = -i-1;
-	    }
-	}
-    }
-freez(&clusterids);
-
-for (i = 0; i < nData; i++)
-    dataindex[i] = i;
-
-sort(nData, neworder, dataindex);
-
-freez(&neworder);
-return;
-}
-
-
-void initializeData(int rows, int columns, struct hash *geneHash, struct slName *genes, 
-		    double **data, int **mask, double *arrayweight, 
-		    double *geneorder, char **genename)
-/* 
- * Code was adapted from LOAD function found in ../cluster/data.c to prepare 
- * data for input to treecluster program. 
- * Copyright (C) 2002 Michiel Jan Laurens de Hoon.
- */
-{
-if (geneHash == NULL || data == NULL || mask == NULL || arrayweight == NULL ||
-    geneorder == NULL || genename == NULL)
-    return;
-
-int column, row;
-struct slName *sl;
-
-/* Set up array weights and array order (not used since we're ordering by gene) */
-for (column = 0; column < columns; column++)
-    arrayweight[column] = 1.0;
-
-/* Fill data, make sure mask is set to one for all data so that
- * data is used in clustering */
-for (row = 0; row < rows; row++)
-    { 
-    data[row] = needMem(columns*sizeof(double));
-    mask[row] = needMem(columns*sizeof(int));
-    }
-
-/* Fill the genename with gene names */
-row = 0;
-for (sl = genes; sl; sl = sl->next)
-    {
-    genename[row] = cloneString(sl->name); 
-    row++;
-    }
-
-/* Fill the gene weights with the default value of 1.0 */
-for (row = 0; row < rows; row++) 
-    geneorder[row] = row;
-
-row = 0;
-for (sl = genes; sl; sl = sl->next)
-    {
-    struct hashEl *el = hashLookup(geneHash, sl->name);
-    if (!el)
-	{
-//	uglyf("<BR>%s not found<BR>", sl->name);
-	for (column = 0; column < columns; column++)
-	    {
-	    data[row][column] = 0.0;
-	    mask[row][column] = 0;
-	    }
-	}
-    else
-	{
-//	uglyf("<BR>%s FOUND<BR>", sl->name);
-	struct bed *nb = el->val;
-	for(column = 0; column < nb->expCount; column++)
-	    {
-	    if (column < columns)
-		{
-		data[row][column] = nb->expScores[column];
-		mask[row][column] = 1;
-		}
-	    }
-	}
-    row++;
-    }
-}
-
-Node *clusterSamplesByGeneSet(struct hash *geneHash, struct slName *allGenes, 
-			      char method, char metric, int transpose, int *nRows, int *nCols)
-/* Set up data structures for cluster software, perform hierarchical clustering, and 
- * return a new sorted list of gene names according to clustering */
-{
-if (geneHash == NULL || allGenes == NULL)
-    return NULL;
-
-if (method == 'x')  // should never happen ('x' = don't cluster)
-    return NULL;
-
-struct bed *nb = NULL;
-struct slName *sl, *genes = NULL;
-struct hashEl *el;
-
-for (sl = allGenes; sl ; sl = sl->next)
-    {
-    el = hashLookup(geneHash, sl->name);
-    if (el != NULL)
-        {
-        slNameAddHead(&genes, sl->name);
-        nb = el->val;
-        }
-    }
-slReverse(&genes);
-  
-if (!nb)
-    return NULL;
-
-int columns = nb->expCount;
-int rows = slCount(genes);
-
-/* Allocate space for array weights */
-double *arrayweight = needMem(columns*sizeof(double));
-
-/* Allocate space for data */
-double **data = needMem(rows*sizeof(double*));
-int **mask = needMem(rows*sizeof(int*));
- 
-/* Allocate space for gene quantities */
-double *geneorder = needMem(rows*sizeof(double));
-int *geneindex = needMem(rows*sizeof(int));
-char **genename = needMem(rows*sizeof(char*));
-
-initializeData(rows, columns, geneHash, genes, data, mask, arrayweight, geneorder, genename); 
-
-Node* tree = treecluster(rows, columns, data, mask, arrayweight, transpose, metric, method, NULL);
-
-freez(&geneorder);
-freez(&geneindex);
-freez(&genename);
-freez(&arrayweight);
-freez(&data);
-freez(&mask);
-
-*nRows = rows;
-*nCols = columns;
-return tree;
-}
-
-struct slName *clusterGeneSet(struct hash *geneHash, struct slName *allGenes, 
-			      char method, char metric)
-/* Set up data structures for cluster software, perform hierarchical clustering, and 
- * return a new sorted list of gene names according to clustering */
-{
-if (geneHash == NULL || allGenes == NULL)
-    return NULL;
-
-if (method == 'x')  // should never happen ('x' = don't cluster)
-    return NULL;
-
-struct bed *nb = NULL;
-struct slName *sl, *genes = NULL;
-struct hashEl *el;
-
-for (sl = allGenes; sl ; sl = sl->next)
-    {
-    el = hashLookup(geneHash, sl->name);
-    if (el != NULL)
-        {
-        slNameAddHead(&genes, sl->name);
-        nb = el->val;
-        }
-    }
-slReverse(&genes);
-  
-if (nb == NULL)
-    return NULL;
-
-int columns = nb->expCount;
-int rows = slCount(genes);
-
-if (rows > MAX_ROWS || columns > MAX_COLUMNS)
-    return NULL;
-
-/* Allocate space for array weights */
-double *arrayweight = needMem(columns*sizeof(double));
-
-/* Allocate space for data */
-double **data = needMem(rows*sizeof(double*));
-int **mask = needMem(rows*sizeof(int*));
- 
-/* Allocate space for gene quantities */
-double *geneorder = needMem(rows*sizeof(double));
-int *geneindex = needMem(rows*sizeof(int));
-char **genename = needMem(rows*sizeof(char*));
-
-initializeData(rows, columns, geneHash, genes, data, mask, arrayweight, geneorder, genename); 
-
-/* Perform hierarchical clustering. */
-Node* tree = treecluster(rows, columns, data, mask, arrayweight, 0, metric, method, NULL);
-
-if (tree == NULL)
-    return NULL;
-
-
-const int nNodes = rows - 1;
-double *nodeorder = needMem(nNodes*sizeof(double));
-int *nodecounts = needMem(nNodes*sizeof(int));
-getNodeOrderFromTree(tree, nNodes, geneorder, nodeorder, nodecounts, metric); 
-
-TreeSort(nNodes, rows, geneindex, geneorder, nodeorder, nodecounts, tree);
-
-int i;
-struct slName *newGeneOrder = NULL;
-for (i = 0; i < nNodes; i++)
-    slNameAddHead(&newGeneOrder, genename[geneindex[i]]);
-
-slReverse(&newGeneOrder);
-
-freez(&nodecounts);
-freez(&nodeorder);
-freez(&tree);
-freez(&geneorder);
-freez(&geneindex);
-freez(&genename);
-freez(&arrayweight);
-freez(&data);
-freez(&mask);
-
-return newGeneOrder;
-}
-
-void outputGenePCA(int rows, int columns, struct slName *genes, 
-		   double eigenvalues[], double** eigenvectors, int order[])
-{ 
-int row, column;
-
-/* Fill the genename with gene names */
-row = 0;
-struct slName *sl;
-char **genename = needMem(rows*sizeof(char*)); 
-for (sl = genes; sl; sl = sl->next)
-    {
-    genename[row] = cloneString(sl->name);
-    row++;
-    }
-   
-uglyf("\n"); 
-for (column = 0; column < min(rows, columns); column++)
-    uglyf("%f,", eigenvalues[order[column]]);
-
-uglyf("\n");
-if (rows > columns)
-    { 
-    for (row = 0; row < rows; row++)
-	{ 
-	uglyf("%s   ", genename[row]); 
-	for (column = 0; column < columns; column++)
-	    uglyf("%f,", eigenvectors[row][order[column]]);
-	uglyf("\n");
-	}
-    }
-else
-    { 
-    for (row = 0; row < rows; row++)
-	{ 
-	uglyf("%s   ", genename[order[row]]);
-	for (column = 0; column < rows; column++)
-	    uglyf("%f,", eigenvectors[column][order[row]]);
-	uglyf("\n");
-	}
-    }
-
-freez(&genename); 
-}
-
-
-void pcaGeneSet(struct hash *geneHash, struct slName *genes, 
-		int rows, int columns, double **data, char **genename, double **u, double **v, 
-		double *w, int *svdorder)      
-/* Set up data structures for cluster software and perform PCA on gene set
- * adapted from PerformPCA routine in "data.c" in Cluster 3.0 package */
-{
-if (geneHash == NULL || genes == NULL)
-    return;
-
-/* Allocate space for array weights */
-double *arrayweight = needMem(columns*sizeof(double));
-
-/* Allocate space for data */
-//double **data = needMem(rows*sizeof(double*));
-int **mask = needMem(rows*sizeof(int*));
-
-/* Allocate space for gene quantities */
-double *geneorder = needMem(rows*sizeof(double));
-
-initializeData(rows, columns, geneHash, genes, data, mask, arrayweight, geneorder, genename);    
-/* free data that is not needed for PCA (should i have a specialized initializeData 
- * routines for PCA and hierarchical clustering? */ 
-freez(&arrayweight);
-freez(&geneorder);
-       
-int row, column;
-//int* svdorder= NULL;
-double* svdsortval= NULL;
-int ierr = 0;
-const int nvals = min(rows,columns);
-
-if (!u || !v || !w)
-    { 
-    if (u) 
-	freez(&u);
-    if (v) 
-	freez(&v);
-    if (w) 
-	freez(&w);
-    errAbort("Memory allocation error in pcaGeneSet");
-    }
-
-if (rows > columns)
-    { 
-    /* [u] = [row, columns]
-     * [v] = [columns, columns]
-     * [w] = [columns, columns] diagonal
-     */
-    for (row = 0; row < rows; row++)
-	{ 
-	double mag = 0.0;
-	u[row] = needMem(columns*sizeof(double));
-
-	if (!u[row])
-	    { 
-	    while(--row >= 0) 
-		freez(&u[row]);
-	    freez(&u);
-	    freez(&v);
-	    freez(&w);
-	    errAbort("Memory allocation error in pcaGeneSet");
-	    }
-
-	for (column = 0; column < columns; column++)
-	    { 
-	    if (mask[row][column])
-		{ 
-		const double val = data[row][column];
-		mag += val*val;
-		}
-	    }
-	mag = sqrt(mag);
-
-	if (mag == 0.0) 
-	    mag = 1.0;
-
-	for (column = 0; column < columns; column++)
-	    { 
-	    if (mask[row][column]) 
-		u[row][column] = data[row][column] / mag;
-	    else 
-		u[row][column] = 0.0;
-	    }
-	}
-
-    for (row = 0; row < columns; row++)
-	{ 
-	v[row] = needMem(columns*sizeof(double));
-
-	if (!v[row])
-	    { 
-	    while (--row >= 0) 
-		freez(&v[row]);
-	    for (row = 0; row < rows; row++) 
-		freez(&u[row]);
-	    freez(&u);
-	    freez(&v);
-	    freez(&w);
-	    errAbort("Memory allocation error in pcaGeneSet");
-	    }
-	}
-
-    svd(rows, columns, u, w, v, &ierr);
-    }
-else /* rows < columns */
-    { 
-    /* [u] = [rows, rows]
-     * [v] = [column, rows]
-     * [w] = [rows, rows] diagonal
-     */ 
-    for (column = 0; column < columns; column++)
-	{ 
-	v[column] = needMem(rows*sizeof(double));
-	if (!v[column])
-	    { 
-	    while (--column >= 0) 
-		freez(&v[column]);
-	    freez(&u);
-	    freez(&v);
-	    freez(&w);
-	    errAbort("Memory allocation error in pcaGeneSet");
-	    }
-	}
-    for (row = 0; row < rows; row++)
-	{ 
-	double mag = 0.0;
-	for (column = 0; column < columns; column++)
-	    { 
-	    if (mask[row][column])
-		{ 
-		const double val = data[row][column];
-		mag += val*val;
-		}
-	    }
-
-	mag = sqrt(mag);
-	if (mag == 0.0) 
-	    mag = 1.0;
-
-	for (column = 0; column < columns; column++)
-	    { 
-	    if (mask[row][column]) 
-		v[column][row] = data[row][column] / mag;
-	    else 
-		v[column][row] = 0.0;
-	    }
-	}
-    for (row = 0; row < rows; row++)
-	{ 
-	u[row] = needMem(rows*sizeof(double));
-	if(!u[row])
-	    { 
-	    while(--row >= 0) 
-		freez(&u[row]);
-	    for (column = 0; column < columns; column++) 
-		freez(&v[column]);
-	    freez(&u);
-	    freez(&v);
-	    freez(&w);
-	    errAbort("Memory allocation error in pcaGeneSet");
-	    }
-	}
-
-    svd(columns, rows, v, w, u, &ierr);
-    }
-
-if(!ierr)
-    svdsortval= needMem(nvals*sizeof(double));
-
-if (!svdorder || !svdsortval)
-    { 
-    if(svdorder) 
-	freez(&svdorder);
-    if(svdsortval) 
-	freez(&svdsortval);
-    
-    for (row = 0; row < rows; row++) 
-	freez(&u[row]);
-    for (column = 0; column < columns; column++) 
-	freez(&v[column]);
-    freez(&u);
-    freez(&v);
-    freez(&w); 
-    errAbort("Memory allocation error in pcaGeneSet");
-    }
-
-for (column = 0; column < nvals; column++)
-    { 
-    svdorder[column] = column;
-    svdsortval[column] = -w[column];
-    }
-
-sort(nvals,svdsortval,svdorder);
-
-/* Free all used space */
-//for (row = 0; row < rows; row++) 
-//    freez(&u[row]);
-//for (column = 0; column < columns; column++) 
-//    freez(&v[column]);
-//freez(&u);
-//freez(&v);
-//freez(&w); 
-//freez(&svdorder);
-freez(&svdsortval);
-}
-
-static void verticalTextCentered(struct vGfx *vg, int x, int y,
-				 int width, int height, int colorIx, MgFont *font, char *string)
-/* Draw a vertical line of text in middle of the box.
- *      The string will read from bottom to top
- */
-{
-/*      don't let this run wild */
-CLIP(width, vg->width);
-CLIP(height, vg->height);
-if ((width > 0) && (height > 0))
-    {
-    struct vGfx *vgHoriz;
-    int i, j;
-    /*  reversed meanings of width and height here since this is going
-     *  to rotate 90 degrees
-     */
-    vgHoriz = vgOpenGif(height, width, "/dev/null");
-    vgTextCentered(vgHoriz, 0, 0, height, width, colorIx, font, string);
-    /*  now, blit from the horizontal to the vertical, rotate -90 (CCW) */
-    for (i = 0; i < height; ++i)        /* xSrc -> yDest */
-        {
-        int yDest = height - i;
-        for (j = 0; j < width; ++j)     /* ySrc -> xDest */
-            {
-            vgDot(vg,j+x,yDest+y, vgGetDot(vgHoriz, i, j));
-            }
-        }
-    vgClose(&vgHoriz);
-    }
-}     
-
-static void addLabels(struct vGfx *vg, MgFont *font, char *minXStr,
-		      char *maxXStr, char *minYStr, char *maxYStr, char *shortLabel,
-		      char *longLabel, int totalWidth, int totalHeight, 
-		      int graphWidth, int graphHeight, int startX, int startY, 
-		      int leftMargin, int fontHeight, char *vertLabel)
-{
-int x1 = 0;
-int y1 = 0;
-int x2 = 0;
-int y2 = 0;
-int shortLabelSize = 0;
-int longLabelSize = 0;
-int textWidth = 0;
-
-/*      border around the whole thing   */
-vgLine(vg, startX, startY, totalWidth-1+startX, startY, MG_MAGENTA);  /*      top     */
-vgLine(vg, startX, startY, startX, totalHeight-1+startY, MG_MAGENTA); /*      left    */
-vgLine(vg, totalWidth-1+startX, startY, 
-       totalWidth-1+startX, totalHeight-1+startY, MG_MAGENTA); /* right */
-vgLine(vg, startX, totalHeight-1+startY, 
-       totalWidth-1+startX, totalHeight-1+startY, MG_MAGENTA);/* bottom */
-
-/*      border just around the graph    */
-vgLine(vg, leftMargin-PLOT_MARGIN+startX, PLOT_MARGIN+startY, 
-       leftMargin-PLOT_MARGIN+startX,PLOT_MARGIN+graphHeight+startY, MG_CYAN); /* left */
-
-vgLine(vg, leftMargin+startX, PLOT_MARGIN+graphHeight+1+startY,
-       totalWidth-PLOT_MARGIN+startX, PLOT_MARGIN+graphHeight+1+startY, MG_CYAN);
-/*      bottom  */
-
-x1 = leftMargin+startX;
-y1 = totalHeight-PLOT_MARGIN-fontHeight+startY;
-x2 = totalWidth-PLOT_MARGIN+startX;
-y2 = totalHeight-PLOT_MARGIN+startY;
-
-if (shortLabel)
-    shortLabelSize = mgFontStringWidth(font, shortLabel);
-
-if (longLabel)
-    longLabelSize = mgFontStringWidth(font, longLabel);
-
-if ((longLabel != NULL) && (longLabelSize < (x2-x1)))
-    vgTextCentered(vg, x1, y1, x2-x1, y2-y1, MG_BLACK, font, longLabel);
-else if (shortLabel != NULL)
-    vgTextCentered(vg, x1, y1, x2-x1, y2-y1, MG_BLACK, font, shortLabel);
-
-x1 = leftMargin+startX;
-y1 = totalHeight-PLOT_MARGIN-fontHeight-fontHeight+2+startY;
-
-vgText(vg, x1, y1, MG_BLACK, font, minXStr);
-textWidth = mgFontStringWidth(font, maxXStr);
-x1 = totalWidth - PLOT_MARGIN - textWidth;
-vgText(vg, x1, y1, MG_BLACK, font, maxXStr);
-
-x1 = startX;
-x2 = leftMargin - PLOT_MARGIN+startX;
-y1 = PLOT_MARGIN+startY;
-y2 = PLOT_MARGIN + fontHeight+startY;
-
-if (maxYStr)
-    vgTextRight(vg, x1, y1, x2-x1, y2-y1, MG_BLACK, font, maxYStr);
-
-y1 = PLOT_MARGIN+graphHeight-fontHeight+startY;
-y2 = PLOT_MARGIN+graphHeight+startY;
-if (minYStr)
-    vgTextRight(vg, x1, y1, x2-x1, y2-y1, MG_BLACK, font, minYStr);
-
-x1 = PLOT_MARGIN+startX;
-y1 = PLOT_MARGIN + fontHeight+startY;
-x2 = leftMargin - PLOT_MARGIN - PLOT_MARGIN+startX;
-y2 = graphHeight - fontHeight+startY;
-if (vertLabel)
-    verticalTextCentered(vg, x1, y1, x2-x1, y2-y1, MG_BLACK, font, vertLabel);
-}       
-
-static MgFont *fontSetup(int *height)
-{
-MgFont *font = mgSmallFont();
-if (height)
-    {
-    *height = mgFontLineHeight(font);
-    }
-return (font);
-}
-
-     
-static void ordinaryPlot(int **densityCounts, Color **dataColors, struct vGfx *vg,
-			 int totalWidth, int totalHeight, 
-			 int graphWidth, int graphHeight, 
-			 int startX, int startY, int leftMargin)
-/* a simple point plot, not density, the inversion of the Y axis takes
- * place here.  leftMargin is what has been passed in, the top margin is
- * fixed at PLOT_MARGIN 
- *      Function borrowed from hgTables/correlatePlot.c and adjusted. */
-{
-int i, j;
-
-/*      the dots are going to overlap because of DOT_SIZE, but that is
- *      OK for this situation, they are all MG_BLACK
- */
-for (j = 0; j < graphHeight; ++j)
-    for (i = 0; i < graphWidth; ++i)
-        if (densityCounts[j][i])
-            {
-            vgBox(vg, i+leftMargin+startX, PLOT_MARGIN+(graphHeight-j)+startY,
-		  DOT_SIZE+2, DOT_SIZE+2, dataColors[j][i]);
-            }
-
-/*  the (totalHeight-j) is the inversion of the Y axis */
-}
-
-struct tempName *scatterPlot(struct vGfx *vg, struct dataVector *xList,
-			     struct dataVector *yList, int totalWidth, int totalHeight, 
-			     int startX, int startY)
-/*      create scatter plot gif file in trash, return path name 
- *      Function borrowed from hgTables/correlatePlot.c and adjusted. */
-{
-//static struct tempName gifFileName;
-MgFont *font = NULL;
-struct dataVector *x;
-struct dataVector *y; 
-double yMin = INFINITY;
-double xMin = INFINITY;
-double yMax = -INFINITY;
-double xMax = -INFINITY;
-double yRange = 0.0;
-double xRange = 0.0;
-int **densityCounts;    /*      densityCounts[GRAPH_HEIGHT] [GRAPH_WIDTH] */
-Color **dataColors;
-int i, j;
-int pointsPlotted = 0;
-int fontHeight = 0;
-char minXStr[32];
-char maxXStr[32];
-char minYStr[32];
-char maxYStr[32];
-char xLabel[256];
-char yLabel[256];
-int leftLabelSize = 0;
-int bottomLabelSize = 0;
-int leftMargin = 0;
-int bottomMargin = 0;
-
-font = fontSetup(&fontHeight);
-
-/*      find overall min and max        */
-for (x = xList, y = yList; (y != NULL) && (x !=NULL); y = y->next, x=x->next)
-    {
-    yMin = min(yMin,y->min);
-    xMin = min(xMin,x->min);
-    yMax = max(yMax,y->max);
-    xMax = max(xMax,x->max);
-    }
-
-yRange = yMax - yMin;
-xRange = xMax - xMin;
-if (xRange == 0.0)
-    xRange = 1.0;                                                                               
-if (yRange == 0.0)
-    yRange = 1.0;
-
-safef(minXStr,ArraySize(minXStr), "%.4g", xMin);
-safef(maxXStr,ArraySize(maxXStr), "%.4g", xMax);
-safef(minYStr,ArraySize(minYStr), "%.4g", yMin);
-safef(maxYStr,ArraySize(maxYStr), "%.4g", yMax);
-
-/*      the axes labels will be horizontal, thus the widest establishes
- *      the left margin.
- */
-leftLabelSize = max(mgFontStringWidth(font, minYStr),
-		    mgFontStringWidth(font, maxYStr));
-
-/*      the vertical text may be even wider than the numbers    */
-leftLabelSize = max(leftLabelSize,fontHeight);
-
-/*      The bottom label is one line for the axes labels, and a second
- *      line for the X axis table longLabel
- */
-bottomLabelSize = fontHeight * 2;
-
-/*      y data is the Y axis, x data is the X axis      */
-/*      Do not worry about the fact that the Y axis is 0 at the top
- *      here, that will get translated during the actual plot.
- */
-
-leftMargin = PLOT_MARGIN + leftLabelSize + PLOT_MARGIN + PLOT_MARGIN;
-bottomMargin = PLOT_MARGIN + bottomLabelSize + PLOT_MARGIN;
-
-int graphWidth = totalWidth - leftMargin - PLOT_MARGIN;
-int graphHeight = totalHeight - PLOT_MARGIN - bottomMargin;
-
-if (graphWidth <= 0 || graphHeight <= 0)
-    return NULL;
-
-struct lm *lm = lmInit(graphWidth);
-/*      Initialize density plot count array     */
-/*      space for the row pointers, first       */
-lmAllocArray(lm, densityCounts, graphHeight);
-lmAllocArray(lm, dataColors, graphHeight); 
-
-/*      then space for each row */
-for (j = 0; j < graphHeight; ++j)
-    {
-    lmAllocArray(lm, densityCounts[j], graphWidth);
-    lmAllocArray(lm, dataColors[j], graphWidth);
-    }
-
-for (j = 0; j < graphHeight; ++j)
-    for (i = 0; i < graphWidth; ++i)
-	{
-        densityCounts[j][i] = 0;
-	dataColors[j][i] = MG_GRAY;
-	}
-for (x = xList, y = yList; (y != NULL) && (x !=NULL); y = y->next, x=x->next)
-    {
-    pointsPlotted++;
-
-    double yValue = y->value;
-    double xValue = x->value;
-    int x1 = ((xValue - xMin)/xRange) * graphWidth;
-    int y1 = ((yValue - yMin)/yRange) * graphHeight;
-    
-    CLIP(x1,graphWidth);
-    CLIP(y1,graphHeight);
-    
-    densityCounts[y1][x1]++;
-    dataColors[y1][x1] = x->color;
-    }
-	
-struct tempName *gifFileName = NULL;  
-if (vg == NULL)
-    {
-    gifFileName = AllocA(struct tempName);
-    trashDirFile(gifFileName, "hgtData", "hgtaScatter", ".gif");
-
-    vg = vgOpenGif(totalWidth, totalHeight, gifFileName->forCgi);
-    }
-
-vgSetClip(vg, leftMargin+startX, PLOT_MARGIN+startY, graphWidth+startX, graphHeight+startY);
-
-ordinaryPlot(densityCounts, dataColors, vg, totalWidth, totalHeight, 
-	     graphWidth, graphHeight, startX, startY, leftMargin);
-
-safef(xLabel, ArraySize(xLabel), "PCA Component 1"); 
-safef(yLabel, ArraySize(yLabel), "PCA Component 2");
-
-/*      allow the entire area to be drawn into for the labels   */
-vgSetClip(vg, startX, startY, totalWidth+startX, totalHeight+startY);
-
-addLabels(vg, font, minXStr, maxXStr, minYStr, maxYStr,
-	  NULL, xLabel, totalWidth, totalHeight, graphWidth, graphHeight,
-	  startX, startY, leftMargin, fontHeight, yLabel);
-
-vgUnclip(vg);
-
-if (gifFileName)
-    vgClose(&vg);
-
-lmCleanup(&lm);
-
-return gifFileName;
-}     
-
-
-struct hash *getPCAComponentsXY(int rows, int columns, 
-				struct dataVector **x, struct dataVector **y,
-				struct slName **list1, struct slName **list2, 
-				double **data, char **genename, double **u, double **v, double *w, 
-				int *svdorder, Color *colors)
-{
-if (rows < 2 || columns < 2)
-    return NULL;
-
-int row, column;
-struct dataVector *newX, *newY;
-int c1 = 0;
-int c2 = 1;
-
-double mult = 10000.0;
-
-int* geneorder1 = needMem(rows*sizeof(double));
-int* geneorder2 = needMem(rows*sizeof(double));
-double** weight = needMem(rows*sizeof(double*)); 
-double** weightAbs = needMem(rows*sizeof(double*));
-
-int nvals = columns;
-if (rows < columns)
-    nvals = rows;
-
-for (row = 0; row < rows; row++)
-    {
-    weight[row] = needMem(nvals*sizeof(double));
-    weightAbs[row] = needMem(nvals*sizeof(double));
-    }    
-
-if (rows > columns)
-    {
-    for (row = 0; row < rows; row++)
-	{
-	geneorder1[row] = row;
-	geneorder2[row] = row;
-	for (column = 0; column < nvals; column++)
-	    {
-	    weight[row][column] = u[row][svdorder[column]];
-	    weightAbs[row][column] = -fabs(weight[row][column]);
-	    }
-	}
-
-    for (column = 0; column < columns; column++)
-	{
-	newX = AllocA(struct dataVector);
-	newY = AllocA(struct dataVector);
-	newX->value = 0;
-	newY->value = 0;
-	for (row = 0; row < rows; row++)
-	    {
-	    newX->value += data[row][column] * u[row][svdorder[c1]];
-	    newY->value += data[row][column] * u[row][svdorder[c2]];
-	    }
-
-	newX->min = newX->value;
-	newX->max = newX->value;
-	newY->min = newY->value;
-	newY->max = newY->value;
-
-	newX->count = 1;
-	newY->count = 1;
-	
-	newX->color = MG_GRAY;
-	newY->color = MG_GRAY;
-	if (colors != NULL)
-	    {
-	    newX->color = colors[column];
-	    newY->color = colors[column];
-	    }
-
-	slAddHead(x, newX);
-	slAddHead(y, newY);
-	}
-    }
-else
-    {
-    for (row = 0; row < rows; row++)                                                            
-	{
-	geneorder1[row] = row;
-	geneorder2[row] = row;
-	for (column = 0; column < nvals; column++)
-	    {
-	    weight[row][column] = u[column][row];
-	    weightAbs[row][column] = -fabs(weight[row][column]);
-	    }
-	}
-
-    for (column = 0; column < columns; column++)
-	{
-        newX = AllocA(struct dataVector);
-        newY = AllocA(struct dataVector); 
-	newX->value = 0;
-	newY->value = 0;
-	for (row = 0; row < rows; row++)
-	    {
-	    newX->value += data[svdorder[row]][column] * u[c1][svdorder[row]];
-	    newY->value += data[svdorder[row]][column] * u[c2][svdorder[row]];
-	    }
-
-	newX->min = newX->value;
-        newX->max = newX->value;
-        newY->min = newY->value;
-        newY->max = newY->value;
-
-        newX->count = 1;
-        newY->count = 1;
-
-        newX->color = MG_GRAY;
-        newY->color = MG_GRAY;
-        if (colors != NULL)
-            {
-            newX->color = colors[column];
-            newY->color = colors[column];
-            }
- 
-        slAddHead(x, newX);
-        slAddHead(y, newY);
-	}
-    }
-
-double* w1Abs = needMem(rows*sizeof(double));
-double* w2Abs = needMem(rows*sizeof(double));
-for (row = 0; row < rows; row++)
-    {
-    w1Abs[row] = weightAbs[row][c1];  // get first component, for scatter plot
-    w2Abs[row] = weightAbs[row][c2];  // get second component, for scatter plot
-    }
-sort(rows,w1Abs,geneorder1);
-sort(rows,w2Abs,geneorder2);
-
-struct hash *statHash = NULL;
-if (rows > 0)
-    statHash = hashNew(0);
-
-for (row = 0; row < rows; row++)
-    {
-    int index1 = geneorder1[row];
-    int index2 = geneorder2[row];
-
-    if (ceil(w1Abs[index1]*mult) != 0)
-	slNameAddHead(list1, genename[index1]);
-
-    if (ceil(w2Abs[index2]*mult) != 0)
-	slNameAddHead(list2, genename[index2]);
-
-    struct pcaData *pd = AllocA(struct pcaData);
-    pd->numComponents = nvals;
-    AllocArray(pd->components, pd->numComponents);
-    for (column = 0; column < nvals; column++)
-	pd->components[column] = weight[row][column];
-
-    hashAdd(statHash, genename[row], pd);
-    }
-slReverse(list1);
-slReverse(list2);
-
-return statHash;
-}
-
-void printComponents(char *name, struct slName *list1, struct slName *list2)
-{
-if (list1 == NULL || list2 == NULL)
-    return;
-
-struct slName *sl;
-int count = 0;
-int numGenesPerLine = 20;
-webNewSection("PCA Components: %s", name);
-
-hPrintf("<TABLE border = \"0\">");
-hPrintf("<TR><TH>First:<TD>");
-for (sl = list1; sl; sl = sl->next)
-    {
-    hPrintf("%s,", sl->name);
-    count++;
-    if (count == numGenesPerLine)
-	{
-	hPrintf("...");
-	break;
-	}
-    }
-count = 0;
-hPrintf("<TR><TH>Second:<TD>");
-for (sl = list2; sl; sl = sl->next)
-    {
-    hPrintf("%s,", sl->name);
-    count++;
-    if (count == numGenesPerLine)
-	{
-	hPrintf("...");
-	break;
-	}
-    }
-hPrintf("</TABLE>");
-
-hPrintf("\n</TD><TD WIDTH=15></TD></TR></TABLE>\n");
-hPrintf("</TD></TR></TABLE>\n");
-hPrintf("</TD></TR></TABLE>\n");
-}
-
-struct hash *performPCAandPlot(struct vGfx *vg, int totalWidth, int totalHeight, 
-		    int startX, int startY, struct hash *geneHash, 
-		    struct slName *allGenes, struct featureColor *fcList)
-{
-if (geneHash == NULL || allGenes == NULL)
-    return NULL;
-
-struct bed *nb = NULL;
-struct slName *sl, *genes = NULL;
-struct hashEl *el;
-
-for (sl = allGenes; sl ; sl = sl->next)
-    {
-    el = hashLookup(geneHash, sl->name);
-    if (el != NULL)
-	{
-	slNameAddHead(&genes, sl->name);
-	nb = el->val;
-	}
-    }
-slReverse(&genes);
-
-if (nb == NULL)
-    return NULL;
-
-int row, column;
-int columns = nb->expCount; 
-int rows = slCount(genes);
-int nvals = min(rows, columns); 
-
-if (rows > MAX_ROWS || columns > MAX_COLUMNS)
-    return NULL;
-
-double **data = needMem(rows*sizeof(double*));
-char **genename = needMem(rows*sizeof(char*));
-double** u = needMem(rows*sizeof(double*));
-double** v = needMem(columns*sizeof(double*));
-double* w = needMem(nvals*sizeof(double));
-int *svdorder = needMem(nvals*sizeof(int));
-
-pcaGeneSet(geneHash, genes, rows, columns, 
-	   data, genename, u, v, w, svdorder);
-
-Color colors[columns];
-for (column = 0; column < columns; column++)
-    colors[column] = MG_GRAY;
-
-struct featureColor *fc;
-for (fc = fcList; fc; fc = fc->next)
-    {
-    if (fc->index < columns)
-	colors[fc->index] = fc->color;
-    }
-
-struct dataVector *x = NULL; 
-struct dataVector *y = NULL;
-struct slName *list1 = NULL;
-struct slName *list2 = NULL;
- 
-struct hash *statHash = getPCAComponentsXY(rows, columns, &x, &y, &list1, &list2, 
-					   data, genename, u, v, w, svdorder, colors);
-
-struct tempName *scatterPlotGif = NULL; 
-if (vg)
-    scatterPlotGif = scatterPlot(vg, x, y, totalWidth, totalHeight, startX, startY);    
-
-//#if 0
-outputGenePCA(rows, columns, genes, w, u, svdorder);
-//#endif
-  
-
-/* Free all used space */
-for (row = 0; row < rows; row++)
-    freez(&u[row]);
-for (column = 0; column < columns; column++)
-    freez(&v[column]);                                                                            
-freez(&u);
-freez(&v);
-freez(&w);
-
-return statHash;
-}
-
-struct hash *performPCA(struct hash *geneHash, struct slName *allGenes)
-/* use plotting function, but pass NULL vGfx ptr to avoid drawing plot, just returning statHash */
-{
-return performPCAandPlot(NULL, 0, 0, 0, 0, geneHash, allGenes, NULL);
-}
-