src/hg/instinct/inc/hgHeatmapLib.h 1.38

1.38 2009/10/22 23:59:02 cszeto
Added local_url function
Index: src/hg/instinct/inc/hgHeatmapLib.h
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/inc/hgHeatmapLib.h,v
retrieving revision 1.37
retrieving revision 1.38
diff -b -B -U 1000000 -r1.37 -r1.38
--- src/hg/instinct/inc/hgHeatmapLib.h	4 Jun 2009 03:47:23 -0000	1.37
+++ src/hg/instinct/inc/hgHeatmapLib.h	22 Oct 2009 23:59:02 -0000	1.38
@@ -1,353 +1,353 @@
 /********************************************************************************/
 /* Copyright 2007-2009 -- The Regents of the University of California           */
 /********************************************************************************/
 
 /* hgHeatmapLib.h - Include file contain the
  * data structure heatmap "struct genoHeatmap",
  * and functions to access the data strucure.  
  */
 
 #ifndef HGHEATMAPLIB_H
 #define HGHEATMAPLIB_H
 
 #include "microarray.h"
 
 
 #define hghBedScore 10000
 
 /*** External vars declared in hgHeatmap.c ***/
 extern char *database;	                /* Name of the selected database - hg15, mm3, or the like. */
 extern char *genome;	                /* Name of the selected genome - mouse, human, etc. */
 extern char *theDataset;                /* Name of the selected dataset - UCSF breast cancer etc. */
 extern struct slRef *ghList;	        /* List of active heatmaps */
 extern struct hash *ghHash;	        /* Hash of active heatmaps */
 extern struct geneSetGroup *gsList;     /* List of available gene set groups */
 extern struct hash *gsHash;             /* Hash of available gene set groups */
 extern struct geneSet *allGeneSets;     /* List of current pathway collections */
 
 /*** Info on a single heatmap. ***/
 
 struct genoHeatmap
 /* A genomic heatmap */
 {
     void *next;			/* Next in list. */
     char *name;                 /* Graph name. tableName in database */
     char *shortLabel;           /* Short label. */
     char *longLabel;            /* Long label. */
-    char *url;                  /* URL to Pubmed entry for dataset */
+    char *local_url;            /* URL to description entry for dataset */
     char *accessTable;          /* isoform table, usually much smaller than table "name" for 
 				   fast display */
     boolean private;            /* private or public access to data */
 	boolean custom;				/* is this a custom track? */
 
     char *group;                /* Dataset's group */
     char *database;             /* database host genomic data */
     char *dataType;             /* type of dataset */
     char *platform;             /* platform (expression, SNP, etc.) for display */
 
     char *raFile;               /* XX.ra file to configure features  */
     char *patDb;                /* database host clinical information */
     char *patTable;             /* talbe in patDb that hosts patient to sample mapping information */
     char *patField;             /* field in patTable with patient identifier */ 
     char *sampleField;          /* field in patTable with sample identifier */ 
     
     char *setsTable;            /* geneset table in patDb , todo: move out of patDb to an independent db */
     char *probeTable;           /* probe to geneSymbol mapping table in local database such as hg18 */
     char *displayNameTable;     /* probe to geneSymbol mapping table to use for genesets consisting of probe ids */
 
     int height;                 /* height of the heatmap */
     double expScale;            /* display color saturation cutoff value */
     float gainFull;             /* display gain to emphasize color in whole genome view */
     float gainSet;              /* display gain to emphasize color in gene set view */
 
     int expCount;		/* number of experiments */
     struct slName *sampleList;  /* List of names of samples, sampleList position matches sampleOrder hash */
     struct hash *sampleOrder;	/* Hash of orders for samples, 
 				   keyed on sampleName, values are sample positions (interger) */
     int *expIdOrder;            /* ordering of expIds for display of bed15 format */
     struct trackDb *tDb;	/* the track database */
 	struct customTrack *ct; /* the ct entry */
 
     struct analysisResult *anaResult;               /* analysis result for the chromosome view */
     struct analysisResultHash *anaResultHash;       /* analysis result for the gene set view */
 };
 
 struct geneSet {
     struct geneSet *next;
     struct slName *genes;
     char *name;
     char *displayName;
     int x;
     int y;
     int width;
     int numGenes;
     int numGenesActive;
     double pixelsPerGene;
 };
 
 struct geneSetGroup {
     struct geneSetGroup *next;
     struct hash *settings;
 
     char *name;
     char *shortLabel;
     char *longLabel;
     char *members;
 };
 
 struct hgStats
 /* Statistics extensible data */
 {
     struct hgStats *next;     /* Next in singly linked list. */
     char *chrom;	      /* Human chromosome */
     unsigned chromStart;      /* Start position in chromosome */
     unsigned chromEnd;	      /* End position in chromosome */
     char *name;	              /* Name of item */
     float stats;              /* float number , for now it is the t or z statistics \
 			         or the difference in mean values
 			      */
     float prob;               /* float number , it is -log10(probability) */
     float outputVal;          /* output value , for now it is -log10(p) * direction, \
 				 direction =1 if stats is positive \
 				 direction = -1 if stats is negative */
     int compIndex;            /* if storing a PCA component, store its index */ 
     int totalComps;           /* the total number of PCA components to be stored, for plotting */
 };
 
 
 struct analysisResult
 {
     void *next;                 /* Next in list */
     struct hgStats *stats;     /* Results */
     double min;                /* Mimimum value in results */
     double max;                /* Maximum value in reuslts */
 };
 
 
 struct analysisResultHash
 {
     void *next;                 /* Next in list */
     struct hash *hash;         /* Results, hash of struct hgStats */
     double min;                /* Mimimum value in results */
     double max;                /* Maximum value in reuslts */
 };
 
 struct analysisResultHashList
 {
     struct analysisResultHashList *next;    /* Next in list */
     struct analysisResultHash *val;         /* Results, hash of struct hgStats */
 };
 
 struct featureColor 
 {
     struct featureColor *next;
     char *name;
     char *id;
     int index;
     int order;
     Color color;
 };
                 
 /*** Routines from hgHeatmapLib.h ***/
 
 struct microarrayGroups *maGroupings(char *database, char *table);
 
 struct genoHeatmap *getHeatmap(struct sqlConnection *conn, char *database, char *tableName, 
 			       struct hash *raHash);
 /* Get heatmap using database, tableName */
 
 struct genoHeatmap *getCustomHeatmap(struct customTrack *ct);
 /* Get heatmap using database, tableName */
 
 struct genoHeatmap *cloneHeatmap(struct genoHeatmap *gh);
 /* Deep copy of gh, clean up memory after return pointer */
 
 void setSampleOrder(struct genoHeatmap* gh, struct slName* sampleList);
 /* Set the sampleOrder and sampleList of a specific heatmap to posStr; 
  * posStr is a comma separated string of sample ids.
  * if posStr is null, then check the configuration file 
  * if the setting is not set in the configuration file, then the orders 
  * are set to default in sampleList and sampleOrder */
 
 void sortPersonOrder (struct genoHeatmap *gh);
 /*  Sort and set the sample orders according to feature configuration */
 
 void defaultOrder(struct genoHeatmap *gh);
 /* reset the default order of samples to be displayed */ 
 
 int *getBedOrder(struct genoHeatmap *gh);
 /* Return an array for reordering the experiments
  * If the order has not been set, then use function setBedOrder to set */
 
 void setPersonOrder(struct genoHeatmap* gh, struct slName *personList);
 /* Set the sampleOrder and sampleList of a specific heatmap to patientList;
  * patientList is a list of patient names
  * if patientList is null, set to default */
 
 struct slName *samplesSortedByChromPos(struct genoHeatmap *gh, char *chrom,
                                        int start, int stop, int direction);
 /* Sort samples (starting in default order) according to chrom:start-stop and direction (1 = ASC)
  * (-1 = DESC) */
 
 struct slName *samplesSortedByGene(struct genoHeatmap *gh, char *gene, int direction); 
 /* Sort samples according to value of data in gene (averages of all probes that map to 
  * that gene */
 
 struct slName *getAllSamples(struct genoHeatmap *gh);
 /* Return all the samples belong to a heatmap 
  * free return list after use */
 
 struct slName *getAllSamplesInDb(struct genoHeatmap *gh, struct slName *list);
 /* Return all the samples in list belong to a heatmap.
  * free return list after use   
  */
 
 struct slName *getAllPatients (struct genoHeatmap *gh);
 /* Return all the patients, including those without data in gh
  * Free returned list after use */
 
 struct slName *getAllPatientsInDb (struct genoHeatmap *gh);
 /* Return all only the patients with data in gh
  * Free returned list after use */
 
 struct slName *getSamplesInPatList (struct genoHeatmap *gh, struct slName *patList);
 /* Return samples belong to patients in patList, and also have data in the heatmap 
  * Free returned list after use */
 
 struct hash *readRa(char *rootName);
 /* Read in ra in root, root/org, and root/org/database. */
 
 char *mustFindInRaHash(char *fileName, struct hash *raHash, char *name);
 /* Look up in ra hash or die trying. */ 
 
 extern char *getColumnDbRaName();
 /* defined in hgHeatmap.h */
  
 struct geneSet *getPathways (char *db, char *pathwayNames);
 /* Get gene sets from database name is the gene set group name */
 
 struct slName *getAllGenes (char *db, char *tableName);
 /* Get genes from database */
 
 struct bed *getChromHeatmap(struct genoHeatmap *gh, char *chromName, boolean useAccessTable);
 /* Get the bed 15 data for heatmap gh, 
    if useAccessTable, only retrive data from the down sampled table*/
 
 struct bed *getChromHeatmapRange(struct genoHeatmap *gh, char *chromName,
                                  int chromStart, int chromStop, boolean useAccessTable);
 /* Get range of bed 15 data, not entire chromosome. TODO: Merge function with above */
 
 struct bed *getBedGraph(struct genoHeatmap *gh, char* chrom, int nField);
 
 struct bed *getBedGraphRange(struct genoHeatmap *gh, char* chrom,
 			     int start, int stop, int nField);
 
 struct hash **getSubgroupChromHeatmapHash(struct genoHeatmap *gh, int subsetNum, char *raName, struct geneSet *geneSets);
 /* Get the subset of bed 15 data from database for heatmap gh
  * Clean up return pointer after use */
 
 struct slName **getSubsets(struct genoHeatmap *gh, int subsetNum,  char *raName );
 /* Get the subset smaple lists as an array of list of samples 
  * Returned array is created on the heap, clean up memory after use 
  */
 
 int getSubsetsIfAny(struct genoHeatmap *gh, int subsetNum, char *raName, struct slName ***subsets);
 /* Get the subset smaple lists as an array of list of samples 
  * Returned integer is 0 if there are no subsets, otherwise 1 if there are subsets 
  * Subsets are returned by reference.
  */
 
 struct bed **getSubgroupChromHeatmap(struct genoHeatmap *gh, int subsetNum, 
 				     char *raName, char *chromName, boolean useAccess);
 /* Get the subset of bed 15 data from database for heatmap gh 
  * Clean up return pointer after use */
 
 void getChromHeatmapHash(struct hash **geneHash, char *geneDb, char *probeTable, 
 			 char *tableName, char *chromName, struct geneSet *geneSets);
 /* Get the bed15 data of gneneSets
  * You can add probe hash information to geneHash accumulatively 
  */ 
 
 struct analysisResult *diffAveSubgroup (struct genoHeatmap *gh, int subsetNum, char *raName,
 					boolean (*func)(float data1[], unsigned long n1, float data2[], unsigned long n2, float *r, float *prob),
 					char *chromName, boolean useAccess);
 /* Calculate the differnce of means between two subsets 
  * Clean memory of returned pointer after use */
 
 struct analysisResultHash *diffAveSubgroupHash (struct genoHeatmap *gh, int subsetNum, char *raName, struct geneSet *geneSets,
 						boolean (*func)(float data1[], unsigned long n1, float data2[], unsigned long n2, float *r, float *prob)
 						);
 /* Calculate the differnce of means between two subsets 
  * Clean memory of returned pointer after use */
 
 struct analysisResult *runMetaAnalysisChrom (int binSize, struct slRef **resultList, 
 											boolean (*func)(struct slDouble *data, 
 											float *r, float *prob));
 
 struct analysisResultHash *runMetaAnalysisGS (struct geneSet *geneSets, struct slRef **resultList, 
 											boolean (*func)(struct slDouble *data, 
 											float *r, float *prob));
  
 /*************** statistics functions ************************/
 
 struct analysisResultHash *diffSubgroupHash (struct genoHeatmap *gh,int subsetNum, char *raName, 
 					     struct geneSet *geneSets, char *blockStatFunc,
 					     boolean (*func)(float data1[], unsigned long n1, 
 							     float data2[], unsigned long n2, 
 							     float *r, float *prob)
 					     );
 /* Calculate the differnce of means between two subsets
  * Clean memory of returned pointer after use */
     
 /*************** statistics functions ************************/
 
 void rankPCADiffAvePathways ( struct genoHeatmap *gh,
                               int subsetNum, char *raName, char *chromHeatmap,
                               struct geneSet *geneSets, FILE *output,
                               boolean (*func)(float data1[], unsigned long n1, float data2[],
                                               unsigned long n2, float *r, float *prob),
                               boolean useAccess);
 
 struct geneSet *rankDiffAvePathways (struct genoHeatmap *gh, int subsetNum, char *raName,
 				     char *chromHeatmap,  struct geneSet *geneSets,  FILE *fp, 
 				     boolean (*func)(float data1[], unsigned long n1, float data2[],
 						     unsigned long n2, float *r, float *prob), char* funcName,
 				     boolean useAccess);
 /* Rank pathways based on the differnce of means between two subsets */ 
 
 void rankDiffAveGenes ( struct genoHeatmap *gh, 
 			int subsetNum, char *raName, char *chromHeatmap, 
 			struct slName *genes, FILE *output,
 			boolean (*func)(float data1[], unsigned long n1, float data2[], 
 					unsigned long n2, float *r, float *prob),char *funcName,
 			boolean useAccess);
 /*Rank genes based on the differnce of between two subsets */
 
 /*************** hgStats related functions *********************/
 void hgStatsFree(struct hgStats *pEl);
 /* Free a single dynamically allocated hgStats */
 
 void hgStatsFreeList(struct hgStats *pList);
 /* Free a list of dynamically allocated hgStats's */
 
 int hgStatsCmpScore(const void *va, const void *vb);
 /* Compare to sort based on score - lowest first. */
 
 void analysisResultFree(struct analysisResult *pt);
 /* Free analysisResult */
 
 void analysisResultHashFree(struct analysisResultHash *pt);
 /* Free analysisResultHash */
 
 struct geneSet *getCompGeneSet(int subsetNum, int numberOfGenes,
 								boolean (*func)(float data1[], unsigned long n1, float data2[], 
 					unsigned long n2, float *r, float *prob), char *metaMethod);
 
 //todo void analysisResultHashFree(struct analysisResultHash *pt);
 double zScore(double p);
 
 struct analysisResultHash *makePathwayStatsHash(struct geneSet *geneSets,struct hash *geneHash, struct analysisResultHash *stats, char *metaMethod);
 
 void adjustBonferroni(struct genoHeatmap *gh);
 
 #endif /* HGHEATMAPLIB_H */