src/hg/instinct/inc/hgHeatmapLib.h 1.37
1.37 2009/06/04 03:47:23 jsanborn
added copyright notices, removed cluster library
Index: src/hg/instinct/inc/hgHeatmapLib.h
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/inc/hgHeatmapLib.h,v
retrieving revision 1.36
retrieving revision 1.37
diff -b -B -U 1000000 -r1.36 -r1.37
--- src/hg/instinct/inc/hgHeatmapLib.h 5 May 2009 22:43:59 -0000 1.36
+++ src/hg/instinct/inc/hgHeatmapLib.h 4 Jun 2009 03:47:23 -0000 1.37
@@ -1,349 +1,353 @@
+/********************************************************************************/
+/* Copyright 2007-2009 -- The Regents of the University of California */
+/********************************************************************************/
+
/* hgHeatmapLib.h - Include file contain the
* data structure heatmap "struct genoHeatmap",
* and functions to access the data strucure.
*/
#ifndef HGHEATMAPLIB_H
#define HGHEATMAPLIB_H
#include "microarray.h"
#define hghBedScore 10000
/*** External vars declared in hgHeatmap.c ***/
extern char *database; /* Name of the selected database - hg15, mm3, or the like. */
extern char *genome; /* Name of the selected genome - mouse, human, etc. */
extern char *theDataset; /* Name of the selected dataset - UCSF breast cancer etc. */
extern struct slRef *ghList; /* List of active heatmaps */
extern struct hash *ghHash; /* Hash of active heatmaps */
extern struct geneSetGroup *gsList; /* List of available gene set groups */
extern struct hash *gsHash; /* Hash of available gene set groups */
extern struct geneSet *allGeneSets; /* List of current pathway collections */
/*** Info on a single heatmap. ***/
struct genoHeatmap
/* A genomic heatmap */
{
void *next; /* Next in list. */
char *name; /* Graph name. tableName in database */
char *shortLabel; /* Short label. */
char *longLabel; /* Long label. */
char *url; /* URL to Pubmed entry for dataset */
char *accessTable; /* isoform table, usually much smaller than table "name" for
fast display */
boolean private; /* private or public access to data */
boolean custom; /* is this a custom track? */
char *group; /* Dataset's group */
char *database; /* database host genomic data */
char *dataType; /* type of dataset */
char *platform; /* platform (expression, SNP, etc.) for display */
char *raFile; /* XX.ra file to configure features */
char *patDb; /* database host clinical information */
char *patTable; /* talbe in patDb that hosts patient to sample mapping information */
char *patField; /* field in patTable with patient identifier */
char *sampleField; /* field in patTable with sample identifier */
char *setsTable; /* geneset table in patDb , todo: move out of patDb to an independent db */
char *probeTable; /* probe to geneSymbol mapping table in local database such as hg18 */
char *displayNameTable; /* probe to geneSymbol mapping table to use for genesets consisting of probe ids */
int height; /* height of the heatmap */
double expScale; /* display color saturation cutoff value */
float gainFull; /* display gain to emphasize color in whole genome view */
float gainSet; /* display gain to emphasize color in gene set view */
int expCount; /* number of experiments */
struct slName *sampleList; /* List of names of samples, sampleList position matches sampleOrder hash */
struct hash *sampleOrder; /* Hash of orders for samples,
keyed on sampleName, values are sample positions (interger) */
int *expIdOrder; /* ordering of expIds for display of bed15 format */
struct trackDb *tDb; /* the track database */
struct customTrack *ct; /* the ct entry */
struct analysisResult *anaResult; /* analysis result for the chromosome view */
struct analysisResultHash *anaResultHash; /* analysis result for the gene set view */
};
struct geneSet {
struct geneSet *next;
struct slName *genes;
char *name;
char *displayName;
int x;
int y;
int width;
int numGenes;
int numGenesActive;
double pixelsPerGene;
};
struct geneSetGroup {
struct geneSetGroup *next;
struct hash *settings;
char *name;
char *shortLabel;
char *longLabel;
char *members;
};
struct hgStats
/* Statistics extensible data */
{
struct hgStats *next; /* Next in singly linked list. */
char *chrom; /* Human chromosome */
unsigned chromStart; /* Start position in chromosome */
unsigned chromEnd; /* End position in chromosome */
char *name; /* Name of item */
float stats; /* float number , for now it is the t or z statistics \
or the difference in mean values
*/
float prob; /* float number , it is -log10(probability) */
float outputVal; /* output value , for now it is -log10(p) * direction, \
direction =1 if stats is positive \
direction = -1 if stats is negative */
int compIndex; /* if storing a PCA component, store its index */
int totalComps; /* the total number of PCA components to be stored, for plotting */
};
struct analysisResult
{
void *next; /* Next in list */
struct hgStats *stats; /* Results */
double min; /* Mimimum value in results */
double max; /* Maximum value in reuslts */
};
struct analysisResultHash
{
void *next; /* Next in list */
struct hash *hash; /* Results, hash of struct hgStats */
double min; /* Mimimum value in results */
double max; /* Maximum value in reuslts */
};
struct analysisResultHashList
{
struct analysisResultHashList *next; /* Next in list */
struct analysisResultHash *val; /* Results, hash of struct hgStats */
};
struct featureColor
{
struct featureColor *next;
char *name;
char *id;
int index;
int order;
Color color;
};
/*** Routines from hgHeatmapLib.h ***/
struct microarrayGroups *maGroupings(char *database, char *table);
struct genoHeatmap *getHeatmap(struct sqlConnection *conn, char *database, char *tableName,
struct hash *raHash);
/* Get heatmap using database, tableName */
struct genoHeatmap *getCustomHeatmap(struct customTrack *ct);
/* Get heatmap using database, tableName */
struct genoHeatmap *cloneHeatmap(struct genoHeatmap *gh);
/* Deep copy of gh, clean up memory after return pointer */
void setSampleOrder(struct genoHeatmap* gh, struct slName* sampleList);
/* Set the sampleOrder and sampleList of a specific heatmap to posStr;
* posStr is a comma separated string of sample ids.
* if posStr is null, then check the configuration file
* if the setting is not set in the configuration file, then the orders
* are set to default in sampleList and sampleOrder */
void sortPersonOrder (struct genoHeatmap *gh);
/* Sort and set the sample orders according to feature configuration */
void defaultOrder(struct genoHeatmap *gh);
/* reset the default order of samples to be displayed */
int *getBedOrder(struct genoHeatmap *gh);
/* Return an array for reordering the experiments
* If the order has not been set, then use function setBedOrder to set */
void setPersonOrder(struct genoHeatmap* gh, struct slName *personList);
/* Set the sampleOrder and sampleList of a specific heatmap to patientList;
* patientList is a list of patient names
* if patientList is null, set to default */
struct slName *samplesSortedByChromPos(struct genoHeatmap *gh, char *chrom,
int start, int stop, int direction);
/* Sort samples (starting in default order) according to chrom:start-stop and direction (1 = ASC)
* (-1 = DESC) */
struct slName *samplesSortedByGene(struct genoHeatmap *gh, char *gene, int direction);
/* Sort samples according to value of data in gene (averages of all probes that map to
* that gene */
struct slName *getAllSamples(struct genoHeatmap *gh);
/* Return all the samples belong to a heatmap
* free return list after use */
struct slName *getAllSamplesInDb(struct genoHeatmap *gh, struct slName *list);
/* Return all the samples in list belong to a heatmap.
* free return list after use
*/
struct slName *getAllPatients (struct genoHeatmap *gh);
/* Return all the patients, including those without data in gh
* Free returned list after use */
struct slName *getAllPatientsInDb (struct genoHeatmap *gh);
/* Return all only the patients with data in gh
* Free returned list after use */
struct slName *getSamplesInPatList (struct genoHeatmap *gh, struct slName *patList);
/* Return samples belong to patients in patList, and also have data in the heatmap
* Free returned list after use */
struct hash *readRa(char *rootName);
/* Read in ra in root, root/org, and root/org/database. */
char *mustFindInRaHash(char *fileName, struct hash *raHash, char *name);
/* Look up in ra hash or die trying. */
extern char *getColumnDbRaName();
/* defined in hgHeatmap.h */
struct geneSet *getPathways (char *db, char *pathwayNames);
/* Get gene sets from database name is the gene set group name */
struct slName *getAllGenes (char *db, char *tableName);
/* Get genes from database */
struct bed *getChromHeatmap(struct genoHeatmap *gh, char *chromName, boolean useAccessTable);
/* Get the bed 15 data for heatmap gh,
if useAccessTable, only retrive data from the down sampled table*/
struct bed *getChromHeatmapRange(struct genoHeatmap *gh, char *chromName,
int chromStart, int chromStop, boolean useAccessTable);
/* Get range of bed 15 data, not entire chromosome. TODO: Merge function with above */
struct bed *getBedGraph(struct genoHeatmap *gh, char* chrom, int nField);
struct bed *getBedGraphRange(struct genoHeatmap *gh, char* chrom,
int start, int stop, int nField);
struct hash **getSubgroupChromHeatmapHash(struct genoHeatmap *gh, int subsetNum, char *raName, struct geneSet *geneSets);
/* Get the subset of bed 15 data from database for heatmap gh
* Clean up return pointer after use */
struct slName **getSubsets(struct genoHeatmap *gh, int subsetNum, char *raName );
/* Get the subset smaple lists as an array of list of samples
* Returned array is created on the heap, clean up memory after use
*/
int getSubsetsIfAny(struct genoHeatmap *gh, int subsetNum, char *raName, struct slName ***subsets);
/* Get the subset smaple lists as an array of list of samples
* Returned integer is 0 if there are no subsets, otherwise 1 if there are subsets
* Subsets are returned by reference.
*/
struct bed **getSubgroupChromHeatmap(struct genoHeatmap *gh, int subsetNum,
char *raName, char *chromName, boolean useAccess);
/* Get the subset of bed 15 data from database for heatmap gh
* Clean up return pointer after use */
void getChromHeatmapHash(struct hash **geneHash, char *geneDb, char *probeTable,
char *tableName, char *chromName, struct geneSet *geneSets);
/* Get the bed15 data of gneneSets
* You can add probe hash information to geneHash accumulatively
*/
struct analysisResult *diffAveSubgroup (struct genoHeatmap *gh, int subsetNum, char *raName,
boolean (*func)(float data1[], unsigned long n1, float data2[], unsigned long n2, float *r, float *prob),
char *chromName, boolean useAccess);
/* Calculate the differnce of means between two subsets
* Clean memory of returned pointer after use */
struct analysisResultHash *diffAveSubgroupHash (struct genoHeatmap *gh, int subsetNum, char *raName, struct geneSet *geneSets,
boolean (*func)(float data1[], unsigned long n1, float data2[], unsigned long n2, float *r, float *prob)
);
/* Calculate the differnce of means between two subsets
* Clean memory of returned pointer after use */
struct analysisResult *runMetaAnalysisChrom (int binSize, struct slRef **resultList,
boolean (*func)(struct slDouble *data,
float *r, float *prob));
struct analysisResultHash *runMetaAnalysisGS (struct geneSet *geneSets, struct slRef **resultList,
boolean (*func)(struct slDouble *data,
float *r, float *prob));
/*************** statistics functions ************************/
struct analysisResultHash *diffSubgroupHash (struct genoHeatmap *gh,int subsetNum, char *raName,
struct geneSet *geneSets, char *blockStatFunc,
boolean (*func)(float data1[], unsigned long n1,
float data2[], unsigned long n2,
float *r, float *prob)
);
/* Calculate the differnce of means between two subsets
* Clean memory of returned pointer after use */
/*************** statistics functions ************************/
void rankPCADiffAvePathways ( struct genoHeatmap *gh,
int subsetNum, char *raName, char *chromHeatmap,
struct geneSet *geneSets, FILE *output,
boolean (*func)(float data1[], unsigned long n1, float data2[],
unsigned long n2, float *r, float *prob),
boolean useAccess);
struct geneSet *rankDiffAvePathways (struct genoHeatmap *gh, int subsetNum, char *raName,
char *chromHeatmap, struct geneSet *geneSets, FILE *fp,
boolean (*func)(float data1[], unsigned long n1, float data2[],
unsigned long n2, float *r, float *prob), char* funcName,
boolean useAccess);
/* Rank pathways based on the differnce of means between two subsets */
void rankDiffAveGenes ( struct genoHeatmap *gh,
int subsetNum, char *raName, char *chromHeatmap,
struct slName *genes, FILE *output,
boolean (*func)(float data1[], unsigned long n1, float data2[],
unsigned long n2, float *r, float *prob),char *funcName,
boolean useAccess);
/*Rank genes based on the differnce of between two subsets */
/*************** hgStats related functions *********************/
void hgStatsFree(struct hgStats *pEl);
/* Free a single dynamically allocated hgStats */
void hgStatsFreeList(struct hgStats *pList);
/* Free a list of dynamically allocated hgStats's */
int hgStatsCmpScore(const void *va, const void *vb);
/* Compare to sort based on score - lowest first. */
void analysisResultFree(struct analysisResult *pt);
/* Free analysisResult */
void analysisResultHashFree(struct analysisResultHash *pt);
/* Free analysisResultHash */
struct geneSet *getCompGeneSet(int subsetNum, int numberOfGenes,
boolean (*func)(float data1[], unsigned long n1, float data2[],
unsigned long n2, float *r, float *prob), char *metaMethod);
//todo void analysisResultHashFree(struct analysisResultHash *pt);
double zScore(double p);
struct analysisResultHash *makePathwayStatsHash(struct geneSet *geneSets,struct hash *geneHash, struct analysisResultHash *stats, char *metaMethod);
void adjustBonferroni(struct genoHeatmap *gh);
#endif /* HGHEATMAPLIB_H */