4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/cgilib/gtexBoxplot.c src/hg/cgilib/gtexBoxplot.c
index 145d16e..58d23bd 100644
--- src/hg/cgilib/gtexBoxplot.c
+++ src/hg/cgilib/gtexBoxplot.c
@@ -1,161 +1,161 @@
 /* Create a PNG file with boxplot of gene expression 
  *      for GTEx (Genotype Tissue Expression) data. */
 
 /* Copyright (C) 2016 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 
 #include "common.h"
 #include "hash.h"
 #include "trashDir.h"
 #include "gtexInfo.h"
 #include "gtexTissue.h"
 #include "gtexSampleData.h"
 #include "pipeline.h"
 
 struct tissueSampleVals
 /* RPKM expression values for multiple samples */
 /* (Data structure shared by different implementations of boxplot (e.g. C, JS),
  * later abandoned) */
     {
     struct tissueSampleVals *next;
     char *name;         /* GTEx tissue name */
     char *description;  /* GTEx tissue description */
     int color;          /* GTEx tissue color */
     int count;          /* number of samples */
     double *vals;       /* RPKM values */
     double min, max;    /* minimum, maximum value */
     double q1, median, q3;      /* quartiles */
     struct slDouble *valList;   /* used to create val array */
     };
 
 static struct tissueSampleVals *getTissueSampleVals(char *geneId, boolean doLogTransform,
                                                 char *version, double *maxValRet)
 /* Get sample data for the gene.  Optionally log10 it. Return maximum value seen */
 {
 struct hash *tsHash = hashNew(0);
 struct tissueSampleVals *tsv;
 struct hashEl *hel;
 struct slDouble *val;
 double maxVal = 0;
 struct gtexSampleData *sd = NULL;
 char query[256];
 char **row;
 char buf[256];
 char *sampleDataTable = "gtexSampleData";
 safef(buf, sizeof(buf), "%s%s", sampleDataTable, gtexVersionSuffixFromVersion(version));
 struct sqlConnection *conn = hAllocConn("hgFixed");
 assert(sqlTableExists(conn, buf));
 sqlSafef(query, sizeof(query), "select * from %s where geneId='%s'", buf, geneId);
 struct sqlResult *sr = sqlGetResult(conn, query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     sd = gtexSampleDataLoad(row);
     if ((hel = hashLookup(tsHash, sd->tissue)) == NULL)
         {
         AllocVar(tsv);
         hashAdd(tsHash, sd->tissue, tsv);
         }
     else
         tsv = (struct tissueSampleVals *)hel->val;
     maxVal = max(maxVal, sd->score);
     val = slDoubleNew(sd->score);
     slAddHead(&tsv->valList, val);
     }
 /*  Fill in tissue descriptions, fill values array and calculate stats for plotting
         Then make a list, suitable for sorting by tissue or score
     NOTE: Most of this not needed for R implementation */
 struct gtexTissue *tis = NULL, *tissues = gtexGetTissues(version);
 struct tissueSampleVals *tsList = NULL;
 int i;
 if (doLogTransform)
     maxVal = log10(maxVal+1.0);
 for (tis = tissues; tis != NULL; tis = tis->next)
     {
     tsv = hashFindVal(tsHash, tis->name);
     if (tsv == NULL)
         {
         /* no non-zero values for this tissue/gene */
         AllocVar(tsv);
         val = slDoubleNew(0.0);
         slAddHead(&tsv->valList, val);
         }
     tsv->name = tis->name;
     tsv->description = tis->description;
     tsv->color = tis->color;
     int count = tsv->count = slCount(tsv->valList);
     double *vals = AllocArray(tsv->vals, count);
     for (i=0; i<count; i++)
         {
         val = slPopHead(&tsv->valList);
         if (doLogTransform)
             vals[i] = log10(val->val+1.0);
         else
             vals[i] = val->val;
         }
     doubleBoxWhiskerCalc(tsv->count, tsv->vals, 
                                 &tsv->min, &tsv->q1, &tsv->median, &tsv->q3, &tsv->max);
     slAddHead(&tsList, tsv);
     }
 if (maxValRet != NULL)
     *maxValRet = maxVal;
 return tsList;
 }
 
 /********************************************************/
 /* R implementation of GTEx boxplot.  This invokes R interpreter on an R script */
 
 static boolean drawGtexRBoxplot(char *geneName, struct tissueSampleVals *tsvList,
                         boolean doLogTransform, char *version, struct tempName *pngTn)
 /* Draw a box-and-whiskers plot from GTEx sample data, using R boxplot */
 {
 /* Create R data frame.  This is a tab-sep file, one row per sample, 
  * with columns for sample, tissue, rpkm */
 struct tempName dfTn;
 trashDirFile(&dfTn, "hgc", "gtexGene", ".df.txt");
 FILE *f = fopen(dfTn.forCgi, "w");
 if (f == NULL)
     errAbort("can't create temp file %s", dfTn.forCgi);
 fprintf(f, "sample\ttissue\trpkm\n");
 struct tissueSampleVals *tsv;
 int sampleId=1;
 int i;
 for (tsv = tsvList; tsv != NULL; tsv = tsv->next)
     {
     int count = tsv->count;
     // remove trailing parenthesized phrases as not worth label length
     chopSuffixAt(tsv->description, '(');
     for (i=0; i<count; i++)
         fprintf(f, "%d\t%s\t%0.3f\n", sampleId++, tsv->description, tsv->vals[i]);
     }
 fclose(f);
 
 // Plot to PNG file
 if (!pngTn)
     return FALSE;
 trashDirFile(pngTn, "hgc", "gtexGene", ".png");
 
 /* Exec R in quiet mode, without reading/saving environment or workspace */
 char *pipeCmd[] = {"Rscript","--vanilla","--slave","hgcData/gtexBoxplot.R", 
     geneName, dfTn.forCgi, pngTn->forHtml, 
     doLogTransform ? "log=TRUE" : "log=FALSE", "order=alpha", version, NULL};
 struct pipeline *pl = pipelineOpen1(pipeCmd, pipelineWrite | pipelineNoAbort, "/dev/null", NULL, 0);
 int ret = pipelineWait(pl);
 //NOTE: use "order=score" to order bargraph by median RPKM, descending
 
 if (ret == 0)
     return TRUE;
 return FALSE;
 }
 
 boolean gtexGeneBoxplot(char *geneId, char *geneName, char *version, 
                                 boolean doLogTransform, struct tempName *pngTn)
 /* Create a png temp file with boxplot of GTEx expression values for this gene. 
  * GeneId is the Ensembl gene ID.  GeneName is the HUGO name, used for graph title;
  * If NULL, label with the Ensembl gene ID */
 {
 struct tissueSampleVals *tsvs;
 tsvs  = getTissueSampleVals(geneId, doLogTransform, version, NULL);
 char *label = geneName ? geneName : geneId;
 return drawGtexRBoxplot(label, tsvs, doLogTransform, version, pngTn);
 }