dab89c1682e091dfa3780ce95359ebea9f6b7d52
braney
  Thu May 18 09:49:17 2017 -0700
add licensing info to optimalLeaf library

diff --git src/optimalLeaf/readFile.cc src/optimalLeaf/readFile.cc
index dc68de6..c2839e3 100644
--- src/optimalLeaf/readFile.cc
+++ src/optimalLeaf/readFile.cc
@@ -1,199 +1,205 @@
+/* This code provided by Ziv Bar-Joseph with the explicit understanding that 
+ * no licensing is required for it to be used in the UCSC Genome Browser
+ * as long as reference is made to this paper:
+ * https://www.ncbi.nlm.nih.gov/pubmed/12801867
+ */
+
 //----------------------------------------------------
 //                   readFile.hh
 // reads the input file. 
 // -----------------------------------------------------
 #include <stdlib.h>
 #include <iostream>
 #include <fstream>
 #include <string.h>
 #include <stdio.h>
 #include <ctype.h>
 #include <math.h>
 #include "readFile.hh"
 
 using namespace std;
 const int MAXSIZE = 200000;
 const float missing = 0.75;
 
 // reads the file and genrates the expression matrix and 
 // retrieves gene names and experiment names.
 readFile::readFile(char *fileName,int orfL, int descL) {
   FILE *from=fopen(fileName,"r");  
   char findName[5];
   int isName=0;
   int lineLen = orfL+descL+MAXSIZE;
   char *buffer = new char[lineLen];
   char geneVal[MAXSIZE];
   char **lines,**temp;
   int i, j, k,num, len,numT,place;
   char *tok, *newtok;
   int curLines = 100; // initial guess for number of genes
   int nlines=0; // actual number of lines
   int numMiss,removeGenes = 0;
   int redI = 0;
   lines = new char*[curLines];
   for(i=0;i<curLines;i++) {
     lines[i] = new char[lineLen];
   }
   while(fgets(buffer,lineLen,from)) {
     strcpy(lines[nlines],buffer);
     nlines++;
     if(nlines == curLines) { // overflow, add new lines
       curLines = curLines * 2;
       temp = new char*[nlines];
       for(i=0;i<nlines;i++) {
 	temp[i] = new char[lineLen];
 	strcpy(temp[i],lines[i]);
 	delete []lines[i];
       }
       delete []lines;
       lines = new char*[curLines];
       for(i=0;i<curLines;i++) {
 	lines[i] = new char[lineLen]; 
         if(i < nlines) {
 	  strcpy(lines[i],temp[i]); // copy all existing lines
 	  delete []temp[i];
 	}
       }
       delete []temp;
     }
   }
   /* add '\n' at the end of last line if needed */
   if(!strchr(lines[nlines-1],'\n')) {
     len=strlen(lines[nlines-1]);
     lines[nlines-1][len]='\n';
     lines[nlines-1][len+1]=0;
   }
   
   // compute the number of values in the first line, and find if 'name' was
   // used
   strcpy(findName,"name");
   
   len=strlen(lines[0]);
   j = 0;
   numT = 0;
   while(j < len && numT == 0) {
     if(lines[0][j]=='\t') 
       numT++;
     j++;
   }
   i = 0;
   // check if the 'name' column is present in the input file
   while(j < len && numT == 1) {
     buffer[i] = lines[0][j];
     if(lines[0][j]=='\t') 
       numT++;   
     j++;
     i++;
   }
   buffer[i-1] = '\0';
   if(strcmp(buffer,findName) == 0) 
     isName = 1;
   numT = 0;
   j=0;
   while(lines[0][j]!='\n') {
     if(lines[0][j]=='\t')
       numT++;
     j++;
   }
   numT++; // end of line
   numGenes = nlines - 1;
   expNum = numT - 1- isName;
   geneNames = new char*[numGenes];
   geneDesc = new char*[numGenes];
   vals = new float*[numGenes];
   expNames = new char*[expNum];
   for(i=0;i<numGenes;i++) {
     geneNames[i] = new char[orfL];
     geneDesc[i] = new char[descL];
     vals[i] = new float[expNum];
   }
   for(i=0;i<expNum;i++) {
     expNames[i] = new char[descL];
   } 
   j = 0;
   numT = 0;
   while(j<len && numT < (1+isName)) { 
      if(isspace(lines[0][j])) {
       numT++;
     }
     j++;
   }
   place = 0;
   // find experiment names
   for(i=0;i<expNum;i++) {
     while(isspace(lines[0][j]) == 0) {
       expNames[i][place] = lines[0][j];
       j++;
       place++;
     }
     j++;
     expNames[i][place] = '\0';
     place = 0;
   }
   // get the gene values
   for(i=0;i<numGenes;i++) {
     j=0;
     place = 0;
      while(isspace(lines[i+1][j]) == 0) {
       geneNames[i-redI][place] = lines[i+1][j];
       place++;
       j++;
     }
     geneNames[i-redI][place] = '\0';
     place = 0;
     j++;
     if(isName) {
        while(isspace(lines[i+1][j]) == 0) {
 	 geneDesc[i-redI][place] = lines[i+1][j];
 	 place++;
 	 j++;
        }
        geneDesc[i-redI][place] = '\0';
     }
     else {
       strcpy(geneDesc[i-redI],geneNames[i-redI]);
     }
     place = 0;
     j++;
     numMiss = 0; // the number of missing values
     for(k=0;k<expNum;k++) {
       place = 0;
       while(isspace(lines[i+1][j]) == 0) {
 	geneVal[place] = lines[i+1][j];
 	place++;
 	j++;
       }
       geneVal[place] = '\0';
       if(geneVal[0] == '\0') {
         vals[i-redI][k] = 110; // missing values are replaced by a number
                           // greater than 100, however when writing the 
                           // output file, they are restored.
         numMiss++;
       }
       else
 	vals[i-redI][k] = atof(geneVal); 
       j++;
     }
     if(numMiss > (int)ceil(missing*expNum) || (numMiss + 1) == expNum) { // too many missing values
       removeGenes++;
       redI++;
     }
   }
   for(i=0;i<curLines;i++) {
     delete []lines[i];
   }
   delete []lines;
   numGenes = numGenes - removeGenes;  
   cerr<<"removed "<<removeGenes<<" genes due to missing values"<<'\n';
 }
 
 readFile::readFile( int inumGenes, int iexpNum, float **ivals, char **igeneNames,char **igeneDesc,char **iexpNames)
 {
 numGenes = inumGenes;
 expNum = iexpNum;
 vals = ivals;
 geneNames = igeneNames;
 geneDesc = igeneDesc;
 expNames = expNames;
 }