src/hg/instinct/flatfileToBED15/flatfileToBED15.c 1.6

1.6 2010/05/28 20:47:53 cszeto
Removed some unecessary printlines
Index: src/hg/instinct/flatfileToBED15/flatfileToBED15.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/flatfileToBED15/flatfileToBED15.c,v
retrieving revision 1.5
retrieving revision 1.6
diff -b -B -U 1000000 -r1.5 -r1.6
--- src/hg/instinct/flatfileToBED15/flatfileToBED15.c	28 May 2010 20:10:13 -0000	1.5
+++ src/hg/instinct/flatfileToBED15/flatfileToBED15.c	28 May 2010 20:47:53 -0000	1.6
@@ -1,216 +1,214 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <limits.h>
 #include <math.h>
 
 #include "common.h"
 #include "options.h"
 #include "bed.h"
 
 #define MAX_LINE 100000
 #define MAX_TOKEN 50
 #define MAX_FILENAME 200
 
 
 static struct optionSpec optionSpecs[] =
 {
 	{"prefix", OPTION_STRING},
     {"flatFile", OPTION_STRING},
     {"bedFile", OPTION_STRING},
     {NULL, 0}
 };
 
 /*
 ** Prototypes
 */
 
 void usage();
 
 int printMicroarrayGroups(char *prefix, char line[]){
 	char outfile[MAX_FILENAME], *tok;
 	sprintf(outfile, "%s_microarrayGroups.ra", prefix);
 	FILE * fp = fopen(outfile, "w");
 	if(fp == NULL){
 		fprintf(stderr, "ERROR: Couldn't open %s for writing.\n", outfile);
 		exit(1);
 	}
 	int i = 0,expCount = 0;
 	for(i = 0; i < MAX_LINE; i++){
 		if(line[i]=='\0' || line[i] == '\n')
 			break;
 		if(line[i] == '\t')
 			expCount++;
 	}
 	fprintf(fp, "name %sGroups\n", prefix);
 	fprintf(fp, "type groupings\n");
 	fprintf(fp, "all %sAll\n\n", prefix);
 	fprintf(fp, "name %sAll\n", prefix);
 	fprintf(fp, "type all\n");
 	fprintf(fp, "description All Arrays\n");
 	fprintf(fp, "expIds ");
 	for(i = 0; i < expCount; i++){
 		fprintf(fp, "%d,", i);
 	}
 	fprintf(fp, "\n");
 	fprintf(fp, "groupSizes ");
 	for(i = 0; i < expCount; i++){
 		fprintf(fp, "1,");
 	}
 	fprintf(fp, "\n");
 	fprintf(fp, "names ");
 	tok = strtok(line, "\t\n");
 	tok = strtok(NULL, "\t\n"); //skip the 'names' entry
 	
-	fprintf(stderr, "EERE\n");
 	while(tok != NULL){
 		fprintf(fp,"%s,", tok);
 		tok=strtok(NULL, "\t\n");
 	}
-	fprintf(stderr, "never HERE\n");
 	fprintf(fp, "\n");
 	
 	fclose(fp);
 	return expCount;
 }
 
 int main(int argc, char *argv[])
 {
     int i=0, j=0, k=0; 
     char *prefix, *bedFileName, *flatFileName, outfile[100];
     FILE *flatFile, *fp, *error;
 
     optionInit(&argc, argv, optionSpecs);
     prefix = optionVal("prefix", NULL);
 	if(!prefix)
 		{
 		fprintf(stderr, "ERROR: missing prefix.\n");
 		usage();
 		}	
     bedFileName = optionVal("bedFile", NULL);
     if(!bedFileName)
         {
         fprintf(stderr, "ERROR: missing bedFile name.\n");
         usage();
         }
     flatFileName = optionVal("flatFile", NULL);
     if(!flatFileName)
         {
         fprintf(stderr, "ERROR: missing flatFile name.\n");
         usage();
         }
 
     flatFile = fopen(flatFileName, "r");
     if(!flatFile)
         {        
         fprintf(stderr, "ERROR: can't open flatFile '%s'\n", flatFileName);
         usage();
         }
 
     error = fopen("error.log", "w");
     if(!error){
         printf("Couldn't open the error log file. Exiting...\n");
         exit(1);
     }
 
 	sprintf(outfile, "%s.bed15", prefix);
 	fp = fopen(outfile, "w");
     if(!error){
         printf("Couldn't open the output file %s. Exiting...\n", outfile);
         exit(1);
     }
 	
 
     char line[MAX_LINE], *tok;
 
     struct bed *b;
     b = bedLoadAll(bedFileName);
     struct hash *probeMap = newHash(0);
     for(;b;b=b->next) {
     	hashAdd(probeMap, b->name, b);
     }
 
     //grab the info out for each probe
     char probeName[MAX_TOKEN], expIds[MAX_LINE], expScores[MAX_LINE], dir[MAX_TOKEN], tmp[MAX_TOKEN];
     int lineCount=0, expCount=0, lastExpCount=0;
     
     while(fgets(line, MAX_LINE, flatFile)){
         //init
         expIds[0]= expScores[0] = tmp[0] = dir[0] = probeName[0] = '\0';
         i = j = k = expCount = 0;
         //copy the probe name from the line
         while(line[j] != '\t' && line[j] != '\0' && line[j] != '\n')
             {
             probeName[j] = line[j];
             j++;
             }
         probeName[j] = '\0';
 
         struct hashEl *el = hashLookup(probeMap, probeName);
 		if(lineCount == 0){
 			lastExpCount=printMicroarrayGroups(prefix,line);
         }else if(el == NULL) {
             fprintf(error, "%s not in bedFile\n", probeName);
         } else{
             // print info to outfile
             struct bed *b = (struct bed *) el->val;
 
             //copy the expScores out of the flatFile line, and count expCount
             tok=strtok(line, "\t\n");
             tok=strtok(NULL, "\t\n");//get rid of the probename one
             i=0;
             while(tok != NULL){
                 if(strcmp(tok,"NaN") != 0) strcat(expScores, tok);
                 strcat(expScores, ",");
                 tok=strtok(NULL, "\t\n");
                 expCount++;
             }         
     
             //check the expCount is ok, or if it's not set yet set it. 
             if(expCount != lastExpCount && lastExpCount!= 0){
                 fprintf(stderr,"ERROR: %s line has fewer expScores than the last line (%d vs. %d). Unparsable file! Exiting...\n", line, expCount, lastExpCount);
                 exit(1);
             }else if(lastExpCount == 0) lastExpCount = expCount;
 
             //make a string for the expIds based on expCount
             for(i=0; i < expCount; i++){
                 sprintf(tmp,"%d,",i);
                 strcat(expIds, tmp);
             }
     
             if(b->chromStart < b->chromEnd) strcpy(dir, "+");
             else strcpy(dir, "-");    
         
             fprintf(fp, "%s\t", b->chrom);
             fprintf(fp, "%u\t", b->chromStart);
             fprintf(fp, "%u\t", b->chromEnd);
             fprintf(fp, "%s\t", probeName);
             fprintf(fp, "0\t%s\t0\t0\t0\t1\t0\t0\t",dir);
             fprintf(fp, "%d\t", expCount);
             fprintf(fp, "%s\t", expIds);
             fprintf(fp, "%s\n",expScores);
         }
 		lineCount++;
     }
     fclose(flatFile);
     fclose(error);
 	fclose(fp);
     return 0;
 }
 
 /*
 ** Grunt Functions:
 */
 
 void usage()
 /* Explain usage and exit. */
 {
     fprintf(stderr, "Usage: ./flatfileToBED15 -prefix=name -flatFile=filename -bedFile=filename\n");
 	fprintf(stderr, "\t-prefix should be the name you want on the results files (prefix.ra and prefix.bed15\n");
     fprintf(stderr, "\t-flatFile should be tab delimited, one probe per line, one sample per column, beginning with a naming column\n");
     fprintf(stderr, "\t-bedFile should be a BED4 file containing coordinate information for each probe expected for the platform.\n");
     fprintf(stderr, "In addition to the bed15 file and microarrayGroups.ra file, this program will create an 'error.log'\n");
     fprintf(stderr, "file for all probes it coulnd't find coordinates for in the provided BED4.\n");
     fprintf(stderr, "NOTE: NaN vals MUST be marked 'NaN' or the program will put them into your bed file as is, rather than as empty vals.\n");
     exit(1);
 }