src/hg/instinct/flatfileToBED15/flatfileToBED15.c 1.6
1.6 2010/05/28 20:47:53 cszeto
Removed some unecessary printlines
Index: src/hg/instinct/flatfileToBED15/flatfileToBED15.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/flatfileToBED15/flatfileToBED15.c,v
retrieving revision 1.5
retrieving revision 1.6
diff -b -B -U 1000000 -r1.5 -r1.6
--- src/hg/instinct/flatfileToBED15/flatfileToBED15.c 28 May 2010 20:10:13 -0000 1.5
+++ src/hg/instinct/flatfileToBED15/flatfileToBED15.c 28 May 2010 20:47:53 -0000 1.6
@@ -1,216 +1,214 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <math.h>
#include "common.h"
#include "options.h"
#include "bed.h"
#define MAX_LINE 100000
#define MAX_TOKEN 50
#define MAX_FILENAME 200
static struct optionSpec optionSpecs[] =
{
{"prefix", OPTION_STRING},
{"flatFile", OPTION_STRING},
{"bedFile", OPTION_STRING},
{NULL, 0}
};
/*
** Prototypes
*/
void usage();
int printMicroarrayGroups(char *prefix, char line[]){
char outfile[MAX_FILENAME], *tok;
sprintf(outfile, "%s_microarrayGroups.ra", prefix);
FILE * fp = fopen(outfile, "w");
if(fp == NULL){
fprintf(stderr, "ERROR: Couldn't open %s for writing.\n", outfile);
exit(1);
}
int i = 0,expCount = 0;
for(i = 0; i < MAX_LINE; i++){
if(line[i]=='\0' || line[i] == '\n')
break;
if(line[i] == '\t')
expCount++;
}
fprintf(fp, "name %sGroups\n", prefix);
fprintf(fp, "type groupings\n");
fprintf(fp, "all %sAll\n\n", prefix);
fprintf(fp, "name %sAll\n", prefix);
fprintf(fp, "type all\n");
fprintf(fp, "description All Arrays\n");
fprintf(fp, "expIds ");
for(i = 0; i < expCount; i++){
fprintf(fp, "%d,", i);
}
fprintf(fp, "\n");
fprintf(fp, "groupSizes ");
for(i = 0; i < expCount; i++){
fprintf(fp, "1,");
}
fprintf(fp, "\n");
fprintf(fp, "names ");
tok = strtok(line, "\t\n");
tok = strtok(NULL, "\t\n"); //skip the 'names' entry
- fprintf(stderr, "EERE\n");
while(tok != NULL){
fprintf(fp,"%s,", tok);
tok=strtok(NULL, "\t\n");
}
- fprintf(stderr, "never HERE\n");
fprintf(fp, "\n");
fclose(fp);
return expCount;
}
int main(int argc, char *argv[])
{
int i=0, j=0, k=0;
char *prefix, *bedFileName, *flatFileName, outfile[100];
FILE *flatFile, *fp, *error;
optionInit(&argc, argv, optionSpecs);
prefix = optionVal("prefix", NULL);
if(!prefix)
{
fprintf(stderr, "ERROR: missing prefix.\n");
usage();
}
bedFileName = optionVal("bedFile", NULL);
if(!bedFileName)
{
fprintf(stderr, "ERROR: missing bedFile name.\n");
usage();
}
flatFileName = optionVal("flatFile", NULL);
if(!flatFileName)
{
fprintf(stderr, "ERROR: missing flatFile name.\n");
usage();
}
flatFile = fopen(flatFileName, "r");
if(!flatFile)
{
fprintf(stderr, "ERROR: can't open flatFile '%s'\n", flatFileName);
usage();
}
error = fopen("error.log", "w");
if(!error){
printf("Couldn't open the error log file. Exiting...\n");
exit(1);
}
sprintf(outfile, "%s.bed15", prefix);
fp = fopen(outfile, "w");
if(!error){
printf("Couldn't open the output file %s. Exiting...\n", outfile);
exit(1);
}
char line[MAX_LINE], *tok;
struct bed *b;
b = bedLoadAll(bedFileName);
struct hash *probeMap = newHash(0);
for(;b;b=b->next) {
hashAdd(probeMap, b->name, b);
}
//grab the info out for each probe
char probeName[MAX_TOKEN], expIds[MAX_LINE], expScores[MAX_LINE], dir[MAX_TOKEN], tmp[MAX_TOKEN];
int lineCount=0, expCount=0, lastExpCount=0;
while(fgets(line, MAX_LINE, flatFile)){
//init
expIds[0]= expScores[0] = tmp[0] = dir[0] = probeName[0] = '\0';
i = j = k = expCount = 0;
//copy the probe name from the line
while(line[j] != '\t' && line[j] != '\0' && line[j] != '\n')
{
probeName[j] = line[j];
j++;
}
probeName[j] = '\0';
struct hashEl *el = hashLookup(probeMap, probeName);
if(lineCount == 0){
lastExpCount=printMicroarrayGroups(prefix,line);
}else if(el == NULL) {
fprintf(error, "%s not in bedFile\n", probeName);
} else{
// print info to outfile
struct bed *b = (struct bed *) el->val;
//copy the expScores out of the flatFile line, and count expCount
tok=strtok(line, "\t\n");
tok=strtok(NULL, "\t\n");//get rid of the probename one
i=0;
while(tok != NULL){
if(strcmp(tok,"NaN") != 0) strcat(expScores, tok);
strcat(expScores, ",");
tok=strtok(NULL, "\t\n");
expCount++;
}
//check the expCount is ok, or if it's not set yet set it.
if(expCount != lastExpCount && lastExpCount!= 0){
fprintf(stderr,"ERROR: %s line has fewer expScores than the last line (%d vs. %d). Unparsable file! Exiting...\n", line, expCount, lastExpCount);
exit(1);
}else if(lastExpCount == 0) lastExpCount = expCount;
//make a string for the expIds based on expCount
for(i=0; i < expCount; i++){
sprintf(tmp,"%d,",i);
strcat(expIds, tmp);
}
if(b->chromStart < b->chromEnd) strcpy(dir, "+");
else strcpy(dir, "-");
fprintf(fp, "%s\t", b->chrom);
fprintf(fp, "%u\t", b->chromStart);
fprintf(fp, "%u\t", b->chromEnd);
fprintf(fp, "%s\t", probeName);
fprintf(fp, "0\t%s\t0\t0\t0\t1\t0\t0\t",dir);
fprintf(fp, "%d\t", expCount);
fprintf(fp, "%s\t", expIds);
fprintf(fp, "%s\n",expScores);
}
lineCount++;
}
fclose(flatFile);
fclose(error);
fclose(fp);
return 0;
}
/*
** Grunt Functions:
*/
void usage()
/* Explain usage and exit. */
{
fprintf(stderr, "Usage: ./flatfileToBED15 -prefix=name -flatFile=filename -bedFile=filename\n");
fprintf(stderr, "\t-prefix should be the name you want on the results files (prefix.ra and prefix.bed15\n");
fprintf(stderr, "\t-flatFile should be tab delimited, one probe per line, one sample per column, beginning with a naming column\n");
fprintf(stderr, "\t-bedFile should be a BED4 file containing coordinate information for each probe expected for the platform.\n");
fprintf(stderr, "In addition to the bed15 file and microarrayGroups.ra file, this program will create an 'error.log'\n");
fprintf(stderr, "file for all probes it coulnd't find coordinates for in the provided BED4.\n");
fprintf(stderr, "NOTE: NaN vals MUST be marked 'NaN' or the program will put them into your bed file as is, rather than as empty vals.\n");
exit(1);
}