src/hg/instinct/flatfileToBED15/flatfileToBED15.c 1.5
1.5 2010/05/28 20:10:13 cszeto
Added writing microarrayGroups file
Index: src/hg/instinct/flatfileToBED15/flatfileToBED15.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/instinct/flatfileToBED15/flatfileToBED15.c,v
retrieving revision 1.4
retrieving revision 1.5
diff -b -B -U 4 -r1.4 -r1.5
--- src/hg/instinct/flatfileToBED15/flatfileToBED15.c 18 Oct 2009 22:11:30 -0000 1.4
+++ src/hg/instinct/flatfileToBED15/flatfileToBED15.c 28 May 2010 20:10:13 -0000 1.5
@@ -9,11 +9,14 @@
#include "bed.h"
#define MAX_LINE 100000
#define MAX_TOKEN 50
+#define MAX_FILENAME 200
+
static struct optionSpec optionSpecs[] =
{
+ {"prefix", OPTION_STRING},
{"flatFile", OPTION_STRING},
{"bedFile", OPTION_STRING},
{NULL, 0}
};
@@ -23,15 +26,68 @@
*/
void usage();
+int printMicroarrayGroups(char *prefix, char line[]){
+ char outfile[MAX_FILENAME], *tok;
+ sprintf(outfile, "%s_microarrayGroups.ra", prefix);
+ FILE * fp = fopen(outfile, "w");
+ if(fp == NULL){
+ fprintf(stderr, "ERROR: Couldn't open %s for writing.\n", outfile);
+ exit(1);
+ }
+ int i = 0,expCount = 0;
+ for(i = 0; i < MAX_LINE; i++){
+ if(line[i]=='\0' || line[i] == '\n')
+ break;
+ if(line[i] == '\t')
+ expCount++;
+ }
+ fprintf(fp, "name %sGroups\n", prefix);
+ fprintf(fp, "type groupings\n");
+ fprintf(fp, "all %sAll\n\n", prefix);
+ fprintf(fp, "name %sAll\n", prefix);
+ fprintf(fp, "type all\n");
+ fprintf(fp, "description All Arrays\n");
+ fprintf(fp, "expIds ");
+ for(i = 0; i < expCount; i++){
+ fprintf(fp, "%d,", i);
+ }
+ fprintf(fp, "\n");
+ fprintf(fp, "groupSizes ");
+ for(i = 0; i < expCount; i++){
+ fprintf(fp, "1,");
+ }
+ fprintf(fp, "\n");
+ fprintf(fp, "names ");
+ tok = strtok(line, "\t\n");
+ tok = strtok(NULL, "\t\n"); //skip the 'names' entry
+
+ fprintf(stderr, "EERE\n");
+ while(tok != NULL){
+ fprintf(fp,"%s,", tok);
+ tok=strtok(NULL, "\t\n");
+ }
+ fprintf(stderr, "never HERE\n");
+ fprintf(fp, "\n");
+
+ fclose(fp);
+ return expCount;
+}
+
int main(int argc, char *argv[])
{
int i=0, j=0, k=0;
- char *bedFileName, *flatFileName;
- FILE *flatFile = NULL;
+ char *prefix, *bedFileName, *flatFileName, outfile[100];
+ FILE *flatFile, *fp, *error;
optionInit(&argc, argv, optionSpecs);
+ prefix = optionVal("prefix", NULL);
+ if(!prefix)
+ {
+ fprintf(stderr, "ERROR: missing prefix.\n");
+ usage();
+ }
bedFileName = optionVal("bedFile", NULL);
if(!bedFileName)
{
fprintf(stderr, "ERROR: missing bedFile name.\n");
@@ -52,14 +106,22 @@
fprintf(stderr, "ERROR: can't open flatFile '%s'\n", flatFileName);
usage();
}
- FILE * error = fopen("error.log", "w");
+ error = fopen("error.log", "w");
if(!error){
printf("Couldn't open the error log file. Exiting...\n");
exit(1);
}
+ sprintf(outfile, "%s.bed15", prefix);
+ fp = fopen(outfile, "w");
+ if(!error){
+ printf("Couldn't open the output file %s. Exiting...\n", outfile);
+ exit(1);
+ }
+
+
char line[MAX_LINE], *tok;
struct bed *b;
b = bedLoadAll(bedFileName);
@@ -69,15 +131,14 @@
}
//grab the info out for each probe
char probeName[MAX_TOKEN], expIds[MAX_LINE], expScores[MAX_LINE], dir[MAX_TOKEN], tmp[MAX_TOKEN];
- int expCount=0, lastExpCount=0;
+ int lineCount=0, expCount=0, lastExpCount=0;
while(fgets(line, MAX_LINE, flatFile)){
//init
expIds[0]= expScores[0] = tmp[0] = dir[0] = probeName[0] = '\0';
i = j = k = expCount = 0;
-
//copy the probe name from the line
while(line[j] != '\t' && line[j] != '\0' && line[j] != '\n')
{
probeName[j] = line[j];
@@ -85,9 +146,11 @@
}
probeName[j] = '\0';
struct hashEl *el = hashLookup(probeMap, probeName);
- if(el == NULL) {
+ if(lineCount == 0){
+ lastExpCount=printMicroarrayGroups(prefix,line);
+ }else if(el == NULL) {
fprintf(error, "%s not in bedFile\n", probeName);
} else{
// print info to outfile
struct bed *b = (struct bed *) el->val;
@@ -117,20 +180,22 @@
if(b->chromStart < b->chromEnd) strcpy(dir, "+");
else strcpy(dir, "-");
- printf("%s\t", b->chrom);
- printf("%u\t", b->chromStart);
- printf("%u\t", b->chromEnd);
- printf("%s\t", probeName);
- printf("0\t%s\t0\t0\t0\t1\t0\t0\t",dir);
- printf("%d\t", expCount);
- printf("%s\t", expIds);
- printf("%s\n",expScores);
+ fprintf(fp, "%s\t", b->chrom);
+ fprintf(fp, "%u\t", b->chromStart);
+ fprintf(fp, "%u\t", b->chromEnd);
+ fprintf(fp, "%s\t", probeName);
+ fprintf(fp, "0\t%s\t0\t0\t0\t1\t0\t0\t",dir);
+ fprintf(fp, "%d\t", expCount);
+ fprintf(fp, "%s\t", expIds);
+ fprintf(fp, "%s\n",expScores);
}
+ lineCount++;
}
fclose(flatFile);
fclose(error);
+ fclose(fp);
return 0;
}
/*
@@ -139,12 +204,13 @@
void usage()
/* Explain usage and exit. */
{
- fprintf(stderr, "Usage: ./flatfileToBED15 -flatFile=filename -bedFile=filename > [outputBED15_filename]\n");
+ fprintf(stderr, "Usage: ./flatfileToBED15 -prefix=name -flatFile=filename -bedFile=filename\n");
+ fprintf(stderr, "\t-prefix should be the name you want on the results files (prefix.ra and prefix.bed15\n");
fprintf(stderr, "\t-flatFile should be tab delimited, one probe per line, one sample per column, beginning with a naming column\n");
fprintf(stderr, "\t-bedFile should be a BED4 file containing coordinate information for each probe expected for the platform.\n");
- fprintf(stderr, "In addition to the bed15 file produced in stdout, this program will create an 'error.log'\n");
+ fprintf(stderr, "In addition to the bed15 file and microarrayGroups.ra file, this program will create an 'error.log'\n");
fprintf(stderr, "file for all probes it coulnd't find coordinates for in the provided BED4.\n");
fprintf(stderr, "NOTE: NaN vals MUST be marked 'NaN' or the program will put them into your bed file as is, rather than as empty vals.\n");
exit(1);
}