src/hg/utils/rgdGeneXref2/rgdGeneXref2.c 1.1
1.1 2010/02/05 22:33:20 fanhsu
Created rgdGeneXref2.c.
Index: src/hg/utils/rgdGeneXref2/rgdGeneXref2.c
===================================================================
RCS file: src/hg/utils/rgdGeneXref2/rgdGeneXref2.c
diff -N src/hg/utils/rgdGeneXref2/rgdGeneXref2.c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ src/hg/utils/rgdGeneXref2/rgdGeneXref2.c 5 Feb 2010 22:33:20 -0000 1.1
@@ -0,0 +1,115 @@
+/* rgdGeneXref2 - from all mRNAs in a genome (e.g. rn3) referenced by SWISS-PROT
+ generate a list of proteins and a list of protein/mRNA pairs */
+#include "common.h"
+#include "hCommon.h"
+#include "hdb.h"
+
+void usage()
+/* Explain usage and exit. */
+{
+errAbort(
+ "rgdGeneXref2 - parse out Dbxref field of the raw RGD Gene GFF file to generate data for the rgdGeneXref22 table\n"
+ " generate a list of proteins and a list of protein/mRNA pairs.\n"
+ "usage:\n"
+ " rgdGeneXref2 databas outfile\n"
+ " database is the genome database\n"
+ " outfile is the output file name\n"
+ "example: rgdGeneXref2 rn4 rgdGeneXref2.tab\n");
+}
+
+int main(int argc, char *argv[])
+{
+struct sqlConnection *conn;
+
+char query[512];
+struct sqlResult *sr;
+char **row;
+
+char *dataBase;
+char *chp;
+char *chp9;
+char *feature, *xrefStr;
+char *Dbxref;
+char *DbxrefEnd = NULL;
+char *rgdGeneId;
+char *rest = NULL;
+FILE *outf;
+char *outfileName;
+boolean more;
+
+if (argc != 3) usage();
+dataBase = argv[1];
+outfileName = argv[2];
+
+outf = mustOpen(outfileName, "w");
+
+conn= hAllocConn(dataBase);
+
+sprintf(query,"select feature, rgdId from rgdGeneRaw0 where feature = 'gene'");
+sr = sqlMustGetResult(conn, query);
+row = sqlNextRow(sr);
+while (row != NULL)
+ {
+ feature = row[0];
+ xrefStr = row[1];
+
+ Dbxref = row[1];
+ chp9 = strstr(xrefStr, ";");
+ if (chp9 != NULL)
+ {
+ *chp9 = '\0';
+ DbxrefEnd = chp9;
+ }
+
+ chp = Dbxref;
+
+ /* get start of "RGD:" */
+ chp = strstr(chp, ",");
+ chp ++;
+ rgdGeneId = chp;
+
+ /* check if there are other references beside the RGD: entry */
+ more = FALSE;
+ chp = strstr(rgdGeneId, ",");
+ if (chp != NULL)
+ {
+ more = TRUE;
+ *chp = '\0';
+ chp++;
+ rest = chp;
+ }
+
+ if (more)
+ {
+ chp9 = strstr(rest, ",");
+ while (chp9 != NULL)
+ {
+ *chp9 = '\0';
+ fprintf(outf, "%s\t%s\n", rgdGeneId, chp); fflush(stdout);
+ if (DbxrefEnd == chp9)
+ {
+ /* if end is reached, break */
+ break;
+ }
+ else
+ {
+ /* keep looking for next entry */
+ chp9++;
+ chp = chp9;
+ chp9 = strstr(chp, ",");
+ }
+ }
+
+ /* print last entry */
+ fprintf(outf, "%s\t%s\n", rgdGeneId, chp); fflush(stdout);
+ }
+
+ row = sqlNextRow(sr);
+ }
+sqlFreeResult(&sr);
+hFreeConn(&conn);
+fclose(outf);
+
+return(0);
+}
+