src/hg/near/hprdXmlToTab/hprdXmlToTab.c 1.1

1.1 2009/10/15 21:58:38 kent
Utility to convert HPRD interaction html file to tab-separated list of protein/protein interactions seems to work. This is based on an autoXml parser and a snippet of code Galt wrote that now lives in hprdXmlToTab.c.
Index: src/hg/near/hprdXmlToTab/hprdXmlToTab.c
===================================================================
RCS file: src/hg/near/hprdXmlToTab/hprdXmlToTab.c
diff -N src/hg/near/hprdXmlToTab/hprdXmlToTab.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ src/hg/near/hprdXmlToTab/hprdXmlToTab.c	15 Oct 2009 21:58:38 -0000	1.1
@@ -0,0 +1,63 @@
+#include "common.h"
+#include "hprd.h"
+
+void usage(char *self)
+/* show usage */
+{
+errAbort("Syntax error. Correct usage:\n%s input.xml output.P2P.tab complex.tab\n",self);
+}
+
+int main(int argc, char *argv[])
+{
+if (argc != 4)
+    usage(argv[0]);
+
+struct hprdEntrySet *entrySet = hprdEntrySetLoad(argv[1]);
+struct hprdInteraction *i=NULL, *iL = entrySet->hprdEntry->hprdInteractionList->hprdInteraction;
+int iCount = 0;
+FILE *fp = mustOpen(argv[2], "w");
+FILE *fc = mustOpen(argv[3], "w");
+
+//uglyf("interactionList count = %d\n", slCount(iList));
+
+for(i=iL;i;i=i->next)
+    {
+    ++iCount;
+    struct hprdParticipant *q=NULL, *p=NULL, *pL = i->hprdParticipantList->hprdParticipant;
+    int pCount = slCount(pL);
+    if (pCount < 2)
+	{
+	warn("participant count=%d which is < 2 for participant id = %s\n", pCount, pL->id);
+	}
+    else
+	{
+
+	double distance = pCount==2 ? 1.0 : 1.5;
+	for(p=pL;p;p=p->next)
+	    for(q=p->next;q;q=q->next)
+		fprintf(fp, "%s\t%s\t%f\n", 
+		    p->hprdInteractorRef->text, 
+		    q->hprdInteractorRef->text,
+		    distance);
+	if (pCount > 2)  /* complex */
+	    for(p=pL;p;p=p->next)
+		{
+		fprintf(fc,"%d\t%s\n", iCount, p->hprdInteractorRef->text );
+		}
+	    
+	}
+    }
+
+
+uglyf("interaction count = %d\n", iCount);
+
+carefulClose(&fp);
+carefulClose(&fc);
+
+hprdEntrySetFree(&entrySet);
+
+
+
+return 0;
+}
+