src/hg/encode/hgEncodeVocab/hgEncodeVocab.c 1.31
1.31 2010/03/24 20:35:48 tdreszer
Added link for antibody validation documents. Now if we can just get some.
Index: src/hg/encode/hgEncodeVocab/hgEncodeVocab.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/encode/hgEncodeVocab/hgEncodeVocab.c,v
retrieving revision 1.30
retrieving revision 1.31
diff -b -B -U 1000000 -r1.30 -r1.31
--- src/hg/encode/hgEncodeVocab/hgEncodeVocab.c 10 Feb 2010 20:03:57 -0000 1.30
+++ src/hg/encode/hgEncodeVocab/hgEncodeVocab.c 24 Mar 2010 20:35:48 -0000 1.31
@@ -1,429 +1,442 @@
/* hgEncodeVocab - print table of controlled vocabulary from ENCODE configuration files */
#include "common.h"
#include "hash.h"
#include "linefile.h"
#include "cheapcgi.h"
#include "hCommon.h"
#include "htmshell.h"
#include "ra.h"
#include "hui.h"
/* hgEncodeVocab - A CGI script to display the different types of encode controlled vocabulary.
* usage:
* hgEncodeVocab [ra=cv.ra] type=[Antibody|"Cell Line"|localization|rnaExtract|"Gene Type"] [tier=(1|2|3)]
* options:\n"
* ra=cv.ra : Path to cv.ra file (default cv_file())
* type=TypeName : Type to display
* tier=N : If type="Cell Line" then this is the tier to display
* bgcolor=RRGGBB : Change background color (hex digits)
* organism=Human|Mouse : If type="Cell Line", then set 'Mouse' to override default Human
* term=a : Display row for a single term
* TODO: terms=a,b,c : Display rows for listed terms. Must use with 'type'.
* tag=a : Display row for a single term, using tag as identifier
* TODO: tags=a,b,c : Display rows for listed terms, using tags as identifiers. Must use with 'type'.
*/
static char const rcsid[] = "$Id$";
//options that apply to all vocab types
static char *termOpt = NULL;
static char *tagOpt = NULL;
static char *typeOpt = NULL;
static char *organismOpt = "Human"; // default, uses naming convention from dbDb table
static char *cv_file()
{
/* return default location of cv.ra (can specify as cgi var: ra=cv.ra) */
static char filePath[PATH_LEN];
safef(filePath, sizeof(filePath), "%s/encode/cv.ra", hCgiRoot());
if(!fileExists(filePath))
errAbort("Error: can't locate cv.ra; %s doesn't exist\n", filePath);
return filePath;
}
+void documentLink(struct hash *ra, char *term, char *docTerm,char *dir,char *title,boolean genericDoc)
+/* Compare controlled vocab based on term value */
+{
+char *s;
+if(title == NULL)
+ title = docTerm;
+
+// add links to protocol doc if it exists
+char docUrl[PATH_LEN];
+char docFile[PATH_LEN];
+// parse setting
+s = hashFindVal(ra,docTerm);
+if(s != NULL && differentWord(s,"missing"))
+ {
+ char *docSetting = cloneString(s);
+ char *settings=docSetting;
+ while((s = nextWord(&settings)) != NULL)
+ {
+ char *docTitle = NULL;
+ char *fileName = NULL;
+ if(strchr(s,':')) // lab Specific setting
+ {
+ docTitle = strSwapChar(s,':',0);
+ fileName = docTitle + strlen(docTitle) + 1;
+ }
+ else
+ {
+ docTitle = title;
+ fileName = s;
+ }
+ safef(docUrl, sizeof(docUrl), "%s%s", dir, fileName);
+ safef(docFile, sizeof(docFile), "%s%s", hDocumentRoot(), docUrl);
+ //if (fileExists(documentFile))
+ printf(" <A TARGET=_BLANK HREF=%s>%s</A>\n", docUrl,docTitle);
+ }
+ freeMem(docSetting);
+ }
+else if(genericDoc)
+ { // generate a standard name
+ safef(docUrl, sizeof(docUrl), "%s%s_protocol.pdf", dir, term);
+ safef(docFile, sizeof(docFile), "%s%s", hDocumentRoot(), docUrl);
+ if (fileExists(docFile))
+ printf(" <A TARGET=_BLANK HREF=%s>%s</A>\n", docUrl,title);
+ }
+}
int termCmp(const void *va, const void *vb)
/* Compare controlled vocab based on term value */
{
const struct hash *a = *((struct hash **)va);
const struct hash *b = *((struct hash **)vb);
char *termA = hashMustFindVal((struct hash *)a, "term");
char *termB = hashMustFindVal((struct hash *)b, "term");
return (strcasecmp(termA, termB));
}
void doTypeHeader(char *type)
{
if (sameString(type,"Antibody"))
{
- puts(" <TH>Term</TH><TH>Target Description</TH><TH>Antibody Description</TH><TH>Vendor ID</TH><TH>Lab</TH><TH>Lots</TH><TH>Target Link</TH>");
+ puts(" <TH>Term</TH><TH>Target Description</TH><TH>Antibody Description</TH><TH>Vendor ID</TH><TH>Lab</TH><TH>Documents</TH><TH>Lots</TH><TH>Target Link</TH>");
}
else if (sameString(type,"ripAntibody"))
{
puts(" <TH>Term</TH><TH>Antibody Description</TH><TH>Target Description</TH><TH>Vendor ID</TH>");
}
else if (sameString(type,"ripTgtProtein"))
{
puts(" <TH>Term</TH><TH>Alternative Symbols</TH><TH>Description</TH>");
}
else if (sameString(type,"localization"))
{
puts(" <TH>Term</TH><TH>Description</TH><TH>GO ID</TH>");
}
else if (sameString(type,"Cell Line"))
{
puts(" <TH>Term</TH><TH>Tier</TH><TH>Description</TH><TH>Lineage</TH><TH>Karyotype</TH><TH>Sex</TH><TH>Documents</TH><TH>Vendor ID</TH><TH>Term ID</TH>");
}
else
puts(" <TH>Term</TH><TH>Description</TH>");
}
void doTypeRow(struct hash *ra, char *type, int *total)
{
char *term;
char *s, *t, *u;
// Skip all rows that do not match term or tag if specified
char *optVal = termOpt;
char *optType = "term";
if (tagOpt)
{
optVal = tagOpt;
optType = "tag";
}
if (optVal)
{
(void)stripChar(optVal,'\"');
if (differentWord(optVal, hashMustFindVal(ra, optType)))
return;
}
term = (char *)hashMustFindVal(ra, "term");
if (sameString(type,"Antibody"))
{
++(*total);
puts("<TR>");
printf(" <TD>%s</TD>\n", term);
s = hashFindVal(ra, "targetDescription");
printf(" <TD>%s</TD>\n", s ? s : " ");
s = hashFindVal(ra, "antibodyDescription");
printf(" <TD>%s</TD>\n", s ? s : " ");
s = hashFindVal(ra, "vendorName");
t = hashFindVal(ra, "vendorId");
u = hashFindVal(ra, "orderUrl");
printf(" <TD>");
if (u)
printf("<A TARGET=_BLANK HREF=%s>", u);
printf("%s %s", s ? s : " ", t ? t : " ");
if (u)
printf("</A>");
puts("</TD>");
s = hashFindVal(ra, "lab");
printf(" <TD>%s</TD>\n", s ? s : " ");
+ // add links to protocol doc if it exists
+ printf(" <TD>");
+ documentLink(ra,term,"validation","/ENCODE/protocols/antibody/",NULL,FALSE);
+ printf(" </TD>\n");
+
s = hashFindVal(ra, "lots");
printf(" <TD>%s</TD>\n", s ? s : " ");
t = hashFindVal(ra, "targetId");
u = hashFindVal(ra, "targetUrl");
printf(" <TD>");
if (u)
printf("<A TARGET=_BLANK HREF=%s>", u);
printf("%s", t ? t : " ");
if (u)
printf("</A>");
puts("</TD>");
puts("</TR>");
}
else if (sameString(type,"ripAntibody"))
{
++(*total);
puts("<TR>");
printf(" <TD>%s</TD>\n", term);
s = hashFindVal(ra, "antibodyDescription");
printf(" <TD>%s</TD>\n", s ? s : " ");
s = hashFindVal(ra, "targetDescription");
printf(" <TD>%s</TD>\n", s ? s : " ");
s = hashFindVal(ra, "vendorName");
t = hashFindVal(ra, "vendorId");
u = hashFindVal(ra, "orderUrl");
printf(" <TD>");
if (u)
printf("<A TARGET=_BLANK HREF=%s>", u);
printf("%s %s", s ? s : " ", t ? t : " ");
if (u)
printf("</A>");
puts("</TD>");
puts("</TR>");
}
else if (sameString(type,"ripTgtProtein"))
{
++(*total);
puts("<TR>");
s = hashFindVal(ra, "url");
if (s)
printf(" <TD><A TARGET=_BLANK HREF=\"%s\">%s</A></TD>\n", s, term);
else
printf(" <TD>%s</TD>\n", term);
s = hashFindVal(ra, "alternativeSymbols");
printf(" <TD>%s</TD>\n", s ? s : " ");
s = hashFindVal(ra, "description");
printf(" <TD>%s</TD>\n", s ? s : " ");
puts("</TR>");
}
else if (sameString(type,"localization"))
{
++(*total);
puts("<TR>");
printf(" <TD>%s</TD>\n", term);
s = hashMustFindVal(ra, "description");
printf(" <TD>%s</TD>\n", s);
s = hashFindVal(ra, "termId");
u = hashFindVal(ra, "termUrl");
printf(" <TD>");
if (u)
printf("<A TARGET=_BLANK HREF=%s>", u);
printf("%s", s ? s : " ");
if (u)
printf("</A>");
puts("</TD>");
puts("</TR>");
}
else if (sameString(type,"Cell Line"))
{
printf("<!-- Cell Line table: contains links to protocol file and vendor description page -->");
s = hashFindVal(ra, "organism");
if (s && differentString(s, organismOpt))
return;
if (cgiOptionalInt("tier",0))
{
if (hashFindVal(ra,"tier") == NULL)
return;
if (atoi(hashFindVal(ra,"tier"))!=cgiOptionalInt("tier",0))
return;
}
if (cgiOptionalString("tiers"))
{
if (hashFindVal(ra,"tier") == NULL)
return;
boolean found=FALSE;
char *tiers=cloneString(cgiOptionalString("tiers"));
char *tier;
(void)strSwapChar(tiers,',',' ');
while((tier=nextWord(&tiers)))
{
if (atoi(hashFindVal(ra,"tier"))==atoi(tier))
{
found=TRUE;
break;
}
}
if(!found)
return;
}
++(*total);
puts("<TR>");
printf(" <TD>%s</TD>\n", term);
s = hashFindVal(ra, "tier");
printf(" <TD>%s</TD>\n", s ? s : " " );
s = hashFindVal(ra, "description");
printf(" <TD>%s</TD>\n", s ? s : " " );
s = hashFindVal(ra, "lineage");
printf(" <TD>%s</TD>\n", s ? s : " " );
s = hashFindVal(ra, "karyotype");
printf(" <TD>%s</TD>\n", s ? s : " " );
s = hashFindVal(ra, "sex");
printf(" <TD>%s</TD>\n", s ? s : " " );
-#define PROTOCOL_DIR "/ENCODE/protocols/cell/"
-#define PROTOCOL_TITLE "protocol"
// add links to protocol doc if it exists
printf(" <TD>");
- char protocolUrl[PATH_LEN];
- char protocolFile[PATH_LEN];
- // parse Protocol setting
- s = hashFindVal(ra,"protocol");
- if(s != NULL)
- {
- char *protocolSetting = cloneString(s);
- char *settings=protocolSetting;
- while((s = nextWord(&settings)) != NULL)
- {
- char *title = NULL;
- char *fileName = NULL;
- if(strchr(s,':')) // lab Specific setting
- {
- title = strSwapChar(s,':',0);
- fileName = title + strlen(title) + 1;
- }
- else
- {
- title = PROTOCOL_TITLE;
- fileName = s;
- }
- safef(protocolUrl, sizeof(protocolUrl), "%s%s", PROTOCOL_DIR, fileName);
- safef(protocolFile, sizeof(protocolFile), "%s%s", hDocumentRoot(), protocolUrl);
- if (fileExists(protocolFile))
- printf(" <A TARGET=_BLANK HREF=%s>%s</A>\n", protocolUrl,title);
- }
- freeMem(protocolSetting);
- }
- else
- { // generate a standard name
- safef(protocolUrl, sizeof(protocolUrl), "%s%s_protocol.pdf", PROTOCOL_DIR, term);
- safef(protocolFile, sizeof(protocolFile), "%s%s", hDocumentRoot(), protocolUrl);
- if (fileExists(protocolFile))
- printf(" <A TARGET=_BLANK HREF=%s>%s</A>\n", protocolUrl,PROTOCOL_TITLE);
- }
+ documentLink(ra,term,"protocol","/ENCODE/protocols/cell/",NULL,TRUE);
printf(" </TD>\n");
s = hashFindVal(ra, "vendorName");
t = hashFindVal(ra, "vendorId");
u = hashFindVal(ra, "orderUrl");
printf(" <TD>");
if (u)
printf("<A TARGET=_BLANK HREF=%s>", u);
printf("%s %s", s ? s : " ", t ? t : " ");
if (u)
printf("</A>");
puts("</TD>");
s = hashFindVal(ra, "termId");
u = hashFindVal(ra, "termUrl");
printf(" <TD>");
if (u)
printf("<A TARGET=_BLANK HREF=%s>", u);
printf("%s", s ? s : " ");
if (u)
printf("</A>");
puts("</TD>");
puts("</TR>");
}
else
{
s = hashFindVal(ra, "description");
if(s != NULL)
{
++(*total);
puts("<TR>");
printf(" <TD>%s</TD>\n", term);
printf(" <TD>%s</TD>\n", s);
puts("</TR>");
}
else
{
printf("<TR>\n <TD>%s</TD>\n <TD>Unrecognised term</TD>\n</TR>\n", term);
errAbort("Error: Unrecognised type (%s)\n", type);
}
}
}
static char *normalizeType(char *type)
/* Strips any quotation marks and converts common synonyms */
{
(void)stripChar(type,'\"');
if ((sameWord(type,"Cell Line"))
|| (sameWord(type,"cellLine" ))
|| (sameWord(type,"Cell Type"))
|| (sameWord(type,"cellType" )))
return cloneString("Cell Line");
else if (sameWord(type,"Factor"))
return cloneString("Antibody");
return type;
}
static char *findType(struct hash *cvHash)
/* returns the type that was requested or else the type associated with the term requested */
{
struct hashCookie hc = hashFirst(cvHash);
struct hashEl *hEl;
struct hash *ra;
char *type = typeOpt;
if (type == NULL) // If not type, but term (or tag), then search for match and use its type
{
char *optType = "tag";
char *optVal = tagOpt;
if (optVal == NULL)
{
optVal = termOpt;
if (optVal == NULL)
errAbort("Error: Required 'term', 'tag', or 'type' optument not found\n");
optType = "term";
}
(void)stripChar(optVal,'\"');
while ((hEl = hashNext(&hc)) != NULL)
{
ra = (struct hash *)hEl->val;
char *val = hashMustFindVal(ra, optType);
if (sameWord(val, optVal))
{
type = hashMustFindVal(ra, "type");
break;
}
}
}
if (type == NULL) // Still not type? abort
errAbort("Error: Required 'type', 'tag', or 'term' argument not found\n");
return normalizeType(type);
}
void doMiddle()
{
struct hash *cvHash = raReadAll(cgiUsualString("ra", cv_file()), "term");
struct hashCookie hc = hashFirst(cvHash);
struct hashEl *hEl;
struct slList *termList = NULL;
struct hash *ra;
char *type;
int total = 0;
puts("<TABLE BORDER=1 BGCOLOR=#FFFEE8 CELLSPACING=0 CELLPADDING=2>");
puts("<TR style=\"background:#D9E4F8\">");
type = findType(cvHash);
doTypeHeader(type);
puts("</TR>");
while ((hEl = hashNext(&hc)) != NULL)
{
ra = (struct hash *)hEl->val;
if (differentString(hashMustFindVal(ra, "type"), type))
continue;
slAddTail(&termList, ra);
}
slSort(&termList, termCmp);
while((ra = slPopHead(&termList)) != NULL)
{
// TODO: Add check for unknown tags in cv.ra
doTypeRow(ra, type, &total);
}
puts("</TABLE><BR>");
if(total > 1)
printf("Total = %d\n", total);
}
int main(int argc, char *argv[])
/* Process command line */
{
cgiSpoof(&argc, argv);
termOpt = cgiOptionalString("term");
tagOpt = cgiOptionalString("tag");
typeOpt = cgiOptionalString("type");
organismOpt = cgiUsualString("organism", organismOpt);
char *bgColor = cgiOptionalString("bgcolor");
if (bgColor)
htmlSetBgColor(strtol(bgColor, 0, 16));
htmlSetStyle(htmlStyleUndecoratedLink);
htmShell("ENCODE Controlled Vocabulary", doMiddle, "get");
return 0;
}