c8d8db67358f26a4150328fdc1d3e6849eaa01ee max Thu Oct 5 16:13:34 2017 -0700 changing pslSelect for uniprot track, refs #20174 diff --git src/hg/pslSelect/pslSelect.c src/hg/pslSelect/pslSelect.c index f02123e..ee7308a 100644 --- src/hg/pslSelect/pslSelect.c +++ src/hg/pslSelect/pslSelect.c @@ -5,61 +5,70 @@ #include "common.h" #include "options.h" #include "linefile.h" #include "dystring.h" #include "hash.h" #include "localmem.h" #include "psl.h" /* command line option specifications */ static struct optionSpec optionSpecs[] = { {"qtPairs", OPTION_STRING}, {"queries", OPTION_STRING}, {"qtStart", OPTION_STRING}, {"queryPairs", OPTION_STRING}, + {"qDelim", OPTION_STRING}, + {"qPass", OPTION_BOOLEAN}, {NULL, 0} }; #define QT_PAIRS_MODE 1 #define QUERY_MODE 2 #define QUERY_PAIRS_MODE 3 #define QT_START 4 static int mode = 0; static int isPairs = TRUE; +static char qDelim = '\0'; + +static boolean isQPass = FALSE; + /* global data from command line */ static char *selectFile; void usage() /* Explain usage and exit. */ { errAbort( "pslSelect - select records from a PSL file.\n" "\n" "usage:\n" " pslSelect [options] inPsl outPsl\n" "\n" "Must specify a selection option\n" "\n" "Options:\n" " -qtPairs=file - file is tab-separated qName and tName pairs to select\n" + " -qPass - pass all PSLs with queries that do not appear in qtPairs file at all\n" + " (default is to remove all PSLs for queries that are not in file)\n" " -queries=file - file has qNames to select\n" - " -queryPairs=file - file is tab-separated paris of qNames to select\n" + " -queryPairs=file - file is tab-separated pairs of qNames to select\n" " with new qName to substitute in output file\n" - " -qtStart=file - file is tab-seperate rows of qName,tName,tStart\n" + " -qtStart=file - file is tab-separate rows of qName,tName,tStart\n" + " -qDelim=char - use only the part of the query name before this character\n" ); } struct hash *loadSelect(char *selectFile) /* load select file. */ { struct hash *hash = hashNew(20); char *row[2]; struct lineFile *lf = lineFileOpen(selectFile, TRUE); int wordCount = isPairs ? 2 : 1; while (lineFileNextRowTab(lf, row, wordCount)) { char *value = isPairs ? row[1] : ""; hashAdd(hash, row[0], lmCloneString(hash->lm, value)); } @@ -71,77 +80,102 @@ /* load select file. */ { struct hash *hash = hashNew(20); char *row[3]; char buff[128]; struct lineFile *lf = lineFileOpen(selectFile, TRUE); while (lineFileNextRowTab(lf, row, ArraySize(row))) { sprintf(buff, "%s%s",lmCloneString(hash->lm, row[1]),lmCloneString(hash->lm, row[2])); hashAdd(hash, row[0], lmCloneString(hash->lm, buff)); } lineFileClose(&lf); return hash; } +char* firstPart(char* name, char delim) +/* return only a clone of the part of name before the split character. */ +{ +int i; +for (i=0; name[i]!='\0'; i++) + if (name[i]==delim) + break; +char *ret = cloneStringZ(name, i); +return ret; +} + boolean pairSelected3(struct hash* selectHash, char *qName, char *tName, int tStart) /* determine if the query/target/tStart triple is selected. Handle the query * being paired to multiple targets */ { char buff[128]; -struct hashEl *hel = hashLookup(selectHash, qName); +char *qPrefix = firstPart(qName, qDelim); +struct hashEl *hel = hashLookup(selectHash, qPrefix); while (hel != NULL) { char *target = hel->val; sprintf(buff,"%s%d",tName, tStart); if (sameString(target, buff)) return TRUE; hel = hashLookupNext(hel); } +freeMem(qPrefix); return FALSE; } struct hashEl *selectedItem(struct hash* selectHash, char *qName, char *tName) /* determine if the item is selected. Handle the query * being paired to multiple query */ { -struct hashEl *hel = hashLookup(selectHash, qName); +char *qPrefix = firstPart(qName, qDelim); +struct hashEl *hel = hashLookup(selectHash, qPrefix); +freeMem(qPrefix); + +//boolean notInTable = (hel==NULL); +//boolean foundOneQuery = FALSE; + +if (isQPass && hel==NULL) + return (struct hashEl*)1; + while (hel != NULL) { + //foundOneQuery = TRUE; char *target = hel->val; if (mode == QUERY_MODE || mode == QUERY_PAIRS_MODE) return hel; if (mode == QT_PAIRS_MODE && sameString(target, tName)) return hel; hel = hashLookupNext(hel); } + return NULL; } void pslSelect(char *inPsl, char *outPsl) /* select psl */ { struct hash *selectHash = NULL; struct lineFile *inPslLf = pslFileOpen(inPsl); FILE *outPslFh = mustOpen(outPsl, "w"); struct psl* psl; struct hashEl *hel; if (mode != QT_START) selectHash = loadSelect(selectFile); else selectHash = loadSelect3(selectFile); + while ((psl = pslNext(inPslLf)) != NULL) { if (mode == QT_START) { if (pairSelected3(selectHash, psl->qName, psl->tName, psl->tStart)) pslTabOut(psl, outPslFh); } else if ((hel = selectedItem(selectHash, psl->qName, psl->tName)) != NULL) { if (mode == QUERY_PAIRS_MODE) { freeMem(psl->qName); psl->qName = cloneString(hel->val); } pslTabOut(psl, outPslFh); @@ -161,25 +195,34 @@ usage(); if ((selectFile = optionVal("qtPairs", NULL)) != NULL) mode = QT_PAIRS_MODE; else if ((selectFile = optionVal("queries", NULL)) != NULL) { mode = QUERY_MODE; isPairs = FALSE; } else if ((selectFile = optionVal("queryPairs", NULL)) != NULL) mode = QUERY_PAIRS_MODE; else if ((selectFile = optionVal("qtStart", NULL)) != NULL) mode = QT_START; else errAbort("must specify option"); +char* delim = NULL; +delim = optionVal("qDelim", NULL); +if (delim) + qDelim = delim[0]; + +isQPass = optionExists("qPass"); +if (isQPass && (mode!=QT_PAIRS_MODE)) + errAbort("-qPass can only be used with -qtPairs"); + pslSelect(argv[1], argv[2]); return 0; } /* * Local Variables: * c-file-style: "jkent-c" * End: */