85e28d7dd56a748e795fdc5c85a61d8f11fd1613 markd Fri Jun 21 10:44:57 2019 -0700 added program to merge adjacent blocks in BED 12 files diff --git src/hg/getRna/getRna.c src/hg/getRna/getRna.c index 7811929..8786776 100644 --- src/hg/getRna/getRna.c +++ src/hg/getRna/getRna.c @@ -5,63 +5,69 @@ #include "common.h" #include "options.h" #include "hdb.h" #include "jksql.h" #include "genbank.h" #include "linefile.h" #include "fa.h" /* command line option specifications */ static struct optionSpec optionSpecs[] = { {"cdsUpper", OPTION_BOOLEAN}, {"cdsUpperAll", OPTION_BOOLEAN}, {"inclVer", OPTION_BOOLEAN}, {"peptides", OPTION_BOOLEAN}, + {"seqTbl", OPTION_STRING}, + {"extFileTbl", OPTION_STRING}, {NULL, 0} }; /* command line options */ static boolean cdsUpper = FALSE; static boolean cdsUpperAll = FALSE; static boolean inclVer = FALSE; static boolean peptides = FALSE; +char *seqTbl = NULL; +char *extFileTbl = NULL; /* derived from command line, it clearer as -cdsUpperAll and -peptides defines * multiple behaviors */ boolean warnOnNoCds = FALSE; boolean skipNoCds = FALSE; static int errCnt = 0; static void usage() /* Explain usage and exit. */ { errAbort( "getRna - Get mrna for GenBank or RefSeq sequences found in a database\n" "usage:\n" " getRna [options] database accFile outfa\n" "\n" "Get mrna for all accessions in accFile, writing to a fasta file. If accession\n" " has a version, that version is returned or an error generated\n" "\n" "Options:\n" " -cdsUpper - lookup CDS and output it as upper case. If CDS annotation\n" " can't be obtained, the sequence is skipped with a warning.\n" " -cdsUpperAll - like -cdsUpper, except keep sequeneces without CDS\n" " -inclVer - include version with sequence id.\n" " -peptides - translate mRNAs to peptides\n" + " -seqTbl=tbl - use this table instead of gbSeq and seq. Many other options don't work if this is used.\n" + " -extFileTbl=tbl - use this table instead of gbExtFile and extFile\n" "\n"); } static void parseAccVersion(char* requestedAcc, char acc[GENBANK_ACC_BUFSZ], char ver[GENBANK_ACC_BUFSZ]) /* parse accession and optional version */ { char* verDot = strchr(requestedAcc, '.'); if (verDot != NULL) { genbankDropVer(acc, requestedAcc); safecpy(ver, GENBANK_ACC_BUFSZ, verDot+1); } else @@ -204,64 +210,87 @@ } if (cdsOk && cdsUpper) upperCaseCds(dna, &cds); if (inclVer || (strlen(ver) > 0)) processVersion(conn, acc, ver); if (peptides) writePeptide(outFa, acc, dna, &cds); else faWriteNext(outFa, acc, dna->dna, dna->size); dnaSeqFree(&dna); } +static void getAccSeqTable(char *acc, struct sqlConnection *conn, FILE *outFa) +/* get mrna for an accession from a seqTable */ +{ +struct dnaSeq *dna = hDnaSeqMustGetConn(conn, acc, seqTbl, extFileTbl); +faWriteNext(outFa, acc, dna->dna, dna->size); +dnaSeqFree(&dna); +} + static void getRna(char *database, char *accFile, char *outFaFile) /* obtain mrna for a list of accessions */ { struct sqlConnection *conn = sqlConnect(database); struct lineFile *accLf = lineFileOpen(accFile, TRUE); FILE *outFa = mustOpen(outFaFile, "w"); char *line; int lineSize; while (lineFileNext(accLf, &line, &lineSize)) { + if (seqTbl == NULL) getAccMrna(trimSpaces(line), conn, outFa); + else + getAccSeqTable(trimSpaces(line), conn, outFa); } if (ferror(outFa)) errAbort("error writing %s", outFaFile); carefulClose(&outFa); lineFileClose(&accLf); sqlDisconnect(&conn); } int main(int argc, char *argv[]) /* Process command line. */ { char *database, *accFile, *outFaFile; optionInit(&argc, argv, optionSpecs); if (argc != 4) usage(); database = argv[1]; accFile = argv[2]; outFaFile = argv[3]; +if ((optionExists("seqTbl") && !optionExists("extFileTbl")) + || (!optionExists("seqTbl") && optionExists("extFileTbl"))) + errAbort("must specified both or neither of -seqTbl and -extFileTbl"); +seqTbl = optionVal("seqTbl", seqTbl); +extFileTbl = optionVal("extFileTbl", extFileTbl); + cdsUpper = optionExists("cdsUpper"); cdsUpperAll = optionExists("cdsUpperAll"); +if ((seqTbl != NULL) && (cdsUpper || cdsUpperAll)) + errAbort("-cdsUpper and -cdsUpperAll not support with -seqTbl"); warnOnNoCds = !cdsUpperAll; skipNoCds = cdsUpper; if (cdsUpperAll) cdsUpper = TRUE; inclVer = optionExists("inclVer"); +if ((seqTbl != NULL) && inclVer) + errAbort("-inclVer not support with -seqTbl, version is always included"); peptides = optionExists("peptides"); +if ((seqTbl != NULL) && peptides) + errAbort("-peptides not support with -seqTbl"); if (peptides) skipNoCds = TRUE; if (peptides && (cdsUpper || cdsUpperAll)) errAbort("can't specify -peptides with -cdsUpper or -cdsUpperAll"); getRna(database, accFile, outFaFile); return (errCnt == 0) ? 0 : 1; }