85e28d7dd56a748e795fdc5c85a61d8f11fd1613
markd
  Fri Jun 21 10:44:57 2019 -0700
added program to merge adjacent blocks in BED 12 files

diff --git src/hg/getRna/getRna.c src/hg/getRna/getRna.c
index 7811929..8786776 100644
--- src/hg/getRna/getRna.c
+++ src/hg/getRna/getRna.c
@@ -5,63 +5,69 @@
 #include "common.h"
 #include "options.h"
 #include "hdb.h"
 #include "jksql.h"
 #include "genbank.h"
 #include "linefile.h"
 #include "fa.h"
 
 
 /* command line option specifications */
 static struct optionSpec optionSpecs[] = {
     {"cdsUpper", OPTION_BOOLEAN},
     {"cdsUpperAll", OPTION_BOOLEAN},
     {"inclVer", OPTION_BOOLEAN},
     {"peptides", OPTION_BOOLEAN},
+    {"seqTbl", OPTION_STRING},
+    {"extFileTbl", OPTION_STRING},
     {NULL, 0}
 };
 
 /* command line options */
 static boolean cdsUpper = FALSE;
 static boolean cdsUpperAll = FALSE;
 static boolean inclVer = FALSE;
 static boolean peptides = FALSE;
+char *seqTbl = NULL;
+char *extFileTbl = NULL;
 
 /* derived from command line, it clearer as -cdsUpperAll and -peptides defines
  * multiple behaviors */
 boolean warnOnNoCds = FALSE;
 boolean skipNoCds = FALSE;
 
 static int errCnt = 0;
 
 static void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "getRna - Get mrna for GenBank or RefSeq sequences found in a database\n"
   "usage:\n"
   "   getRna [options] database accFile outfa\n"
   "\n"
   "Get mrna for all accessions in accFile, writing to a fasta file. If accession\n"
   " has a version, that version is returned or an error generated\n"
   "\n"
   "Options:\n"
   "  -cdsUpper - lookup CDS and output it as upper case. If CDS annotation\n"
   "    can't be obtained, the sequence is skipped with a warning.\n"
   "  -cdsUpperAll - like -cdsUpper, except keep sequeneces without CDS\n"
   "  -inclVer - include version with sequence id.\n"
   "  -peptides - translate mRNAs to peptides\n"
+  "  -seqTbl=tbl - use this table instead of gbSeq and seq. Many other options don't work if this is used.\n"
+  "  -extFileTbl=tbl - use this table instead of gbExtFile and extFile\n"
   "\n");
 }
 
 static void parseAccVersion(char* requestedAcc,
                             char acc[GENBANK_ACC_BUFSZ],
                             char ver[GENBANK_ACC_BUFSZ])
 /* parse accession and optional version */
 {
 char* verDot = strchr(requestedAcc, '.');
 if (verDot != NULL)
     {
     genbankDropVer(acc, requestedAcc);
     safecpy(ver, GENBANK_ACC_BUFSZ, verDot+1);
     }
 else
@@ -204,64 +210,87 @@
     }
 
 if (cdsOk && cdsUpper)
     upperCaseCds(dna, &cds);
 if (inclVer || (strlen(ver) > 0))
     processVersion(conn, acc, ver);
 
 if (peptides)
     writePeptide(outFa, acc, dna, &cds);
 else
     faWriteNext(outFa, acc, dna->dna, dna->size);
 
 dnaSeqFree(&dna);
 }
 
+static void getAccSeqTable(char *acc, struct sqlConnection *conn, FILE *outFa)
+/* get mrna for an accession from a seqTable */
+{
+struct dnaSeq *dna =  hDnaSeqMustGetConn(conn, acc, seqTbl, extFileTbl);
+faWriteNext(outFa, acc, dna->dna, dna->size);
+dnaSeqFree(&dna);
+}
+
 static void getRna(char *database, char *accFile, char *outFaFile)
 /* obtain mrna for a list of accessions */
 {
 struct sqlConnection *conn = sqlConnect(database);
 struct lineFile *accLf = lineFileOpen(accFile, TRUE); 
 FILE *outFa = mustOpen(outFaFile, "w"); 
 char *line;
 int lineSize;
 
 while (lineFileNext(accLf, &line, &lineSize))
     {
+    if (seqTbl == NULL)
         getAccMrna(trimSpaces(line), conn, outFa);
+    else
+        getAccSeqTable(trimSpaces(line), conn, outFa);
     }
 
 if (ferror(outFa))
     errAbort("error writing %s", outFaFile);
 carefulClose(&outFa);
 lineFileClose(&accLf);
 sqlDisconnect(&conn);
 
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 char *database, *accFile, *outFaFile;
 
 optionInit(&argc, argv, optionSpecs);
 if (argc != 4)
     usage();
 database = argv[1];
 accFile = argv[2];
 outFaFile = argv[3];
+if ((optionExists("seqTbl") && !optionExists("extFileTbl"))
+    || (!optionExists("seqTbl") && optionExists("extFileTbl")))
+    errAbort("must specified both or neither of -seqTbl and -extFileTbl");
+seqTbl = optionVal("seqTbl", seqTbl);
+extFileTbl = optionVal("extFileTbl", extFileTbl);
+
 cdsUpper = optionExists("cdsUpper");
 cdsUpperAll = optionExists("cdsUpperAll");
+if ((seqTbl != NULL) && (cdsUpper || cdsUpperAll))
+    errAbort("-cdsUpper and -cdsUpperAll not support with -seqTbl");
 warnOnNoCds = !cdsUpperAll;
 skipNoCds = cdsUpper;
 if (cdsUpperAll)
     cdsUpper = TRUE;
 inclVer = optionExists("inclVer");
+if ((seqTbl != NULL) && inclVer)
+    errAbort("-inclVer not support with -seqTbl, version is always included");
 peptides = optionExists("peptides");
+if ((seqTbl != NULL) && peptides)
+    errAbort("-peptides not support with -seqTbl");
 if (peptides)
     skipNoCds = TRUE;
 if (peptides && (cdsUpper || cdsUpperAll))
     errAbort("can't specify -peptides with -cdsUpper or -cdsUpperAll");
 getRna(database, accFile, outFaFile);
 return (errCnt == 0) ? 0 : 1;
 }