src/gfClient/gfClient.c a2819277576ffc91069261a03b358213afe2cb3b

a2819277576ffc91069261a03b358213afe2cb3b
markd
  Wed Feb 3 15:30:27 2021 -0800
allow gfPcr to query dynamic servers

diff --git src/gfClient/gfClient.c src/gfClient/gfClient.c
index 94942ec..9dba8b9 100644
--- src/gfClient/gfClient.c
+++ src/gfClient/gfClient.c
@@ -1,189 +1,189 @@
 /* gfClient - A client for the genomic finding program that produces a .psl file. */
 /* Copyright 2001-2003 Jim Kent.  All rights reserved. */
 #include "common.h"
 #include "linefile.h"
 #include "aliType.h"
 #include "fa.h"
 #include "genoFind.h"
 #include "psl.h"
 #include "options.h"
 #include "fuzzyFind.h"
 
 
 static struct optionSpec optionSpecs[] = {
     {"prot", OPTION_BOOLEAN},
     {"q", OPTION_STRING},
     {"t", OPTION_STRING},
     {"minIdentity", OPTION_FLOAT},
     {"minScore", OPTION_INT},
     {"dots", OPTION_INT},
     {"out", OPTION_STRING},
     {"maxIntron", OPTION_INT},
     {"nohead", OPTION_BOOLEAN},
     {"genome", OPTION_STRING},
     {"genomeDataDir", OPTION_STRING},
     {NULL, 0}
 };
 
 /* Variables that can be overridden by command line. */
 int dots = 0;
 int minScore = 30;
 double minIdentity = 90;
 char *outputFormat = "psl";
 char *qType = "dna";
 char *tType = "dna";
 char *genome = NULL;
 char *genomeDataDir = NULL;
 
 void usage()
 /* Explain usage and exit. */
 {
 printf(
   "gfClient v. %s - A client for the genomic finding program that produces a .psl file\n"
   "usage:\n"
   "   gfClient host port seqDir in.fa out.psl\n"
   "where\n"
   "   host is the name of the machine running the gfServer\n"
   "   port is the same port that you started the gfServer with\n"
   "   seqDir is the path of the .2bit or .nib files relative to the current dir\n"
   "       (note these are needed by the client as well as the server)\n"
   "   in.fa is a fasta format file.  May contain multiple records\n"
   "   out.psl is where to put the output\n"
   "options:\n"
   "   -t=type       Database type. Type is one of:\n"
   "                   dna - DNA sequence\n"
   "                   prot - protein sequence\n"
   "                   dnax - DNA sequence translated in six frames to protein\n"
   "                 The default is dna.\n"
   "   -q=type       Query type. Type is one of:\n"
   "                   dna - DNA sequence\n"
   "                   rna - RNA sequence\n"
   "                   prot - protein sequence\n"
   "                   dnax - DNA sequence translated in six frames to protein\n"
   "                   rnax - DNA sequence translated in three frames to protein\n"
   "   -prot         Synonymous with -t=prot -q=prot.\n"
   "   -dots=N       Output a dot every N query sequences.\n"
   "   -nohead       Suppresses 5-line psl header.\n"
   "   -minScore=N   Sets minimum score.  This is twice the matches minus the \n"
   "                 mismatches minus some sort of gap penalty.  Default is 30.\n"
   "   -minIdentity=N   Sets minimum sequence identity (in percent).  Default is\n"
   "                 90 for nucleotide searches, 25 for protein or translated\n"
   "                 protein searches.\n"
   "   -out=type     Controls output file format.  Type is one of:\n"
   "                   psl - Default.  Tab-separated format without actual sequence\n"
   "                   pslx - Tab-separated format with sequence\n"
   "                   axt - blastz-associated axt format\n"
   "                   maf - multiz-associated maf format\n"
   "                   sim4 - similar to sim4 format\n"
   "                   wublast - similar to wublast format\n"
   "                   blast - similar to NCBI blast format\n"
   "                   blast8- NCBI blast tabular format\n"
   "                   blast9 - NCBI blast tabular format with comments\n"
   "   -maxIntron=N   Sets maximum intron size. Default is %d.\n"
   "   -genome=name  When using a dynamic gfServer, The genome name is used to \n"
   "                 find the data files relative to the dynamic gfServer root, named \n"
-  "                 in the form $genome.2bit, $genome.untrans.gfidx, and $genome.trans.gfidx, \n"
+  "                 in the form $genome.2bit, $genome.untrans.gfidx, and $genome.trans.gfidx\n"
   "   -genomeDataDir=path\n"
   "                 When using a dynamic gfServer, this is the dynamic gfServer root directory\n"
   "                 that contained the genome data files.  Defaults to being the root directory.\n"
   "                \n",
   gfVersion, ffIntronMaxDefault);
 exit(-1);
 }
 
 
 struct gfOutput *gvo;
 
 void gfClient(char *hostName, char *portName, char *tSeqDir, char *inName, 
 	char *outName, char *tTypeName, char *qTypeName)
 /* gfClient - A client for the genomic finding program that produces a .psl file. */
 {
 struct lineFile *lf = lineFileOpen(inName, TRUE);
 static bioSeq seq;
 FILE *out = mustOpen(outName, "w");
 enum gfType qType = gfTypeFromName(qTypeName);
 enum gfType tType = gfTypeFromName(tTypeName);
 int dotMod = 0;
 char databaseName[256];
 struct hash *tFileCache = gfFileCacheNew();
 
 snprintf(databaseName, sizeof(databaseName), "%s:%s", hostName, portName);
 
 gvo = gfOutputAny(outputFormat,  round(minIdentity*10), qType == gftProt, tType == gftProt,
 	optionExists("nohead"), databaseName, 23, 3.0e9, minIdentity, out);
 gfOutputHead(gvo, out);
 struct gfConnection *conn = gfConnect(hostName, portName, genome, genomeDataDir);
 while (faSomeSpeedReadNext(lf, &seq.dna, &seq.size, &seq.name, qType != gftProt))
     {
     if (dots != 0)
         {
 	if (++dotMod >= dots)
 	    {
 	    dotMod = 0;
             verboseDot();
 	    }
 	}
     if (qType == gftProt && (tType == gftDnaX || tType == gftRnaX))
         {
 	gvo->reportTargetStrand = TRUE;
 	gfAlignTrans(conn, tSeqDir, &seq, minScore, tFileCache, gvo);
 	}
     else if ((qType == gftRnaX || qType == gftDnaX) && (tType == gftDnaX || tType == gftRnaX))
         {
 	gvo->reportTargetStrand = TRUE;
 	gfAlignTransTrans(conn, tSeqDir, &seq, FALSE, minScore, tFileCache, gvo, qType == gftRnaX);
 	if (qType == gftDnaX)
 	    {
 	    reverseComplement(seq.dna, seq.size);
 	    gfAlignTransTrans(conn, tSeqDir, &seq, TRUE, minScore, tFileCache, gvo, FALSE);
 	    }
 	}
     else if ((tType == gftDna || tType == gftRna) && (qType == gftDna || qType == gftRna))
 	{
 	gfAlignStrand(conn, tSeqDir, &seq, FALSE, minScore, tFileCache, gvo);
 	reverseComplement(seq.dna, seq.size);
 	gfAlignStrand(conn, tSeqDir, &seq, TRUE,  minScore, tFileCache, gvo);
 	}
     else
         {
 	errAbort("Comparisons between %s queries and %s databases not yet supported",
 		qTypeName, tTypeName);
 	}
     gfOutputQuery(gvo, out);
     }
 gfDisconnect(&conn);
 
 if (out != stdout)
     printf("Output is in %s\n", outName);
 gfFileCacheFree(&tFileCache);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, optionSpecs);
 if (argc != 6)
     usage();
 if (optionExists("prot"))
     qType = tType = "prot";
 qType = optionVal("q", qType);
 tType = optionVal("t", tType);
 if (sameWord(tType, "prot") || sameWord(tType, "dnax") || sameWord(tType, "rnax"))
     minIdentity = 25;
 minIdentity = optionFloat("minIdentity", minIdentity);
 minScore = optionInt("minScore", minScore);
 dots = optionInt("dots", 0);
 outputFormat = optionVal("out", outputFormat);
 genome = optionVal("genome", NULL);
 genomeDataDir = optionVal("genomeDataDir", NULL);
 if ((genomeDataDir != NULL) && (genome == NULL))
     errAbort("-genomeDataDir requires the -genome option");
 if ((genome != NULL) && (genomeDataDir == NULL))
     genomeDataDir = ".";
 
 
 /* set global for fuzzy find functions */
 setFfIntronMax(optionInt("maxIntron", ffIntronMaxDefault));
 gfClient(argv[1], argv[2], argv[3], argv[4], argv[5], tType, qType);
 return 0;
 }