40b7e2a3e86f9633d0800d1ae5fed25c38a4e342
markd
  Mon Mar 25 06:36:58 2019 -0700
removed pslToBigPsl warning about missing CDS, as this is an expected occurance for non-coding genes which can generate tens of thousands of warnings

diff --git src/hg/utils/pslToBigPsl/pslToBigPsl.c src/hg/utils/pslToBigPsl/pslToBigPsl.c
index a6fc4f4..96fcd4f 100644
--- src/hg/utils/pslToBigPsl/pslToBigPsl.c
+++ src/hg/utils/pslToBigPsl/pslToBigPsl.c
@@ -1,184 +1,182 @@
 /* pslToBigPsl - converts psl to bigPsl. */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "genePred.h"
 #include "psl.h"
 #include "bigPsl.h"
 #include "fa.h"
 #include "genbank.h"
 #include "net.h"
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "pslToBigPsl - converts psl to bigPsl input (bed format with extra fields)\n"
   "usage:\n"
   "  pslToBigPsl file.psl stdout | sort -k1,1 -k2,2n > file.bigPslInput\n"
   "options:\n"
   "  -cds=file.cds\n"
   "  -fa=file.fasta\n"
   "NOTE: to build bigBed:\n"
   "   bedToBigBed -type=bed12+13 -tab -as=bigPsl.as file.bigPslInput chrom.sizes output.bb\n"
   );
 }
 
 boolean snakeMode = FALSE;
 
 /* Command line validation table. */
 static struct optionSpec options[] = {
    {"cds", OPTION_STRING},
    {"fa", OPTION_STRING},
    {NULL, 0},
 };
 
 #define MAX_BLOCKS 10000
 unsigned blockSizes[MAX_BLOCKS];
 unsigned blockStarts[MAX_BLOCKS];
 unsigned oBlockStarts[MAX_BLOCKS];
 
 void outBigPsl(FILE *fp, struct psl *psl, struct hash *fastaHash, struct hash *cdsHash)
 {
 struct bigPsl bigPsl;
 
 if (psl->blockCount > MAX_BLOCKS)
     errAbort("psl has more than %d blocks, make MAX_BLOCKS bigger in source", MAX_BLOCKS);
 
 // make sure blocks are represented on reference's positive strand as required by BED format
 boolean didRc = FALSE;
 if (psl->strand[1] == '-')
     {
     didRc = TRUE;
     pslRc(psl); 
     }
 
 bigPsl.chrom = psl->tName;
 bigPsl.chromSize = psl->tSize;
 bigPsl.match = psl->match;
 bigPsl.misMatch = psl->misMatch;
 bigPsl.repMatch = psl->repMatch;
 bigPsl.nCount = psl->nCount;
 bigPsl.oChromStart = psl->qStart;
 bigPsl.oChromEnd = psl->qEnd;
 bigPsl.oChromSize = psl->qSize;
 bigPsl.chromStart = psl->tStart;
 bigPsl.chromEnd = psl->tEnd;
 bigPsl.thickStart = psl->tStart;
 bigPsl.thickEnd = psl->tEnd;
 bigPsl.name = psl->qName;
 bigPsl.score = 1000;
 bigPsl.strand[0] = psl->strand[0];
 bigPsl.strand[1] = 0;
 if (didRc)
     bigPsl.oStrand[0] = '-';
 else
     bigPsl.oStrand[0] = '+';
 bigPsl.oStrand[1] = 0;
 bigPsl.reserved = 0; // BLACK
 bigPsl.blockCount = psl->blockCount;
 bigPsl.blockSizes = (int *)blockSizes;
 bigPsl.chromStarts = (int *)blockStarts;
 bigPsl.oChromStarts = (int *)oBlockStarts;
 bigPsl.oSequence = "";
 bigPsl.oCDS = "";
 int ii;
 boolean isProt = pslIsProtein(psl);
 int mult = pslIsProtein(psl) ? 3 : 1;
 for(ii=0; ii < bigPsl.blockCount; ii++)
     {
     blockStarts[ii] = psl->tStarts[ii] - bigPsl.chromStart;
     oBlockStarts[ii] = psl->qStarts[ii] ;
     blockSizes[ii] = psl->blockSizes[ii] * mult;
     }
     
 bigPsl.seqType = PSL_SEQTYPE_EMPTY;
 if (fastaHash)
     {
     struct dnaSeq *seq = hashFindVal(fastaHash, psl->qName);
 
     if (seq != NULL)
         {
 	bigPsl.oSequence = seq->dna;
         bigPsl.seqType = isProt ? PSL_SEQTYPE_PROTEIN : PSL_SEQTYPE_NUCLEOTIDE;
         }
     else
 	{
 	warn("Cannot find sequence for %s. Dropping", psl->qName);
 	return;
 	}
     }
 
 if (cdsHash)
     {
     char *cds = hashFindVal(cdsHash, psl->qName);
 
     if (cds != NULL)
 	{
 	bigPsl.oCDS = cds;
 
 	struct genbankCds mrnaCds;
 	genbankCdsParse(cds, &mrnaCds); 
 	struct genbankCds genomeCds = genbankCdsToGenome(&mrnaCds, psl);
 	bigPsl.thickStart = genomeCds.start;
 	bigPsl.thickEnd = genomeCds.end;
 	}
-    else
-    	warn("CDS missing for %s\n", psl->qName);
     }
 
 bigPslOutput(&bigPsl, fp, '\t', '\n');
 }
 
 
 void pslToBigPsl(char *pslFile, char *bigPslOutput, char *fastaFile, char *cdsFile)
 /* pslToBigPsl - converts psl to bigPsl. */
 {
 struct psl *psl = pslLoadAll(pslFile);
 struct hash *fastHash = NULL;
 struct hash *cdsHash = NULL;
 
 if (fastaFile != NULL)
     {
     struct dnaSeq  *seqs = pslIsProtein(psl) ?faReadAllPep(fastaFile) : faReadAllDna(fastaFile); 
     fastHash = newHash(10);
     for(; seqs; seqs = seqs->next)
 	hashAdd(fastHash, seqs->name, seqs);
     }
 
 if (cdsFile != NULL)
     {
     cdsHash = newHash(10);
     struct lineFile *lf = netLineFileOpen(cdsFile);
     char *row[2];
     while (lineFileRow(lf, row))
 	{
         hashAdd(cdsHash, row[0], cloneString(row[1]));
 	}
 	  
     lineFileClose(&lf);
     }
 
 FILE *fp = mustOpen(bigPslOutput, "w");
 
 for(; psl ; psl = psl->next)
     {
     outBigPsl(fp, psl, fastHash, cdsHash);
     }
 
 fclose(fp);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc != 3)
     usage();
 char *fastaFile = optionVal("fa", NULL);
 char *cdsFile = optionVal("cds", NULL);
 
 pslToBigPsl(argv[1], argv[2], fastaFile, cdsFile);
 return 0;
 }