44ccfacbe3a3d4b300f80d48651c77837a4b571e
galt
  Tue Apr 26 11:12:02 2022 -0700
SQL INJECTION Prevention Version 2 - this improves our methods by making subclauses of SQL that get passed around be both easy and correct to use. The way that was achieved was by getting rid of the obscure and not well used functions sqlSafefFrag and sqlDyStringPrintfFrag and replacing them with the plain versions of those functions, since these are not needed anymore. The new version checks for NOSQLINJ in unquoted %-s which is used to include SQL clauses, and will give an error the NOSQLINJ clause is not present, and this will automatically require the correct behavior by developers. sqlDyStringPrint is a very useful function, however because it was not enforced, users could use various other dyString functions and they operated without any awareness or checking for SQL correct use. Now those dyString functions are prohibited and it will produce an error if you try to use a dyString function on a SQL string, which is simply detected by the presence of the NOSQLINJ prefix.

diff --git src/hg/mouseStuff/lavToAxt/lavToAxt.c src/hg/mouseStuff/lavToAxt/lavToAxt.c
index 8a4ddb7..0b7d77e 100644
--- src/hg/mouseStuff/lavToAxt/lavToAxt.c
+++ src/hg/mouseStuff/lavToAxt/lavToAxt.c
@@ -1,524 +1,524 @@
 /* lavToAxt - Convert blastz lav file to an axt file (which includes sequence). */
 
 /* Copyright (C) 2014 The Regents of the University of California 
  * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 #include "lav.h"
 #include "hash.h"
 #include "options.h"
 #include "dlist.h"
 #include "dystring.h"
 #include "dnaseq.h"
 #include "nib.h"
 #include "twoBit.h"
 #include "fa.h"
 
 struct dnaSeq *qFaList;
 struct dnaSeq *tFaList;
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "lavToAxt - Convert blastz lav file to an axt file (which includes sequence)\n"
   "usage:\n"
   "   lavToAxt in.lav tNibDir qNibDir out.axt\n"
   "Where tNibDir/qNibDir are either directories full of nib files, or a single\n"
   "twoBit file\n"
   "options:\n"
   "   -fa  qNibDir is interpreted as a fasta file of multiple dna seq instead of directory of nibs\n"
   "   -tfa tNibDir is interpreted as a fasta file of multiple dna seq instead of directory of nibs\n"
   "   -dropSelf  drops alignment blocks on the diagonal for self alignments\n"
   "   -scoreScheme=fileName Read the scoring matrix from a blastz-format file.\n"
   "                (only used in conjunction with -dropSelf, to rescore \n"
   "                alignments when blocks are dropped)\n"
   );
 }
 
 boolean qIsFa = FALSE;	/* Corresponds to -fa flag. */
 boolean tIsFa = FALSE;	/* Corresponds to -tfa flag. */
 boolean dropSelf = FALSE;	/* Corresponds to -dropSelf flag. */
 struct axtScoreScheme *scoreScheme = NULL;  /* -scoreScheme flag. */
 
 struct cachedSeqFile
 /* File in cache. */
     {
     struct cachedSeqFile *next;	/* next in list. */
     char *fileName;     /* File name (allocated here) */
     struct twoBitFile *tbf;	/* Two bit file if any. */
     char *name;		/* Sequence name (allocated here) for nib file */
     FILE *f;		/* Open nib file. */
     int size;		/* Size of sequence. */
     };
 
 void cachedSeqFileFree(struct cachedSeqFile **pCn)
 /* Free up resources associated with cached sequence file. */
 {
 struct cachedSeqFile *cn = *pCn;
 if (cn != NULL)
     {
     carefulClose(&cn->f);
     twoBitClose(&cn->tbf);
     freeMem(cn->fileName);
     freeMem(cn->name);
     freez(pCn);
     }
 }
 
 struct cachedSeqFile *openNibFromCache(struct dlList *cache, char *dirName, char *seqName)
 /* Return open file handle via cache.  */
 {
 static int maxCacheSize=32;
 int cacheSize = 0;
 struct dlNode *node;
 struct cachedSeqFile *cn;
 char fileName[512];
 
 /* First loop through trying to find it in cache, counting
  * cache size as we go. */
 for (node = cache->head; !dlEnd(node); node = node->next)
     {
     ++cacheSize;
     cn = node->val;
     if (sameString(seqName, cn->name))
         {
 	dlRemove(node);
 	dlAddHead(cache, node);
 	return cn;
 	}
     }
 
 /* If cache has reached max size free least recently used. */
 if (cacheSize >= maxCacheSize)
     {
     node = dlPopTail(cache);
     cn = node->val;
     cachedSeqFileFree(&cn);
     freeMem(node);
     }
 
 /* Cache new file. */
 AllocVar(cn);
 cn->name = cloneString(seqName);
 snprintf(fileName, sizeof(fileName), "%s/%s.nib", dirName, seqName);
 cn->fileName = cloneString(fileName);
 nibOpenVerify(fileName, &cn->f, &cn->size);
 dlAddValHead(cache, cn);
 return cn;
 }
 
 struct cachedSeqFile *openTwoBitFromCache(struct dlList *cache, char *fileName)
 /* Return open file handle via cache.  In this case it's just a cache of one.  */
 {
 struct cachedSeqFile *cn;
 if (dlEmpty(cache))
     {
     AllocVar(cn);
     cn->fileName = cloneString(fileName);
     cn->tbf = twoBitOpen(fileName);
     dlAddValHead(cache, cn);
     }
 else
     cn = cache->head->val;
 return cn;
 }
 
 struct cachedSeqFile *openFromCache(struct dlList *cache, char *dirName, char *seqName, 
 	boolean isTwoBit)
 /* Return open file handle via cache.  */
 {
 if (isTwoBit)
     return openTwoBitFromCache(cache, dirName);
 else
     return openNibFromCache(cache, dirName, seqName);
 }
 
 struct dnaSeq *readFromCache(struct dlList *cache, char *dirName, char *seqName,
 	int start, int size, int seqSize, boolean isTwoBit)
 /* Return dnaSeq read from the appropriate nib file.  
  * You need to dnaSeqFree this when done (it is the nib
  * file that is cached, not the sequence). */
 {
 struct cachedSeqFile *cn = openFromCache(cache, dirName, seqName, isTwoBit);
 if (isTwoBit)
     {
     return twoBitReadSeqFrag(cn->tbf, seqName, start, start+size);
     }
 else
     {
     if (seqSize != cn->size)
 	errAbort("%s/%s is %d bases in .lav file and %d in .nib file\n",
 	   dirName, seqName, seqSize, cn->size); 
     if ((start+size) > cn->size )
 	printf("%s/%s is %d bases in .lav file and %d in .nib file start %d size %d end %d\n",
 	   dirName, seqName, seqSize, cn->size, start, size, start+size); 
     return nibLdPartMasked(NIB_MASK_MIXED, cn->fileName, cn->f, cn->size, start, size);
     }
 }
 
 void outputBlocks(struct lineFile *lf,
 	struct block *blockList, int score, FILE *f, boolean isRc, 
 	char *qName, int qSize, char *qNibDir, struct dlList *qCache,
 	char *tName, int tSize, char *tNibDir, struct dlList *tCache,
 	boolean rescore)
 /* Output block list as an axt to file f. */
 {
 int qStart = BIGNUM, qEnd = 0, tStart = BIGNUM, tEnd = 0;
 struct block *lastBlock = NULL;
 struct block *block;
-struct dyString *qSym = newDyString(16*1024);
-struct dyString *tSym = newDyString(16*1024);
+struct dyString *qSym = dyStringNew(16*1024);
+struct dyString *tSym = dyStringNew(16*1024);
 struct dnaSeq *qSeq = NULL, *tSeq = NULL, *seq = NULL;
 struct axt axt;
 boolean qIsTwoBit = twoBitIsFile(qNibDir);
 boolean tIsTwoBit = twoBitIsFile(tNibDir);
 
 if (blockList == NULL)
     return;
 
 /* Figure overall dimensions. */
 for (block = blockList; block != NULL; block = block->next)
     {
     if (qStart > block->qStart) qStart = block->qStart;
     if (qEnd < block->qEnd) qEnd = block->qEnd;
     if (tStart > block->tStart) tStart = block->tStart;
     if (tEnd < block->tEnd) tEnd = block->tEnd;
     }
 
 /* Load sequence covering alignment from nib files. */
 if (isRc)
     {
     reverseIntRange(&qStart, &qEnd, qSize);
     if (qIsFa)
         {
         for (seq = qFaList ; seq != NULL ; seq = seq->next)
             if (sameString(qName, seq->name))
                 break;
         if (seq != NULL)
             {
             AllocVar(qSeq);
             qSeq->size = qEnd - qStart;
             qSeq->name = cloneString(qName);
             qSeq->dna = cloneMem((seq->dna)+qStart, qSeq->size);
             }
         else
             errAbort("sequence not found %s\n",qName);
         }
     else
         qSeq = readFromCache(qCache, qNibDir, qName, qStart, qEnd - qStart, qSize, qIsTwoBit);
     reverseIntRange(&qStart, &qEnd, qSize);
     reverseComplement(qSeq->dna, qSeq->size);
     }
 else
     {    
     if (qIsFa)
         {
         for (seq = qFaList ; seq != NULL ; seq = seq->next)
 	    {
             if (sameString(qName, seq->name))
                 break;
 	    }
 	if (seq != NULL)
 	    {
 	    AllocVar(qSeq);
 	    qSeq->size = qEnd - qStart;
 	    qSeq->name = cloneString(qName);
 	    qSeq->dna = (seq->dna)+qStart;
 	    }
 	else
 	    errAbort("sequence not found %s\n",qName);
         }
     else
         qSeq = readFromCache(qCache, qNibDir, qName, qStart, qEnd - qStart, qSize, qIsTwoBit);
     }
     if (tIsFa)
         {
         for (seq = tFaList ; seq != NULL ; seq = seq->next)
             if (sameString(tName, seq->name))
                 break;
         if (seq != NULL)
             {
             AllocVar(tSeq);
             tSeq->size = tEnd - tStart;
             tSeq->name = cloneString(tName);
             tSeq->dna = cloneMem((seq->dna)+tStart, tSeq->size);
             }
         else
             errAbort("sequence not found %s\n",tName);
         }
     else
         tSeq = readFromCache(tCache, tNibDir, tName, tStart, tEnd - tStart, tSize, tIsTwoBit);
 
 /* Loop through blocks copying sequence into dynamic strings. */
 for (block = blockList; block != NULL; block = block->next)
     {
     if (lastBlock != NULL)
         {
 	int qGap = block->qStart - lastBlock->qEnd;
 	int tGap = block->tStart - lastBlock->tEnd;
 	if (qGap != 0 && tGap != 0)
 	    {
 	    errAbort("Gaps in both strand on alignment ending line %d of %s",
 	    	lf->lineIx, lf->fileName);
 	    }
 	if (qGap > 0)
 	    {
 	    dyStringAppendMultiC(tSym, '-', qGap);
 	    dyStringAppendN(qSym, qSeq->dna + lastBlock->qEnd - qStart, qGap);
 	    }
 	if (tGap > 0)
 	    {
 	    dyStringAppendMultiC(qSym, '-', tGap);
 	    dyStringAppendN(tSym, tSeq->dna + lastBlock->tEnd - tStart, tGap);
 	    }
 	}
     if (qSeq->size < block->qStart - qStart)
         {
         errAbort("read past end of sequence %s size =%d block->qStart-qstart=%d block->qStart=%d qEnd=%d \n", qName, qSeq->size, block->qStart-qStart,block->qStart, block->qEnd );
         }
     dyStringAppendN(qSym, qSeq->dna + block->qStart - qStart,
     	block->qEnd - block->qStart);
     if (tSeq->size < block->tStart - tStart)
         {
         errAbort("read past end of sequence %s size =%d block->tStart-tstart=%d\n", tName, tSeq->size, block->tStart-tStart);
         }
     dyStringAppendN(tSym, tSeq->dna + block->tStart - tStart,
     	block->tEnd - block->tStart);
     lastBlock = block;
     }
 if (qSym->stringSize != tSym->stringSize)
     errAbort("qSize and tSize don't agree in alignment ending line %d of %s",
 	    lf->lineIx, lf->fileName);
 
 if (rescore)
     score = axtScoreSym(scoreScheme, qSym->stringSize,
 			qSym->string, tSym->string);
 
 /* Fill in an axt and write it to output. */
 ZeroVar(&axt);
 axt.qName = qName;
 axt.qStart = qStart;
 axt.qEnd = qEnd;
 axt.qStrand = (isRc ? '-' : '+');
 axt.tName = tName;
 axt.tStart = tStart;
 axt.tEnd = tEnd;
 axt.tStrand = '+';
 axt.score = score;
 axt.symCount = qSym->stringSize;
 axt.qSym = qSym->string;
 axt.tSym = tSym->string;
 axtWrite(&axt, f);
 
 /* Clean up. */
 if (!qIsFa)
     freeDnaSeq(&qSeq);
 freeDnaSeq(&tSeq);
 dyStringFree(&qSym);
 dyStringFree(&tSym);
 }
 
 void parseA(struct lineFile *lf, struct block **retBlockList, 
 	int *retScore)
 /* Parse an alignment stanza into a block list. */
 {
 struct block *block, *blockList = NULL;
 char *line, *words[6], typeChar;
 int wordCount;
 int score = -666;
 boolean gotScore = FALSE;
 
 while (lineFileNext(lf, &line, NULL))
     {
     if (line[0] == '#')
 	continue;
     if (line[0] == '}')
 	break;
     wordCount = chopLine(line, words);
     if (wordCount == 0)
 	continue;
     typeChar = words[0][0];
     if (typeChar == 'l')
 	{
 	lineFileExpectWords(lf, 6, wordCount);
 	AllocVar(block);
 	block->tStart = lineFileNeedNum(lf, words, 1) - 1;
 	block->tEnd = lineFileNeedNum(lf, words, 3);
 	block->qStart = lineFileNeedNum(lf, words, 2) - 1;
 	block->qEnd = lineFileNeedNum(lf, words, 4);
 	if (block->qEnd - block->qStart != block->tEnd - block->tStart)
 	    errAbort("Block size mismatch line %d of %s", lf->lineIx, lf->fileName);
 	block->percentId = lineFileNeedNum(lf, words, 5);
 	slAddHead(&blockList, block);
 	}
     else if (typeChar == 's')
         {
 	gotScore = TRUE;
 	score = lineFileNeedNum(lf, words, 1);
 	}
     }
 if (!gotScore)
     {
     errAbort("'a' stanza missing score line %d of %s", 
     	lf->lineIx, lf->fileName);
     }
 slReverse(&blockList);
 blockList = removeFrayedEnds(blockList);
 *retBlockList = blockList;
 *retScore = score;
 }
 
 static boolean breakUpIfOnDiagonal(struct block *blockList, boolean isRc,
 	char *qName, char *tName, int qSize, int tSize,
 	struct block *retBlockLists[], int maxBlockLists, int *retCount) 
 /* If any blocks are on diagonal, remove the blocks and separate the lists 
  * of blocks before and after the diagonal. Store block list pointers in 
  * retBlockLists, the number of lists in retCount, and return TRUE if 
  * we found any blocks on diagonal so we know to rescore afterwards. */
 {
 int blockListIndex = 0;
 boolean brokenUp = FALSE;
 
 retBlockLists[blockListIndex] = blockList;
 if (sameString(qName, tName))
     {
     struct block *block = NULL, *lastBlock = NULL;
     int i = 0;
     for (block = blockList;  block != NULL;  block = block->next)
 	{
 	int qStart = block->qStart;
 	int qEnd   = block->qEnd;
 	if (lastBlock != NULL && block == retBlockLists[blockListIndex])
 	    freez(&lastBlock);
 	if (isRc)
 	    reverseIntRange(&qStart, &qEnd, qSize);
 	if (rangeIntersection(block->tStart, block->tEnd, qStart, qEnd) > 0)
 	    {
 	    brokenUp = TRUE;
 	    if (block != retBlockLists[blockListIndex])
 		{
 		assert(lastBlock != NULL);
 		lastBlock->next = NULL;
 		blockListIndex++;
 		if (blockListIndex >= maxBlockLists)
 		    errAbort("breakUpIfOnDiagonal: Too many fragmented block lists!");
 		}
 	    retBlockLists[blockListIndex] = block->next;
 	    }
 	lastBlock = block;
 	}
     if (retBlockLists[blockListIndex] == NULL)
 	{
 	blockListIndex--;
 	if (lastBlock != NULL)
 	    freez(&lastBlock);
 	}
     for (i=0;  i <= blockListIndex; i++)
 	{
 	retBlockLists[i] = removeFrayedEnds(retBlockLists[i]);
 	}
     }
 *retCount = blockListIndex + 1;
 return brokenUp;
 }
 
 
 void parseIntoAxt(char *lavFile, FILE *f, 
 	char *tNibDir, struct dlList *tCache, 
 	char *qNibDir, struct dlList *qCache)
 /* Parse a blastz lav file and put it an axt. */
 {
 struct lineFile *lf = lineFileOpen(lavFile, TRUE);
 char *line;
 struct block *blockList = NULL;
 boolean isRc = FALSE;
 char *tName = NULL, *qName = NULL;
 char *matrix = NULL, *command = NULL;
 int qSize = 0, tSize = 0;
 int score = 0;
 
 /* Check header. */
 if (!lineFileNext(lf, &line, NULL))
    errAbort("%s is empty", lf->fileName);
 if (!startsWith("#:lav", line))
    errAbort("%s is not a lav file\n", lf->fileName);
 
 while (lineFileNext(lf, &line, NULL))
     {
     if (startsWith("s {", line))
         {
 	parseS(lf, &tSize, &qSize);
 	}
     else if (startsWith("h {", line))
         {
 	parseH(lf, &tName, &qName, &isRc);
 	}
     else if (startsWith("d {", line))
         {
 	parseD(lf, &matrix, &command, f);
 	}
     else if (startsWith("a {", line))
         {
 	parseA(lf, &blockList, &score);
         if (optionExists("dropSelf"))
 	    {
 	    struct block *bArr[256];
 	    int numBLs = 0, i = 0;
 	    boolean rescore = FALSE;
 	    rescore = breakUpIfOnDiagonal(blockList, isRc, qName, tName,
 					  qSize, tSize, bArr, ArraySize(bArr),
 					  &numBLs);
 	    for (i=0;  i < numBLs;  i++)
 		{
 		outputBlocks(lf, bArr[i], score, f, isRc, 
 			     qName, qSize, qNibDir, qCache,
 			     tName, tSize, tNibDir, tCache, rescore);
 		slFreeList(&bArr[i]);
 		}
 	    }
 	else
 	    {
 	    outputBlocks(lf, blockList, score, f, isRc, 
 			 qName, qSize, qNibDir, qCache,
 			 tName, tSize, tNibDir, tCache, FALSE);
 	    slFreeList(&blockList);
 	    }
 	}
     }
 lineFileClose(&lf);
 }
 
 
 void lavToAxt(char *inLav, char *tNibDir, char *qNibDir, char *outAxt)
 /* lavToAxt - Convert blastz lav file to an axt file (which includes sequence). */
 {
 FILE *f = mustOpen(outAxt, "w");
 struct dlList *qCache = newDlList(), *tCache = newDlList();
 parseIntoAxt(inLav, f, tNibDir, tCache, qNibDir, qCache);
 carefulClose(&f);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 char *scoreSchemeFile = NULL;
 optionHash(&argc, argv);
 if (argc != 5)
     usage();
 if (optionExists("fa"))
     {
     qIsFa = TRUE;
     qFaList = faReadAllMixed(argv[3]);
     }
 if (optionExists("tfa"))
     {
     tIsFa = TRUE;
     tFaList = faReadAllMixed(argv[2]);
     }
 scoreSchemeFile = optionVal("scoreScheme", scoreSchemeFile);
 if (scoreSchemeFile != NULL)
     scoreScheme = axtScoreSchemeRead(scoreSchemeFile);
 else
     scoreScheme = axtScoreSchemeDefault();
 lavToAxt(argv[1], argv[2], argv[3], argv[4]);
 return 0;
 }