e70152e44cc66cc599ff6b699eb8adc07f3e656a
kent
  Sat May 24 21:09:34 2014 -0700
Adding Copyright NNNN Regents of the University of California to all files I believe with reasonable certainty were developed under UCSC employ or as part of Genome Browser copyright assignment.
diff --git src/hg/lib/ggMrnaAli.c src/hg/lib/ggMrnaAli.c
index 18717f3..08e8008 100644
--- src/hg/lib/ggMrnaAli.c
+++ src/hg/lib/ggMrnaAli.c
@@ -1,508 +1,511 @@
 /* ggMrnaAli.c was originally generated by the autoSql program, which also 
  * generated ggMrnaAli.h and ggMrnaAli.sql.  This module links the database and
  * the RAM representation of objects. */
 
+/* Copyright (C) 2014 The Regents of the University of California 
+ * See README in this or parent directory for licensing information. */
+
 #include "common.h"
 #include "linefile.h"
 #include "dystring.h"
 #include "jksql.h"
 #include "ggMrnaAli.h"
 #include "dnautil.h"
 
 
 void ggMrnaBlockStaticLoad(char **row, struct ggMrnaBlock *ret)
 /* Load a row from ggMrnaBlock table into ret.  The contents of ret will
  * be replaced at the next call to this function. */
 {
 
 ret->qStart = sqlSigned(row[0]);
 ret->qEnd = sqlSigned(row[1]);
 ret->tStart = sqlSigned(row[2]);
 ret->tEnd = sqlSigned(row[3]);
 }
 
 struct ggMrnaBlock *ggMrnaBlockLoad(char **row)
 /* Load a ggMrnaBlock from row fetched with select * from ggMrnaBlock
  * from database.  Dispose of this with ggMrnaBlockFree(). */
 {
 struct ggMrnaBlock *ret;
 
 AllocVar(ret);
 ret->qStart = sqlSigned(row[0]);
 ret->qEnd = sqlSigned(row[1]);
 ret->tStart = sqlSigned(row[2]);
 ret->tEnd = sqlSigned(row[3]);
 return ret;
 }
 
 struct ggMrnaBlock *ggMrnaBlockLoadAll(char *fileName) 
 /* Load all ggMrnaBlock from a tab-separated file.
  * Dispose of this with ggMrnaBlockFreeList(). */
 {
 struct ggMrnaBlock *list = NULL, *el;
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
 char *row[4];
 
 while (lineFileRow(lf, row))
     {
     el = ggMrnaBlockLoad(row);
     slAddHead(&list, el);
     }
 lineFileClose(&lf);
 slReverse(&list);
 return list;
 }
 
 struct ggMrnaBlock *ggMrnaBlockLoadByQuery(struct sqlConnection *conn, char *query)
 /* Load all ggMrnaBlock from table that satisfy the query given.  
  * Where query is of the form 'select * from example where something=something'
  * or 'select example.* from example, anotherTable where example.something = 
  * anotherTable.something'.
  * Dispose of this with ggMrnaBlockFreeList(). */
 {
 struct ggMrnaBlock *list = NULL, *el;
 struct sqlResult *sr;
 char **row;
 
 sr = sqlGetResult(conn, query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     el = ggMrnaBlockLoad(row);
     slAddHead(&list, el);
     }
 slReverse(&list);
 sqlFreeResult(&sr);
 return list;
 }
 
 void ggMrnaBlockSaveToDb(struct sqlConnection *conn, struct ggMrnaBlock *el, char *tableName, int updateSize)
 /* Save ggMrnaBlock as a row to the table specified by tableName. 
  * As blob fields may be arbitrary size updateSize specifies the approx size
  * of a string that would contain the entire query. Arrays of native types are
  * converted to comma separated strings and loaded as such, User defined types are
  * inserted as NULL. Strings are automatically escaped to allow insertion into the database. */
 {
 struct dyString *update = newDyString(updateSize);
 sqlDyStringPrintf(update, "insert into %s values ( %d,%d,%d,%d)", 
 	tableName,  el->qStart,  el->qEnd,  el->tStart,  el->tEnd);
 sqlUpdate(conn, update->string);
 freeDyString(&update);
 }
 
 
 struct ggMrnaBlock *ggMrnaBlockCommaIn(char **pS, struct ggMrnaBlock *ret)
 /* Create a ggMrnaBlock out of a comma separated string. 
  * This will fill in ret if non-null, otherwise will
  * return a new ggMrnaBlock */
 {
 char *s = *pS;
 
 if (ret == NULL)
     AllocVar(ret);
 ret->qStart = sqlSignedComma(&s);
 ret->qEnd = sqlSignedComma(&s);
 ret->tStart = sqlSignedComma(&s);
 ret->tEnd = sqlSignedComma(&s);
 *pS = s;
 return ret;
 }
 
 void ggMrnaBlockFree(struct ggMrnaBlock **pEl)
 /* Free a single dynamically allocated ggMrnaBlock such as created
  * with ggMrnaBlockLoad(). */
 {
 struct ggMrnaBlock *el;
 
 if ((el = *pEl) == NULL) return;
 freez(pEl);
 }
 
 void ggMrnaBlockFreeList(struct ggMrnaBlock **pList)
 /* Free a list of dynamically allocated ggMrnaBlock's */
 {
 struct ggMrnaBlock *el, *next;
 
 for (el = *pList; el != NULL; el = next)
     {
     next = el->next;
     ggMrnaBlockFree(&el);
     }
 *pList = NULL;
 }
 
 void ggMrnaBlockOutput(struct ggMrnaBlock *el, FILE *f, char sep, char lastSep) 
 /* Print out ggMrnaBlock.  Separate fields with sep. Follow last field with lastSep. */
 {
 fprintf(f, "%d", el->qStart);
 fputc(sep,f);
 fprintf(f, "%d", el->qEnd);
 fputc(sep,f);
 fprintf(f, "%d", el->tStart);
 fputc(sep,f);
 fprintf(f, "%d", el->tEnd);
 fputc(lastSep,f);
 }
 
 struct ggMrnaAli *ggMrnaAliLoad(char **row)
 /* Load a ggMrnaAli from row fetched with select * from ggMrnaAli
  * from database.  Dispose of this with ggMrnaAliFree(). */
 {
 struct ggMrnaAli *ret;
 int i;
 char *s;
 
 AllocVar(ret);
 ret->blockCount = sqlSigned(row[11]);
 ret->tName = cloneString(row[0]);
 ret->tStart = sqlSigned(row[1]);
 ret->tEnd = sqlSigned(row[2]);
 strcpy(ret->strand, row[3]);
 ret->qName = cloneString(row[4]);
 ret->qStart = sqlSigned(row[5]);
 ret->qEnd = sqlSigned(row[6]);
 ret->baseCount = sqlUnsigned(row[7]);
 ret->orientation = sqlSigned(row[8]);
 ret->hasIntrons = sqlSigned(row[9]);
 ret->milliScore = sqlSigned(row[10]);
 s = row[12];
 for (i=0; i<ret->blockCount; ++i)
     {
     s = sqlEatChar(s, '{');
     slSafeAddHead(&ret->blocks, ggMrnaBlockCommaIn(&s, NULL));
     s = sqlEatChar(s, '}');
     s = sqlEatChar(s, ',');
     }
 slReverse(&ret->blocks);
 return ret;
 }
 
 struct ggMrnaAli *ggMrnaAliLoadAll(char *fileName) 
 /* Load all ggMrnaAli from a tab-separated file.
  * Dispose of this with ggMrnaAliFreeList(). */
 {
 struct ggMrnaAli *list = NULL, *el;
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
 char *row[13];
 
 while (lineFileRow(lf, row))
     {
     el = ggMrnaAliLoad(row);
     slAddHead(&list, el);
     }
 lineFileClose(&lf);
 slReverse(&list);
 return list;
 }
 
 struct ggMrnaAli *ggMrnaAliLoadByQuery(struct sqlConnection *conn, char *query)
 /* Load all ggMrnaAli from table that satisfy the query given.  
  * Where query is of the form 'select * from example where something=something'
  * or 'select example.* from example, anotherTable where example.something = 
  * anotherTable.something'.
  * Dispose of this with ggMrnaAliFreeList(). */
 {
 struct ggMrnaAli *list = NULL, *el;
 struct sqlResult *sr;
 char **row;
 
 sr = sqlGetResult(conn, query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     el = ggMrnaAliLoad(row);
     slAddHead(&list, el);
     }
 slReverse(&list);
 sqlFreeResult(&sr);
 return list;
 }
 
 void ggMrnaAliSaveToDb(struct sqlConnection *conn, struct ggMrnaAli *el, char *tableName, int updateSize)
 /* Save ggMrnaAli as a row to the table specified by tableName. 
  * As blob fields may be arbitrary size updateSize specifies the approx size
  * of a string that would contain the entire query. Arrays of native types are
  * converted to comma separated strings and loaded as such, User defined types are
  * inserted as NULL. Strings are automatically escaped to allow insertion into the database. */
 {
 struct dyString *update = newDyString(updateSize);
 sqlDyStringPrintf(update, "insert into %s values ( '%s',%d,%d,'%s','%s',%d,%d,%u,%d,%d,%d,%d, NULL )", 
 	tableName,  el->tName,  el->tStart,  el->tEnd,  el->strand,  el->qName,  el->qStart,  el->qEnd,  el->baseCount,  el->orientation,  el->hasIntrons,  el->milliScore,  el->blockCount);
 sqlUpdate(conn, update->string);
 freeDyString(&update);
 }
 
 
 struct ggMrnaAli *ggMrnaAliCommaIn(char **pS, struct ggMrnaAli *ret)
 /* Create a ggMrnaAli out of a comma separated string. 
  * This will fill in ret if non-null, otherwise will
  * return a new ggMrnaAli */
 {
 char *s = *pS;
 int i;
 
 if (ret == NULL)
     AllocVar(ret);
 ret->tName = sqlStringComma(&s);
 ret->tStart = sqlSignedComma(&s);
 ret->tEnd = sqlSignedComma(&s);
 sqlFixedStringComma(&s, ret->strand, sizeof(ret->strand));
 ret->qName = sqlStringComma(&s);
 ret->qStart = sqlSignedComma(&s);
 ret->qEnd = sqlSignedComma(&s);
 ret->baseCount = sqlUnsignedComma(&s);
 ret->orientation = sqlSignedComma(&s);
 ret->hasIntrons = sqlSignedComma(&s);
 ret->milliScore = sqlSignedComma(&s);
 ret->blockCount = sqlSignedComma(&s);
 s = sqlEatChar(s, '{');
 for (i=0; i<ret->blockCount; ++i)
     {
     s = sqlEatChar(s, '{');
     slSafeAddHead(&ret->blocks, ggMrnaBlockCommaIn(&s,NULL));
     s = sqlEatChar(s, '}');
     s = sqlEatChar(s, ',');
     }
 slReverse(&ret->blocks);
 s = sqlEatChar(s, '}');
 s = sqlEatChar(s, ',');
 *pS = s;
 return ret;
 }
 
 void ggMrnaAliFree(struct ggMrnaAli **pEl)
 /* Free a single dynamically allocated ggMrnaAli such as created
  * with ggMrnaAliLoad(). */
 {
 struct ggMrnaAli *el;
 
 if ((el = *pEl) == NULL) return;
 freeMem(el->tName);
 freeMem(el->qName);
 ggMrnaBlockFreeList(&el->blocks);
 freez(pEl);
 }
 
 void ggMrnaAliFreeList(struct ggMrnaAli **pList)
 /* Free a list of dynamically allocated ggMrnaAli's */
 {
 struct ggMrnaAli *el, *next;
 
 for (el = *pList; el != NULL; el = next)
     {
     next = el->next;
     ggMrnaAliFree(&el);
     }
 *pList = NULL;
 }
 
 void ggMrnaAliOutput(struct ggMrnaAli *el, FILE *f, char sep, char lastSep) 
 /* Print out ggMrnaAli.  Separate fields with sep. Follow last field with lastSep. */
 {
 int i;
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->tName);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 fprintf(f, "%d", el->tStart);
 fputc(sep,f);
 fprintf(f, "%d", el->tEnd);
 fputc(sep,f);
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->strand);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 if (sep == ',') fputc('"',f);
 fprintf(f, "%s", el->qName);
 if (sep == ',') fputc('"',f);
 fputc(sep,f);
 fprintf(f, "%d", el->qStart);
 fputc(sep,f);
 fprintf(f, "%d", el->qEnd);
 fputc(sep,f);
 fprintf(f, "%u", el->baseCount);
 fputc(sep,f);
 fprintf(f, "%d", el->orientation);
 fputc(sep,f);
 fprintf(f, "%d", el->hasIntrons);
 fputc(sep,f);
 fprintf(f, "%d", el->milliScore);
 fputc(sep,f);
 fprintf(f, "%d", el->blockCount);
 fputc(sep,f);
 /* Loading ggMrnaBlock list. */
     {
     struct ggMrnaBlock *it = el->blocks;
     if (sep == ',') fputc('{',f);
     for (i=0; i<el->blockCount; ++i)
         {
         fputc('{',f);
         ggMrnaBlockCommaOut(it,f);
         it = it->next;
         fputc('}',f);
         fputc(',',f);
         }
     if (sep == ',') fputc('}',f);
     }
 fputc(lastSep,f);
 }
 
 /* -------------------------------- End autoSql Generated Code -------------------------------- */
 
 
 
 struct ggMrnaAli *pslToGgMrnaAli(struct psl *psl, char *chrom, unsigned int chromStart,
 				 unsigned int chromEnd, struct dnaSeq *genoSeq)
 /* Convert from psl format of alignment to ma format.  Return
  * NULL if no introns in psl. */
 {
 struct ggMrnaAli *ma;
 int i;
 int blockCount;
 struct ggMrnaBlock *blocks, *block;
 int iOrientation; 
 char *strand;
 
 /* convert psl to our local genoSeq coordinates */
 pslTargetOffset(psl, -1*chromStart);
 
 /* Figure out orientation and direction based on introns. */
 iOrientation = pslIntronOrientation(psl, genoSeq, 0);
 strand = psl->strand;
 if (iOrientation < 0)
     strand = "-";
 else if (iOrientation > 0)
     strand = "+";
 
 AllocVar(ma);
 ma->orientation = iOrientation;
 ma->qName = cloneString(psl->qName);
 ma->qStart = psl->qStart;
 ma->qEnd = psl->qEnd;
 ma->baseCount = psl->qSize;
 ma->milliScore = psl->match + psl->repMatch - psl->misMatch - (psl->blockCount-1)*2;
 safef(ma->strand, sizeof(ma->strand), "%s", strand);
 ma->hasIntrons = (iOrientation == 0 ? FALSE : TRUE);
 ma->tName = cloneString(psl->tName);
 ma->tStart = psl->tStart;
 ma->tEnd = psl->tEnd;
 ma->blockCount = blockCount = psl->blockCount;
 ma->blocks = AllocArray(blocks, blockCount);
 ma->sourceType = "psl";
 
 for (i = 0; i<blockCount; ++i)
     {
     int bSize = psl->blockSizes[i];
     int qStart = psl->qStarts[i];
     int tStart = psl->tStarts[i];
     block = blocks+i;
     block->qStart = qStart;
     block->qEnd = qStart + bSize;
     block->tStart = tStart;
     block->tEnd = tStart + bSize;
     }
 pslTargetOffset(psl, chromStart);
 return ma;
 }
 
 boolean ggMrnaAliMergeBlocks(struct ggMrnaAli *ma, int maxGap)
 /* Merge blocks that looks to be separated by small amounts
  * of sequencing noise only. 2 is a good value for maxGap */
 {
 struct ggMrnaBlock *readBlock, *writeBlock;
 int mergedCount = 1;
 int i;
 boolean mergedSome = FALSE;
 
 readBlock = writeBlock = ma->blocks;
 for (i=1; i<ma->blockCount; ++i)
     {
     ++readBlock;
     if (intAbs(readBlock->qStart - writeBlock->qEnd) <= maxGap &&
         intAbs(readBlock->tStart - writeBlock->tEnd) <= maxGap)
 	{
 	ma->baseCount += readBlock->tStart - writeBlock->tEnd; /* If we've added bases keep track. */
 	writeBlock->qEnd = readBlock->qEnd;
 	writeBlock->tEnd = readBlock->tEnd;
 	mergedSome = TRUE;
 	}
     else
 	{
 	++writeBlock;
 	*writeBlock = *readBlock;
 	++mergedCount;
 	}
     }
 ma->blockCount = mergedCount;
 return mergedSome;
 }
 
 int cmpGgMrnaAliTargetStart(const void *va, const void *vb)
 /* Compare two ggMrnaAli based on their  strand, tEnd. */
 {
 const struct ggMrnaAli *a = *((struct ggMrnaAli **)va);
 const struct ggMrnaAli *b = *((struct ggMrnaAli **)vb);
 if(differentString(a->tName, b->tName))
     return strcmp(a->tName, b->tName);
 if(differentString(a->strand, b->strand))
     return strcmp(a->strand, b->strand);
 return (a->tStart - b->tStart);
 }
 
 void ggMrnaAliBedOut(struct ggMrnaAli *ma, FILE *f)
 /* write out the target blocks in simple bed format, one bed per block */
 {
 int i;
 for(i=0; i < ma->blockCount; i++)
     {
     fprintf(f, "%s\t%d\t%d\t%s\t%d\t%s\n", ma->tName, ma->blocks[i].tStart + ma->tStart, ma->blocks[i].tEnd + ma->tStart, 
 	    ma->qName, (ma->hasIntrons == TRUE ? 1000 : 500 ), ma->strand);
     }
 }
 
 void ggMrnaAliBed12Out(struct ggMrnaAli *ma, FILE *f)
 /* Write out the target blocks as a linked feature bed format. */
 {
 int i;
 fprintf(f, "%s\t%d\t%d\t%s\t%d\t%s\t%d\t%d\t0\t%d\t", 
 	ma->tName, ma->tStart, ma->tEnd, ma->qName, 1000, ma->strand,
 	ma->tStart, ma->tEnd, ma->blockCount);
 for(i=0; i< ma->blockCount; i++)
     fprintf(f, "%d,", (ma->blocks[i].tEnd - ma->blocks[i].tStart));
 fprintf(f,"\t");
 for(i=0; i< ma->blockCount; i++)
     fprintf(f, "%d,", (ma->blocks[i].tStart - ma->tStart));
 fprintf(f,"\n");
 }
 
 struct ggMrnaAli *pslListToGgMrnaAliList(struct psl *pslList, char *chrom, unsigned int chromStart, 
 					 unsigned int chromEnd, struct dnaSeq *genoSeq, int maxGap)
 /* create a ggMrnaAli list from a psl list merging gaps below size maxGap */
 {
 struct psl *psl = NULL;
 struct ggMrnaAli *maList=NULL, *ma=NULL;
 for(psl = pslList; psl != NULL; psl = psl->next)
     {
     ma = pslToGgMrnaAli(psl, chrom, chromStart, chromEnd, genoSeq);
     if(ma != NULL)
 	{
 	ggMrnaAliMergeBlocks(ma, maxGap);
 	slAddHead(&maList, ma);
 	}
     }
 slReverse(&maList);
 return maList;
 }
 
 struct ggMrnaInput *ggMrnaInputFromAlignments(struct ggMrnaAli *maList, struct dnaSeq *genoSeq)
 /* wrap a ggMrnaInput around some alignments */
 {
 struct ggMrnaInput *mi = NULL;
 assert(maList);
 AllocVar(mi);
 mi->tName = maList->tName;
 mi->tStart = maList->tStart;
 mi->tEnd = maList->tEnd;
 safef(mi->strand, sizeof(mi->strand), "%s", maList->strand);
 mi->genoSeq = genoSeq;
 mi->maList = maList;
 return mi;
 }