e70152e44cc66cc599ff6b699eb8adc07f3e656a
kent
  Sat May 24 21:09:34 2014 -0700
Adding Copyright NNNN Regents of the University of California to all files I believe with reasonable certainty were developed under UCSC employ or as part of Genome Browser copyright assignment.
diff --git src/hg/lib/hgMaf.c src/hg/lib/hgMaf.c
index 7549420..1ee9666 100644
--- src/hg/lib/hgMaf.c
+++ src/hg/lib/hgMaf.c
@@ -1,443 +1,446 @@
 /* hgMaf.c - Stuff to load up mafs from the browser database. */
 
+/* Copyright (C) 2014 The Regents of the University of California 
+ * See README in this or parent directory for licensing information. */
+
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "dystring.h"
 #include "options.h"
 #include "jksql.h"
 #include "dnautil.h"
 #include "dnaseq.h"
 #include "maf.h"
 #include "hdb.h"
 #include "scoredRef.h"
 #include "hgMaf.h"
 
 
 int mafCmp(const void *va, const void *vb)
 /* Compare to sort based on start of first component. */
 {
 const struct mafAli *a = *((struct mafAli **)va);
 const struct mafAli *b = *((struct mafAli **)vb);
 return a->components->start - b->components->start;
 }
 
 
 struct mafAli *mafLoadInRegion2(struct sqlConnection *conn,
     struct sqlConnection *conn2, char *table, char *chrom,
     int start, int end, char *file)
 /* Return list of alignments in region. */
 {
 char **row;
 unsigned int extFileId = 0;
 struct mafAli *maf, *mafList = NULL;
 struct mafFile *mf = NULL;
 int rowOffset;
 
 if (file != NULL)
     mf = mafOpen(file);
 
 struct sqlResult *sr = hRangeQuery(conn, table, chrom,
     start, end, NULL, &rowOffset);
 
 while ((row = sqlNextRow(sr)) != NULL)
     {
     struct scoredRef ref;
     scoredRefStaticLoad(row + rowOffset, &ref);
     if ((file != NULL) && (ref.extFile != 0))
 	errAbort("expect extFile to be zero if file specified\n");
     if ((file == NULL) && (ref.extFile == 0))
 	errAbort("expect extFile to be not zero or file specified\n");
 
     if (ref.extFile != extFileId)
 	{
 	char *path = hExtFileNameC(conn2, "extFile", ref.extFile);
 	mafFileFree(&mf);
 	mf = mafOpen(path);
 	extFileId = ref.extFile;
 	}
     lineFileSeek(mf->lf, ref.offset, SEEK_SET);
     maf = mafNext(mf);
     if (maf == NULL)
         internalErr();
     slAddHead(&mafList, maf);
     }
 sqlFreeResult(&sr);
 mafFileFree(&mf);
 slReverse(&mafList);
 /* hRangeQuery may return items out-of-order when bin is used in the query,
  * so sort here in order to avoid trouble at base-level view: */
 slSort(&mafList, mafCmp);
 return mafList;
 }
 
 struct mafAli *mafLoadInRegion(struct sqlConnection *conn, char *table,
 	char *chrom, int start, int end)
 {
 struct sqlConnection *conn2 = hAllocConn(sqlGetDatabase(conn));
 struct mafAli *ret = mafLoadInRegion2(conn, conn2, table, chrom,
     start, end,NULL);
 hFreeConn(&conn2);
 return ret;
 }
 
 struct mafAli *axtLoadAsMafInRegion(struct sqlConnection *conn, char *table,
         char *chrom, int start, int end,
         char *tPrefix, char *qPrefix, int tSize,  struct hash *qSizeHash)
 /* Return list of alignments in region from axt external file as a maf. */
 {
 char **row;
 unsigned int extFileId = 0;
 struct lineFile *lf = NULL;
 struct mafAli *maf, *mafList = NULL;
 struct axt *axt;
 int rowOffset;
 struct sqlResult *sr = hRangeQuery(conn, table, chrom,
     start, end, NULL, &rowOffset);
 
 while ((row = sqlNextRow(sr)) != NULL)
     {
     struct scoredRef ref;
     scoredRefStaticLoad(row + rowOffset, &ref);
     if (ref.extFile != extFileId)
 	{
 	char *path = hExtFileName(sqlGetDatabase(conn),"extFile", ref.extFile);
 	lf = lineFileOpen(path, TRUE);
 	extFileId = ref.extFile;
 	}
     lineFileSeek(lf, ref.offset, SEEK_SET);
     axt = axtRead(lf);
     if (axt == NULL)
         internalErr();
     maf = mafFromAxt(axt, tSize, tPrefix, hashIntVal(qSizeHash, axt->qName), qPrefix);
     axtFree(&axt);
     slAddHead(&mafList, maf);
     }
 sqlFreeResult(&sr);
 lineFileClose(&lf);
 slReverse(&mafList);
 return mafList;
 }
 
 static void mafCheckFirstComponentSrc(struct mafAli *mafList, char *src)
 /* Check that the first component of each maf has given src. */
 {
 struct mafAli *maf;
 for (maf = mafList; maf != NULL; maf = maf->next)
     {
     if (!sameString(maf->components->src, src))
         errAbort("maf first component isn't %s", src);
     }
 }
 
 static void mafCheckFirstComponentStrand(struct mafAli *mafList, char strand)
 /* Check that the first component of each maf has given strand. */
 {
 struct mafAli *maf;
 for (maf = mafList; maf != NULL; maf = maf->next)
     {
     if (maf->components->strand != strand)
         errAbort("maf first component isn't %c", strand);
     }
 }
 
 struct oneOrg
 /* Info on one organism. */
     {
     struct oneOrg *next;
     char *name;		/* Name - allocated in hash */
     int order;		/* Help sort organisms. */
     struct dyString *dy;	/* Associated alignment for this organism. */
     boolean hit;	/* Flag to see if hit this time around. */
     };
 
 static int oneOrgCmp(const void *va, const void *vb)
 /* Compare to sort based on order. */
 {
 const struct oneOrg *a = *((struct oneOrg **)va);
 const struct oneOrg *b = *((struct oneOrg **)vb);
 return a->order - b->order;
 }
 
 static void fillInMissing(struct oneOrg *nativeOrg, struct oneOrg *orgList,
 	struct dnaSeq *native, int seqStart, int curPos, int aliStart)
 /* Fill in alignment strings in orgList with native sequence
  * for first organism, and dots for rest. */
 {
 int fillSize = aliStart - curPos;
 int offset = curPos - seqStart;
 struct oneOrg *org;
 if (nativeOrg == NULL)
     return;
 dyStringAppendN(nativeOrg->dy, native->dna + offset, fillSize);
 for (org = orgList; org != NULL; org = org->next)
     {
     if (org != nativeOrg)
 	dyStringAppendMultiC(org->dy, '.', fillSize);
     }
 }
 
 static int countAlpha(char *s)
 /* Count up number of alphabetic characters in string */
 {
 int count = 0;
 char c;
 while ((c = *s++) != 0)
     if (isalpha(c))
         ++count;
 return count;
 }
 
 struct mafAli *hgMafFrag(
 	char *database,     /* Database, must already have hSetDb to this */
 	char *track,        /* Name of MAF track */
 	char *chrom,        /* Chromosome (in database genome) */
 	int start, int end, /* start/end in chromosome */
 	char strand,        /* Chromosome strand. */
 	char *outName,      /* Optional name to use in first component */
 	struct slName *orderList /* Optional order of organisms. */
 	)
 /* mafFrag- Extract maf sequences for a region from database.
  * This creates a somewhat unusual MAF that extends from start
  * to end whether or not there are actually alignments.  Where
  * there are no alignments (or alignments missing a species)
  * a . character fills in.   The score is always zero, and
  * the sources just indicate the species.  You can mafFree this
  * as normal. */
 {
 int chromSize = hChromSize(database, chrom);
 struct sqlConnection *conn = hAllocConn(database);
 struct dnaSeq *native = hChromSeq(database, chrom, start, end);
 struct mafAli *maf, *mafList = mafLoadInRegion(conn, track, chrom, start, end);
 char masterSrc[128];
 struct hash *orgHash = newHash(10);
 struct oneOrg *orgList = NULL, *org, *nativeOrg = NULL;
 int curPos = start, symCount = 0;
 struct slName *name;
 int order = 0;
 
 /* Check that the mafs are really copacetic, the particular
  * subtype we think is in the database that this (relatively)
  * simple code can handle. */
 safef(masterSrc, sizeof(masterSrc), "%s.%s", database, chrom);
 mafCheckFirstComponentSrc(mafList, masterSrc);
 mafCheckFirstComponentStrand(mafList, '+');
 slSort(&mafList, mafCmp);
 
 /* Prebuild organisms if possible from input orderList. */
 for (name = orderList; name != NULL; name = name->next)
     {
     AllocVar(org);
     slAddHead(&orgList, org);
     hashAddSaveName(orgHash, name->name, org, &org->name);
     org->dy = dyStringNew(native->size*1.5);
     org->order = order++;
     if (nativeOrg == NULL)
         nativeOrg = org;
     }
 if (orderList == NULL)
     {
     AllocVar(org);
     slAddHead(&orgList, org);
     hashAddSaveName(orgHash, database, org, &org->name);
     org->dy = dyStringNew(native->size*1.5);
     if (nativeOrg == NULL)
         nativeOrg = org;
     }
 
 /* Go through all mafs in window, mostly building up
  * org->dy strings. */
 for (maf = mafList; maf != NULL; maf = maf->next)
     {
     struct mafComp *mc, *mcMaster = maf->components;
     struct mafAli *subMaf = NULL;
     order = 0;
     if (curPos < mcMaster->start)
 	{
         fillInMissing(nativeOrg, orgList, native, start,
 		curPos, mcMaster->start);
 	symCount += mcMaster->start - curPos;
 	}
     if (curPos < mcMaster->start + mcMaster->size) /* Prevent worst
                                                     * backtracking */
 	{
 	if (mafNeedSubset(maf, masterSrc, curPos, end))
 	    {
 	    subMaf = mafSubset(maf, masterSrc, curPos, end);
 	    if (subMaf == NULL)
 	        continue;
 	    }
 	else
 	    subMaf = maf;
 	for (mc = subMaf->components; mc != NULL; mc = mc->next, ++order)
 	    {
 	    /* Extract name up to dot into 'orgName' */
 	    char buf[128], *e, *orgName;
 
 	    if ((mc->size == 0) || (mc->srcSize == 0)) /* skip over components without sequence */
 		continue;
 
 	    mc->leftStatus = mc->rightStatus = 0; /* squash annotation */
 
 	    e = strchr(mc->src, '.');
 	    if (e == NULL)
 		orgName = mc->src;
 	    else
 		{
 		int len = e - mc->src;
 		if (len >= sizeof(buf))
 		    errAbort("organism/database name %s too long", mc->src);
 		memcpy(buf, mc->src, len);
 		buf[len] = 0;
 		orgName = buf;
 		}
 
 	    /* Look up dyString corresponding to  org, and create a
 	     * new one if necessary. */
 	    org = hashFindVal(orgHash, orgName);
 	    if (org == NULL)
 		{
 		if (orderList != NULL)
 		   errAbort("%s is not in orderList", orgName);
 		AllocVar(org);
 		slAddHead(&orgList, org);
 		hashAddSaveName(orgHash, orgName, org, &org->name);
 		org->dy = dyStringNew(native->size*1.5);
 		dyStringAppendMultiC(org->dy, '.', symCount);
 		if (nativeOrg == NULL)
 		    nativeOrg = org;
 		}
 	    if (orderList == NULL && order > org->order)
 		org->order = order;
 	    org->hit = TRUE;
 
 	    /* Fill it up with alignment. */
 	    dyStringAppendN(org->dy, mc->text, subMaf->textSize);
 	    }
 	for (org = orgList; org != NULL; org = org->next)
 	    {
 	    if (!org->hit)
 		dyStringAppendMultiC(org->dy, '.', subMaf->textSize);
 	    org->hit = FALSE;
 	    }
 	symCount += subMaf->textSize;
 	curPos = mcMaster->start + mcMaster->size;
 	if (subMaf != maf)
 	    mafAliFree(&subMaf);
 	}
     }
 if (curPos < end)
     {
     fillInMissing(nativeOrg, orgList, native, start, curPos, end);
     symCount += end - curPos;
     }
 mafAliFreeList(&mafList);
 
 slSort(&orgList, oneOrgCmp);
 if (strand == '-')
     {
     for (org = orgList; org != NULL; org = org->next)
 	reverseComplement(org->dy->string, org->dy->stringSize);
     }
 
 /* Construct our maf */
 AllocVar(maf);
 maf->textSize = symCount;
 for (org = orgList; org != NULL; org = org->next)
     {
     struct mafComp *mc;
     AllocVar(mc);
     if (org == orgList)
         {
 	if (outName != NULL)
 	    {
 	    mc->src = cloneString(outName);
 	    mc->srcSize = native->size;
 	    mc->strand = '+';
 	    mc->start = 0;
 	    mc->size = native->size;
 	    }
 	else
 	    {
 	    mc->src = cloneString(masterSrc);
 	    mc->srcSize = chromSize;
 	    mc->strand = strand;
 	    if (strand == '-')
 	       reverseIntRange(&start, &end, chromSize);
 	    mc->start = start;
 	    mc->size = end-start;
 	    }
 	}
     else
         {
 	int size = countAlpha(org->dy->string);
 	mc->src = cloneString(org->name);
 	mc->srcSize = size;
 	mc->strand = '+';
 	mc->start = 0;
 	mc->size = size;
 	}
     mc->text = cloneString(org->dy->string);
     dyStringFree(&org->dy);
     slAddHead(&maf->components, mc);
     }
 slReverse(&maf->components);
 
 slFreeList(&orgList);
 freeHash(&orgHash);
 hFreeConn(&conn);
 return maf;
 }
 
 struct consWiggle *wigMafWiggles(char *db, struct trackDb *tdb)
 /* get conservation wiggle table names and labels from trackDb setting,
    ignoring those where table doesn't exist */
 {
 char *fields[20];
 int fieldCt;
 int i;
 char *wigTable, *wigLabel;
 struct consWiggle *wig, *wigList = NULL;
 char *setting = trackDbSetting(tdb, CONS_WIGGLE);
 if (!setting)
     return NULL;
 fieldCt = chopLine(cloneString(setting), fields);
 for (i = 0; i < fieldCt; i += 3)
     {
     wigTable = fields[i];
     if (hTableExists(db, wigTable))
         {
         AllocVar(wig);
         wig->table = cloneString(wigTable);
         wigLabel = (i+1 == fieldCt ? DEFAULT_CONS_LABEL : fields[i+1]);
         wig->leftLabel = cloneString(wigLabel);
         wigLabel = (i+2 >= fieldCt ? wig->leftLabel : fields[i+2]);
         wig->uiLabel = cloneString(wigLabel);
         slAddTail(&wigList, wig);
         }
     }
 return wigList;
 }
 
 char *wigMafWiggleVar(char *prefix, struct consWiggle *wig,char **suffix)
 // Return name of cart variable (and optionally the suffix) for this cons wiggle
 {
 char option[128];
 safef(option, sizeof option, "%s.cons.%s", prefix, wig->leftLabel);
 if (suffix != NULL)
     *suffix = option + strlen(prefix) + 1;
 return (cloneString(option));
 }
 
 struct consWiggle *consWiggleFind(char *db,struct trackDb *parent,char *table)
 /* Return conservation wig if it is found in the parent. */
 {
 if (parent == NULL || !startsWith("wigMaf", parent->type))
     return NULL;
 
 struct consWiggle *wig, *wiggles = wigMafWiggles(db, parent);
 for (wig = wiggles;
      wig != NULL && differentString(wig->table,table);
      wig = wig->next) {}
 return wig;
 }