src/hg/ratStuff/mafFrags/mafFrags.c 1.10
1.10 2009/06/23 16:40:19 hiram
The meFirst stuff was broken when the input maf does not have the separator
Index: src/hg/ratStuff/mafFrags/mafFrags.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/ratStuff/mafFrags/mafFrags.c,v
retrieving revision 1.9
retrieving revision 1.10
diff -b -B -U 1000000 -r1.9 -r1.10
--- src/hg/ratStuff/mafFrags/mafFrags.c 31 Oct 2008 00:38:35 -0000 1.9
+++ src/hg/ratStuff/mafFrags/mafFrags.c 23 Jun 2009 16:40:19 -0000 1.10
@@ -1,250 +1,252 @@
/* mafFrags - Collect MAFs from regions specified in a 6 column bed file. */
#include "common.h"
#include "linefile.h"
#include "hash.h"
#include "options.h"
#include "obscure.h"
#include "jksql.h"
#include "maf.h"
#include "hdb.h"
#include "hgMaf.h"
static char const rcsid[] = "$Id$";
-void usage()
+static void usage()
/* Explain usage and exit. */
{
errAbort(
"mafFrags - Collect MAFs from regions specified in a 6 column bed file\n"
"usage:\n"
" mafFrags database track in.bed out.maf\n"
"options:\n"
" -orgs=org.txt - File with list of databases/organisms in order\n"
" -bed12 - If set, in.bed is a bed 12 file, including exons\n"
" -thickOnly - Only extract subset between thickStart/thickEnd\n"
" -meFirst - Put native sequence first in maf\n"
" -txStarts - Add MAF txstart region definitions ('r' lines) using BED name\n"
" and output actual reference genome coordinates in MAF.\n"
);
}
static struct optionSpec options[] = {
{"orgs", OPTION_STRING},
{"bed12", OPTION_BOOLEAN},
{"thickOnly", OPTION_BOOLEAN},
{"meFirst", OPTION_BOOLEAN},
{"txStarts", OPTION_BOOLEAN},
{NULL, 0},
};
-boolean bed12 = FALSE;
-boolean thickOnly = FALSE;
-boolean meFirst = FALSE;
-boolean txStarts = FALSE;
+static boolean bed12 = FALSE;
+static boolean thickOnly = FALSE;
+static boolean meFirst = FALSE;
+static boolean txStarts = FALSE;
-struct mafAli *mafFromBed12(char *database, char *track, struct bed *bed,
- struct slName *orgList)
+static struct mafAli *mafFromBed12(char *database, char *track,
+ struct bed *bed, struct slName *orgList)
/* Construct a maf out of exons in bed. */
{
/* Loop through all block in bed, collecting a list of mafs, one
* for each block. While we're at make a hash of all species seen. */
struct hash *speciesHash = hashNew(0);
struct mafAli *mafList = NULL, *maf, *bigMaf;
struct mafComp *comp, *bigComp;
int totalTextSize = 0;
int i;
for (i=0; i<bed->blockCount; ++i)
{
int start = bed->chromStart + bed->chromStarts[i];
int end = start + bed->blockSizes[i];
if (thickOnly)
{
start = max(start, bed->thickStart);
end = min(end, bed->thickEnd);
}
if (start < end)
{
maf = hgMafFrag(database, track, bed->chrom, start, end, '+',
database, NULL);
slAddHead(&mafList, maf);
for (comp = maf->components; comp != NULL; comp = comp->next)
hashStore(speciesHash, comp->src);
totalTextSize += maf->textSize;
}
}
slReverse(&mafList);
/* Add species in order list too */
struct slName *org;
for (org = orgList; org != NULL; org = org->next)
hashStore(speciesHash, org->name);
/* Allocate memory for return maf that contains all blocks concatenated together.
* Also fill in components with any species seen at all. */
AllocVar(bigMaf);
bigMaf->textSize = totalTextSize;
struct hashCookie it = hashFirst(speciesHash);
struct hashEl *hel;
while ((hel = hashNext(&it)) != NULL)
{
AllocVar(bigComp);
bigComp->src = cloneString(hel->name);
bigComp->text = needLargeMem(totalTextSize + 1);
memset(bigComp->text, '.', totalTextSize);
bigComp->text[totalTextSize] = 0;
bigComp->strand = '+';
bigComp->srcSize = totalTextSize; /* It's safe if a bit of a lie. */
hel->val = bigComp;
slAddHead(&bigMaf->components, bigComp);
}
/* Loop through maf list copying in data. */
int textOffset = 0;
for (maf = mafList; maf != NULL; maf = maf->next)
{
for (comp = maf->components; comp != NULL; comp = comp->next)
{
bigComp = hashMustFindVal(speciesHash, comp->src);
memcpy(bigComp->text + textOffset, comp->text, maf->textSize);
bigComp->size += comp->size;
}
textOffset += maf->textSize;
}
/* Cope with strand of darkness. */
if (bed->strand[0] == '-')
{
for (comp = bigMaf->components; comp != NULL; comp = comp->next)
reverseComplement(comp->text, bigMaf->textSize);
}
/* If got an order list then reorder components according to it. */
if (orgList != NULL)
{
struct mafComp *newList = NULL;
for (org = orgList; org != NULL; org = org->next)
{
comp = hashMustFindVal(speciesHash, org->name);
slAddHead(&newList, comp);
}
slReverse(&newList);
bigMaf->components = newList;
}
/* Rename our own component to bed name */
comp = hashMustFindVal(speciesHash, database);
freeMem(comp->src);
comp->src = cloneString(bed->name);
/* Clean up and go home. */
hashFree(&speciesHash);
mafAliFreeList(&mafList);
return bigMaf;
}
-void moveMeToFirst(struct mafAli *maf, char *myName)
+static void moveMeToFirst(struct mafAli *maf, char *myName)
/* Find component matching myName, and move it to first. */
{
-struct mafComp *comp = mafFindCompPrefix(maf, myName, ".");
+struct mafComp *comp = mafMayFindCompPrefix(maf, myName, ".");
+if (NULL == comp)
+ comp = mafFindCompPrefix(maf, myName, NULL);
slRemoveEl(&maf->components, comp);
slAddHead(&maf->components, comp);
}
static void processBed6(char *database, char *track, FILE *f, struct bed *bed,
struct slName *orgList)
/* generate MAF alignment for a bed6 */
{
struct mafAli *maf;
if (txStarts)
{
maf = hgMafFrag(database, track,
bed->chrom, bed->chromStart, bed->chromEnd, bed->strand[0],
NULL, orgList);
maf->regDef = mafRegDefNew(mafRegDefTxUpstream,
bed->chromEnd-bed->chromStart,
bed->name);
if (meFirst)
moveMeToFirst(maf, database);
}
else
{
maf = hgMafFrag(database, track,
bed->chrom, bed->chromStart, bed->chromEnd, bed->strand[0],
bed->name, orgList);
if (meFirst)
moveMeToFirst(maf, bed->name);
}
mafWrite(f, maf);
mafAliFree(&maf);
}
-void mafFrags(char *database, char *track, char *bedFile, char *mafFile)
+static void mafFrags(char *database, char *track, char *bedFile, char *mafFile)
/* mafFrags - Collect MAFs from regions specified in a 6 column bed file. */
{
struct slName *orgList = NULL;
struct lineFile *lf = lineFileOpen(bedFile, TRUE);
FILE *f = mustOpen(mafFile, "w");
if (optionExists("orgs"))
{
char *orgFile = optionVal("orgs", NULL);
char *buf;
readInGulp(orgFile, &buf, NULL);
orgList = stringToSlNames(buf);
/* Ensure that org list starts with database. */
struct slName *me = slNameFind(orgList, database);
if (me == NULL)
errAbort("Need to have reference database '%s' in %s", database, orgFile);
if (me != orgList)
{
slRemoveEl(&orgList, me);
slAddHead(&orgList, me);
}
}
mafWriteStart(f, "zero");
if (bed12)
{
char *row[12];
while (lineFileRow(lf, row))
{
struct bed *bed = bedLoadN(row, ArraySize(row));
struct mafAli *maf = mafFromBed12(database, track, bed, orgList);
if (meFirst)
moveMeToFirst(maf, bed->name);
mafWrite(f, maf);
mafAliFree(&maf);
bedFree(&bed);
}
}
else
{
char *row[6];
while (lineFileRow(lf, row))
{
struct bed *bed = bedLoadN(row, ArraySize(row));
processBed6(database, track, f, bed, orgList);
bedFree(&bed);
}
}
mafWriteEnd(f);
carefulClose(&f);
}
int main(int argc, char *argv[])
/* Process command line. */
{
optionInit(&argc, argv, options);
if (argc != 5)
usage();
bed12 = optionExists("bed12");
thickOnly = optionExists("thickOnly");
meFirst = optionExists("meFirst");
txStarts = optionExists("txStarts");
mafFrags(argv[1], argv[2], argv[3], argv[4]);
return 0;
}