src/hg/gigAssembler/goldToAgp/goldToAgp.c 1.4
1.4 2009/07/07 18:45:01 hiram
Fixup broken build on Solaris
Index: src/hg/gigAssembler/goldToAgp/goldToAgp.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/gigAssembler/goldToAgp/goldToAgp.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -b -B -U 1000000 -r1.3 -r1.4
--- src/hg/gigAssembler/goldToAgp/goldToAgp.c 6 May 2003 07:22:19 -0000 1.3
+++ src/hg/gigAssembler/goldToAgp/goldToAgp.c 7 Jul 2009 18:45:01 -0000 1.4
@@ -1,195 +1,193 @@
/* goldToAgp - convert from ooGreedy "gold" format to the official
* ".agp" format for golden paths. */
#include "common.h"
#include "linefile.h"
#include "hash.h"
#include "portable.h"
static char const rcsid[] = "$Id$";
void usage()
/* Print usage instructions and exit */
{
errAbort(
"goldToAgp - convert from ooGreedy 'gold' format to the official\n"
"'.agp' format for golden paths through a contig. \n"
"usage:\n"
" goldToAgp gsDir version chromDir(s)\n"
"This will look for gsDir/fin/trans gsDir/draft/trans and gsdir/predraft/trans\n"
"to define name translations. It will then translate in chromDir/ctg*/gold.version\n"
"into chromDir/ctg*/ctg*.agp\n");
}
struct fragData
/* Info on fragment */
{
char *ucscName; /* Name on UCSC side. */
char *ffaName; /* Greg Schuler name. */
char *accVer; /* Genbank accession.version. */
int subStart; /* Start in Genbank submission coordinates. (1 based) */
int subEnd; /* End in Genbank submission coordinates. (inclusive) */
char *type; /* Phase in agp sense "F" "D" or "P" */
};
struct fragData *parseFragData(char *words[], char *phase, int whereIx)
/* Parse frag data from a line. */
{
struct fragData *fd;
-int wordCount;
char *s, *e;
AllocVar(fd);
fd->ucscName = cloneString(words[0]);
fd->ffaName = cloneString(words[1]);
s = words[whereIx];
if (s[0] != '(')
errAbort("Format error 1 in trans file: %s\n", s);
s += 1;
if ((e = strchr(s, ':')) == NULL)
errAbort("Format error 2 in trans file: %s\n", s);
*e++ = 0;
fd->accVer = cloneString(s);
s = e;
if ((e = strchr(s, '.')) == NULL || e[1] != '.' || !isdigit(s[0]))
errAbort("Format error 3 in trans file: %s\n", s);
*e++ = 0;
fd->subStart = atoi(s);
s = e+1;
if (!isdigit(s[0]))
errAbort("Format error 4 in trans file: %s\n", s);
fd->subEnd = atoi(s);
fd->type = phase;
return fd;
}
void hashTrans(char *dir, char *subDir, char *type, struct hash *hash)
/* Read trans file in dir/subdir/trans into hash. */
{
char fileName[512];
struct lineFile *lf;
int lineSize;
char *line;
int wordCount;
char *words[16];
struct fragData *fd;
sprintf(fileName, "%s/%s/trans", dir, subDir);
printf("Reading in %s\n", fileName);
lf = lineFileOpen(fileName, TRUE);
while (lineFileNext(lf, &line, &lineSize))
{
wordCount = chopLine(line, words);
if (wordCount != 7 && wordCount != 3)
errAbort("Bad line %d of %s\n", lf->lineIx, lf->fileName);
fd = parseFragData(words, type, wordCount-1);
hashAdd(hash, fd->ucscName, fd);
}
lineFileClose(&lf);
}
void translateOne(char *goldName, char *agpName, char *contig, struct hash *hash)
/* Translate one gold to agp. */
{
struct lineFile *lf = lineFileOpen(goldName, TRUE);
FILE *f = mustOpen(agpName, "w");
int lineSize, wordCount;
char *line, *words[16];
-int i;
printf("Translating %s to %s\n", goldName, agpName);
while (lineFileNext(lf, &line, &lineSize))
{
wordCount = chopLine(line, words);
if (wordCount != 8 && wordCount != 9)
errAbort("Bad line %d of %s\n", lf->lineIx, lf->fileName);
fprintf(f, "%s\t%s\t%s\t%s\t", contig, words[1], words[2], words[3]);
if (wordCount == 8)
{
if (!sameWord(words[4], "N"))
errAbort("Bad N line %d of %s\n", lf->lineIx, lf->fileName);
fprintf(f, "N\t%s\t%s\t%s\n", words[5], words[6], words[7]);
}
else
{
struct fragData *fd;
char *frag = words[5];
int fragStartInClone;
if (startsWith("NT_", frag))
{
chopSuffix(frag);
fprintf(f, "%s\t%s.0\t%s\t%s\t\%s\n", "F",
frag, words[6], words[7], words[8]);
}
else
{
fd = hashFindVal(hash, frag);
if (fd == NULL)
errAbort("%s isn't in trans files\n", frag);
fragStartInClone = fd->subStart - 1;
fprintf(f, "%s\t%s\t%d\t%d\t%s\n",
fd->type, fd->accVer,
fragStartInClone + atoi(words[6]),
fragStartInClone + atoi(words[7]),
words[8]);
}
}
}
lineFileClose(&lf);
fclose(f);
}
void goldToAgp(char *gsDir, int version, char *chromDirs[], int chromCount)
/* goldToAgp - convert from ooGreedy "gold" format to the official
* ".agp" format for golden paths. */
{
struct hash *seqHash = newHash(18);
int i;
char *chrom;
char goldFileName[512];
char agpFileName[512];
char contigName[256];
char *ctgName;
struct slName *dirList, *dirEl;
char chromRoot[128], chromSuperDir[256], chromExt[64];
hashTrans(gsDir, "fin", "F", seqHash);
hashTrans(gsDir, "draft", "D", seqHash);
hashTrans(gsDir, "predraft", "P", seqHash);
hashTrans(gsDir, "extras", "D", seqHash);
// hashTrans(gsDir, "missing", "O", seqHash);
for (i=0; i<chromCount; ++i)
{
chrom = chromDirs[i];
dirList = listDir(chrom, "ctg*");
splitPath(chrom, chromSuperDir, chromRoot, chromExt);
for (dirEl = dirList; dirEl != NULL; dirEl = dirEl->next)
{
ctgName = dirEl->name;
sprintf(contigName, "%s/%s", chromRoot, ctgName);
sprintf(goldFileName, "%s/%s/gold.%d", chrom, ctgName, version);
sprintf(agpFileName, "%s/%s/%s.agp", chrom, ctgName, ctgName);
if (fileExists(goldFileName))
{
translateOne(goldFileName, agpFileName, contigName, seqHash);
}
else
{
warn("%s doesn't exist\n", goldFileName);
}
}
slFreeList(&dirList);
}
}
int main(int argc, char *argv[])
/* Process command line. */
{
if (argc < 4 || !isdigit(argv[2][0]))
usage();
goldToAgp(argv[1], atoi(argv[2]), argv+3, argc-3);
return 0;
}