src/hg/updateStsInfo/updateStsInfo.c 1.10
1.10 2009/08/20 21:53:41 hiram
Take care of faAcc strings being (null)
Index: src/hg/updateStsInfo/updateStsInfo.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/updateStsInfo/updateStsInfo.c,v
retrieving revision 1.9
retrieving revision 1.10
diff -b -B -U 1000000 -r1.9 -r1.10
--- src/hg/updateStsInfo/updateStsInfo.c 31 Mar 2007 19:38:16 -0000 1.9
+++ src/hg/updateStsInfo/updateStsInfo.c 20 Aug 2009 21:53:41 -0000 1.10
@@ -1,882 +1,885 @@
/*
File: updateStsInfo.c
Author: Terry Furey
Date: 7/6/2004
Description: Update stsInfo file with new information from NCBI
*/
#include "common.h"
#include "linefile.h"
#include "memalloc.h"
#include "hash.h"
#include "jksql.h"
#include "hdb.h"
#include "fuzzyFind.h"
#include "portable.h"
#include "dnautil.h"
#include "dnaseq.h"
#include "psl.h"
#include "fa.h"
#include "psl.h"
#include "options.h"
#include "hgConfig.h"
#include "stsInfo2.h"
/* over time these listings and numbers are growing. */
#define MAX_ID_LIST 262144
#define MAX_STS_ID 9000000
/* command line option specifications */
static struct optionSpec optionSpecs[] = {
{"gb", OPTION_STRING},
{"verbose", OPTION_STRING},
{NULL, 0}
};
struct sts
{
struct sts *next;
struct stsInfo2 *si;
struct dnaSeq *fa;
char *faAcc;
boolean mapped;
boolean dbstsIdExists;
} *sList=NULL;
struct primer
{
struct primer *next;
unsigned dbStsId;
char* left;
char* right;
char* dist;
unsigned ucscId;
};
struct gb
{
struct gb *next;
char* acc;
struct sts *s;
boolean gbSeq;
};
struct hash *stsHash;
struct hash *primerHash;
struct hash *nameHash;
struct hash *dbStsIdHash;
struct hash *gbAccHash;
struct hash *ucscIdHash;
struct hash *orgHash;
struct stsInfo2 *siList = NULL;
int nextUcscId = 0;
boolean isMapped(struct stsInfo2 *si)
/* Determine whether a stsInfo2 record contains mapping information */
{
if (differentString(si->genethonName, "\0"))
return(1);
if (differentString(si->marshfieldName, "\0"))
return(1);
if (differentString(si->decodeName, "\0"))
return(1);
if (differentString(si->wiyacName, "\0"))
return(1);
if (differentString(si->wirhName, "\0"))
return(1);
if (differentString(si->gm99gb4Name, "\0"))
return(1);
if (differentString(si->gm99g3Name, "\0"))
return(1);
if (differentString(si->tngName, "\0"))
return(1);
return(0);
}
boolean checkGb(char *name)
/* Check if this is a genBank accession */
{
if (hashLookup(gbAccHash, name))
return(TRUE);
return(FALSE);
}
boolean checkGdb(char *name)
/* Check if this is a GDB id */
{
return(startsWith("GDB", name));
}
boolean inArray(char *el, char **array, int size)
/* Check if element is in array */
{
int i;
for (i = 0; i < size; i++)
if (sameString(el, array[i]))
return(TRUE);
return(FALSE);
}
boolean inArrayInt(unsigned el, unsigned *array, int size)
/* Check if element is in array */
{
int i;
for (i = 0; i < size; i++)
if (el == array[i])
return(TRUE);
return(FALSE);
}
void addElement(char *el, char ***array, unsigned *count)
/* Add a new element to a array of elements */
{
char *arrayCurr, arrayNew[MAX_ID_LIST];
int sizeOne, size;
char **cArray, **rArray=NULL, ***dArray;
/* Check if already present in array */
if (!inArray(el, *array, *count))
{
size = *count;
arrayCurr = sqlStringArrayToString(*array, *count);
safef(arrayNew, ArraySize(arrayNew), "%s%s,", arrayCurr, el);
size++;
dArray = array;
/* if (*dArray)
freeMem(dArray); */
sqlStringDynamicArray(arrayNew, &cArray, &sizeOne);
assert(sizeOne == size);
*count = size;
AllocArray(rArray, size);
CopyArray(cArray, rArray, size);
*array = rArray;
}
}
void removeElement(char *el, char ***array, unsigned *count)
/* Add a new element to a array of elements */
{
char *arrayCurr, *arrayCurrDel, del[128];
int sizeOne, size;
char **cArray, **rArray=NULL, ***dArray;
if (*count > 0)
{
size = *count;
arrayCurr = sqlStringArrayToString(*array, *count);
safef(del, ArraySize(del), "%s,", el);
arrayCurrDel = replaceChars(arrayCurr, del, "");
if (differentString(arrayCurr, arrayCurrDel))
size--;
dArray = array;
/* if (*dArray)
freeMem(dArray); */
sqlStringDynamicArray(arrayCurrDel, &cArray, &sizeOne);
assert(sizeOne == size);
*count = size;
if (size > 0)
{
AllocArray(rArray, size);
CopyArray(cArray, rArray, size);
*array = rArray;
}
else
*array = NULL;
}
}
void addElementInt(unsigned el, unsigned **array, unsigned *count)
/* Add a new element to a array of elements */
{
char *arrayCurr, arrayNew[MAX_ID_LIST];
int sizeOne, size;
unsigned *uArray, **dArray;
if (!inArrayInt(el, *array, *count))
{
size = *count;
arrayCurr = sqlUnsignedArrayToString(*array, *count);
safef(arrayNew, ArraySize(arrayNew), "%s%d,", arrayCurr, el);
size++;
dArray = array;
/* if (*count > 0)
freeMem(dArray); */
sqlUnsignedDynamicArray(arrayNew, &uArray, &sizeOne);
assert(sizeOne == size);
*count = size;
*array = uArray;
}
}
void addName(struct sts *s, char *name)
/* Add a name to a sts record */
{
struct gb *gb;
/* See if it is a genBank record */
if (checkGb(name))
{
addElement(name, &s->si->genbank, &s->si->gbCount);
if (hashLookup(gbAccHash, name))
{
gb = hashMustFindVal(gbAccHash, name);
gb->s = s;
}
else
{
AllocVar(gb);
gb->next = NULL;
gb->acc = cloneString(name);
gb->s = s;
}
}
else if (checkGdb(name))
{
addElement(name, &s->si->gdb, &s->si->gdbCount);
}
else
{
addElement(name, &s->si->otherNames, &s->si->nameCount);
}
}
void readStsInfo(struct lineFile *sif)
/* Read in current stsInfo file */
{
struct stsInfo2 *si;
struct sts *s;
struct primer *p;
struct gb *gb;
char name[16], *words[52];
int i;
stsHash = newHash(20);
dbStsIdHash = newHash(20);
primerHash = newHash(20);
nameHash = newHash(24);
gbAccHash = newHash(20);
ucscIdHash = newHash(20);
/* Read in all rows */
while (lineFileChopTab(sif, words))
{
si = stsInfo2Load(words);
/* Determine next ucsc id to be used */
if (si->identNo >= nextUcscId)
nextUcscId = si->identNo + 1;
/* Create sts struct */
if (sameString(si->organism, "Homo sapiens\0"))
{
AllocVar(s);
s->next = NULL;
s->si = si;
s->fa = NULL;
s->mapped = isMapped(si);
s->dbstsIdExists = FALSE;
slAddHead(&sList, s);
safef(name, ArraySize(name), "%d", si->identNo);
hashAdd(stsHash, name, s);
/* Add ids to dbStsIdHash */
if (si->dbSTSid)
hashAddInt(dbStsIdHash, name, si->dbSTSid);
/* Add sts records to ucscId hash */
if (si->dbSTSid)
{
safef(name, ArraySize(name), "%d", si->dbSTSid);
hashAdd(ucscIdHash, name, s);
}
for (i = 0; i < si->otherDbstsCount; i++)
{
safef(name, ArraySize(name), "%d", si->otherDbSTS[i]);
if (!hashLookup(ucscIdHash, name))
hashAdd(ucscIdHash, name, s);
}
/* Add names to name hash and genbank hash */
hashAdd(nameHash, si->name, s);
for (i = 0; i < si->gbCount; i++)
{
hashAdd(nameHash, si->genbank[i], s);
AllocVar(gb);
gb->next = NULL;
gb->acc = cloneString(si->genbank[i]);
gb->s = s;
gb->gbSeq = FALSE;
hashAdd(gbAccHash, gb->acc, gb);
}
for (i = 0; i < si->gdbCount; i++)
hashAdd(nameHash, si->gdb[i], s);
for (i = 0; i < si->nameCount; i++)
hashAdd(nameHash, si->otherNames[i], s);
/* Create primer info if available and add to hash */
if (differentString(si->leftPrimer, "\0"))
{
AllocVar(p);
p->next = NULL;
p->dbStsId = si->dbSTSid;
p->left = cloneString(si->leftPrimer);
p->right = cloneString(si->rightPrimer);
p->dist = cloneString(si->distance);
p->ucscId = si->identNo;
safef(name, ArraySize(name), "%d", p->dbStsId);
hashAdd(primerHash, name, p);
}
}
else
{
stsInfo2Free(&si);
}
}
}
void readGbAcc(struct lineFile *gaf)
/* Read in and record all genbank accessions that have sequences */
{
struct gb *gb;
char *acc[1];
struct sts *s;
while (lineFileNextRow(gaf, acc, 1))
{
if (!hashLookup(gbAccHash, acc[0]))
{
AllocVar(gb);
gb->next = NULL;
gb->acc = cloneString(acc[0]);
gb->s = NULL;
gb->gbSeq = TRUE;
hashAdd(gbAccHash, acc[0], gb);
if (hashLookup(nameHash, acc[0]))
{
s = hashMustFindVal(nameHash, acc[0]);
addElement(acc[0], &s->si->genbank, &s->si->gbCount);
removeElement(acc[0], &s->si->otherNames, &s->si->nameCount);
}
}
else
{
gb = hashMustFindVal(gbAccHash, acc[0]);
gb->gbSeq = TRUE;
}
}
}
void updatePrimerInfo(struct primer *p, struct sts *s)
/* Update primer information for an STS marker from a primer record */
{
s->si->dbSTSid = p->dbStsId;
s->si->leftPrimer = cloneString(p->left);
s->si->rightPrimer = cloneString(p->right);
s->si->distance = cloneString(p->dist);
s->dbstsIdExists = TRUE;
p->ucscId = s->si->identNo;
}
void readDbstsPrimers(struct lineFile *dsf)
/* Read in primer and organism info from dbSTS.sts */
{
struct primer *p;
struct sts *s;
char *words[8], *name, *org;
int dbStsId, newId;
orgHash = newHash(20);
while (lineFileChopNextTab(dsf, words, 8))
{
/* Check that the organism is human, or at least that a human record
has not already been read in for this dbSTS id */
org = cloneString(words[7]);
if (!hashLookup(orgHash, words[0]) || (sameString(org, "Homo sapiens\0")))
hashAdd(orgHash, words[0], org);
/* If not human, then don't process any further */
if (differentString(org, "Homo sapiens\0"))
continue;
/* See if this dbSTS id is currently in use by a STS marker */
dbStsId = sqlUnsigned(words[0]);
if (hashLookup(ucscIdHash, words[0]))
s = hashMustFindVal(ucscIdHash, words[0]);
else
s = NULL;
/* See if primers already recorded for this dbSTS id from STS Info file */
if (hashLookup(primerHash, words[0]))
{
p = hashMustFindVal(primerHash, words[0]);
/* If STS marker not mapped, update primer information */
if (s == NULL && !s->mapped)
{
freez(&(p->left));
p->left = cloneString(words[1]);
freez(&(p->right));
p->left = cloneString(words[2]);
freez(&(p->dist));
p->left = cloneString(words[3]);
}
}
/* If no record of this primer, create one */
else
{
AllocVar(p);
p->next = NULL;
p->dbStsId = dbStsId;
p->left = cloneString(words[1]);
p->right = cloneString(words[2]);
p->dist = cloneString(words[3]);
if (s != NULL)
p->ucscId = s->si->identNo;
else
p->ucscId = 0;
name = cloneString(words[0]);
hashAdd(primerHash, name, p);
}
/* If dbSTS id linked to a STS marker already */
if (s != NULL)
{
/* If linked to ucsc record and record not mapped or doesn't have primer information,
update primer info */
if (((!s->mapped) || (sameString(s->si->leftPrimer, "\0"))) && (s->si->dbSTSid == dbStsId))
updatePrimerInfo(p, s);
}
/* If not linked to a ucsc record and human, check if the name is already in use */
else if ((sameString(org, "Homo sapiens\0")) && (hashLookup(nameHash, words[4])))
{
s = hashMustFindVal(nameHash, words[4]);
/* Update the marker record with the dbSTS id and primer info if none exists */
if ((s->si->dbSTSid == 0) || (s->si->dbSTSid >= MAX_STS_ID) ||
(sameString(s->si->leftPrimer, "\0")))
updatePrimerInfo(p, s);
/* If the record is already linked to another dbSTS id, add this to other list */
else
{
newId = sqlUnsigned(words[0]);
addElementInt(newId, &s->si->otherDbSTS, &s->si->otherDbstsCount);
}
hashAdd(ucscIdHash, words[0], s);
}
}
} /* void readDbstsPrimers(struct lineFile *dsf) */
void readDbstsNames(struct lineFile *daf)
/* Read in dbSTS names and create new stsInfo record, if necessary */
{
struct sts *s;
struct stsInfo2 *si;
struct primer *p;
char *words[4], *names[64], name[64], *org;
int dbstsId, nameCount, i;
while (lineFileChopNext(daf, words, 2))
{
/* Make sure this is a human marker */
org = hashFindVal(orgHash, words[0]);
if (hashLookup(orgHash, words[0]) && !sameString(org, "Homo sapiens\0") && !sameString(org, "\0"))
continue;
dbstsId = sqlUnsigned(words[0]);
/* Find the primers for this dbSTS id */
if (hashLookup(primerHash, words[0]))
p = hashMustFindVal(primerHash, words[0]);
/* Determine if this id is already being used */
if (hashLookup(ucscIdHash, words[0]))
{
s = hashMustFindVal(ucscIdHash, words[0]);
}
else
{
s = NULL;
}
/* If the id has not been assigned, see any of the names are being used */
if (s == NULL)
{
nameCount = chopByChar(words[1], ';', names, ArraySize(names));
for (i = 0; i < nameCount; i++)
{
touppers(names[i]);
/* See if this name associated with a ucsc record already */
if (hashLookup(nameHash, names[i]))
{
s = hashMustFindVal(nameHash, names[i]);
/* See if this record needs an dbSTS id */
if ((s->si->dbSTSid == 0) || (s->si->dbSTSid >= MAX_STS_ID) ||
(sameString(s->si->leftPrimer, "\0")))
{
s->si->dbSTSid = dbstsId;
/* If no primer info recorded, add it if possible */
if (((!s->mapped) || (sameString(s->si->leftPrimer, "\0")))
&& (hashLookup(primerHash, words[0])))
{
p = hashMustFindVal(primerHash, words[0]);
s->si->leftPrimer = cloneString(p->left);
s->si->rightPrimer = cloneString(p->right);
s->si->distance = cloneString(p->dist);
}
i = nameCount;
}
else
{
addElementInt(dbstsId, &s->si->otherDbSTS, &s->si->otherDbstsCount);
}
}
}
}
if (s != NULL)
{
/* Determine if all of the names are recorded */
if (s->si->dbSTSid == dbstsId)
s->dbstsIdExists = TRUE;
nameCount = chopByChar(words[1], ';', names, ArraySize(names));
for (i = 0; i < nameCount; i++)
{
touppers(names[i]);
if (!hashLookup(nameHash, names[i]))
{
subChar(names[i],',',':');
addName(s, names[i]);
hashAdd(nameHash, names[i], s);
}
}
}
else
{
/* If valid primers exist, then add record */
if (hashLookup(primerHash, words[0]))
p = hashMustFindVal(primerHash, words[0]);
else
p = NULL;
if (p != NULL)
{
nameCount = chopByChar(words[1], ';', names, ArraySize(names));
AllocVar(s);
AllocVar(si);
si->next = NULL;
s->si = si;
s->mapped = FALSE;
s->dbstsIdExists = TRUE;
s->fa = NULL;
si->next = NULL;
si->identNo = nextUcscId;
nextUcscId++;
touppers(names[0]);
si->name = cloneString(names[0]);
si->gbCount = 0;
si->genbank = NULL;
si->gdbCount = 0;
si->gdb = NULL;
si->nameCount = 0;
si->otherNames = NULL;
if (checkGb(names[0]) || checkGdb(names[0]))
addName(s, names[0]);
hashAdd(nameHash, names[0], s);
for (i = 1; i < nameCount; i++)
{
subChar(names[i], ',', ':');
touppers(names[i]);
addName(s, names[i]);
hashAdd(nameHash, names[i], s);
}
si->dbSTSid = dbstsId;
si->otherDbstsCount = 0;
si->otherDbSTS = NULL;
si->leftPrimer = cloneString(p->left);
si->rightPrimer = cloneString(p->right);
si->distance = cloneString(p->dist);
si->organism = cloneString("Homo sapiens");
si->sequence = 0;
si->otherUCSCcount = 0;
si->otherUCSC = NULL;
si->mergeUCSCcount = 0;
si->mergeUCSC = NULL;
si->genethonName = cloneString("");
si->genethonChr = cloneString("");
si->marshfieldName = cloneString("");
si->marshfieldChr = cloneString("");
si->wiyacName = cloneString("");
si->wiyacChr = cloneString("");
si->wirhName = cloneString("");
si->wirhChr = cloneString("");
si->gm99gb4Name = cloneString("");
si->gm99gb4Chr = cloneString("");
si->gm99g3Name = cloneString("");
si->gm99g3Chr = cloneString("");
si->tngName = cloneString("");
si->tngChr = cloneString("");
si->decodeName = cloneString("");
si->decodeChr = cloneString("");
slAddHead(&sList, s);
hashAdd(ucscIdHash, words[0], s);
safef(name, ArraySize(name), "%d", s->si->identNo);
hashAdd(stsHash, name, s);
hashAddInt(dbStsIdHash, name, dbstsId);
p->ucscId = s->si->identNo;
}
}
}
}
void readAllSts(FILE *asf)
/* Read in current sequences for sts markers */
{
struct dnaSeq *ds;
struct sts *s;
char *words[8], *acc=NULL, *line;
int wordCount;
while (faReadMixedNext(asf, 0, "default", TRUE, &line, &ds))
{
/* Determine the UCSC id */
wordCount = chopByWhite(line, words, ArraySize(words));
stripString(words[0], ">");
if (wordCount == 3)
acc = cloneString(words[2]);
else
acc = NULL;
/* Find the record and attach */
if (hashLookup(stsHash, ds->name))
{
s = hashMustFindVal(stsHash, ds->name);
s->fa = ds;
s->faAcc = acc;
s->si->sequence = 1;
}
else
{
dnaSeqFree(&ds);
freez(&line);
if (acc != NULL)
freez(&acc);
}
}
}
void readDbstsFa(FILE *dff)
/* Read in sequences from dbSTS.fa and add, if possible */
{
struct dnaSeq *ds;
struct sts *s;
struct gb *gb;
char name[256], *line;
while (faReadMixedNext(dff, 0, "default", TRUE, &line, &ds))
{
/* Determine the UCSC id */
if (hashLookup(gbAccHash, ds->name))
{
/* Determine if this is linked to a marker */
gb = hashMustFindVal(gbAccHash, ds->name);
if (gb->s != NULL)
{
/* If no recorded sequence, then add */
s = gb->s;
if (s->fa == NULL)
{
s->faAcc = cloneString(ds->name);
safef(name, ArraySize(name), "%d", s->si->identNo);
ds->name = cloneString(name);
s->fa = ds;
s->si->sequence = 1;
}
/* If no accession recorded, see if sequences are the same */
else if (s->faAcc == NULL)
{
if (sameString(s->fa->dna, ds->dna))
{
s->faAcc = cloneString(ds->name);
s->si->sequence = 1;
}
freeDnaSeq(&ds);
}
/* If same accession as recorded, the update sequence */
else if (sameString(s->faAcc, ds->name))
{
ds->name = cloneString(s->fa->name);
freeDnaSeq(&s->fa);
s->fa = ds;
s->si->sequence = 1;
}
else
freeDnaSeq(&ds);
}
else
freeDnaSeq(&ds);
}
}
}
void writeOut(FILE *of, FILE *opf, FILE *oaf, FILE *off)
/* Write out update files for info, primers, and sequences */
{
struct sts *s;
struct stsInfo2 *si;
char name[256];
int i;
slReverse(&sList);
for (s = sList; s != NULL; s = s->next)
{
if (s->fa != NULL)
s->si->sequence = 1;
else
s->si->sequence = 0;
si = s->si;
if ((s->dbstsIdExists)||(si->dbSTSid == 0)||(si->dbSTSid >= MAX_STS_ID))
{
fprintf(of, "%d\t%s\t%d\t", si->identNo, si->name, si->gbCount);
for (i = 0; i < si->gbCount; i++)
fprintf(of, "%s,", si->genbank[i]);
fprintf(of, "\t%d\t", si->gdbCount);
for (i = 0; i < si->gdbCount; i++)
fprintf(of, "%s,", si->gdb[i]);
fprintf(of, "\t%d\t", si->nameCount);
for (i = 0; i < si->nameCount; i++)
fprintf(of, "%s,", si->otherNames[i]);
fprintf(of, "\t%d\t%d\t", si->dbSTSid, si->otherDbstsCount);
for (i = 0; i < si->otherDbstsCount; i++)
fprintf(of, "%d,", si->otherDbSTS[i]);
fprintf(of, "\t%s\t%s\t%s\t%s\t%d\t%d\t", si->leftPrimer, si->rightPrimer,
si->distance, si->organism, si->sequence, si->otherUCSCcount);
for (i = 0; i < si->otherUCSCcount; i++)
fprintf(of, "%d,", si->otherUCSC[i]);
fprintf(of, "\t%d\t", si->mergeUCSCcount);
for (i = 0; i < si->mergeUCSCcount; i++)
fprintf(of, "%d,", si->mergeUCSC[i]);
fprintf(of, "\t%s\t%s\t%.2f\t%.2f", si->genethonName, si->genethonChr,
si->genethonPos, si->genethonLOD);
fprintf(of, "\t%s\t%s\t%.2f\t%.2f", si->marshfieldName, si->marshfieldChr,
si->marshfieldPos, si->marshfieldLOD);
fprintf(of, "\t%s\t%s\t%.2f\t%.2f", si->wiyacName, si->wiyacChr,
si->wiyacPos, si->wiyacLOD);
fprintf(of, "\t%s\t%s\t%.2f\t%.2f", si->wirhName, si->wirhChr,
si->wirhPos, si->wirhLOD);
fprintf(of, "\t%s\t%s\t%.2f\t%.2f", si->gm99gb4Name, si->gm99gb4Chr,
si->gm99gb4Pos, si->gm99gb4LOD);
fprintf(of, "\t%s\t%s\t%.2f\t%.2f", si->gm99g3Name, si->gm99g3Chr,
si->gm99g3Pos, si->gm99g3LOD);
fprintf(of, "\t%s\t%s\t%.2f\t%.2f", si->tngName, si->tngChr,
si->tngPos, si->tngLOD);
fprintf(of, "\t%s\t%s\t%.2f\t%.2f\n", si->decodeName, si->decodeChr,
si->decodePos, si->decodeLOD);
/* Write out to primers file */
if (differentString(si->leftPrimer, "\0"))
fprintf(opf, "%d\t%s\t%s\t%s\t%d\n", si->dbSTSid, si->leftPrimer,
si->rightPrimer, si->distance, si->identNo);
/* Write out to alias file */
fprintf(oaf, "%s\t%d\t%s\n", si->name, si->identNo, si->name);
for (i = 0; i < si->gbCount; i++)
if (differentString(si->genbank[i], si->name))
fprintf(oaf, "%s\t%d\t%s\n", si->genbank[i], si->identNo, si->name);
for (i = 0; i < si->gdbCount; i++)
if (differentString(si->gdb[i], si->name))
fprintf(oaf, "%s\t%d\t%s\n", si->gdb[i], si->identNo, si->name);
for (i = 0; i < si->nameCount; i++)
if (differentString(si->otherNames[i], si->name))
fprintf(oaf, "%s\t%d\t%s\n", si->otherNames[i], si->identNo, si->name);
/* Write out to fa file */
if (s->fa != NULL)
{
/* gb = sqlStringArrayToString(si->genbank, si->gbCount); */
+ if (s->faAcc && differentWord(s->faAcc,"(null)"))
safef(name, ArraySize(name), "%s %s %s", s->fa->name, si->name, s->faAcc);
+ else
+ safef(name, ArraySize(name), "%s %s %s", s->fa->name, si->name, s->si->name);
faWriteNext(off, name, s->fa->dna, s->fa->size);
}
}
else
verbose(1, "%d\t%s\t%d\t(%d) not in dbSTS anymore\n", si->identNo, si->name, si->dbSTSid, s->dbstsIdExists);
}
}
int main(int argc, char *argv[])
{
struct lineFile *sif, *dsf, *daf, *gbf;
FILE *of, *opf, *oaf, *off, *asf, *dff;
char filename[256], *gbName;
int verb = 0;
verboseSetLevel(0);
optionInit(&argc, argv, optionSpecs);
if (argc < 3)
{
fprintf(stderr, "USAGE: updateStsInfo [-verbose=<level> -gb=<file>] <stsInfo file> <all.STS.fa> <dbSTS.sts> <dbSTS.aliases> <dbSTS.convert.fa> <outfile prefix>\n");
return 1;
}
verb = optionInt("verbose", 0);
verboseSetLevel(verb);
gbName = optionVal("gb", NULL);
if (gbName)
gbf = lineFileOpen(gbName, TRUE);
sif = lineFileOpen(argv[1], TRUE);
asf = mustOpen(argv[2], "r");
dsf = lineFileOpen(argv[3], TRUE);
daf = lineFileOpen(argv[4], TRUE);
dff = mustOpen(argv[5], "r");
safef(filename, ArraySize(filename), "%s.info", argv[6]);
of = mustOpen(filename, "w");
safef(filename, ArraySize(filename), "%s.primers", argv[6]);
opf = mustOpen(filename, "w");
safef(filename, ArraySize(filename), "%s.alias", argv[6]);
oaf = mustOpen(filename, "w");
safef(filename, ArraySize(filename), "%s.fa", argv[6]);
off = mustOpen(filename, "w");
/* Read in current stsInfo file */
verbose(1, "Reading current stsInfo file: %s\n", argv[1]);
readStsInfo(sif);
/* Read in genbank accessions that have sequences */
if (gbName)
{
verbose(1, "Reading genbank accession file: %s\n", gbName);
readGbAcc(gbf);
}
/* Read in primer and organism information from dbSTS.sts */
verbose(1, "Reading current dbSTS.sts file: %s\n", argv[3]);
readDbstsPrimers(dsf);
/* Read in names from dbSTS.alias and create new stsInfo records if needed */
verbose(1, "Reading current dbSTS.aliases file: %s\n", argv[4]);
readDbstsNames(daf);
/* Read in current sequences for sts markers */
verbose(1, "Reading current all.STS file: %s\n", argv[2]);
readAllSts(asf);
/* Read in new sequences from dbSTS.fa */
verbose(1, "Reading dbSTS.fa file: %s\n", argv[5]);
readDbstsFa(dff);
/* Print out the new files */
verbose(1, "Creating output files: %s .info .primers .alias .fa\n", argv[6]);
writeOut(of, opf, oaf, off);
fclose(asf);
lineFileClose(&dsf);
lineFileClose(&daf);
fclose(dff);
if (gbName)
lineFileClose(&gbf);
fclose(of);
fclose(opf);
fclose(oaf);
fclose(off);
return(0);
}