7642d8f313d523d2bc7f054928a82969d2e745f8
galt
  Tue Dec 31 23:45:26 2024 -0800
minor compilter fix for agpToFa.

diff --git src/hg/agpToFa/agpToFa.c src/hg/agpToFa/agpToFa.c
index ac9bd52..f26867a 100644
--- src/hg/agpToFa/agpToFa.c
+++ src/hg/agpToFa/agpToFa.c
@@ -1,379 +1,379 @@
 /* agpToFa - Convert a .agp file to a .fa file. */
 
 /* Copyright (C) 2011 The Regents of the University of California 
  * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "cheapcgi.h"
 #include "fa.h"
 #include "agpFrag.h"
 #include "agpGap.h"
 #include "options.h"
 
 
 static void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "agpToFa - Convert a .agp file to a .fa file\n"
   "usage:\n"
   "   agpToFa in.agp agpSeq out.fa freezeDir\n"
   "Where agpSeq matches a sequence name in in.agp (or is \"all\")\n"
   "and seqDir is where program looks for sequence.\n"
   "\n"
   "This is currently a fairly limited implementation.\n"
   "It only works on finished clones\n"
   "\n"
   "options:\n"
   "   -simple - treat freezeDir as a simple directory full of\n"
   "             .fa files.  In this case .fa files must be named\n"
   "             accession.fa and only have one record\n"
   "   -subDirs=predraft,draft,fin,extras  Explicitly specify comma separated\n"
   "             list of freeze subdirectories.\n"
   "   -simpleMulti - treat freezeDir as a single file, multi-record FASTA\n"
   "                  which contains all fragment records.\n"
   "   -simpleMultiMixed - same as simpleMulti, but preserves \n"
   "                  mixed-case sequence\n"
   "   -verbose=N - N=2 display illegal coordinates in agp files\n"
   );
 }
 
 /* command line option specifications */
 static struct optionSpec optionSpecs[] = {
     {"simple", OPTION_BOOLEAN},
     {"subDirs", OPTION_STRING},
     {"simpleMulti", OPTION_BOOLEAN},
     {"simpleMultiMixed", OPTION_BOOLEAN},
     {NULL, 0}
 };
 
 /* Overridable options. */
 static char *subDirs = "extras,fin,draft,predraft";
 
 static void simpleMultiFillInSequence(boolean preserveCase, char *seqFile, 
                         struct agpFrag *agpList, DNA *dna, int dnaSize)
 /* Fill in DNA array with sequences from one multi-record fasta file. */
 {
 static struct hash *fragHash = NULL;
 struct agpFrag *agp = NULL;
 struct dnaSeq *seq = NULL;
 int illegals = 0;
 
 if (fragHash == NULL)
     {
     struct lineFile *lf = lineFileOpen(seqFile, TRUE);
     struct dnaSeq *seqList = faReadAllMixedInLf(lf);
     fragHash = newHash(17);
     for (seq = seqList;  seq != NULL;  seq = seq->next)
 	{
 	if (!preserveCase)
 	    tolowers(seq->dna);
 	hashAdd(fragHash, seq->name, seq);
 	}
     lineFileClose(&lf);
     }
 
 for (agp = agpList; agp != NULL; agp = agp->next)
     {
     int size;
     seq = hashFindVal(fragHash, agp->frag);
     if (seq == NULL)
 	errAbort("Couldn't find %s in %s", agp->frag, seqFile);
     size = agp->fragEnd - agp->fragStart;
     if (agp->strand[0] == '-')
 	reverseComplement(seq->dna + agp->fragStart, size);
     verbose(3, "dna: %lu, start: %u, size: %d, seqDna: %lu, start: %u, "
 	    "size: %d %s %d\n",
 	    (unsigned long) dna, agp->chromStart, dnaSize,
 	    (unsigned long) seq->dna, agp->fragStart,
 	    size, agp->frag, seq->size);
     if ((agp->chromStart + size) > dnaSize)
 	{
 	warn("frag size %d + dest chromStart %u  = %d > %d dest dna size\n",
 		size, agp->chromStart, size+agp->chromStart, dnaSize);
 	errAbort("fragment copy size exceeds destination dna size");
 	}
     if ((agp->fragStart + size) > seq->size)
 	{
 	verbose(2, "#\t%s start:%u end:%u seqSize: %d\n",
 		agp->frag, agp->fragStart, agp->fragEnd, seq->size);
 	++illegals;
 	}
     else
 	memcpy(dna + agp->chromStart, seq->dna + agp->fragStart, size);
     }
 if (illegals > 0)
     {
     warn("%d illegal coordinates found in agp files.\n", illegals);
     warn("sequence file: %s\n", seqFile);
     errAbort("Fragment copy is more than available fragment sequence.");
     }
 }
 
 static void simpleFillInSequence(char *seqDir, struct agpFrag *agpList,
     DNA *dna, int dnaSize)
 /* Fill in DNA array with sequences from simple clones. */
 {
 struct agpFrag *agp;
 char underline = '_';
 
 for (agp = agpList; agp != NULL; agp = agp->next)
     {
     char clone[128];
     char path[512];
     struct dnaSeq *seq;
     int size;
     strcpy(clone, agp->frag);
     chopSuffixAt(clone,underline);
     sprintf(path, "%s/%s.fa", seqDir, clone);
     seq = faReadAllDna(path);
     if (slCount(seq) != 1)
 	errAbort("Can only handle exactly one clone in %s.", path);
     size = agp->fragEnd - agp->fragStart;
     if (agp->strand[0] == '-')
 	reverseComplement(seq->dna + agp->fragStart, size);
     memcpy(dna + agp->chromStart, seq->dna + agp->fragStart, size);
     freeDnaSeq(&seq);
     }
 }
 
 struct clone 
 /* Info about a clone. */
     {
     struct clone *next;	/* Next in list. */
     char *name;		/* Clone name including version. */
     struct frag *fragList;	/* List of fragments. */
     char *faFile;	/* Location of .fa file. */
     int size;		/* Total size. */
     DNA *dna;		/* Bases, just filled in as needed. */
     };
 
 struct frag
 /* Info about a fragment. */
     {
     struct frag *next;	/* Next in list. */
     char *name;		/* Frag name, UCSC format. */
     int start;		/* Start position, 0 based. */
     int end;		/* One past end. */
     struct clone *clone; /* Clone this belongs to. */
     };
 
 static void getCloneDna(struct clone *clone, struct hash *fragHash)
 /* Read in clone DNA from file in format with one record per
  * clone contig.   Make clone->dna so that it is same as
  * non-fragmented clone file. */
 {
 struct dnaSeq *seqList = faReadAllDna(clone->faFile), *seq;
 int fragSize;
 clone->dna = needLargeMem(clone->size+1);
 clone->dna[clone->size] = 0;
 uglyf("GetCloneDna %s\n", clone->faFile);
 for (seq = seqList; seq != NULL; seq = seq->next)
     {
     struct frag *frag = hashFindVal(fragHash, seq->name);
     if (frag == NULL)
         errAbort("Couldn't find %s from %s in trans files", seq->name, clone->faFile);
     assert(frag->end <= clone->size);
     fragSize = frag->end - frag->start;
     assert(fragSize >= 0);
     if (fragSize != seq->size)
         errAbort("Size mismatch (%d vs %d) between trans and .ffa files on %s", 
 		fragSize, seq->size, frag->name);
     memcpy(clone->dna + frag->start,  seq->dna,  fragSize);
     }
 freeDnaSeqList(&seqList);
 }
 
 static void readTrans(char *transFile, char *faDir, struct hash *cloneHash, struct hash *fragHash)
 /* Read in transFile into hashes. */
 {
 struct lineFile *lf = lineFileOpen(transFile, TRUE);
 char *row[3];
 char *parts[3], *subParts[2];
 int partCount, subCount;
-char faName[512];
+char faName[1024];
 struct clone *clone;
 struct frag *frag;
 
 printf("Reading %s\n", transFile);
 while (lineFileRow(lf, row))
     {
     char *cloneName = row[1];
     char *e = strchr(cloneName, '~');
     if (e == NULL)
         errAbort("Missing ~ line %d of %s", lf->lineIx, lf->fileName);
     *e++ = 0;
     if ((clone = hashFindVal(cloneHash, cloneName)) == NULL)
         {
 	AllocVar(clone);
 	hashAddSaveName(cloneHash, cloneName, clone, &clone->name);
 	chopSuffix(cloneName);
 	sprintf(faName, "%s/%s.fa", faDir, cloneName);
 	cloneName = NULL;
 	clone->faFile = cloneString(faName);
 	}
     AllocVar(frag);
     hashAddSaveName(fragHash, row[0], frag, &frag->name);
     partCount = chopString(row[2], "(:)", parts, ArraySize(parts));
     if (partCount != 2)
         errAbort("Expecting (ACCESSION.VER:START..STOP) line %d of %s", lf->lineIx, lf->fileName);
     subCount = chopString(parts[1], ".", subParts, ArraySize(subParts));
     if (subCount != 2)
         errAbort("Expecting START..STOP line %d of %s", lf->lineIx, lf->fileName);
     frag->start = atoi(subParts[0])-1;
     frag->end = atoi(subParts[1]);
     frag->clone = clone;
     if (clone->size < frag->end)
         clone->size = frag->end;
     slAddTail(&clone->fragList, frag);
     }
 lineFileClose(&lf);
 }
 
 static void gsFillInSequence(char *gsDir, struct agpFrag *agpList, DNA *dna, int dnaSize)
 /* Fill in DNA array with sequences from 'freeze' */
 {
 struct agpFrag *agp;
 int subCount;
 char *sub[64];
 char transFile[512], faDir[512];
 struct hash *cloneHash = newHash(15), *fragHash = newHash(17);
 int i;
 struct clone *clone;
 
 /* Read in trans-files to hash */
 subCount = chopString(subDirs, ",", sub, ArraySize(sub));
 if (subCount >= ArraySize(sub))
     errAbort("Too many subDirs, limit 64.");
 for (i=0; i<subCount; ++i)
     {
     sprintf(faDir, "%s/%s/fa", gsDir, sub[i]);
     sprintf(transFile, "%s/%s/trans", gsDir, sub[i]);
     readTrans(transFile, faDir, cloneHash, fragHash);
     }
 
 /* Read sequences and copy them to output DNA array. */
 printf("Reading source sequences from %s\n", gsDir);
 for (agp = agpList; agp != NULL; agp = agp->next)
     {
     DNA *dest;
     int size;
     if ((clone = hashFindVal(cloneHash, agp->frag)) == NULL)
         errAbort("Couldn't find %s in %s", agp->frag, gsDir);
     if (clone->dna == NULL)
 	getCloneDna(clone, fragHash);
     dest = dna + agp->chromStart;
     size = agp->chromEnd - agp->chromStart;
     memcpy(dest, clone->dna + agp->fragStart, size);
     if (agp->strand[0] == '-')
         reverseComplement(dest, size);
     }
 }
 
 void agpToFaOne(struct agpFrag **pAgpList, char *agpFile, char *agpSeq,
 		char *seqDir, int lastPos, FILE *f)
 /* Given one sequence's worth of AGP in pAgpList, process it into FASTA
  * and write to f. */
 {
 DNA *dna = NULL;
 
 slReverse(pAgpList);
 if (lastPos == 0)
     errAbort("%s not found in %s\n", agpSeq, agpFile);
 dna = needHugeMem(lastPos+1);
 memset(dna, 'n', lastPos);
 dna[lastPos] = 0;
 if (optionExists("simpleMulti"))
     {
     simpleMultiFillInSequence(0, seqDir, *pAgpList, dna, lastPos);
     }
 else if (optionExists("simpleMultiMixed"))
     {
     simpleMultiFillInSequence(1, seqDir, *pAgpList, dna, lastPos);
     }
 else if (optionExists("simple"))
     {
     simpleFillInSequence(seqDir, *pAgpList, dna, lastPos);
     }
 else
     {
     gsFillInSequence(seqDir, *pAgpList, dna, lastPos);
     }
 verbose(2,"Writing %s (%d bases)\n", agpSeq, lastPos);
 faWriteNext(f, agpSeq, dna, lastPos);
 agpFragFreeList(pAgpList);
 }
 
 
 static void agpToFa(char *agpFile, char *agpSeq, char *faOut, char *seqDir)
 /* agpToFa - Convert a .agp file to a .fa file. */
 {
 struct lineFile *lf = lineFileOpen(agpFile, TRUE);
 char *line, *words[16];
 int lineSize, wordCount;
 int lastPos = 0;
 struct agpFrag *agpList = NULL, *agp;
 FILE *f = mustOpen(faOut, "w");
 char *prevChrom = NULL;
 
 verbose(2,"#\tprocessing AGP file: %s\n", agpFile);
 while (lineFileNext(lf, &line, &lineSize))
     {
     if (line[0] == 0 || line[0] == '#' || line[0] == '\n')
         continue;
     wordCount = chopLine(line, words);
     if (wordCount < 5)
         errAbort("Bad line %d of %s: need at least 5 words, got %d\n",
 		 lf->lineIx, lf->fileName, wordCount);
     if (! (sameWord("all", agpSeq) || sameWord(words[0], agpSeq)))
 	continue;
     if (prevChrom != NULL && !sameString(prevChrom, words[0]))
 	{
 	agpToFaOne(&agpList, agpFile, prevChrom, seqDir, lastPos, f);
 	lastPos = 0;
 	}
     if (words[4][0] != 'N' && words[4][0] != 'U')
 	{
 	lineFileExpectAtLeast(lf, 9, wordCount);
 	agp = agpFragLoad(words);
 	/* file is 1-based but agpFragLoad() now assumes 0-based: */
 	agp->chromStart -= 1;
 	agp->fragStart  -= 1;
 	if (agp->chromStart != lastPos)
 	    errAbort("Start doesn't match previous end line %d of %s\n",
 		     lf->lineIx, lf->fileName);
 	if (agp->chromEnd - agp->chromStart != agp->fragEnd - agp->fragStart)
 	    errAbort("Sizes don't match in %s and %s line %d of %s\n",
 		     agp->chrom, agp->frag, lf->lineIx, lf->fileName);
 	slAddHead(&agpList, agp);
 	lastPos = agp->chromEnd;
 	}
     else
         {
 	lastPos = lineFileNeedNum(lf, words, 2);
 	}
     if (prevChrom == NULL || !sameString(prevChrom, words[0]))
 	{
 	freeMem(prevChrom);
 	prevChrom = cloneString(words[0]);
 	}
     }
 agpToFaOne(&agpList, agpFile, prevChrom, seqDir, lastPos, f);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, optionSpecs);
 
 subDirs = optionVal("subDirs", cloneString(subDirs));
 if (verboseLevel() > 1)
     {
     verbose(2,"#\tsubDirs: %s\n", subDirs);
     }
 if (argc != 5)
     usage();
 agpToFa(argv[1], argv[2], argv[3], argv[4]);
 return 0;
 }