e70152e44cc66cc599ff6b699eb8adc07f3e656a
kent
  Sat May 24 21:09:34 2014 -0700
Adding Copyright NNNN Regents of the University of California to all files I believe with reasonable certainty were developed under UCSC employ or as part of Genome Browser copyright assignment.
diff --git src/hg/fishClones/fishClones.c src/hg/fishClones/fishClones.c
index afb19ec..8d061fd 100644
--- src/hg/fishClones/fishClones.c
+++ src/hg/fishClones/fishClones.c
@@ -1,1339 +1,1342 @@
 /*
   File: fishClones.c
   Author: Terry Furey
   Date: 6/3/2003
   Description: Create Fish Clones track from FISH info file and clone 
                placement information
 */
 
+/* Copyright (C) 2013 The Regents of the University of California 
+ * See README in this or parent directory for licensing information. */
+
 #include "common.h"
 #include "linefile.h"
 #include "memalloc.h"
 #include "hash.h"
 #include "jksql.h"
 #include "hdb.h"
 #include "fuzzyFind.h"
 #include "portable.h"
 #include "psl.h"
 #include "options.h"
 #include "binRange.h"
 #include "hgConfig.h"
 #include "clonePos.h"
 #include "stsAlias.h"
 #include "stsMap.h"
 #include "lfs.h"
 
 /* command line option specifications */
 static struct optionSpec optionSpecs[] = {
   {"fhcrc", OPTION_STRING},
   {"psl", OPTION_STRING},
   {"verbose", OPTION_STRING},
   {"noRandom", OPTION_BOOLEAN},
   {"noBin", OPTION_BOOLEAN},
   {"endInfo", OPTION_BOOLEAN},
   {NULL, 0}
 };
 
 int verb = 0;
 boolean NORANDOM = FALSE;
 boolean NOBIN = FALSE;
 boolean ENDINFO = FALSE;
 
 struct position
 {
   struct position *next;
   char *name;
   char *chrom;
   int start;
   int end;
   char orien;
   char *type;
   int phase;
 };
 
 struct name
 {
   struct name *next;
   char *name;
   int phase;
 };
 
 struct place
 {
   struct place *next;
   char *chrom;
   int start;
   int end;
   char *type;
   int numAcc;
   struct name *acc;
   int numSts;
   struct name *sts;
   int numBacEndPair;
   struct name *bePair;
   int numBacEnd;
   struct name *bacEnd;
   int score;
   int startBE;
   int endBE;
 };
 
 struct map
 {
   struct map *next;
   char *chrom;
   char *band1;
   char *band2;
   char *center;
 };
 
 struct fishClone
 {
   struct fishClone *next;
   char *cloneName;
   int numGood;
   struct place *good;
   struct position *acc;
   struct position *sts;
   struct position *endPair;
   struct position *end;
   int numCyto;
   struct map *cyto;
   struct name *missingAcc;
 } *fcList = NULL;
 
 struct hash *fishClones = NULL;
 struct hash *positions = NULL;
 struct hash *posClone = NULL;
 
 void mapFree(struct map **map)
 /* Free a dynamically allocated map */
 {
   struct map *m;
   if ((m = *map) == NULL) return;
   free(m->chrom);
   free(m->band1);
   free(m->band2);
   free(m->center);
   freez(map);
 }
 
 void mapFreeList(struct map **mapList)
 /* Free a dynamically allocated map list */
 {
   struct map *m, *next;
   
   for (m = *mapList; m != NULL; m = next)
     {
       next = m->next;
       mapFree(&m);
     }
   *mapList = NULL;
 }
 
 void positionFree(struct position **pos)
 /* Free a dynamically allocated position */
 {
   struct position *p;
 
   if ((p = *pos) == NULL) return;
   free(p->name);
   free(p->chrom);
   freez(pos);
 }
 
 void positionFreeList(struct position **posList)
 /* Free a dynamically allocated position list */
 {
   struct position *p, *next;
   
   for (p = *posList; p != NULL; p = next)
     {
       next = p->next;
       positionFree(&p);
     }
   *posList = NULL;
 }
 
 void nameFree(struct name **name)
 /* Free a dynamically allocated name */
 {
   struct name *n;
 
   if ((n = *name) == NULL) return;
   free(n->name);
   freez(name);
 }
 
 void nameFreeList(struct name **nameList)
 /* Free a dynamically allocated name list */
 {
   struct name *n, *next;
   
   for (n = *nameList; n != NULL; n = next)
     {
       next = n->next;
       nameFree(&n);
     }
   *nameList = NULL;
 }
 
 void placeFree(struct place **place)
 /* Free a dynamically allocated place */
 {
   struct place *p;
 
   if ((p = *place) == NULL) return;
   free(p->chrom);
   free(p->type);
   nameFreeList(&p->acc);
   nameFreeList(&p->bePair);
   nameFreeList(&p->sts);
   nameFreeList(&p->bacEnd);
   freez(place);
 }
 
 void placeFreeList(struct place **placeList)
 /* Free a dynamically allocated place list */
 {
   struct place *p, *next;
   
   for (p = *placeList; p != NULL; p = next)
     {
       next = p->next;
       placeFree(&p);
     }
   *placeList = NULL;
 }
 
 void fishCloneFree(struct fishClone **fc)
 /* Free a dynamically allocated fishClone */
 {
   struct fishClone *f;
   if ((f = *fc) == NULL) return;
   mapFreeList(&(f->cyto));
   positionFreeList(&(f->acc));
   positionFreeList(&(f->sts));
   positionFreeList(&(f->endPair));
   positionFreeList(&(f->end));
   nameFreeList(&(f->missingAcc));
   free(f->cloneName);
   freez(fc);
 }
 
 struct fishClone *createFishClone(char *name)
 /* Create a fishClone structure */
 {
   struct fishClone *ret;
 
   AllocVar(ret);
   ret->next = NULL;
   ret->cloneName = cloneString(name);
   touppers(ret->cloneName);
   ret->numGood = 0;
   ret->good = NULL;
   ret->acc = NULL;
   ret->sts = NULL;
   ret->endPair = NULL;
   ret->end = NULL;
   ret->numCyto = 0;
   ret->cyto = NULL;
   ret->missingAcc = NULL;
 
   return(ret);
 }
 
 struct map *createMap(char *mapInfo, char *center, char *chr)
 /* Create a map record */
 {
   struct map *ret;
   char *bands[16], band[16];
   int wordCount;
   char chrom[16];
   char *mi, *words[10];
 
   AllocVar(ret);
   ret->next = NULL;
 
   /* Determine chromosome */
   mi = cloneString(mapInfo);
   if (stringIn("p", mapInfo))
     wordCount = chopByChar(mi, 'p', words, ArraySize(words));      
   else if (stringIn("q", mapInfo))
     wordCount = chopByChar(mi, 'q', words, ArraySize(words));      
   else if (stringIn("cen", mapInfo))
     wordCount = chopByChar(mi, 'c', words, ArraySize(words));
   else
     errAbort("Couldn't determine chrom for %s\n", mapInfo);
   safef(chrom, sizeof(chrom), "chr%s", words[0]);
   verbose(4, "\tchromosome for %s is %s\n", mapInfo, chrom);
   ret->chrom = cloneString(chrom);
   free(mi);
 
   if (stringIn("~\0", mapInfo))
     {
       wordCount = chopByChar(mapInfo, '~', bands, ArraySize(bands));
       if ((wordCount == 2) && stringIn("p\0", bands[0]))
 	{
 	  safef(band, sizeof(band), "%sp%s", chr, bands[1]); 
 	  bands[1] = cloneString(band);
 	}
       else if ((wordCount == 2) && stringIn("q\0", bands[0]))
 	{
 	  safef(band, sizeof(band), "%sq%s", chr, bands[1]); 
 	  bands[1] = cloneString(band);
 	}
     }
   else
     wordCount = chopByChar(mapInfo, '-', bands, ArraySize(bands));
   ret->band1 = cloneString(bands[0]);
   if (wordCount == 1 || *bands[1] == 0) 
     /* check for empty end band safeguards against bad input data (e.g. "Yp0")*/
     ret->band2 = cloneString(bands[0]);
   else 
     ret->band2 = cloneString(bands[1]);
   ret->center = cloneString(center);
 
   return(ret);
 }
 
 struct position *createPosition(char *name, char *type, char orien)
 /* Create a position record */
 {
   struct position *ret;
   
   AllocVar(ret);
   ret->next = NULL;
   ret->name = cloneString(name);
   touppers(ret->name);
   ret->chrom = NULL;
   ret->start = 0;
   ret->end = 0;
   ret->orien = orien;
   ret->type = type;
   ret->phase = 0;
 
   return(ret);
 }
 
 struct name *createName(char *n)
 /* create a name record */
 {
   struct name *ret;
 
   AllocVar(ret);
   ret->next = NULL;
   ret->name = cloneString(n);
   touppers(ret->name);
   ret->phase = 0;
   return(ret);
 }
 
 struct place *createPlace(struct position *pos)
 /* Create a place record */
 {
   struct place *ret;
   char *names, *words[2];
   struct name *n;
   int wordCount;
 
   AllocVar(ret);
   ret->next = NULL;
   ret->chrom = cloneString(pos->chrom);
   ret->start = pos->start;
   ret->end = pos->end;
   ret->type = cloneString(pos->type);
   ret->numAcc = 0;
   ret->acc = NULL;
   ret->numSts = 0;
   ret->sts = NULL;
   ret->numBacEndPair = 0;
   ret->bePair = NULL;
   ret->numBacEnd = 0;
   ret->bacEnd = NULL;
   ret->startBE = -1;
   ret->endBE = -1;
   if (sameString(ret->type, "Accession"))
     {
       ret->numAcc++;
       ret->acc = createName(pos->name);
       ret->acc->phase = pos->phase;
     } 
   if (sameString(ret->type, "BAC End Pair"))
     {
       names = cloneString(pos->name);
       wordCount = chopString(names, ",",words,2);
       ret->bePair = createName(words[0]);
       n = createName(words[1]);
       slAddHead(&ret->bePair, n);
       ret->numBacEndPair++;
       ret->startBE = 0;
       ret->endBE = 0;
     } 
   if (sameString(ret->type, "STS Marker"))
     {
       ret->numSts++;
       ret->sts = createName(pos->name);
     } 
   if (sameString(ret->type, "BAC End"))
     {
       if (pos->orien == '+') 
 	ret->startBE = 0;
       else if (pos->orien == '-') 
 	ret->endBE = 0;
       else 
 	errAbort("BAC end without orientation, %s\n", pos->name);
       ret->numBacEnd++;
       ret->bacEnd = createName(pos->name);
     } 
   ret->score = 0;
 
   return(ret);
 }
 
 boolean sameName(struct name *n1, struct name *n2)
 /* Determine if two names the same */
 {
   if ((n1 != NULL) && (n2 != NULL) && (sameString(n1->name, n2->name)))
     return(TRUE);
   
   return(FALSE);
 }
 
 boolean inNameList(struct name *n, struct name *nList)
 /* Determine if n is in nList */
 {
   struct name *n1;
 
   for (n1 = nList; n1 != NULL; n1 = n1->next)
     if (sameName(n, n1))
       return(TRUE);
   
   return(FALSE);
 }
 
 boolean samePos(struct position *p1, struct position *p2)
 /* Determine of p1 and p2 are the same */
 {
   if ((sameString(p1->name, p2->name))
       && (p1->start == p2->start) && (p1->end == p2->end) 
       && ((p1->chrom == NULL && p2->chrom == NULL) || 
 	  ((p1->chrom != NULL) && (p2->chrom != NULL) && sameString(p1->chrom, p2->chrom))))
     return(TRUE);
 
   return(FALSE);
 }
 
 boolean inPosList(struct position *p, struct position *pList)
 /* Determine if p is in pList */
 {
   struct position *p1;
 
   for (p1 = pList; p1 != NULL; p1 = p1->next)
     if (samePos(p, p1))
       return(TRUE);
   
   return(FALSE);
 }
 
 void readFishInfo(struct lineFile *ff)
 /* Read in FISH information from NCBI */
 {
   int wordCount, wordCount1, i, j;
   char *words[7], *chr, *clone, *map, *acc, *sts, *bacEnd, name[32];
   char *accs[16], *stss[16], *bacEnds[16], *stsName[16], *maps[16], *mapInfo[16], *mapInfoCen[16];
   char *p, *center;
   struct fishClone *fc;
   struct position *pos;
   struct map *m;
   int lineCount = 0;
 
   fishClones = newHash(16);
   positions = newHash(16);
   posClone = newHash(16);
 
   /* Read in line and create fishClone record */
   verbose(1, "\treading fishInfo file %s\n", ff->fileName);
   while (lineFileChopTab(ff, words))
     {
       ++lineCount;
       chr = cloneString(words[0]);
       clone = cloneString(words[1]);
 
       /* Create and add to hash */
       verbose(4,"\tadding %s to hash for %s, line %d\n", clone, chr, lineCount);
       fc = createFishClone(clone);
       slAddHead(&fcList, fc);
       hashAdd(fishClones, clone, fc);
 
       /* Record cytogenetic map positions */
       map = cloneString(words[3]);
       verbose(4, "\tprocessing map info %s\n", map);
       wordCount = chopByChar(map, ';', maps, ArraySize(maps));
       for (i = 0; i < wordCount; i++)
 	{
 	  p = strrchr(maps[i], ')');
 	  *p = '\0';
 	  wordCount1 = chopByChar(maps[i], '(', mapInfoCen, ArraySize(mapInfo)); 
 	  center = cloneString(mapInfoCen[1]);
 	  wordCount1 = chopByChar(mapInfoCen[0], ',', mapInfo, ArraySize(mapInfo));
 	  for (j = 0; j < wordCount1; j++)
 	    {
 	      m =  createMap(mapInfo[j], center, chr);
 	      slAddHead(&fc->cyto, m);
 	      fc->numCyto++;
 	    }
 	}
 
       /* Record accessions assigned to clone */
       acc = cloneString(words[4]);
       verbose(4, "\tprocessing acc info %s\n", acc);
       wordCount = chopByChar(acc, ';', accs, ArraySize(accs));
       for (i = 0; i < wordCount; i++)
 	{
 	  /* Remove version */
 	  p = strrchr(accs[i], '.');
 	  if (p != NULL)
 	    *p = '\0';
 	  verbose(4, "\tprocessing acc info for %s\n", accs[i]);
 	  pos = createPosition(accs[i], "Accession", ' ');
 	  if (!hashLookup(positions, accs[i]))
 	      hashAdd(positions, accs[i], pos);
 	  if (!inPosList(pos, fc->acc))
 	    {
 	      slAddHead(&fc->acc, pos);
 	      hashAdd(posClone, accs[i], fc);
 	    }
 	}
 
       /* Record sts markers assigned to clone */
       sts = cloneString(words[5]);
       verbose(4, "\tprocessing sts info %s\n", sts);
       wordCount = chopByChar(sts, ';', stss, ArraySize(stss));
       for (i = 0; i < wordCount; i++)
 	{
 	  /* Split off UniSTS id */
 	  wordCount1 = chopByWhite(stss[i], stsName, ArraySize(stsName));
 	  verbose(4, "\tprocessing sts info for %s\n", stsName[0]);
 	  safef(name, sizeof(name), "%s_%s", clone, stsName[0]);
 	  pos = createPosition(stsName[0], "STS Marker", ' ');
 	  if (!hashLookup(positions, name))
 	      hashAdd(positions, name, pos);
 	  if (!inPosList(pos, fc->sts))
 	    {
 	      slAddHead(&fc->sts, pos);
 	      hashAdd(posClone, name, fc);
 	    }
 	}
 
       /* Record bac ends assigned to clone */
       bacEnd = cloneString(words[6]);
       verbose(4, "\tprocessing bacend info %s\n", bacEnd);
       wordCount = chopByChar(bacEnd, ';', bacEnds, ArraySize(bacEnds));
       for (i = 0; i < wordCount; i++)
 	{
 	  /* Split off version */
 	  p = strrchr(bacEnds[i], '.');
 	  *p = '\0';
 	  verbose(4, "\tprocessing bacEnd info for %s\n", bacEnds[i]);
 	  pos = createPosition(bacEnds[i], "BAC End", ' ');
 	  if (!hashLookup(positions, bacEnds[i]))
 	      hashAdd(positions, bacEnds[i], pos);
 	  if (!inPosList(pos, fc->end))
 	    {
 	      slAddHead(&fc->end, pos);
 	      hashAdd(posClone, bacEnds[i], fc);
 	    }
 	}
     }
 }
 
 void readCloneAcc(struct lineFile *cf)
 /* Read in clone/acc information from NCBI (Clone Registry) */
 {
   char *words[4], *clone, *acc, *p;
   struct position *pos;
   struct fishClone *fc;
 
   /* Read in clone/acc pairings */
   lineFileChopNext(cf, words, 3);
   lineFileChopNext(cf, words, 3);
   while (lineFileChopNext(cf, words, 3))
     {
       clone = cloneString(words[0]);
       acc = cloneString(words[1]);
       p = strrchr(acc, '.');
       if (p != NULL) 
 	*p = '\0';
       /* Check if clone a fish clone, and that accession not previously recorded */
       if (hashLookup(fishClones, clone)) 
 	{
 	  fc = hashMustFindVal(fishClones, clone);
 	  pos = createPosition(acc, "Accession", ' ');
 	  if (!hashLookup(positions, acc))
 	      hashAdd(positions, acc, pos);
 	  if (!inPosList(pos, fc->acc))
 	    {
 	      hashAdd(posClone, acc, fc);
 	      slAddHead(&fc->acc, pos);	  
 	    }
 	}
       free(clone);
     }
 }
 
 void readBacEnds(struct lineFile *bef)
 /* Read in clone/bac ends information from NCBI */
 {
   char *words[8], *clone, *be;
   struct position *pos;
   struct fishClone *fc;
 
   /* Read in clone/bac ends pairings */
   while (lineFileChopNext(bef, words, 6))
     {
       clone = cloneString(words[0]);
       be = cloneString(words[1]);
       /* Check if clone a fish clone, and that accession not previously recorded */
       if (hashLookup(fishClones, clone))
 	{
 	  fc = hashMustFindVal(fishClones, clone);
 	  pos = createPosition(be, "BAC End", ' ');
 	  if (!hashLookup(positions, be))
 	      hashAdd(positions, be, pos);
 	  if (!inPosList(pos, fc->end))
 	    {
 	      hashAdd(posClone, be, fc);
 	      slAddHead(&fc->end, pos);
 	    }
 	}
       free(clone);
     }
 }
 
 void readStsMarkers(struct lineFile *sf)
 /* Read in STS Marker information from FHCRC */
 {
   char *words[4], *clone, *sts, name[32];
   struct position *pos;
   struct fishClone *fc;
 
   /* Read in clone/STS pairings */
   while (lineFileChopNext(sf, words, 2))
     {
       clone = cloneString(words[0]);
       sts = cloneString(words[1]);
       safef(name, sizeof(name), "%s_%s", clone, sts);
       /* Check if clone a fish clone, and that accession not previously recorded */
       if (hashLookup(fishClones, clone)) 
 	{
 	  fc = hashMustFindVal(fishClones, clone);
 	  pos = createPosition(sts, "STS Marker", ' ');
 	  /* Make sure it isn't an accession or end sequence */
 	  if (!hashLookup(positions, sts))
 	    {
 	      if (!hashLookup(positions, name))
 		hashAdd(positions, name, pos);
 	      verbose(4, "\tchecking sts marker %s for %s\n", sts, clone);
 	      if (!inPosList(pos, fc->sts)) 
 		{
 		  verbose(4, "\tadding sts marker %s for %s\n", sts, clone);
 		  hashAdd(posClone, name, fc);
 		  slAddHead(&fc->sts, pos);
 		}
 	    }
 	}
     }
 }
 
 void readBacEndsPsl(struct lineFile *bpf)
 /* Read in bac ends pslFile */
 {
   int lineSize, wordCount;
   char *line, *words[32];
   struct psl *psl;
   struct position *pos;
   struct fishClone *fc;
 
   while (lineFileNext(bpf, &line, &lineSize))
     {
     wordCount = chopTabs(line, words);
     if (wordCount != 21)
 	errAbort("Bad line %d of %s\n", bpf->lineIx, bpf->fileName);
     psl = pslLoad(words);
     if (hashLookup(positions, psl->qName))
       {
 	pos = createPosition(psl->qName, "BAC End", psl->strand[0]);
 	pos->chrom = cloneString(psl->tName);
 	pos->start = psl->tStart;
 	pos->end = psl->tEnd;
 	fc = hashMustFindVal(posClone, psl->qName);
 	/* Check if this is a duplicate */
 	if (!inPosList(pos, fc->end))
 	  slAddHead(&fc->end, pos);
 	else
 	  positionFree(&pos);
       }
     }
 }
 
 void readClonePsl(struct lineFile *cpf)
 /* Read in clone psl file */
 {
   int lineSize, wordCount;
   char *line, *words[32];
   char *acc, *p;
   struct psl *psl;
   struct position *pos;
   struct fishClone *fc;
 
   while (lineFileNext(cpf, &line, &lineSize))
     {
     wordCount = chopTabs(line, words);
     if (wordCount != 21)
 	errAbort("Bad line %d of %s\n", cpf->lineIx, cpf->fileName);
     psl = pslLoad(words);
     acc = cloneString(psl->qName);
     p = strrchr(acc, '.');
     *p = '\0';    
     if (hashLookup(positions, acc))
       {
 	pos = createPosition(acc, "Accession", psl->strand[0]);
 	pos->chrom = cloneString(psl->tName);
 	pos->start = psl->tStart;
 	pos->end = psl->tEnd;
 	fc = hashMustFindVal(posClone, acc);
 	/* Check if this is a duplicate */
 	if (!inPosList(pos, fc->acc))
 	  slAddHead(&fc->acc, pos);
 	else
 	  positionFree(&pos);
       }
     }
 }
 
 void findAccPosition(struct sqlConnection *conn, struct position *pos, struct fishClone *fc)
 /* Determine the position of an accession */
 {
   char query[256];
   struct sqlResult *sr;
   char **row, *name;
   struct clonePos *cp;
   struct position *newPos;
   struct name *missing;
 
   name = cloneString(pos->name);
   strcat(name, "%"); 
   sqlSafef(query, sizeof(query), "select * from clonePos where name like '%s'", name);
   sr = sqlGetResult(conn, query);
   if ((row = sqlNextRow(sr)) != NULL) 
     {
       cp = clonePosLoad(row);
       free(pos->name);
       pos->name = cloneString(cp->name);
       pos->chrom = cloneString(cp->chrom);
       pos->start = cp->chromStart;
       pos->end = cp->chromEnd;
       pos->phase = cp->phase;
       clonePosFree(&cp);
       while ((row = sqlNextRow(sr)) != NULL) 
 	{
 	  cp = clonePosLoad(row);
 	  newPos = createPosition(cp->name, "Accession", ' ');
 	  newPos->chrom = cloneString(cp->chrom);
 	  newPos->start = cp->chromStart;
 	  newPos->end = cp->chromEnd;
 	  newPos->phase = cp->phase;
 	  slAddHead(&fc->acc, newPos);
 	  clonePosFree(&cp);
 	}
     }
   else
     {
       missing = createName(pos->name);
       slAddHead(&fc->missingAcc, missing);
     }
   
   sqlFreeResult(&sr);
 }
 
 void findStsPosition(struct sqlConnection *conn, struct position *pos, struct fishClone *fc)
 /* Determine the position of an sts marker */
 {
   struct sqlConnection *conn1 = hAllocConn(sqlGetDatabase(conn));
   char query[256];
   struct sqlResult *sr, *sr1;
   char **row, **row1;
   struct stsAlias *a;
   struct stsMap *s;
   struct position *newPos;
   boolean found = FALSE;
 
   sqlSafef(query, sizeof(query), "select * from stsMap where name = '%s'", pos->name);
   sr = sqlGetResult(conn, query);
   if ((row = sqlNextRow(sr)) == NULL)
     {
       sqlFreeResult(&sr);
       sqlSafef(query, sizeof(query), "select * from stsAlias where alias = '%s'", pos->name);
       sr1 = sqlGetResult(conn1, query);
       if ((row1 = sqlNextRow(sr1)) != NULL)
 	{
 	  a = stsAliasLoad(row1);
 	  sqlSafef(query, sizeof(query), "select * from stsMap where name = '%s'", a->trueName);
 	  stsAliasFree(&a);
 	  sr = sqlGetResult(conn, query);      
 	  if ((row = sqlNextRow(sr)) != NULL)
 	    found = TRUE;
 	}
       sqlFreeResult(&sr1);
     }
   else 
     found = TRUE;
   
   if (found)
     {
       s = stsMapLoad(row);
       pos->chrom = cloneString(s->chrom);
       pos->start = s->chromStart;
       pos->end = s->chromEnd;
       stsMapFree(&s);
       while ((row = sqlNextRow(sr)) != NULL) 
 	{
 	  newPos = createPosition(pos->name, "STS Marker", ' ');
 	  s = stsMapLoad(row);
 	  newPos->chrom = cloneString(s->chrom);
 	  newPos->start = s->chromStart;
 	  newPos->end = s->chromEnd;
 	  slAddHead(&fc->sts, newPos);
 	  stsMapFree(&s);
 	}
     }
   sqlFreeResult(&sr);
   hFreeConn(&conn1);
 }
 
 void findBacEndPairPosition(struct sqlConnection *conn, struct fishClone *fc)
 /* Determine the positions of bac end pairs for a clone */
 {
   char query[256], names[256];
   struct sqlResult *sr;
   char **row;
   struct lfs *be;
   struct position *newPos;
 
   sqlSafef(query, sizeof(query), "select * from bacEndPairs where name = '%s' order by (chromEnd - chromStart)", fc->cloneName);
   sr = sqlGetResult(conn, query);
   while ((row = sqlNextRow(sr)) != NULL)
     {
       be = lfsLoad(row+1);
       safef(names, sizeof(names), "%s,%s", be->lfNames[0], be->lfNames[1]);
       newPos = createPosition(names, "BAC End Pair", ' ');
       newPos->chrom = cloneString(be->chrom);
       newPos->start = be->chromStart;
       newPos->end = be->chromEnd;
       lfsFree(&be);
       slAddHead(&fc->endPair, newPos);
     }
   sqlFreeResult(&sr);
 }
 
 int combinePos(struct place *p, struct position *pos)
 /* See if current position can be added to placement */
 {
   int ret = 0;
   int d1 = abs(p->start - pos->start);
   int d2 = abs(p->end - pos->end);
   struct name *n;
   char *words[2], *names;
   int wordCount;
   struct name *pos2;
   boolean badBE = FALSE, sameEnd = FALSE;
 
   /* Check if BAC end and facing the wrong way */
   n = createName(pos->name);
   if ((!inNameList(n, p->bacEnd)) &&
       (!inNameList(n, p->bePair)))    
     {
     if ((sameString(pos->type, "BAC End")) &&
 	( ((pos->orien == '+') && ((pos->start - p->start) > 25000)) || 
 	((pos->orien == '-') && ((p->end - pos->end) > 25000)) )) 
       badBE = TRUE;
     else if ((((p->startBE != -1) && ((p->start - pos->start) > 25000)) || 
 	      ((p->endBE != -1) && ((pos->end - p->end) > 25000))) &&
 	     (((!sameString(pos->type, "BAC End Pair")) || (p->bePair == NULL)) && (p->acc == NULL)))
       badBE = TRUE;
     }
   nameFree(&n);
 
   /* Check if BAC end and all positions in placement are same accession */
   if ((sameString(pos->type, "BAC End")) && (p->acc == NULL) &&
       (p->bePair == NULL) && (p->sts == NULL) && (p->bacEnd !=NULL))
     {
       sameEnd = TRUE;
       for (pos2 = p->bacEnd; pos2 != NULL; pos2 = pos2->next)
 	if (!sameString(pos->name, pos2->name)) 
 	  sameEnd = FALSE;
     }
 
   if ((sameString(p->chrom, pos->chrom)) && (!badBE) && 
       ((d1 < 200000) || ((pos->start >= p->start) && (pos->start <= p->end))) && 
       ((d2 < 200000) || ((pos->end >= p->start) && (pos->end <= p->end))))
     {
       if (pos->start < p->start)
 	{
 	  if (sameEnd) 
 	    p->end = pos->end;
 	  p->start = pos->start;
 	  if ((sameString(pos->type, "BAC End Pair")) || 
 	      ((sameString(pos->type, "BAC End")) && (pos->orien == '+')))
 	      p->startBE = 0;
 	  else 
 	    {
 	      if ((p->start - pos->start) > 25000)
 		p->startBE = -1;
 	    }
 	}
       else if ((sameString(pos->type, "BAC End Pair")) || 
 	       ( ((sameString(pos->type, "BAC End")) && (pos->orien == '+')) &&
 	       ((pos->start - p->start) <= 25000) && 
 	       ((p->startBE < 0) || ((pos->start - p->start) <= p->startBE))) )
 	p->startBE = pos->start - p->start;
 
       if (pos->end > p->end)
 	{
 	  if (sameEnd) 
 	    p->start = pos->start;
 	  p->end = pos->end;
 	  if ((sameString(pos->type, "BAC End Pair")) || 
 	      ((sameString(pos->type, "BAC End")) && (pos->orien == '-')))
 	    p->endBE = 0;
 	  else 
 	    {
 	      if ((pos->end - p->end) > 25000)
 		p->endBE = -1;
 	    }
 	}
       else if ((sameString(pos->type, "BAC End Pair")) || 
 	       ( ((sameString(pos->type, "BAC End")) && (pos->orien == '-')) &&
 	       ((p->end - pos->end) <= 25000) && 
 	       ((p->endBE < 0) || ((p->end - pos->end) < p->endBE))) )
 	p->endBE = p->end - pos->end;
 
       if (sameString(pos->type, "Accession"))
 	{
 	  p->numAcc++;
 	  n = createName(pos->name);
 	  n->phase = pos->phase;
 	  slAddHead(&p->acc, n);
 	} 
       if (sameString(pos->type, "BAC End Pair"))
 	{
 	  names = cloneString(pos->name);
 	  wordCount = chopString(names, ",",words,2);
 	  n = createName(words[0]);
 	  slAddHead(&p->bePair, n);
 	  n = createName(words[1]);
 	  slAddHead(&p->bePair, n);
 	  p->numBacEndPair++;
 	} 
       if (sameString(pos->type, "STS Marker"))
 	{
 	  /* Check if used for STS Marker */
 	  n = createName(pos->name);
 	  if (!inNameList(n, p->sts))
 	    {
 	      p->numSts++;
 	      n = createName(pos->name);
 	      slAddHead(&p->sts, n);
 	    } 
 	  else 
 	    nameFree(&n);
 	} 
       if (sameString(pos->type, "BAC End"))
 	{
 	  /* Check if used for end pair */
 	  n = createName(pos->name);
 	  if ((!inNameList(n, p->bePair)) && (!inNameList(n, p->bacEnd)))
 	    {
 	      p->numBacEnd++;
 	      slAddHead(&p->bacEnd, n);
 	    }
 	  else
 	    nameFree(&n);
 	} 
       ret = 1;
     }
 
   return(ret);
 }
 
 int scorePlace(struct place *p, struct map *m)
 /* Score a placement */
 {
   int ret = 0, wordCount;
   struct map *m1;
   struct name *n, *nList=NULL, *new;  
   char *chrom, *words[2];
   
   /* Check if chromosome agress with any fish mapping */  
   verbose(3, "\tdetermining if in fish chrom\n");
   chrom = cloneString(p->chrom);
   /* Get rid of "random" */
   wordCount = chopString(chrom, "_",words, 2);
   for (m1 = m; m1 != NULL; m1 = m1->next)
     {
       verbose(3, "\tchecking %s vs. %s\n", m1->chrom, words[0]);
       if (sameString(m1->chrom, words[0]))
 	{
 	  if (wordCount == 1)
 	    ret += 3;
 	  else
 	    ret += 2;
 	  break;
 	}
     }
 
   /* Score accessions based on phase of sequencing */
   verbose(3, "\tscoring accessions\n");
   for (n = p->acc; n != NULL; n = n->next)
     if (!inNameList(n, nList))
       {
 	if (n->phase == 3)
 	  ret += 5;
 	else if (n->phase == 2)
 	  ret += 3;
 	else if (n->phase == 1)
 	  ret += 2;
 	else
 	  ret += 1;
 	new = createName(n->name);
 	slAddHead(&nList, new);
       }
 
   ret += p->numBacEndPair * 3;
 
   /* Make sure each name only counted once */
   verbose(3, "\tscoring sts markers\n");
   for (n = p->sts; n != NULL; n = n->next)
     if (!inNameList(n, nList))
       {
 	ret += 2;
 	new = createName(n->name);
 	slAddHead(&nList, new);
       }
   verbose(3, "\tscoring bac ends\n");
   for (n = p->bacEnd; n != NULL; n = n->next)
     if (!inNameList(n, nList))
       {
 	ret += 1;
 	new = createName(n->name);
 	slAddHead(&nList, new);
       }
   nameFreeList(&nList);
 
   return(ret);
 }
 
 void filterGoodPlace(struct fishClone *fc)
 /* Filter placements to only retain the best */
 {
   int bestScore = 0;
   struct place *p, *lastp = NULL;
   
   /* Determine best score */
   for (p = fc->good; p != NULL; p = p->next)
     if (p->score > bestScore)
       bestScore = p->score;
 
   /* Filter out anyone not within 1 or best score */
   lastp = NULL;
   p = fc->good;
   while (p != NULL)
     {
       if (p->score < (bestScore - 1))
 	{
 	  if (lastp == NULL) 
 	    {
 	      fc->good = fc->good->next;
 	      placeFree(&p);
 	      fc->numGood--;
 	      p = fc->good;
 	    } 
 	  else
 	    {
 	      lastp->next = p->next;
 	      placeFree(&p);
 	      fc->numGood--;
 	      p = lastp->next;
 	    }
 	}
       else 
 	{
 	  lastp = p;
 	  p = p->next;
 	}
     }
 }
 
 void addOrCombinePos(struct fishClone *fc, struct position *pos)
 /* Determine if position can be combined or a new possible good placement added */
 {
   boolean combined = FALSE;
   struct place *p, *newP;
 
   verbose(3, "\tchecking if position can be combined\n");
   if (pos->chrom)
     {
       combined = FALSE;
       for (p = fc->good; p != NULL; p = p->next)
 	if (!combined)
 	  if (combinePos(p, pos))
 	    combined = TRUE;
       if (!combined)
 	{
 	  newP = createPlace(pos);
 	  slAddHead(&fc->good, newP);
 	  fc->numGood++;
 	}
     }
 }
 
 void findGoodPlace(struct fishClone *fc)
 /* Determine the good positions for a clone */
 {
   struct place *p;
   struct position *pos;
 
   /* Check clone acc positions */
   verbose(3, "\tchecking if can combine for %s\n", fc->cloneName);
   for (pos = fc->acc; pos != NULL; pos = pos->next)
     addOrCombinePos(fc, pos);
   /* Check bac end pair positions */
   for (pos = fc->endPair; pos != NULL; pos = pos->next)
     addOrCombinePos(fc, pos);
   /* Check clone sts marker positions */
   for (pos = fc->sts; pos != NULL; pos = pos->next)
     addOrCombinePos(fc, pos);
   /* Check clone end positions */
   for (pos = fc->end; pos != NULL; pos = pos->next)
     addOrCombinePos(fc, pos);
 
   /* Score each of the placements */
   verbose(3, "\tscoring placements for %s\n", fc->cloneName);
   for (p = fc->good; p != NULL; p = p->next)
     {
       p->score = scorePlace(p, fc->cyto);
       verbose(3, "\tscore on %s is %d\n", p->chrom,p->score);
     }
   /* Filter to retain only best placements */
   filterGoodPlace(fc);
   
 }
 
 void findClonePos(char *db)
 /* Determine the best positions for each of the FISH clones */
 {
   struct sqlConnection *conn = hAllocConn(db);
   struct fishClone *fc;
   struct position *pos;
 
   /* Process each clone */
   verbose(2, "\tfindClonePos: determining positions of fish clones\n");
   for (fc = fcList; fc != NULL; fc=fc->next)
     {
       verbose(3, "\tfinding pos of %s\n", fc->cloneName);
       /* Determine positions of clone accessions */
       for (pos = fc->acc; pos != NULL; pos = pos->next)
 	{
 	  /* Check if position already set from psl file */
 	  if (!pos->chrom)
 	    {
 	      verbose(3, "\tfinding acc pos of %s\n", pos->name);
 	      findAccPosition(conn, pos, fc);
 	    }
 	}
       /* Determing positions of sts markers */
       for (pos = fc->sts; pos != NULL; pos = pos->next)
 	{
 	  verbose(3, "\tfinding sts pos of %s\n", pos->name);
 	  findStsPosition(conn, pos, fc);
 	}
       /* Find positions of bac end pairs */
       verbose(3, "\tfinding bac end pairs of %s\n", fc->cloneName);
       findBacEndPairPosition(conn, fc);
       /* Determine good placements, if any */
       findGoodPlace(fc);
     }
   hFreeConn(&conn);
 }
 
 void writeOut(FILE *of, FILE *af)
 /* Print out the fishClones.bed file */
 {
   int score=0;
   struct map *m;
   struct fishClone *fc;
   struct place *p;
   struct name *n;
     int linesOut = 0;
 
   for (fc = fcList; fc != NULL; fc = fc->next)
     {
       if (fc->numGood > 10) 
 	continue;
       if (fc->numGood == 1)
 	score = 1000;
       else 
 	if (fc->numGood > 0)
 	  score = 1500/(fc->numGood);
 	else
 	  if (fc->missingAcc != NULL)
 	    for (n = fc->missingAcc; n != NULL; n = n->next)
 	      fprintf(af, "%s\n",n->name);
       for (p = fc->good; p != NULL; p=p->next)
 	{
 	    ++linesOut;
 	    if (NULL != fc->cyto)
 	    {
 	  fprintf(of, "%s\t%d\t%d\t%s\t%d\t%d\t",
 	      p->chrom, p->start, p->end, fc->cloneName, score, fc->numCyto);
 	  m = fc->cyto;
 	  fprintf(of, "%s", m->band1); 
 	  for (m = fc->cyto->next; m != NULL; m=m->next)
 	      fprintf(of, ",%s", m->band1);
 	  fprintf(of, "\t"); 
 	  m = fc->cyto;
 	  fprintf(of, "%s", m->band2); 
 	  for (m = fc->cyto->next; m != NULL; m=m->next)
 	      fprintf(of, ",%s", m->band2);
 	  fprintf(of, "\t"); 
 	  m = fc->cyto;
 	  fprintf(of, "%s", m->center); 
 	  for (m = fc->cyto->next; m != NULL; m=m->next)
 	      fprintf(of, ",%s", m->center);
 	  fprintf(of, "\t"); 
 	  fprintf(of, "%s\t", p->type);
 	  fprintf(of, "%d\t", p->numAcc);
 	  if (p->numAcc) 
 	    {
 	      fprintf(of, "%s", p->acc->name);
 	      for (n = p->acc->next; n != NULL; n=n->next)
 		fprintf(of, ",%s", n->name);
 	    }
 	  fprintf(of, "\t");
 	  fprintf(of, "%d\t", p->numSts);
 	  if (p->numSts) 
 	    {
 	      fprintf(of, "%s", p->sts->name);
 	      for (n = p->sts->next; n != NULL; n=n->next)
 		fprintf(of, ",%s", n->name);
 	    }
 	  fprintf(of, "\t");
 	  fprintf(of, "%d\t", p->numBacEnd + p->numBacEndPair*2);
 	  if (p->numBacEndPair) 
 	    {
 	      fprintf(of, "%s", p->bePair->name);
 	      for (n = p->bePair->next; n != NULL; n=n->next)
 		fprintf(of, ",%s", n->name);
 	    }
 	  if (p->numBacEnd) 
 	    {
 	      if (p->numBacEndPair)
 		fprintf(of, ",%s", p->bacEnd->name);
 	      else
 		fprintf(of, "%s", p->bacEnd->name);
 	      for (n = p->bacEnd->next; n != NULL; n=n->next)
 		fprintf(of, ",%s", n->name);
 	    }
 	  if (ENDINFO) {
 	    fprintf(of, "\t%d\t%d", p->startBE, p->endBE);
 	  }
 	  fprintf(of, "\n");
 	    } else {
 verbose(1, "# ERROR: at line # %d, no cytoband info for %s:%d-%d\t%s\n",
 	      linesOut, p->chrom, p->start, p->end, fc->cloneName);
 	    }
 	}	/*	for (p = fc->good; p != NULL; p=p->next)	*/
     }
 }
 
 int main(int argc, char *argv[])
 {
   struct lineFile *ff, *cf, *bef, *bpf, *sf, *cpf;
   FILE *of, *af;
   char filename[PATH_LEN], *db, *stsName=NULL, *pslName=NULL;
 
   optionInit(&argc, argv, optionSpecs);
   if (argc < 4)
     {
       fprintf(stderr, "USAGE: fishClones <database> <hbrc> <clac.out> <cl_acc_gi_len> <bacEnds.psl> <out file prefix>\n");
       fprintf(stderr, "Options:\n");
       fprintf(stderr, "\t-fhcrc=<file>\tSTS marker associations from FHCRC\n");
       fprintf(stderr, "\t-psl=<psl_file>\tpsl file of clone placements\n");
       fprintf(stderr, "\t-verbose=<level>\tdisplay all messages\n");
       fprintf(stderr, "\t-noBin\t\tdo not include bin column in output file\n");
       fprintf(stderr, "\t-noRandom\tdo not include placements on random portions\n");
       fprintf(stderr, "\t-endInfo\tprint out whether start/end determined by BAC end\n");
       return 1;
     }
   db = argv[1];
 
   ff = lineFileOpen(argv[2], FALSE);
   cf = lineFileOpen(argv[3], FALSE);
   bef = lineFileOpen(argv[4], FALSE);
   bpf = pslFileOpen(argv[5]);
   safef(filename, sizeof(filename), "%s.bed", argv[6]);
   of = mustOpen(filename, "w");
   safef(filename, sizeof(filename), "%s.acc", argv[6]);
   af = mustOpen(filename, "w");
 
   verb = optionInt("verbose", 0);
   verboseSetLevel(verb);
   NORANDOM = optionExists("noRandom");
   NOBIN = optionExists("noBin");
   ENDINFO = optionExists("endInfo");
   stsName = optionVal("fhcrc", NULL);
   if (stsName) 
     sf = lineFileOpen(stsName, FALSE);
   pslName = optionVal("psl", NULL);
   if (pslName) 
     cpf = pslFileOpen(pslName);
 
   verbose(1, "Reading Fish Clones file %s\n", argv[2]);
   readFishInfo(ff);
   lineFileClose(&ff);
   writeOut(of, af);
   verbose(1, "Reading Clone/Acc (clac.out) file %s\n", argv[3]);
   readCloneAcc(cf);
   lineFileClose(&cf);
   writeOut(of, af);
   verbose(1, "Reading BAC Ends file %s\n", argv[4]);
   readBacEnds(bef);
   lineFileClose(&bef);
   writeOut(of, af);
   verbose(1, "Reading BAC Ends psl file %s\n", argv[5]);
   readBacEndsPsl(bpf);
   lineFileClose(&bpf);
   writeOut(of, af);
   if (stsName)
     {
       verbose(1, "Reading additional STS Marker links %s\n", stsName);
       readStsMarkers(sf);
       lineFileClose(&sf);
     }
   writeOut(of, af);
   if (pslName)
     {
       verbose(1, "Reading additional clone psl positions %s\n", pslName);
       readClonePsl(cpf);
       lineFileClose(&cpf);
     }
   verbose(1, "Determining good positions\n");
   findClonePos(db);
   verbose(1, "Writing output file\n");
   writeOut(of, af);
   fclose(of);
   fclose(af);
 
   return 0;
 }