src/hg/gigAssembler/mendMap/mendMap.c 1.11
1.11 2009/07/07 18:45:02 hiram
Fixup broken build on Solaris
Index: src/hg/gigAssembler/mendMap/mendMap.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/gigAssembler/mendMap/mendMap.c,v
retrieving revision 1.10
retrieving revision 1.11
diff -b -B -U 1000000 -r1.10 -r1.11
--- src/hg/gigAssembler/mendMap/mendMap.c 5 Sep 2003 21:30:43 -0000 1.10
+++ src/hg/gigAssembler/mendMap/mendMap.c 7 Jul 2009 18:45:02 -0000 1.11
@@ -1,3043 +1,3013 @@
/* mm - Map mender. */
#undef VERBOSE_LOG
#include "common.h"
#include "linefile.h"
#include "obscure.h"
#include "errabort.h"
#include "memalloc.h"
#include "portable.h"
#include "hash.h"
#include "fa.h"
#include "dlist.h"
#include "diGraph.h"
#include "localmem.h"
#include "hCommon.h"
#include "psl.h"
#include "qaSeq.h"
static char const rcsid[] = "$Id$";
int version = 23; /* Current version number. */
int maxMapDeviation = 700000; /* No map deviations further than this allowed. */
boolean isPlaced; /* TRUE if want to really follow map. */
FILE *logFile; /* File to write decision steps to. */
char *contigName; /* Name of contig from info file. */
char *contigType; /* Contig type from info file. */
enum htgPhase
{
htgSurvey = 0,
htgDraft = 1,
htgOrdered = 2,
htgFinished = 3,
};
struct cloneEnd
/* Information about the end of a clone. */
{
struct cloneEnd *next;
struct mmClone *clone; /* Which clone they are part of. */
char *frag; /* Associated fragment. NULL if unknown. */
int orientation; /* Orientation. */
bool isStart; /* True if this is start fragment. */
int pos; /* Approximate position. Only used when faking it....*/
};
struct mmClone
/* Describes a clone (BAC) */
{
struct mmClone *next; /* Next in clone list. */
char *name; /* Name of clone (accession). */
char *bacName; /* Name of BAC (usually like RP11-something) */
int mapPos; /* Relative position in contig according to map. */
int flipTendency; /* Tendency of clone to flip. */
char *seqCenter; /* Sequencing center if known. */
char *placeInfo; /* Placement info. */
enum htgPhase phase; /* HTG phase. */
int size; /* Size in bases. */
struct dlList *fragList; /* List of frags in clone. */
struct barge *barge; /* Barge this is part of. */
struct cloneEnd *sEnd; /* One end frag . */
struct cloneEnd *tEnd; /* Other end frag . */
struct bepRef *bepList; /* Bac end pairs that hit this. */
struct mmClone *bestFriend; /* Clone this overlaps with most. */
int bestOverlap; /* Overlap with best friend. */
int mmPos; /* Relative position in contig according to mm. */
struct mmClone *enclosed; /* Clone enclosed by another?. */
};
struct cloneRef
/* A reference to a clone. */
{
struct cloneRef *next; /* Next in list. */
struct mmClone *clone; /* Clone (not allocated here). */
};
struct overlappingClonePair
/* Info on a pair of clones which overlaps. */
{
struct overlappingClonePair *next; /* Next in list. */
struct mmClone *a, *b; /* Pair of clones. */
int overlap; /* Size of strict overlap. */
int sloppyOverlap; /* Size of sloppy overlap. */
int obliviousOverlap; /* Size of truly oblivious overlap. */
/* ------ Stuff added to ooMapMend ------ */
char aHitS; /* Values Y, N, and ? for corresponding end hitting, not hitting and unknown. */
char aHitT; /* Values Y, N, and ? for corresponding end hitting, not hitting and unknown. */
char bHitS; /* Values Y, N, and ? for corresponding end hitting, not hitting and unknown. */
char bHitT; /* Values Y, N, and ? for corresponding end hitting, not hitting and unknown. */
int externalPriority; /* External priority value - from hand edited parts of map. */
int score; /* Overall score. */
};
struct barge
/* A barge - a set of welded together clones. */
{
struct barge *next; /* Next in list. */
int id; /* Barge id. */
int offset; /* Offset within contig. */
int mapPos; /* Position within map. */
int size; /* Total size. */
/* ---- ooMapMend ---- */
struct dlList *cloneEndList; /* Ordered list of clone ends. */
struct dlList *cloneList; /* List of clones on barge. Order not significant. */
struct dlNode *dlNode; /* Node in barge list. */
};
struct bep
/* A BAC end pair. */
{
struct bep *next; /* Next in list. */
char *a; /* Name of one read. */
char *b; /* Name of other read. */
char *cloneName; /* Name of clone. */
struct psl *aList; /* List of alignments a is in. */
struct psl *bList; /* List of alignments b is in. */
struct cloneRef *cloneList; /* List of clones this hits. */
};
struct bepRef
/* A reference to a BAC end pair. */
{
struct bepRef *next; /* Next in list. */
struct bep *bep; /* bep - not allocated here. */
};
void warnHandler(char *format, va_list args)
/* Default error message handler. */
{
if (format != NULL)
{
fprintf(stderr, "[warning] ");
fprintf(logFile, "[warning] ");
vfprintf(stderr, format, args);
vfprintf(logFile, format, args);
fprintf(stderr, "\n");
fprintf(logFile, "\n");
#ifdef SOMETIMES
fflush(logFile);
uglyfBreak();
#endif
}
}
void status(char *format, ...)
/* Print status message to stdout and to log file. */
{
va_list args;
va_start(args, format);
vfprintf(logFile, format, args);
vfprintf(stdout, format, args);
va_end(args);
}
void logIt(char *format, ...)
/* Print message to log file. */
{
va_list args;
va_start(args, format);
vfprintf(logFile, format, args);
va_end(args);
#ifdef SOMETIMES
fflush(logFile);
#endif /* SOMETIMES */
}
/* Make uglyf debugging statements go to log file too. */
#undef uglyf
#define uglyf status
char orientChar(int orient)
/* Return character representing orientation. */
{
if (orient > 0) return '+';
else if (orient < 0) return '-';
else return '.';
}
char otherStrandChar(char c)
/* Return orientation char for opposite strand. */
{
if (c == '-') return '+';
else return '-';
}
int maxMapDif(struct mmClone *a, struct mmClone *b)
/* Return max distance between clones in map coordinates. */
{
-int fDif, rDif;
int aStart, aEnd, bStart, bEnd;
aStart = a->mapPos;
aEnd = a->mapPos + a->size;
bStart = b->mapPos;
bEnd = b->mapPos + b->size;
if (rangeIntersection(aStart, aEnd, bStart, bEnd) > 0)
return 0;
if (aEnd <= bStart)
return bStart - aEnd;
else
return aStart - bEnd;
}
char *ocpHashName(char *a, char *b)
/* Return name of ocp associated with a and b */
{
static char buf[256];
if (strcmp(a,b) < 0)
sprintf(buf, "%s^%s", a, b);
else
sprintf(buf, "%s^%s", b, a);
return buf;
}
struct overlappingClonePair *findHashedOcp(char *aName, char *bName, struct hash *ocpHash)
/* Find overlappingClonePair if any associated with a/b overlap. */
{
char *ocpName = ocpHashName(aName, bName);
return hashFindVal(ocpHash, ocpName);
}
double draftMinCoverage = 0.90;
double initialEncloseMin = 0.95;
double finalEncloseMin = 0.98;
boolean uglyerfOn = FALSE;
void uglyerfStart()
/* Set verbose debugging flag. */
{
uglyerfOn = TRUE;
uglyf("Getting uglyer\n");
}
void uglyerfEnd()
/* Clear verbose debugging flag. */
{
uglyerfOn = FALSE;
}
void uglyerf(char *format, ...)
/* Print status message to stdout and to log file if debugging flag set. */
{
if (uglyerfOn)
{
va_list args;
va_start(args, format);
vfprintf(logFile, format, args);
vfprintf(stdout, format, args);
va_end(args);
}
}
void usage()
/* Explain usage and exit. */
{
errAbort(
"mm - Map mender\n"
"usage:\n"
" mm contigDir\n");
}
int mmCloneCmpSize(const void *va, const void *vb)
/* Compare to sort biggest sized first. */
{
const struct mmClone *a = *((struct mmClone **)va);
const struct mmClone *b = *((struct mmClone **)vb);
return b->size - a->size;
}
int mmCloneCmpMapPos(const void *va, const void *vb)
/* Compare to sort biggest sized first. */
{
const struct mmClone *a = *((struct mmClone **)va);
const struct mmClone *b = *((struct mmClone **)vb);
return a->mapPos - b->mapPos;
}
struct cloneEnd *newCloneEnd(struct mmClone *clone, char *frag)
/* Allocate and initialize a clone end. */
{
struct cloneEnd *ce;
AllocVar(ce);
ce->clone = clone;
ce->frag = frag;
return ce;
}
void mmLoadClones(char *fileName, struct mmClone **retCloneList, struct hash **retCloneHash)
/* Load clones from file into list/hash. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *words[7];
struct mmClone *cloneList = NULL, *clone;
struct hash *cloneHash = newHash(11);
-struct cloneEnd *ce;
-int endCount;
while (lineFileRow(lf, words))
{
if (!sameString(words[3], "0"))
{
AllocVar(clone);
slAddHead(&cloneList, clone);
hashAddSaveName(cloneHash, words[0], clone, &clone->name);
clone->size = lineFileNeedNum(lf, words, 2);
clone->phase = atoi(words[3]);
}
}
lineFileClose(&lf);
slSort(&cloneList, mmCloneCmpMapPos);
*retCloneList = cloneList;
*retCloneHash = cloneHash;
}
void mmAddInfo(char *fileName, struct mmClone *cloneList, struct hash *cloneHash)
/* Read in .info file and add map position to it. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *header[2], *words[4];
struct mmClone *clone;
double flipMod;
/* Parse first line and save it in global variables.. */
if (!lineFileRow(lf, header))
errAbort("Couldn't read header from %s", fileName);
contigName = cloneString(header[0]);
contigType = cloneString(header[1]);
/* Set positions to special flag value. */
for (clone = cloneList; clone != NULL; clone = clone->next)
clone->mapPos = -BIGNUM;
/* Read in positions from file. */
while (lineFileRow(lf, words))
{
int phase = lineFileNeedNum(lf, words, 2);
if (phase != 0)
{
if ((clone = hashFindVal(cloneHash, words[0])) == NULL)
errAbort("Clone %s is in info but not cloneInfo file", words[0]);
clone->mapPos = lineFileNeedNum(lf, words, 1) * 1000;
flipMod = sqrt(clone->size/300000.0);
if (flipMod < 1.0) flipMod = 1.0;
clone->flipTendency = lineFileNeedNum(lf, words, 3) * 1000 * flipMod;
}
}
lineFileClose(&lf);
/* Check positions all there. */
for (clone = cloneList; clone != NULL; clone = clone->next)
if (clone->mapPos == -BIGNUM)
errAbort("%s is in cloneInfo but not %s", clone->name, fileName);
}
void mmAddEndInfo(char *fileName, struct mmClone *cloneList, struct hash *cloneHash)
/* Add info on clone ends from file. Fake the rest. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *words[10];
int wordCount;
struct mmClone *clone;
static char dummyFrag[] = "dummyFrag";
/* Read ends from file. */
while ((wordCount = lineFileChop(lf, words)) != 0)
{
if (wordCount != 9 && wordCount != 7)
errAbort("Expecting 7 or 9 words line %d of %s, got %d",
lf->lineIx, lf->fileName, wordCount);
if ((clone = hashFindVal(cloneHash, words[0])) == NULL)
errAbort("Clone %s is in %s but not cloneInfo file", words[0], fileName);
clone->sEnd = newCloneEnd(clone, dummyFrag);
clone->tEnd = newCloneEnd(clone, (wordCount == 9 ? dummyFrag : NULL));
}
lineFileClose(&lf);
/* If no ends in file then make them up using phase info. */
for (clone = cloneList; clone != NULL; clone = clone->next)
{
if (clone->sEnd == NULL)
{
if (clone->phase >= 2) /* Ordered or finished? */
{
clone->sEnd = newCloneEnd(clone, dummyFrag);
clone->tEnd = newCloneEnd(clone, dummyFrag);
}
else
{
clone->sEnd = newCloneEnd(clone, NULL);
clone->tEnd = newCloneEnd(clone, NULL);
}
}
}
}
void mmAddMap(char *fileName, struct mmClone *cloneList, struct hash *cloneHash)
/* Add info from map file about sequencing center and placement. */
{
struct lineFile *lf = lineFileMayOpen(fileName, TRUE);
struct mmClone *clone;
char *row[6];
int wordCount;
if (lf == NULL)
return;
while ((wordCount = lineFileChop(lf, row)) != 0)
{
if (wordCount < 6)
continue;
if (sameString(row[0], "COMMITTED"))
continue;
if ((clone = hashFindVal(cloneHash, row[0])) != NULL)
{
if (startsWith("ctg", row[3]))
{
clone->bacName = cloneString(row[1]);
clone->seqCenter = cloneString(row[2]);
clone->placeInfo = cloneString(row[5]);
}
}
}
lineFileClose(&lf);
}
int ocpCmpScore(const void *va, const void *vb)
/* Compare to sort based on score. */
{
const struct overlappingClonePair *a = *((struct overlappingClonePair **)va);
const struct overlappingClonePair *b = *((struct overlappingClonePair **)vb);
return b->score - a->score;
}
void ocpDump(struct overlappingClonePair *ocp, FILE *f)
/* Print out ocp to file. */
{
fprintf(f, "%-12s %c %c %-12s %c %c %6d %6d %6d %d %d\n",
ocp->a->name, ocp->aHitS, ocp->aHitT,
ocp->b->name, ocp->bHitS, ocp->bHitT,
ocp->overlap, ocp->sloppyOverlap, ocp->obliviousOverlap,
ocp->externalPriority, ocp->score);
}
boolean ocpGreatEnds(char a, char b)
/* Returns TRUE if one of A is 'Y' and the other 'N' */
{
return (a == 'Y' && b == 'N') || (a == 'N' && b == 'Y');
}
boolean ocpHalfGood(char a, char b)
/* Returns TRUE if is of form ?N ?Y Y? N? */
{
return (a == '?' && (b == 'Y' || b == 'N')) || (b == '?' && (a == 'Y' || a == 'N'));
}
boolean ocpUnknownEnds(char a, char b)
/* Returns TRUE if is of form ?? */
{
return a == '?' && b == '?';
}
boolean ocpDoubleMiss(char a, char b)
/* Returns TRUE if is of form NN */
{
return a == 'N' && b == 'N';
}
boolean ocpDoubleHit(char a, char b)
/* RETURNS true if is of form YY */
{
return a == 'Y' && b == 'Y';
}
int ocpScore(struct overlappingClonePair *ocp, boolean ignoreEnds)
/* Figure out score for ocp so that best looking overlaps have highest score. */
{
int score = 0;
int as = ocp->aHitS, at = ocp->aHitT, bs = ocp->bHitS, bt = ocp->bHitT;
struct mmClone *a = ocp->a, *b = ocp->b;
double size = max(a->size, b->size);
double dist;
struct mmClone *outerClone = NULL, *innerClone = NULL;
int overlap = (ignoreEnds ? ocp->sloppyOverlap : ocp->overlap);
boolean bothFin = (a->phase == htgFinished && b->phase == htgFinished);
boolean bothGreat = (ocpGreatEnds(as, at) && ocpGreatEnds(bs, bt));
score += round(100000.0 * overlap / size);
score += ocp->externalPriority * 10000;
dist = maxMapDif(a, b);
if (dist > maxMapDeviation)
{
if (isPlaced && bothGreat && bothFin && dist < maxMapDeviation*4)
score -= 600000;
else
score -= 1200000;
}
if (ignoreEnds)
{
score -= dist/50;
return score;
}
else
score -= dist/10;
if (ocpDoubleMiss(as, at))
{
outerClone = a;
innerClone = b;
}
if (ocpDoubleMiss(bs, bt))
{
innerClone = b;
outerClone = a;
}
if (bothGreat)
{
score += 1000000;
bothGreat = TRUE;
if (bothFin)
score += 40000;
}
else if (ocpGreatEnds(as, at) && ocpHalfGood(bs, bt))
score += 500000;
else if (ocpGreatEnds(bs, bt) && ocpHalfGood(as, at))
score += 500000;
else if (ocpHalfGood(as, at) && ocpHalfGood(bs, bt))
score += 200000;
else if (ocpGreatEnds(as, at) && ocpUnknownEnds(bs, bt))
score += 100000;
else if (ocpGreatEnds(bs, bt) && ocpUnknownEnds(as, at))
score += 100000;
else if (ocpHalfGood(as, at) && ocpUnknownEnds(bs, bt))
score += 50000;
else if (ocpHalfGood(bs, bt) && ocpUnknownEnds(as, at))
score += 50000;
else if (outerClone != NULL)
{
int innerSize = innerClone->size;
if (innerClone->phase != htgFinished)
innerSize /= draftMinCoverage;
if (ocp->overlap < innerSize)
score -= 1000000;
}
return score;
}
void updateBestFriend(struct mmClone *clone, struct mmClone *aFriend, int overlap)
/* See if aFriend is better than current best friend, and if so update best friend. */
{
if (overlap > clone->bestOverlap)
{
clone->bestOverlap = overlap;
clone->bestFriend = aFriend;
}
}
void mmLoadOcp(char *fileName, struct hash *cloneHash,
struct overlappingClonePair **retOcpList, struct hash **retOcpHash)
/* Load up overlaps from file into list/hash. */
{
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *words[12];
struct overlappingClonePair *ocpList = NULL, *ocp;
struct hash *ocpHash = newHash(12);
-int score;
while (lineFileRow(lf, words))
{
AllocVar(ocp);
slAddHead(&ocpList, ocp);
hashAdd(ocpHash, ocpHashName(words[0], words[4]), ocp);
ocp->a = hashMustFindVal(cloneHash, words[0]);
ocp->b = hashMustFindVal(cloneHash, words[4]);
ocp->overlap = lineFileNeedNum(lf, words, 8);
ocp->sloppyOverlap = lineFileNeedNum(lf, words, 9);
ocp->obliviousOverlap = lineFileNeedNum(lf, words, 10);
ocp->aHitS = words[2][0];
ocp->aHitT = words[3][0];
ocp->bHitS = words[6][0];
ocp->bHitT = words[7][0];
ocp->externalPriority = lineFileNeedNum(lf, words, 11);
updateBestFriend(ocp->a, ocp->b, ocp->sloppyOverlap);
updateBestFriend(ocp->b, ocp->a, ocp->sloppyOverlap);
}
lineFileClose(&lf);
*retOcpList = ocpList;
*retOcpHash = ocpHash;
}
void ocpScoreAndSort(struct overlappingClonePair **pOcpList, boolean ignoreEnds)
/* Fill in scores on ocp list and sort with best scoring first.
* Optionally ignore end info in scoreing. */
{
struct overlappingClonePair *ocp;
for (ocp = *pOcpList; ocp != NULL; ocp = ocp->next)
ocp->score = ocpScore(ocp, ignoreEnds);
slSort(pOcpList, ocpCmpScore);
}
void cloneRefAddUnique(struct cloneRef **pRefList, struct mmClone *clone)
/* Add clone reference to list if not already on list. */
{
refAddUnique((struct slRef**)pRefList, clone);
}
void cloneRefAdd(struct cloneRef **pRefList, struct mmClone *clone)
/* Add clone reference to list. */
{
refAdd((struct slRef**)pRefList, clone);
}
void bepRefAddUnique(struct bepRef **pRefList, struct bep *bep)
/* Add clone reference to list if not already on list. */
{
refAddUnique((struct slRef**)pRefList, bep);
}
void mmLoadBacEndPairs(char *fileName, struct bep **retBepList, struct hash **retBepHash)
/* Make up list of bac end pairs from file. Create hash keyed into either read of
* pair. */
{
struct hash *bepHash = newHash(0);
struct bep *bepList = NULL, *bep;
struct lineFile *lf = lineFileOpen(fileName, TRUE);
char *words[4], *line;
int wordCount, lineSize;
while (lineFileNext(lf, &line, &lineSize))
{
if (line[0] == '#')
continue;
wordCount = chopTabs(line, words);
if (wordCount == 0)
continue;
lineFileExpectWords(lf, 3, wordCount);
AllocVar(bep);
slAddHead(&bepList, bep);
hashAddSaveName(bepHash, words[0], bep, &bep->a);
hashAddSaveName(bepHash, words[1], bep, &bep->b);
bep->cloneName = cloneString(words[2]);
}
lineFileClose(&lf);
slReverse(&bepList);
*retBepList = bepList;
*retBepHash = bepHash;
}
boolean filterBacEndPsl(struct psl *psl)
/* Figure out if a BAC end psl is a keeper. */
{
int startTail, endTail;
if (psl->match < 30)
return FALSE;
if (psl->match + psl->repMatch < 60)
return FALSE;
pslTailSizes(psl, &startTail, &endTail);
if (startTail > 50 || endTail > 50)
return FALSE;
if (pslCalcMilliBad(psl, FALSE) > 50)
return FALSE;
return TRUE;
}
void mmLoadBacEndPsl(char *fileName, struct hash *bepHash, struct hash *cloneHash)
/* Load in decent looking alignments from bacEnd.psl, and fill in info
* linking bacEnds and clones based on this. */
{
struct lineFile *lf = pslFileOpen(fileName);
struct bep *bep;
struct mmClone *clone;
char cloneName[128];
int count = 0, goodCount = 0;
struct psl *psl;
while ((psl = pslNext(lf)) != NULL)
{
++count;
bep = hashFindVal(bepHash, psl->qName);
if (bep == NULL || !filterBacEndPsl(psl))
{
pslFree(&psl);
continue;
}
++goodCount;
fragToCloneName(psl->tName, cloneName);
clone = hashFindVal(cloneHash, cloneName);
if (clone == NULL)
{
warn("Clone %s is in %s but not cloneInfo", cloneName, fileName);
continue;
}
cloneRefAddUnique(&bep->cloneList, clone);
bepRefAddUnique(&clone->bepList, bep);
if (sameString(psl->qName, bep->a))
{
slAddHead(&bep->aList, psl);
}
else
{
assert(sameString(psl->qName, bep->b));
slAddHead(&bep->bList, psl);
}
}
lineFileClose(&lf);
}
boolean cloneHasKnownEnd(struct mmClone *clone)
/* Return TRUE if clone has a known end. */
{
return clone->sEnd->frag != NULL || clone->tEnd->frag != NULL;
}
struct barge *bargeOfOne(struct mmClone *clone, struct dlList *bargeList)
/* Construct a barge around a single clone. */
{
struct barge *barge;
struct cloneEnd *start, *end;
AllocVar(barge);
barge->cloneEndList = newDlList();
barge->cloneList = newDlList();
dlAddValTail(barge->cloneList, clone);
barge->dlNode = dlAddValTail(bargeList, barge);
start = CloneVar(clone->sEnd);
start->isStart = TRUE;
dlAddValTail(barge->cloneEndList, start);
end = CloneVar(clone->tEnd);
end->isStart = FALSE;
if (clone->sEnd->frag || clone->tEnd->frag)
start->orientation = end->orientation = 1;
dlAddValTail(barge->cloneEndList, end);
clone->barge = barge;
return barge;
}
void findCloneEndsOnList(struct dlList *list, struct mmClone *clone,
struct dlNode **retStart, struct dlNode **retEnd)
/* Find start and end nodes associated with clone on list. */
{
struct dlNode *node;
struct cloneEnd *ce;
boolean gotStart = FALSE, gotEnd = FALSE;
for (node = list->head; !dlEnd(node); node = node->next)
{
ce = node->val;
if (ce->clone == clone)
{
if (!gotStart)
{
assert(ce->isStart);
gotStart = TRUE;
*retStart = node;
}
else
{
assert(!ce->isStart);
gotEnd = TRUE;
*retEnd = node;
break;
}
}
}
if (!gotEnd)
errAbort("Couldn't find ends of %s in list", clone->name);
}
char ocpHitsHow(struct overlappingClonePair *ocp, struct mmClone *clone,
int relPos, boolean ignoreEnds)
/* Return how other clone in pair hits clone assuming other end is oriented.
* RelPos is whether clone is to left (+1) or right (-1) of other. */
{
char sHit, tHit;
if (ignoreEnds)
return '?';
if (clone == ocp->a)
{
sHit = ocp->aHitS;
tHit = ocp->aHitT;
}
else
{
assert(clone == ocp->b);
sHit = ocp->bHitS;
tHit = ocp->bHitT;
}
if (relPos > 0)
return tHit;
else if (relPos < 0)
return sHit;
else
return '?';
}
struct fitAt
/* A possible position clone fits. */
{
struct fitAt *next;
struct dlNode *start, *end; /* cloneEnd list nodes where clone goes. */
int orientation; /* +1, -1 or 0. */
boolean addAfter; /* True if add clone after start/end. */
int score; /* Some sort of score. */
};
void dumpFit(struct fitAt *fit, struct mmClone *newbie)
/* Print out info on fit. */
{
struct cloneEnd *se = fit->start->val, *ee = fit->end->val;
logIt(" %s fits %s %s (%s) %s (%s), score %d\n",
newbie->name,
fit->addAfter ? "after" : "before",
se->clone->name,
se->isStart ? "start" : "end",
ee->clone->name,
ee->isStart ? "start" : "end",
fit->score);
}
int fitAtCmpScore(const void *va, const void *vb)
/* Compare to sort based on score. */
{
const struct fitAt *a = *((struct fitAt **)va);
const struct fitAt *b = *((struct fitAt **)vb);
return b->score - a->score;
}
int overlapSize(struct mmClone *a, struct mmClone *b, struct hash *ocpHash, boolean sloppy)
/* Return size of overlap between a and b. */
{
struct overlappingClonePair *ocp;
if (a == b)
return a->size;
ocp = findHashedOcp(a->name, b->name, ocpHash);
if (ocp == NULL)
return 0;
if (sloppy)
return ocp->sloppyOverlap;
else
return ocp->overlap;
}
int addMissingOverlap(struct dlNode *startN, struct dlNode *endN,
struct mmClone *newbie, struct hash *ocpHash, boolean sloppy)
/* Add together all overlaps that should be present between
* newbie and clones, assuming that newbie covers all of the
* ends in between startN and endN (including startN and endN)
* The result will be 0 if all overlaps are present, or a negative
* number if some are missed.
* This routine assumes that there are no clones fully enclosed
* between startN and endN. */
{
struct cloneEnd *startE = startN->val, *endE = endN->val, *ce;
struct mmClone *startC = startE->clone, *endC = endE->clone, *clone;
int missedOverlap = 0, missedOne, minOverlap, overlap;
struct dlNode *node;
for (node = startN; node != endN->next; node = node->next)
{
ce = node->val;
clone = ce->clone;
if (ce->isStart)
{
if (endE->isStart)
minOverlap = clone->size - overlapSize(clone, endC, ocpHash, sloppy);
else
minOverlap = overlapSize(clone, endC, ocpHash, sloppy);
}
else
{
if (startE->isStart)
minOverlap = overlapSize(clone, startC, ocpHash, sloppy);
else
minOverlap = clone->size - overlapSize(clone, startC, ocpHash, sloppy);
}
overlap = overlapSize(clone, newbie, ocpHash, sloppy);
missedOne = overlap - round(minOverlap*0.90);
if (missedOne < 0)
missedOverlap += missedOne;
}
return missedOverlap;
}
int addPresentOverlap(struct dlNode *innerStart, struct dlNode *innerEnd,
struct mmClone *newbie, struct hash *ocpHash, boolean sloppy)
/* Return amount that newbie overlaps with it's two neighbors,
* (don't count same bits twice). */
{
struct cloneEnd *ce;
struct mmClone *before = NULL, *after = NULL;
struct dlNode *node;
/* Figure out first clone before if any. */
for (node = innerStart->prev; !dlStart(node); node = node->prev)
{
ce = node->val;
if (ce->isStart)
{
before = ce->clone;
break;
}
}
/* Figure out first node after if any. */
for (node = innerEnd->next; !dlEnd(node); node = node->next)
{
ce = node->val;
if (!ce->isStart)
{
after = ce->clone;
break;
}
}
if (before == NULL)
return overlapSize(newbie, after, ocpHash, sloppy);
else if (after == NULL)
return overlapSize(newbie, before, ocpHash, sloppy);
else
{
return overlapSize(newbie, after, ocpHash, sloppy) + overlapSize(newbie, before, ocpHash, sloppy)
- overlapSize(before, after, ocpHash, sloppy);
}
}
boolean insideEndsFit(struct mmClone *newbie,
struct dlNode *innerStart, struct dlNode *outerEnd,
struct hash *ocpHash, boolean ignoreEnds)
/* Return TRUE if all known ends from innerStart to outerEnd (half open interval)
* hit newbie, and that newbie overlaps with all clones in interval. */
{
struct dlNode *n1;
struct mmClone *clone;
struct cloneEnd *ce;
struct overlappingClonePair *ocp;
int lorient;
char how;
logIt(" insideEndsFit: newbie %s\n", newbie->name);
if (uglyerfOn)
{
struct dlNode *innerEnd = outerEnd->prev;
struct cloneEnd *se = innerStart->val, *ee = innerEnd->val;
uglyerf(" %s (%s) to %s (%s)\n",
se->clone->name, (se->isStart ? "start" : "end"),
ee->clone->name, (ee->isStart ? "start" : "end"));
}
/* See if this implies overlaps that don't exist. */
for (n1 = innerStart; n1 != outerEnd; n1 = n1->next)
{
ce = n1->val;
clone = ce->clone;
ocp = findHashedOcp(newbie->name, clone->name, ocpHash);
if (ocp == NULL)
{
logIt(" no overlap between %s and %s\n", newbie->name, clone->name);
return FALSE;
}
if (ocp->overlap == 0 && !ignoreEnds)
{
logIt(" ignoring sloppy overlap %d between %s and %s\n", ocp->sloppyOverlap,
newbie->name, clone->name);
return FALSE;
}
/* Check end specific matches. */
lorient = ce->orientation;
if (ce->isStart)
lorient = -lorient;
how = ocpHitsHow(ocp, clone, lorient, ignoreEnds);
if (how == 'N')
{
logIt(" %s (%s) doesn't hit %s\n", clone->name,
(ce->isStart ? "start" : "end"), newbie->name);
return FALSE;
}
}
return TRUE;
}
struct fitAt *cloneFitsAt(struct mmClone *newbie, struct dlNode *startN, struct dlNode *endN,
boolean addAfter, int newbieOrientation,
struct mmClone *member, struct dlNode *memStart,
struct dlNode *memEnd, struct hash *ocpHash, boolean ignoreEnds)
/* Return a fitAt structure if clone can fit at position . */
{
struct dlNode *n1, *n2;
struct dlNode *innerStart, *outerEnd;
struct cloneEnd *ce;
struct mmClone *clone;
-struct overlappingClonePair *ocp;
-char how;
struct fitAt *fit;
int score, minScore;
int missingOverlap, presentOverlap;
int overlapCloneCount = 0;
/* Figure out the first end inside of newbie, and the first end after
* newbie. */
if (addAfter)
{
innerStart = startN->next;
outerEnd = endN->next;
}
else
{
innerStart = startN;
outerEnd = endN;
}
if (uglyerfOn)
{
struct dlNode *innerEnd = outerEnd->prev;
struct cloneEnd *se = innerStart->val, *ee = innerEnd->val;
uglyerf(" cloneFitsAt (inside) %s (%s) to %s (%s)\n",
se->clone->name, (se->isStart ? "start" : "end"),
ee->clone->name, (ee->isStart ? "start" : "end"));
}
/* See if this would lead to new clone enclosing another. */
for (n1 = innerStart; n1 != outerEnd; n1 = n1->next)
{
ce = n1->val;
clone = ce->clone;
overlapCloneCount += 1;
if (ce->isStart)
{
for (n2 = n1->next; n2 != outerEnd; n2 = n2->next)
{
ce = n2->val;
if (ce->clone == clone)
{
return NULL;
}
}
}
}
/* See if this would lead to new clone being enclosed. */
if (addAfter)
{
for (n1 = memStart; n1 != memEnd; n1 = n1->next)
{
boolean newbieStartsInside = FALSE, newbieEndsInside = FALSE;
ce = n1->val;
clone = ce->clone;
if (ce->isStart)
{
for (n2 = n1; ;n2 = n2->next)
{
ce = n2->val;
if (n2 == startN)
newbieStartsInside = TRUE;
if (ce->clone == clone && !ce->isStart)
break;
if (n2 == endN)
newbieEndsInside = TRUE;
}
}
else
{
for (n2 = n1->prev; ;n2 = n2->prev)
{
if (n2 == endN)
newbieEndsInside = TRUE;
if (n2 == startN)
newbieStartsInside = TRUE;
ce = n2->val;
if (ce->clone == clone && ce->isStart)
break;
}
}
if (newbieStartsInside && newbieEndsInside)
return NULL;
}
}
else
{
for (n1 = memEnd; n1 != memStart; n1 = n1->prev)
{
boolean newbieStartsInside = FALSE, newbieEndsInside = FALSE;
ce = n1->val;
clone = ce->clone;
if (!ce->isStart)
{
for (n2 = n1; ;n2 = n2->prev)
{
ce = n2->val;
if (n2 == endN)
newbieEndsInside = TRUE;
if (ce->clone == clone && ce->isStart)
break;
if (n2 == startN)
newbieStartsInside = TRUE;
}
}
else
{
for (n2 = n1->next; ;n2 = n2->next)
{
if (n2 == startN)
newbieStartsInside = TRUE;
if (n2 == endN)
newbieEndsInside = TRUE;
ce = n2->val;
if (ce->clone == clone && !ce->isStart)
break;
}
}
if (newbieStartsInside && newbieEndsInside)
return NULL;
}
}
if (!insideEndsFit(newbie, innerStart, outerEnd, ocpHash, ignoreEnds))
return NULL;
/* Score fit according to how well overlap distances add up. */
missingOverlap = addMissingOverlap(innerStart, outerEnd->prev, newbie, ocpHash, ignoreEnds);
presentOverlap = addPresentOverlap(innerStart, outerEnd->prev, newbie, ocpHash, ignoreEnds);
score = (presentOverlap<<4) + (missingOverlap<<6) + overlapCloneCount;
minScore = overlapCloneCount + 1;
if (score < minScore)
return NULL;
/* Allocate and fill in fit structure. */
AllocVar(fit);
fit->start = startN;
fit->end = endN;
fit->orientation = newbieOrientation;
fit->addAfter = addAfter;
fit->score = score;
dumpFit(fit, newbie);
return fit;
}
void addFittedClone(struct barge *barge, struct mmClone *clone, struct fitAt *fit)
/* Add clone at specific position in barge. */
{
struct dlNode *(*listAdder)(struct dlNode *anchor, void *val);
struct dlNode *firstNode;
struct cloneEnd *start,*end;
listAdder = (fit->addAfter ? dlAddValAfter : dlAddValBefore);
dlAddValTail(barge->cloneList, clone);
clone->barge = barge;
if (fit->orientation < 0)
{
start = clone->tEnd;
end = clone->sEnd;
}
else
{
start = clone->sEnd;
end = clone->tEnd;
}
start = CloneVar(start);
end = CloneVar(end);
start->isStart = TRUE;
end->isStart = FALSE;
start->orientation = end->orientation = fit->orientation;
firstNode = (*listAdder)(fit->start, start);
if (fit->end != fit->start)
(*listAdder)(fit->end, end);
else
dlAddValAfter(firstNode, end);
}
enum orientTypes
/* Possible clone orientations. */
{
orientImpossible = -2,
orientForward = 1,
orientReverse = -1,
orientUnknown = 0,
};
enum orientTypes ocpCloneOrientation(struct overlappingClonePair *ocp,
struct mmClone *clone, boolean ignoreEnds)
/* Return orientation of clone relative to other clone in pair based on end matches.
* Returns 1 (orientForward) if it appears clone comes after other clone. */
{
-int orientation = orientUnknown;
char how, howFlip;
how = ocpHitsHow(ocp, clone, -1, ignoreEnds);
howFlip = ocpHitsHow(ocp, clone, 1, ignoreEnds);
if (how == 'N' && howFlip == 'N')
return orientImpossible;
if (how == 'Y')
return orientForward;
else if (howFlip == 'Y')
return orientReverse;
else if (how == 'N')
return orientReverse;
else if (howFlip == 'N')
return orientForward;
else if (howFlip == '?' && how == '?')
return orientUnknown;
else
{
warn("Case that should never happen in ocpCloneOrientation.");
assert(FALSE);
return orientImpossible;
}
}
struct fitAt *bargeFitListLeft(struct barge *barge,
struct mmClone *newbie, struct mmClone *member,
struct dlNode *memStart, struct dlNode *memEnd,
struct overlappingClonePair *joinOcp,
struct hash *ocpHash, boolean ignoreEnds)
/* Return scored list of all places newbie could fit overlapping member
* to left in barge. */
{
-char how, howFlip, newHow;
+char how, howFlip;
int newbieOrientation = 0, memberOrientation;
struct dlNode *node, *firstStart, *n1, *n2;
struct cloneEnd *ce;
struct mmClone *clone;
struct overlappingClonePair *ocp;
struct fitAt *fitList = NULL, *fit;
-int score;
uglyerf("bargeFitListLeft(newbie %s, member %s)\n", newbie->name, member->name);
ce = memStart->val;
memberOrientation = ce->orientation;
how = ocpHitsHow(joinOcp, member, -memberOrientation, ignoreEnds);
uglyerf(" how = %c\n", how);
if (how == 'N')
return FALSE;
howFlip = ocpHitsHow(joinOcp, member, memberOrientation, ignoreEnds);
uglyerf(" howFlip = %c\n", howFlip);
if (howFlip == 'Y')
return FALSE; /* Might eventually want to just decrease score greatly. */
if (cloneHasKnownEnd(newbie))
{
newbieOrientation = ocpCloneOrientation(joinOcp, newbie, ignoreEnds);
if (newbieOrientation == orientImpossible)
return FALSE;
newbieOrientation = -newbieOrientation;
}
uglyerf(" newbieOrientation = %d\n", newbieOrientation);
/* Figure out where can stop scanning for newbie start sites
* because have a clone ending that newbie doesn't overlap. */
uglyerf(" scanning for firstStart\n");
for (node = memStart->prev; !dlStart(node); node = node->prev)
{
ce = node->val;
uglyerf(" %s %s\n", ce->clone, (ce->isStart ? "start" : "end"));
if (!ce->isStart)
{
clone = ce->clone;
ocp = findHashedOcp(clone->name, newbie->name, ocpHash);
if (ocp == NULL)
break;
}
}
firstStart = node;
{
if (dlStart(node))
{
uglyerf("First node past beginning\n", ce->clone->name);
}
else
{
ce = node->val;
uglyerf("First node %s\n", ce->clone->name);
}
}
for (n1 = memEnd; n1 != memStart; n1 = n1->prev)
{
for (n2 = memStart; n2 != firstStart; n2 = n2->prev)
{
if ((fit = cloneFitsAt(newbie, n2, n1, FALSE,
newbieOrientation, member, memStart, memEnd, ocpHash, ignoreEnds)) != NULL)
{
slAddHead(&fitList, fit);
}
}
}
return fitList;
}
boolean bargeAddLeft(struct barge *barge,
struct mmClone *newbie, struct mmClone *member,
struct dlNode *memStart, struct dlNode *memEnd,
struct overlappingClonePair *joinOcp,
struct hash *ocpHash, boolean doAdd,
boolean ignoreEnds, int *retOrientation)
/* See if can add barge to left of member (which is defined by it's ends on
* barge->cloneEndList) */
{
struct fitAt *fitList, *fit;
fitList = bargeFitListLeft(barge, newbie, member, memStart, memEnd, joinOcp, ocpHash, ignoreEnds);
slSort(&fitList, fitAtCmpScore);
if ((fit = fitList) != NULL)
{
if (doAdd)
addFittedClone(barge, newbie, fit);
*retOrientation = fit->orientation;
}
slFreeList(&fitList);
return fit != NULL;
}
struct fitAt *bargeFitListRight(struct barge *barge,
struct mmClone *newbie, struct mmClone *member,
struct dlNode *memStart, struct dlNode *memEnd,
struct overlappingClonePair *joinOcp,
struct hash *ocpHash, boolean ignoreEnds)
/* Return scored list of all places newbie could fit overlapping member
* to right in barge. */
{
-char how, howFlip, newHow;
+char how, howFlip;
int newbieOrientation = 0, memberOrientation ;
struct dlNode *node, *lastEnd, *n1, *n2;
struct cloneEnd *ce;
struct mmClone *clone;
struct overlappingClonePair *ocp;
struct fitAt *fitList = NULL, *fit;
boolean badYes = FALSE;
int score = 0;
ce = memStart->val;
memberOrientation = ce->orientation;
how = ocpHitsHow(joinOcp, member, memberOrientation, ignoreEnds);
if (how == 'N')
return FALSE;
howFlip = ocpHitsHow(joinOcp, member, -memberOrientation, ignoreEnds);
if (howFlip == 'Y')
badYes = TRUE;
if (cloneHasKnownEnd(newbie))
{
newbieOrientation = ocpCloneOrientation(joinOcp, newbie, ignoreEnds);
if (newbieOrientation == orientImpossible)
return FALSE;
}
/* Figure out where can stop scanning for newbie end sites
* because have a clone starting that newbie doesn't overlap. */
for (node = memEnd->next; !dlEnd(node); node = node->next)
{
ce = node->val;
if (ce->isStart)
{
clone = ce->clone;
ocp = findHashedOcp(clone->name, newbie->name, ocpHash);
if (ocp == NULL)
break;
}
}
lastEnd = node;
for (n1 = memStart; n1 != memEnd; n1 = n1->next)
{
for (n2 = memEnd; n2 != lastEnd; n2 = n2->next)
{
if ((fit = cloneFitsAt(newbie, n1, n2, TRUE,
newbieOrientation, member, memStart, memEnd, ocpHash, ignoreEnds)) != NULL)
{
if (badYes)
{
if (score > 0)
score /= 4;
score -= 100;
}
slAddHead(&fitList, fit);
}
}
}
return fitList;
}
boolean bargeAddRight(struct barge *barge,
struct mmClone *newbie, struct mmClone *member,
struct dlNode *memStart, struct dlNode *memEnd,
struct overlappingClonePair *joinOcp,
struct hash *ocpHash, boolean doAdd,
boolean ignoreEnds, int *retOrientation)
/* See if can add barge to right of member (which is defined by it's ends on
* barge->cloneEndList) */
{
struct fitAt *fitList, *fit;
fitList = bargeFitListRight(barge, newbie, member, memStart, memEnd, joinOcp, ocpHash, ignoreEnds);
slSort(&fitList, fitAtCmpScore);
if ((fit = fitList) != NULL)
{
if (doAdd)
addFittedClone(barge, newbie, fit);
*retOrientation = fit->orientation;
}
slFreeList(&fitList);
return fit != NULL;
}
void dumpCloneEnds(struct dlList *list, FILE *f)
/* Dump out info on clone ends in list to f. */
{
struct dlNode *node;
int orientation;
for (node = list->head; !dlEnd(node); node = node->next)
{
struct cloneEnd *ce = node->val;
orientation = ce->orientation;
fprintf(f, " ");
if (ce->isStart)
{
if (orientation > 0)
fprintf(f, "+");
else if (orientation < 0)
fprintf(f, "-");
else
fprintf(f, "?");
fprintf(f, "(");
}
fprintf(f, "%s", ce->clone->name);
if (!ce->isStart)
fprintf(f, ")");
}
fprintf(f, "\n");
}
boolean bargeAdd(struct barge *barge, struct mmClone *newbie, struct mmClone *member,
struct hash *ocpHash, boolean ignoreEnds, int *retOrientation, boolean *retAddAfter)
/* Attempt to add a clone 'newbie' to barge via connection with 'member'. */
{
struct dlNode *memStart, *memEnd; /* Start/end nodes of member. */
struct overlappingClonePair *ocp;
-int ori;
struct fitAt *leftFits, *rightFits, *fitList, *fit;
logIt("bargeAdd: newbie %s, member %s\n", newbie->name, member->name);
dumpCloneEnds(barge->cloneEndList, logFile);
ocp = findHashedOcp(newbie->name, member->name, ocpHash);
findCloneEndsOnList(barge->cloneEndList, member, &memStart, &memEnd);
/* Find places where could fit and sort by score. */
rightFits = bargeFitListRight(barge, newbie, member, memStart, memEnd, ocp, ocpHash, ignoreEnds);
leftFits = bargeFitListLeft(barge, newbie, member, memStart, memEnd, ocp, ocpHash, ignoreEnds);
fitList = slCat(leftFits, rightFits);
leftFits = rightFits = NULL;
slSort(&fitList, fitAtCmpScore);
/* Add new clone at best fit if any. */
if ((fit = fitList) == NULL)
{
logIt(" %s doesn't fit in barge with %s\n", newbie->name, member->name);
return FALSE;
}
addFittedClone(barge, newbie, fit);
dumpCloneEnds(barge->cloneEndList, logFile);
*retOrientation = fit->orientation;
*retAddAfter = fit->addAfter;
slFreeList(&fitList);
return TRUE;
}
struct barge *bargeOfTwo(struct overlappingClonePair *ocp, struct dlList *bargeList,
struct hash *ocpHash, struct overlappingClonePair *ocpList, boolean ignoreEnds)
/* Make up a barge with just two elements. */
{
struct mmClone *cloneA = ocp->a, *cloneB = ocp->b;
struct barge *barge;
int orientation;
boolean addAfter;
logIt("Barge of two: %s %s %d %d\n", cloneA->name, cloneB->name, ocp->overlap, ocp->score);
barge = bargeOfOne(cloneA, bargeList);
if (!bargeAdd(barge, cloneB, cloneA, ocpHash, ignoreEnds, &orientation, &addAfter))
{
if (ocp->overlap > 0)
errAbort("Can't add second barge in bargeOfTwo(%s %s)", cloneA->name, cloneB->name);
}
return barge;
}
void addToBarge(struct barge *barge, struct overlappingClonePair *ocp,
struct mmClone *member, struct mmClone *newbie,
struct hash *ocpHash, struct overlappingClonePair *ocpList, boolean ignoreEnds)
/* Add clone to existing barge. */
{
int orientation;
boolean addAfter;
logIt("Adding clone %s to barge with %s\n", newbie->name, member->name);
bargeAdd(barge, newbie, member, ocpHash, ignoreEnds, &orientation, &addAfter);
}
struct dlList *dlClone(struct dlList *orig)
/* Make a copy of orig list and return it. */
{
struct dlList *cc = newDlList();
struct dlNode *node;
for (node = orig->head; !dlEnd(node); node = node->next)
dlAddValTail(cc, node->val);
return cc;
}
/* When merging barges we do it tentatively, first saving
* enough state to restore data structures if merger doesn't
* work out, and then merging. */
boolean tentativelyMergeBarges(struct barge *big, struct barge *little,
struct overlappingClonePair *joiningOcp, struct hash *ocpHash, boolean ignoreEnds)
/* Merge little barge into big barge. If there's a problem
* return false. The big barge may have the little barge
* partly merged in at this stage. The function "mergeBarge"
* below will clean things up if this happens. */
{
struct mmClone *member, *newbie, *initialNewbie;
int littleOrientation, bigOrientation;
int relBargeOrientation = 0;
struct dlNode *iNewStart, *iNewEnd;
struct dlNode *memStart, *memEnd;
struct dlNode *n1;
struct cloneEnd *ce;
boolean leftOk, rightOk, addAfter;
struct overlappingClonePair *ocp;
-char *newbieName;
/* Figure out which clone is already part of big barge. */
if (joiningOcp->a->barge == big)
{
member = joiningOcp->a;
initialNewbie = joiningOcp->b;
}
else
{
assert(joiningOcp->b->barge == big);
member = joiningOcp->b;
initialNewbie = joiningOcp->a;
}
logIt("Tentatively merging barges\n");
logIt("big: ");
dumpCloneEnds(big->cloneEndList, logFile);
logIt("little: ");
dumpCloneEnds(little->cloneEndList, logFile);
findCloneEndsOnList(little->cloneEndList, initialNewbie, &iNewStart, &iNewEnd);
for (n1 = iNewStart; !dlEnd(n1); n1 = n1->next)
{
ce = n1->val;
if (ce->isStart)
{
newbie = ce->clone;
leftOk = rightOk = FALSE;
littleOrientation = ce->orientation;
findCloneEndsOnList(big->cloneEndList, member, &memStart, &memEnd);
ocp = findHashedOcp(member->name, newbie->name, ocpHash);
if (ocp == NULL)
errAbort("Buh, neighbors don't overlap in merge barge1 ?");
if (relBargeOrientation == 0)
{
if (!bargeAdd(big, newbie, member, ocpHash, ignoreEnds, &bigOrientation, &addAfter))
{
if (newbie == initialNewbie)
logIt("Couldn't tentatively merge initial\n");
else
logIt("Couldn't tentatively second unoriented\n");
return FALSE;
}
if (relBargeOrientation == 0)
{
relBargeOrientation = littleOrientation * bigOrientation;
}
if (newbie != initialNewbie && relBargeOrientation == 0)
{
relBargeOrientation = (addAfter ? 1 : -1);
}
}
else if (relBargeOrientation > 0)
{
if (!bargeAddRight(big, newbie, member, memStart, memEnd, ocp,
ocpHash, TRUE, ignoreEnds, &bigOrientation))
{
logIt("Couldn't tentatively merge right\n");
return FALSE;
}
}
else if (relBargeOrientation < 0)
{
if (!bargeAddLeft(big, newbie, member, memStart,
memEnd, ocp, ocpHash, TRUE, ignoreEnds, &bigOrientation))
{
logIt("Couldn't tentatively merge left\n");
return FALSE;
}
}
if (bigOrientation != 0 && littleOrientation != 0 && relBargeOrientation != 0)
{
if (relBargeOrientation != bigOrientation*littleOrientation)
{
if (!ignoreEnds)
warn("Inconsistent orientations in tentatively merge at %s %s",
newbie->name, member->name);
else
{
logIt("Couldn't tentatively merge left from inconsistent orientations in 2nd pass\n");
return FALSE;
}
}
}
dumpCloneEnds(big->cloneEndList, logFile);
member = newbie;
}
}
member = initialNewbie;
for (n1 = iNewStart->prev; !dlStart(n1); n1 = n1->prev)
{
ce = n1->val;
if (ce->isStart)
{
newbie = ce->clone;
littleOrientation = ce->orientation;
findCloneEndsOnList(big->cloneEndList, member, &memStart, &memEnd);
ocp = findHashedOcp(member->name, newbie->name, ocpHash);
if (ocp == NULL)
errAbort("Buh, neighbors don't overlap in merge barge2 ?");
if (relBargeOrientation == 0)
{
if (!bargeAdd(big, newbie, member, ocpHash, ignoreEnds, &bigOrientation, &addAfter))
{
logIt("Couldn't tentatively second unoriented left\n");
return FALSE;
}
if (relBargeOrientation == 0)
{
relBargeOrientation = littleOrientation * bigOrientation;
}
if (relBargeOrientation == 0)
{
relBargeOrientation = (addAfter ? -1 : 1);
}
}
else if (relBargeOrientation < 0)
{
if (!bargeAddRight(big, newbie, member, memStart, memEnd, ocp,
ocpHash, TRUE, ignoreEnds, &bigOrientation))
{
logIt("Couldn't tentatively merge right 2\n");
return FALSE;
}
}
else if (relBargeOrientation > 0)
{
if (!bargeAddLeft(big, newbie, member, memStart, memEnd, ocp,
ocpHash, TRUE, ignoreEnds, &bigOrientation))
{
logIt("Couldn't tentatively merge left 2\n");
return FALSE;
}
}
if (bigOrientation != 0 && littleOrientation != 0 && relBargeOrientation != 0)
{
if (relBargeOrientation != bigOrientation*littleOrientation)
{
if (!ignoreEnds)
warn("Inconsistent orientations in tentatively merge 2 at %s %s",
newbie->name, member->name);
else
{
logIt("Couldn't tentatively merge right from inconsistent orientations in 2nd pass\n");
return FALSE;
}
}
}
dumpCloneEnds(big->cloneEndList, logFile);
member = newbie;
}
}
return TRUE;
}
struct barge *backupBarge(struct barge *orig)
/* Make a lightweight copy of barge - just enough to restore
* barge if need be. */
{
struct barge *cc;
struct cloneEnd *ce;
struct dlNode *node;
AllocVar(cc);
cc->cloneList = dlClone(orig->cloneList);
cc->cloneEndList = dlClone(orig->cloneEndList);
for (node = cc->cloneEndList->head; !dlEnd(node); node = node->next)
{
ce = node->val;
node->val = CloneVar(ce);
}
return cc;
}
void reassertBargeCloneOwnership(struct barge *barge)
/* Make sure all clones part of barge point to barge. */
{
struct dlNode *node;
struct mmClone *clone;
for (node = barge->cloneList->head; !dlEnd(node); node = node->next)
{
clone = node->val;
clone->barge = barge;
}
}
void restoreBarge(struct barge *barge, struct barge *backup)
/* Restore barge from backup. */
{
freeDlListAndVals(&barge->cloneEndList);
freeDlList(&barge->cloneList);
barge->cloneEndList = backup->cloneEndList;
backup->cloneEndList = NULL;
barge->cloneList = backup->cloneList;
backup->cloneList = NULL;
}
boolean mergeBarges(struct barge *bargeA, struct barge *bargeB,
struct overlappingClonePair *ocp, struct hash *ocpHash,
struct dlList *bargeList, boolean ignoreEnds)
/* Attempt to merge together two barges. If can't restore things
* so that original barges unchanged. */
{
struct barge *big, *little, *backup;
int aCount = dlCount(bargeA->cloneList), bCount = dlCount(bargeB->cloneList);
boolean ok;
logIt("Merging barges based on overlap between %s and %s\n", ocp->a->name, ocp->b->name);
logIt("BargeA has %d clones, bargeB has %d clones\n", aCount, bCount);
if (aCount >= bCount)
{
big = bargeA;
little = bargeB;
}
else
{
big = bargeB;
little = bargeA;
}
backup = backupBarge(big);
ok = tentativelyMergeBarges(big, little, ocp, ocpHash, ignoreEnds);
if (!ok)
{
restoreBarge(big, backup);
reassertBargeCloneOwnership(little);
}
else
{
dlRemove(little->dlNode);
freez(&little->dlNode);
freeDlList(&little->cloneEndList);
freeDlList(&little->cloneList);
freez(&little);
}
freeDlListAndVals(&backup->cloneEndList);
freeDlList(&backup->cloneList);
freez(&backup);
return ok;
}
int scoreEnclosingOverlap(struct overlappingClonePair *ocp, struct mmClone *inner,
boolean ignoreEnds)
/* Score whether it looks like outer clone encloses inner clone.
* Return number between -2 (no way!) to 1 (yep) */
{
int innerSize = inner->size;
if (ocp == NULL)
return -2;
if (ocp->overlap < innerSize * 0.75)
return -1;
if (!ignoreEnds)
{
if (ocp->a == inner)
{
if (ocp->aHitS == 'Y' && ocp->aHitT == 'Y' && (ocp->bHitS != 'Y' || ocp->bHitT != 'Y'))
return 1;
if (ocp->aHitS == 'N' || ocp->aHitT == 'N')
return -1;
}
else
{
if (ocp->bHitS == 'Y' && ocp->bHitT == 'Y' && (ocp->aHitS != 'Y' || ocp->aHitT != 'Y'))
return 1;
if (ocp->bHitS == 'N' || ocp->bHitT == 'N')
return -1;
}
}
if (ocp->overlap < innerSize * 0.85)
return 0;
if (!ignoreEnds)
{
if (ocp->a == inner)
{
if (ocp->aHitS == 'Y' && ocp->aHitT == 'Y')
return 1;
if (ocp->bHitS == 'N' && ocp->bHitT == 'N')
return 1;
}
else
{
if (ocp->bHitS == 'Y' && ocp->bHitT == 'Y')
return 1;
if (ocp->aHitS == 'N' && ocp->aHitT == 'N')
return 1;
}
}
if (ocp->sloppyOverlap > innerSize * 0.98)
return 1;
if (ocp->overlap < innerSize * 0.95)
return 0;
return 1;
}
int scoreNonenclosingOverlap(struct overlappingClonePair *ocp, struct mmClone *a, struct mmClone *b)
/* Score whether it looks like two clones overlap, but neither is enclosed.
* Returns 1 (yes), -1 (no) or 0 (maybe). */
{
if (ocp == NULL)
return -1;
logIt(" scoreNonenclosing: ");
ocpDump(ocp, logFile);
if (ocpGreatEnds(ocp->aHitS, ocp->aHitT))
return 1;
if (ocpGreatEnds(ocp->bHitS, ocp->bHitT))
return 1;
if (ocp->overlap < a->size * 0.75 && ocp->overlap < b->size * 0.75)
return 1;
if ((ocp->aHitS == 'Y' && ocp->aHitT == 'Y') || (ocp->bHitS == 'Y' && ocp->bHitT == 'Y'))
return -1;
if (ocp->overlap < a->size * 0.85 && ocp->overlap < b->size * 0.85)
return 1;
if ((ocp->aHitS == 'N' && ocp->aHitT == 'N') || (ocp->bHitS == 'N' && ocp->bHitT == 'N'))
return -1;
if (ocp->overlap < a->size * 0.90 && ocp->overlap < b->size * 0.90)
return 1;
if (ocp->overlap < a->size * 0.97 && ocp->overlap < b->size * 0.97)
return 0;
return -1;
}
void markBuriedClones(struct overlappingClonePair *ocpList, boolean ignoreEnds)
/* Set buried field in clones. */
{
struct overlappingClonePair *ocp;
int dist;
for (ocp = ocpList; ocp != NULL; ocp = ocp->next)
{
dist = maxMapDif(ocp->a, ocp->b);
if (dist <= 2*maxMapDeviation)
{
if (ocp->a->size > ocp->b->size)
{
if (scoreEnclosingOverlap(ocp, ocp->b, ignoreEnds) > 0)
{
if (ocp->b->barge == NULL)
{
ocp->b->enclosed = ocp->a;
logIt("%s covers %s\n", ocp->a->name, ocp->b->name);
}
}
}
else
{
if (scoreEnclosingOverlap(ocp, ocp->a, ignoreEnds) > 0)
{
if (ocp->a->barge == NULL)
{
ocp->a->enclosed = ocp->b;
logIt("%s covers %s\n", ocp->b->name, ocp->a->name);
}
}
}
}
}
logIt("\n");
}
boolean orientPair(struct overlappingClonePair *ocp, struct mmClone *clone,
int relOrientation, boolean ignoreEnds, int *pCloneOrientation)
/* See if orientation of clone either is, or if unknown can be made
* consistent with position implied by one clone coming after the other
* and ocp. Just checks clone ends, not other clone ends
* if any in ocp. */
{
int oldOrientation = *pCloneOrientation;
int ocpOrientation = ocpCloneOrientation(ocp, clone, ignoreEnds);
if (ocpOrientation == orientImpossible)
return FALSE;
if (ocpDoubleMiss(ocp->aHitS, ocp->aHitT))
return FALSE;
if (ocpDoubleMiss(ocp->bHitS, ocp->bHitT))
return FALSE;
if (ocpOrientation != orientUnknown)
{
ocpOrientation *= relOrientation;
if (oldOrientation != orientUnknown && oldOrientation != ocpOrientation)
return FALSE; /* Contradicts earlier orientation. */
*pCloneOrientation = ocpOrientation;
}
return TRUE;
}
struct fitAt *fitEnclosed(struct dlNode *listStart, struct dlNode *listEnd,
struct mmClone *clone, struct dlNode *outerStart, struct dlNode *outerEnd,
struct hash *ocpHash, boolean ignoreEnds)
/* Figure out if can place clone between outerStart and outerEnd. */
{
struct dlNode *innerStart = outerStart->next, *innerEnd = outerEnd->prev;
struct dlNode *testStart, *testEnd;
struct cloneEnd *ce;
struct mmClone *testClone;
boolean pastCloneStart = FALSE, pastCloneEnd = FALSE;
int fitScore = 0, oneScore;
struct overlappingClonePair *ocp;
struct fitAt *fit;
int newbieOrientation = 0;
{
struct cloneEnd *ce1 = outerStart->val, *ce2 = outerEnd->val;
char name1[64], name2[64];
if (ce1 == NULL)
sprintf(name1, "BEGIN");
else
sprintf(name1, "%s(%s)", ce1->clone->name,
(ce1->isStart ? "start" : "end"));
if (ce2 == NULL)
sprintf(name2, "END");
else
sprintf(name2, "%s(%s)", ce2->clone->name,
(ce2->isStart ? "start" : "end"));
logIt(" fitEnclosed( %s between %s and %s)\n", clone->name, name1, name2);
}
if (!insideEndsFit(clone, innerStart, outerEnd, ocpHash, ignoreEnds))
{
logIt(" insideEnds don't fit\n");
return NULL;
}
for (testStart = listStart; testStart != listEnd; testStart = testStart->next)
{
if (testStart == innerStart) pastCloneStart = TRUE;
if (testStart == outerEnd) pastCloneEnd = TRUE;
ce = testStart->val;
if (ce->isStart)
{
struct dlNode *gotInStart, *gotInEnd, *gotOutStart,*gotOutEnd;
gotInStart = gotInEnd = gotOutStart = gotOutEnd = NULL;
testClone = ce->clone;
for (testEnd = testStart; ;testEnd = testEnd->next)
{
if (testEnd == innerStart) gotInStart = testEnd;
if (testEnd == outerStart) gotOutStart = testEnd;
if (testEnd == innerEnd) gotInEnd = testEnd;
if (testEnd == outerEnd) gotOutEnd = testEnd;
ce = testEnd->val;
if (ce->clone == testClone && !ce->isStart)
break;
}
ocp = findHashedOcp(testClone->name, clone->name, ocpHash);
if (gotOutStart != NULL && gotOutEnd!= NULL )
{
/* Case where test encloses clone. */
oneScore = scoreEnclosingOverlap(ocp, clone, ignoreEnds);
fitScore += oneScore;
logIt(" %s encloses %s score %d\n", testClone->name, clone->name, oneScore);
}
else if (gotOutStart == NULL && gotOutEnd == NULL)
{
if (pastCloneStart && !pastCloneEnd && innerStart != outerEnd)
{
/* Case where clone encloses test. */
oneScore = scoreEnclosingOverlap(ocp, testClone, ignoreEnds);
fitScore += oneScore;
logIt(" %s enclosed by %s score %d\n", testClone->name, clone->name, oneScore);
}
else
{
/* Case where test is before clone, no overlap. */
}
}
else if (gotOutStart != NULL && gotOutStart != testEnd)
{
/* Case where order is testClone, clone and the two overlap. */
oneScore = scoreNonenclosingOverlap(ocp, clone, testClone);
logIt(" ***oneScore %d\n", oneScore);
if (!orientPair(ocp, clone, 1, ignoreEnds, &newbieOrientation))
return NULL;
fitScore += oneScore;
logIt(" %s before %s score %d\n", testClone->name, clone->name, oneScore);
}
else if (gotOutEnd != NULL && gotOutEnd != testStart)
{
/* Case where order is clone, testClone and the two overlap. */
oneScore = scoreNonenclosingOverlap(ocp, testClone, clone);
logIt(" ***oneScore %d\n", oneScore);
fitScore += oneScore;
if (!orientPair(ocp, clone, -1, ignoreEnds, &newbieOrientation))
return NULL;
logIt(" %s after %s score %d\n", testClone->name, clone->name, oneScore);
}
}
}
AllocVar(fit);
fit->start = outerStart;
fit->end = innerEnd;
fit->orientation = newbieOrientation;
fit->addAfter = TRUE;
fit->score = fitScore;
return fit;
}
void placeBuriedClones(struct mmClone *cloneList, struct dlList *bargeList, struct hash *ocpHash,
boolean ignoreEnds)
/* Place buried clones in proper position in barges. */
{
struct mmClone *clone, *outerClone;
struct barge *barge;
struct dlNode *outCloneStart, *outCloneEnd;
struct dlNode *node, *n1, *n2;
struct dlList *enclosedList = newDlList();
/* Make list of enclosed clones and sort them by size. */
for (clone = cloneList; clone != NULL; clone = clone->next)
{
if (clone->enclosed != NULL)
dlAddValTail(enclosedList, clone);
}
dlSort(enclosedList, mmCloneCmpSize);
/* Place enclosed clones, biggest first. */
for (node = enclosedList->head; !dlEnd(node); node = node->next)
{
struct fitAt *fitList = NULL, *fit;
clone = node->val;
if (clone->barge != NULL)
continue; /* Must have been placed previously. */
outerClone = clone->enclosed;
while (outerClone->enclosed != NULL)
outerClone = outerClone->enclosed;
if ((barge = outerClone->barge) == NULL)
barge = bargeOfOne(outerClone, bargeList);
logIt("Trying to place enclosed %s\n", clone->name);
dumpCloneEnds(barge->cloneEndList, logFile);
findCloneEndsOnList(barge->cloneEndList, outerClone, &outCloneStart, &outCloneEnd);
/* Search one past enclosing clone, in case not really quite enclosed. */
if (!dlStart(outCloneStart->prev))
outCloneStart = outCloneStart->prev;
if (!dlEnd(outCloneEnd->next))
outCloneEnd = outCloneEnd->next;
for (n1 = outCloneStart; n1 != outCloneEnd; n1 = n1->next)
{
for (n2 = n1->next; n2 != outCloneEnd->next; n2 = n2->next)
{
if ((fit = fitEnclosed(barge->cloneEndList->head,
outCloneEnd, clone, n1, n2, ocpHash, ignoreEnds)) != NULL)
{
dumpFit(fit, clone);
slAddHead(&fitList, fit);
}
}
}
slSort(&fitList, fitAtCmpScore);
if ((fit = fitList) != NULL)
{
addFittedClone(barge, clone, fit);
dumpCloneEnds(barge->cloneEndList, logFile);
}
else
warn("Couldn't place enclosed clone %s", clone->name);
slFreeList(&fitList);
logIt("\n");
}
freeDlList(&enclosedList);
}
void makeSingletonBarges(struct mmClone *cloneList, struct dlList *bargeList)
/* Make singleton barges around clones not barged up yet. */
{
struct mmClone *clone;
for (clone = cloneList; clone != NULL; clone = clone->next)
{
if (clone->barge == NULL)
{
bargeOfOne(clone, bargeList);
}
}
}
/* Place buried clones in proper position in barges. */
struct dlList *mmBargeList(struct mmClone *cloneList, struct hash *cloneHash,
struct hash *ocpHash, struct overlappingClonePair **pOcpList)
/* Make contigs of overlapping clones (aka barges) using overlap info.
* ocpList should be sorted by score. */
{
struct overlappingClonePair *ocp, *ocpList;
struct dlList *bargeList = newDlList();
struct barge *barge, *bargeA, *bargeB;
int ignoreEnds;
for (ignoreEnds = 0; ignoreEnds <= 1; ++ignoreEnds)
{
markBuriedClones(*pOcpList, ignoreEnds);
status("Ordering non-enclosed clones %s end info\n",
(ignoreEnds ? "ignoring" : "using") );
ocpScoreAndSort(pOcpList, ignoreEnds);
ocpList = *pOcpList;
for (ocp = ocpList; ocp != NULL; ocp = ocp->next)
ocpDump(ocp, logFile);
logIt("\n");
logIt("Scanning overlaps in order of score\n");
for (ocp = ocpList; ocp != NULL; ocp = ocp->next)
{
if (ocp->score < 0)
break;
ocpDump(ocp, logFile);
if (ocp->a->enclosed)
{
logIt("skipping enclosed: %s inside %s\n\n",
ocp->a->name, ocp->a->enclosed->name);
continue;
}
if (ocp->b->enclosed)
{
logIt("skipping enclosed: %s inside %s\n\n",
ocp->b->name, ocp->b->enclosed->name);
continue;
}
#ifdef SOMETIMES
if (startsWith("AL365225", ocp->a->name) && startsWith("AL391058", ocp->b->name))
{
uglyerfStart();
uglyerf("Got a live one!\n");
}
else
{
uglyerfEnd();
}
#endif
bargeA = ocp->a->barge;
bargeB = ocp->b->barge;
if (bargeA == NULL && bargeB == NULL)
{
barge = bargeOfTwo(ocp, bargeList, ocpHash, ocpList, ignoreEnds);
}
else if (bargeA == NULL)
{
addToBarge(bargeB, ocp, ocp->b, ocp->a, ocpHash, ocpList, ignoreEnds);
}
else if (bargeB == NULL)
{
addToBarge(bargeA, ocp, ocp->a, ocp->b, ocpHash, ocpList, ignoreEnds);
}
else if (bargeA == bargeB)
{
/* This is handled elsewhere. */
logIt("Clones already in same barge.\n");
}
else if (bargeA != bargeB)
{
mergeBarges(bargeA, bargeB, ocp, ocpHash, bargeList, ignoreEnds);
}
logIt("\n");
}
}
for (ignoreEnds = 0; ignoreEnds <= 1; ++ignoreEnds)
placeBuriedClones(cloneList, bargeList, ocpHash, ignoreEnds);
makeSingletonBarges(cloneList, bargeList);
return bargeList;
}
void flipBarge(struct barge *barge)
/* Flip orientation of barge. */
{
struct dlList *flipList = newDlList();
struct dlNode *node, *next;
struct cloneEnd *ce;
/* Reverse end elements. */
for (node = barge->cloneEndList->head; !dlEnd(node); node = next)
{
next = node->next;
dlRemove(node);
dlAddHead(flipList, node);
ce = node->val;
ce->isStart = !ce->isStart;
ce->orientation = -ce->orientation;
}
freeDlList(&barge->cloneEndList);
barge->cloneEndList = flipList;
}
boolean needFlipBarge(struct barge *barge)
/* See if barge looks to need flipping according to map. */
{
int diff = 0, oneDiff;
struct dlNode *node;
struct cloneEnd *ce;
struct mmClone *clone;
boolean isFirst = TRUE;
boolean startPos, lastStartPos = 0;
for (node = barge->cloneEndList->head; !dlEnd(node); node = node->next)
{
ce = node->val;
if (ce->isStart && !ce->clone->enclosed)
{
clone = ce->clone;
startPos = clone->mapPos;
diff += clone->flipTendency * ce->orientation;
if (isFirst)
isFirst = FALSE;
else
{
oneDiff = startPos - lastStartPos;
if (oneDiff > clone->size)
oneDiff = clone->size;
if (oneDiff < -clone->size)
oneDiff = -clone->size;
diff += oneDiff;
}
lastStartPos = startPos;
}
}
return diff < 0;
}
int averageMapPos(struct barge *barge)
/* Return average map position of clones in barge. */
{
double total = 0;
int count = 0;
struct mmClone *clone;
struct dlNode *node;
for (node = barge->cloneList->head; !dlEnd(node); node = node->next)
{
clone = node->val;
count += 1;
total += clone->mapPos;
}
return round(total/count);
}
struct mmClone *findNextUnenclosed(struct dlNode *node)
/* Find next clone in double-linked clone end list that is
* not enclosed. */
{
-struct mmClone *clone = NULL;
struct cloneEnd *ce;
for (; !dlEnd(node); node = node->next)
{
ce = node->val;
if (ce->isStart && ce->clone->enclosed == NULL)
return ce->clone;
}
return NULL;
}
int printOverlapInfo(FILE *f, struct mmClone *a, struct mmClone *b, struct hash *ocpHash,
int orientation)
/* Print size of overlap between a and b, and a end info on ab overlap. */
{
struct overlappingClonePair *ocp;
char as, at, temp;
if (a == NULL || b == NULL)
{
fprintf(f, "%6s - - ", "n/a");
return 0;
}
ocp = findHashedOcp(a->name, b->name, ocpHash);
if (ocp->a == a)
{
as = ocp->aHitS;
at = ocp->aHitT;
}
else
{
as = ocp->bHitS;
at = ocp->bHitT;
}
if (orientation < 0)
{
temp = as;
as = at;
at = temp;
}
if (orientation == 0 && (as != '?' || at != '?'))
fprintf(f, "%6d (%c %c) ", ocp->overlap, as, at);
else
fprintf(f, "%6d %c %c ", ocp->overlap, as, at);
return ocp->overlap;
}
struct cloneRef *getLastClones(struct barge *barge, int minSize, struct hash *ocpHash)
/* Get all clones within minSize of end of barge. */
{
struct dlNode *node;
struct mmClone *clone, *lastClone = NULL;
struct cloneEnd *ce;
struct cloneRef *list = NULL;
int curSize = 0;
for (node = barge->cloneEndList->tail; !dlStart(node); node = node->prev)
{
ce = node->val;
if (!ce->isStart)
{
clone = ce->clone;
cloneRefAdd(&list, clone);
curSize += clone->size;
if (lastClone != NULL)
curSize -= overlapSize(clone, lastClone, ocpHash, FALSE);
if (curSize >= minSize)
break;
lastClone = clone;
}
}
return list;
}
struct cloneRef *getFirstClones(struct barge *barge, int minSize, struct hash *ocpHash)
/* Get all clones within minSize of start of barge. */
{
struct dlNode *node;
struct mmClone *clone, *lastClone = NULL;
struct cloneEnd *ce;
struct cloneRef *list = NULL;
int curSize = 0;
for (node = barge->cloneEndList->head; !dlEnd(node); node = node->next)
{
ce = node->val;
if (ce->isStart)
{
clone = ce->clone;
cloneRefAdd(&list, clone);
curSize += clone->size;
if (lastClone != NULL)
curSize -= overlapSize(clone, lastClone, ocpHash, FALSE);
if (curSize >= minSize)
break;
lastClone = clone;
}
}
return list;
}
struct slRef *refIntersection(struct slRef *aList, struct slRef *bList)
/* Make a refList that is intersection of aList and bList. */
{
struct hash *aHash;
char name[32];
struct slRef *ref, *newList = NULL;
/* Handle case where either list empty quickly. */
if (aList == NULL || bList == NULL)
return NULL;
/* Put all of aList into hash. */
aHash = newHash(0);
for (ref = aList; ref != NULL; ref = ref->next)
{
sprintf(name, "%p", ref->val);
hashAdd(aHash, name, NULL);
}
/* Scan through bList, and add appropriate elements to list. */
for (ref = bList; ref != NULL; ref = ref->next)
{
sprintf(name, "%p", ref->val);
if (hashLookup(aHash, name))
{
refAdd(&newList, ref->val);
}
}
freeHash(&aHash);
return newList;
}
struct bepRef *bepsFromClones(struct cloneRef *cloneRefList)
/* Make a bepRef list consolidating all beps from all clones in input list. */
{
struct bepRef *bepRefList = NULL, *bepRef;
struct cloneRef *cloneRef;
struct mmClone *clone;
for (cloneRef = cloneRefList; cloneRef != NULL; cloneRef = cloneRef->next)
{
clone = cloneRef->clone;
for (bepRef = clone->bepList; bepRef != NULL; bepRef = bepRef->next)
bepRefAddUnique(&bepRefList, bepRef->bep);
}
return bepRefList;
}
boolean evalGapInfo(FILE *f, struct barge *aBarge, struct barge *bBarge,
struct hash *ocpHash, struct hash *bepHash)
/* Evaluate and optionally print out info about gap between aBarge and bBarge.
* Returns TRUE if gap is bridged. If f is NULL it will not print,
* just evaluate gap. */
{
struct cloneRef *aEnd = NULL, *bStart = NULL;
struct bepRef *aBep = NULL, *bBep = NULL, *joiningBep = NULL, *bepRef;
struct bep *bep;
-struct dlNode *node;
int bigEnough = 200000;
-int aEndSize = 0, bStartSize = 0;
-struct mmClone *clone, *lastClone = NULL;
-struct cloneEnd *ce;
boolean bridged = FALSE;
aEnd = getLastClones(aBarge, bigEnough, ocpHash);
bStart = getFirstClones(bBarge, bigEnough, ocpHash);
aBep = bepsFromClones(aEnd);
bBep = bepsFromClones(bStart);
joiningBep = (struct bepRef*)refIntersection((struct slRef*)aBep, (struct slRef*)bBep);
if (joiningBep == NULL)
{
if (f != NULL)
fprintf(f, "----- open gap -----\n");
}
else
{
if (f != NULL)
{
fprintf(f, "----- bridged gap:");
for (bepRef = joiningBep; bepRef != NULL; bepRef = bepRef->next)
{
bep = bepRef->bep;
fprintf(f, " %s", bep->cloneName);
}
fprintf(f, "\n");
}
bridged = TRUE;
}
/* Clean up time. */
slFreeList(&aBep);
slFreeList(&bBep);
slFreeList(&joiningBep);
slFreeList(&aEnd);
slFreeList(&bStart);
return bridged;
}
-char *naForNull(char *s)
-/* Return s, or if s is null "n/a" */
-{
-if (s != NULL)
- return s;
-return "n/a";
-}
-
char cloneDir(struct cloneEnd *ce)
/* Return ? + or - for direction clone goes. */
{
int o = ce->orientation;
if (o == 0)
return '?';
else if (o < 0)
return '-';
else
return '+';
}
void saveBargeFile(char *fileName, char *contigName, struct dlList *bargeList, struct hash *ocpHash,
struct hash *cloneHash, struct hash *bepHash)
/* Write out barges to file. */
{
FILE *f = mustOpen(fileName, "w");
struct dlNode *bNode, *eNode;
struct barge *barge;
struct cloneEnd *ce;
int cloneCount;
boolean lastBridged = FALSE;
fprintf(f, "Barge (Connected Clone) File mm Version %d on %s\n\n", version, contigName);
fprintf(f, "start name phase clone dir size before after center plc mapPos\n");
fprintf(f, "------------------------------------------------------------------------------------------\n");
for (bNode = bargeList->head; !dlEnd(bNode); bNode = bNode->next)
{
struct mmClone *clone, *outer, *lastUnenclosed = NULL, *nextUnenclosed;
struct overlappingClonePair *ocp;
barge = bNode->val;
cloneCount = dlCount(barge->cloneList);
for (eNode = barge->cloneEndList->head; !dlEnd(eNode); eNode = eNode->next)
{
char os, ot, is, it;
- int prevOverlap, nextOverlap;
+ int nextOverlap;
ce = eNode->val;
if (ce->isStart)
{
clone = ce->clone;
if ((outer = clone->enclosed) != NULL)
{
ocp = findHashedOcp(clone->name, outer->name, ocpHash);
if (ocp->a == outer)
{
os = ocp->aHitS;
ot = ocp->aHitT;
is = ocp->bHitS;
it = ocp->bHitT;
}
else
{
os = ocp->bHitS;
ot = ocp->bHitT;
is = ocp->aHitS;
it = ocp->aHitT;
}
fprintf(f, " (%d %-9s %d %-11s %c %6d %c %c %9s %6d %c %c %6s %3s %8d)\n",
clone->mmPos, clone->name, clone->phase, naForNull(clone->bacName),
cloneDir(ce), clone->size, os, ot,
outer->name, ocp->sloppyOverlap,
is, it, naForNull(clone->seqCenter),
naForNull(clone->placeInfo), clone->mapPos);
}
else
{
fprintf(f, "%-8d %-9s %d %-11s %c %6d ",
clone->mmPos, clone->name, clone->phase,
naForNull(clone->bacName), cloneDir(ce), clone->size);
nextUnenclosed = findNextUnenclosed(eNode->next);
printOverlapInfo(f, clone, lastUnenclosed, ocpHash, ce->orientation);
nextOverlap = printOverlapInfo(f, clone, nextUnenclosed, ocpHash, ce->orientation);
fprintf(f, "%6s %3s %8d", naForNull(clone->seqCenter), naForNull(clone->placeInfo),
clone->mapPos);
if (cloneCount == 1)
{
boolean nextBridged = FALSE;
if (!dlEnd(bNode->next))
nextBridged = evalGapInfo(NULL, bNode->val, bNode->next->val,
ocpHash, bepHash);
if (lastBridged || nextBridged)
fprintf(f, " bridged");
else
fprintf(f, " unbridged");
fprintf(f, " singleton");
if (clone->bestFriend != NULL)
fprintf(f, " %s %d", clone->bestFriend->name, clone->bestOverlap);
else
fprintf(f, " none");
}
lastUnenclosed = clone;
fprintf(f, "\n");
if (nextOverlap == 0 && nextUnenclosed != NULL)
{
fprintf(f, "----- weak gap\n");
}
}
}
}
if (!dlEnd(bNode->next))
{
lastBridged = evalGapInfo(f, bNode->val, bNode->next->val, ocpHash, bepHash);
}
}
fclose(f);
}
int bargeCmpMapPos(const void *va, const void *vb)
/* Compare to sort biggest sized first. */
{
const struct barge *a = *((struct barge **)va);
const struct barge *b = *((struct barge **)vb);
return a->mapPos - b->mapPos;
}
void setUnenclosedPos(struct dlList *bargeList, struct hash *ocpHash, struct hash *bepHash)
/* Set mmPos field of all clones that are not enclosed. */
{
struct dlNode *bNode, *eNode;
int pos = 0;
struct barge *barge;
struct cloneEnd *ce;
-struct mmClone *clone;
-boolean lastBridged = FALSE;
-
for (bNode = bargeList->head; !dlEnd(bNode); bNode = bNode->next)
{
- struct mmClone *clone, *lastUnenclosed = NULL, *nextUnenclosed;
- struct overlappingClonePair *ocp;
+ struct mmClone *clone, *lastUnenclosed = NULL;
barge = bNode->val;
for (eNode = barge->cloneEndList->head; !dlEnd(eNode); eNode = eNode->next)
{
ce = eNode->val;
clone = ce->clone;
if (ce->isStart)
{
if (clone->enclosed == NULL)
{
if (lastUnenclosed != NULL)
pos -= overlapSize(clone, lastUnenclosed, ocpHash, FALSE);
clone->mmPos = pos;
ce->pos = pos;
pos += clone->size;
lastUnenclosed = clone;
}
}
else
{
ce->pos = clone->mmPos + clone->size;
}
}
if (!dlEnd(bNode->next))
{
boolean bridged;
bridged = evalGapInfo(NULL, bNode->val, bNode->next->val, ocpHash, bepHash);
if (bridged)
pos += 50000;
else
pos += 100000;
}
}
}
struct dlNode *findNextUnenclosedEnd(struct dlNode *eNode, int *retCount)
/* Find next end that is part of a non-enclosed clone. */
{
struct cloneEnd *ce;
int count = 0;
for (; !dlEnd(eNode); eNode = eNode->next)
{
ce = eNode->val;
if (ce->clone->enclosed == NULL)
break;
++count;
}
*retCount = count;
return eNode;
}
struct cloneEnd *findEndOfClone(struct dlNode *eNode, struct mmClone *clone)
/* Find end of given clone in doubly linked end list starting search at eNode. */
{
struct cloneEnd *ce;
for (; !dlEnd(eNode); eNode = eNode->next)
{
ce = eNode->val;
if (ce->clone == clone && !ce->isStart)
return ce;
}
errAbort("Internal error. Couldn't find end of %s", clone->name);
+return NULL;
}
void setEnclosedPos(struct dlList *bargeList, struct hash *ocpHash, struct hash *bepHash)
/* Set mmPos field of all enclosed clones. */
{
struct dlNode *bNode, *eNode, *nextEnode = NULL;
int lastPos = 0;
struct barge *barge;
struct cloneEnd *ce, *nextKnownCe, *endCe;
-struct mmClone *clone;
int lastKnownPos, nextKnownPos, knownSpan;
int encCount;
int i;
for (bNode = bargeList->head; !dlEnd(bNode); bNode = bNode->next)
{
struct mmClone *clone;
barge = bNode->val;
/* First pass - fill in value for ends of enclosed ends by interpolating
* between other ends. */
for (eNode = barge->cloneEndList->head; !dlEnd(eNode); eNode = nextEnode)
{
ce = eNode->val;
clone = ce->clone;
if (clone->enclosed != NULL)
{
warn("Starting with enclosed clone %s, faking it...", clone->name);
clone->mmPos = lastPos;
}
else
{
lastPos = clone->mmPos;
nextEnode = findNextUnenclosedEnd(eNode->next, &encCount);
if (nextEnode != eNode->next)
{
lastKnownPos = ce->pos;
if (dlEnd(nextEnode))
{
warn("Ending with enclosed clone buh!...");
nextKnownPos = lastKnownPos + clone->size;
}
else
{
nextKnownCe = nextEnode->val;
nextKnownPos = nextKnownCe->pos;
}
knownSpan = nextKnownPos - lastKnownPos;
for (i=1; i<=encCount; ++i)
{
eNode = eNode->next;
ce = eNode->val;
ce->pos = lastKnownPos + roundingScale(knownSpan, i, encCount+1);
}
}
}
}
/* Second pass - set clone position by making midpoint of clone average of two ends. */
for (eNode = barge->cloneEndList->head; !dlEnd(eNode); eNode = eNode->next)
{
ce = eNode->val;
clone = ce->clone;
if (clone->enclosed != NULL && ce->isStart)
{
endCe = findEndOfClone(eNode->next, clone);
if (ce->pos == 0 || endCe->pos == 0)
warn("Zero position of enclosed clone end %s", clone->name);
clone->mmPos = (ce->pos + endCe->pos)/2 - clone->size/2;
}
}
}
}
void orderAndOrientBarges(struct dlList *bargeList, struct hash *ocpHash, struct hash *bepHash)
/* Orient barges according to map and set barge->mapPos field
* according to first clone. */
{
-struct dlNode *bNode, eNode;
+struct dlNode *bNode;
struct barge *barge;
for (bNode = bargeList->head; !dlEnd(bNode); bNode = bNode->next)
{
barge = bNode->val;
if (needFlipBarge(barge))
flipBarge(barge);
barge->mapPos = averageMapPos(barge);
}
dlSort(bargeList, bargeCmpMapPos);
setUnenclosedPos(bargeList, ocpHash, bepHash);
setEnclosedPos(bargeList, ocpHash, bepHash);
}
void saveInfo(char *fileName, char *contigName, struct dlList *bargeList)
/* Save info file. */
{
FILE *f = mustOpen(fileName, "w");
struct dlNode *bNode, *eNode;
struct cloneEnd *ce;
struct mmClone *clone;
struct barge *barge;
fprintf(f, "%s PLACED\n", contigName);
for (bNode = bargeList->head; !dlEnd(bNode); bNode = bNode->next)
{
barge = bNode->val;
for (eNode = barge->cloneEndList->head; !dlEnd(eNode); eNode = eNode->next)
{
ce = eNode->val;
if (ce->isStart)
{
clone = ce->clone;
fprintf(f, "%s %d %d %d\n", clone->name, clone->mmPos/1000, clone->phase, clone->flipTendency);
}
}
}
fclose(f);
}
void writeEndsFile(char *fileName, struct dlList *bargeList)
/* Write out end order info. */
{
struct dlNode *node;
FILE *f = mustOpen(fileName, "w");
struct barge *barge;
for (node = bargeList->head; !dlEnd(node); node = node->next)
{
barge = node->val;
dumpCloneEnds(barge->cloneEndList, f);
}
carefulClose(&f);
}
int cloneEndCmpPos(const void *va, const void *vb)
/* Compare to sort biggest sized first. */
{
const struct cloneEnd *a = *((struct cloneEnd **)va);
const struct cloneEnd *b = *((struct cloneEnd **)vb);
return a->pos - b->pos;
}
void saveLiteralEnds(char *fileName, struct mmClone *cloneList)
/* Save ends pretending they just follow map order.
* Clone list needs to be sorted by mapPos*/
{
struct cloneEnd *ce;
struct barge *barge = NULL;
struct dlList *bargeList = newDlList();
-int end, lastEnd = -BIGNUM;
+int lastEnd = -BIGNUM;
struct mmClone *clone;
struct dlNode *node;
/* Build up barges literally from map positions. */
for (clone = cloneList; clone != NULL; clone = clone->next)
{
if (clone->mapPos > lastEnd)
{
AllocVar(barge);
barge->cloneEndList = newDlList();
barge->cloneList = newDlList();
barge->dlNode = dlAddValTail(bargeList, barge);
}
dlAddValTail(barge->cloneList, clone);
ce = newCloneEnd(clone, NULL);
ce->isStart = TRUE;
ce->pos = clone->mapPos;
dlAddValTail(barge->cloneEndList, ce);
ce = newCloneEnd(clone, NULL);
ce->pos = clone->mapPos + clone->size;
dlAddValTail(barge->cloneEndList, ce);
if (ce->pos > lastEnd)
lastEnd = ce->pos;
}
/* Some ends may be out of order, so sort them. */
for (node = bargeList->head; !dlEnd(node); node = node->next)
{
barge = node->val;
dlSort(barge->cloneEndList, cloneEndCmpPos);
}
/* Write it out. */
writeEndsFile(fileName, bargeList);
/* Clean up time. */
for (node = bargeList->head; !dlEnd(node); node = node->next)
{
barge = node->val;
freeDlListAndVals(&barge->cloneEndList);
freeDlList(&barge->cloneList);
}
freeDlList(&bargeList);
}
boolean enclosedByFinished(struct mmClone *inner, struct overlappingClonePair *ocpList)
/* Return TRUE if inner clone is completely enclosed by a finished clone. */
{
struct overlappingClonePair *ocp;
for (ocp = ocpList; ocp != NULL; ocp = ocp->next)
{
if (ocp->a == inner && ocp->b->phase == htgFinished
&& (scoreEnclosingOverlap(ocp, inner, TRUE) >= 1
|| scoreEnclosingOverlap(ocp, inner, FALSE) >= 1)
&& ocp->bHitS == 'N' && ocp->bHitT == 'N')
return TRUE;
if (ocp->b == inner && ocp->a->phase == htgFinished
&& (scoreEnclosingOverlap(ocp, inner, TRUE) >= 1
|| scoreEnclosingOverlap(ocp, inner, FALSE) >= 1)
&& ocp->aHitS == 'N' && ocp->aHitT == 'N')
return TRUE;
}
return FALSE;
}
void removeCloneFromOcp(struct mmClone *clone, struct overlappingClonePair **pOcpList,
struct hash *ocpHash)
/* Remove all overlaps involving clone. */
{
struct overlappingClonePair *ocpList = NULL, *ocp, *nextOcp;
for (ocp = *pOcpList; ocp != NULL; ocp = nextOcp)
{
nextOcp = ocp->next;
if (ocp->a == clone || ocp->b == clone)
{
char *handle = ocpHashName(ocp->a->name, ocp->b->name);
hashRemove(ocpHash, handle);
}
else
{
slAddHead(&ocpList, ocp);
}
}
slReverse(&ocpList);
*pOcpList = ocpList;
}
void removeDraftInFinished(struct mmClone **pCloneList, struct hash *cloneHash,
struct overlappingClonePair **pOcpList, struct hash *ocpHash)
/* Remove draft clones that are completely enclosed inside finished
* clones/finished contigs. */
{
struct mmClone *cloneList = NULL, *clone, *nextClone;
int rmCount = 0;
for (clone = *pCloneList; clone != NULL; clone = nextClone)
{
nextClone = clone->next;
if (clone->phase != htgFinished && enclosedByFinished(clone, *pOcpList))
{
removeCloneFromOcp(clone, pOcpList, ocpHash);
hashRemove(cloneHash, clone->name);
++rmCount;
}
else
{
slAddHead(&cloneList, clone);
}
}
slReverse(&cloneList);
*pCloneList = cloneList;
}
void mm(char *contigDir)
/* mm - Map mender. */
{
char fileName[512];
struct mmClone *cloneList = NULL, *clone;
struct hash *cloneHash = NULL;
-struct overlappingClonePair *ocpList = NULL, *ocp;
+struct overlappingClonePair *ocpList = NULL;
struct hash *ocpHash = NULL;
struct dlList *bargeList = NULL;
-struct barge *barge;
-struct dlNode *node;
struct hash *bepHash = NULL;
struct bep *bepList = NULL;
-FILE *f;
/* Set up logging of error and status messages. */
sprintf(fileName, "%s/mmLog.%d", contigDir, version);
logFile = mustOpen(fileName, "w");
pushWarnHandler(warnHandler);
sprintf(fileName, "%s/cloneInfo", contigDir);
mmLoadClones(fileName, &cloneList, &cloneHash);
status("Loaded %d clones from %s\n", slCount(cloneList), fileName);
sprintf(fileName, "%s/info", contigDir);
mmAddInfo(fileName, cloneList, cloneHash);
if (sameWord(contigType, "PLACED"))
{
maxMapDeviation = 50000;
isPlaced = TRUE;
}
status("Added map info from %s\n", fileName);
sprintf(fileName, "%s/cloneEnds", contigDir);
mmAddEndInfo(fileName, cloneList, cloneHash);
status("Added map end info from %s\n", fileName);
if (sameString(contigName, "ctg_na"))
{
status("Special processing %s", contigName);
for (clone = cloneList; clone != NULL; clone = clone->next)
{
clone->mapPos = 0;
}
}
else
{
sprintf(fileName, "%s/map", contigDir);
mmAddMap(fileName, cloneList, cloneHash);
status("Added map info from %s\n", fileName);
}
sprintf(fileName, "%s/cloneOverlap", contigDir);
mmLoadOcp(fileName, cloneHash, &ocpList, &ocpHash);
status("Loaded %d overlaps from %s\n", slCount(ocpList), fileName);
sprintf(fileName, "%s/bacEndPairs", contigDir);
mmLoadBacEndPairs(fileName, &bepList, &bepHash);
status("Loaded %d bac end pairs from %s\n", slCount(bepList), fileName);
sprintf(fileName, "%s/bacEnd.psl", contigDir);
mmLoadBacEndPsl(fileName, bepHash, cloneHash);
removeDraftInFinished(&cloneList, cloneHash, &ocpList, ocpHash);
bargeList = mmBargeList(cloneList, cloneHash, ocpHash, &ocpList);
status("Merged overlaps\n");
orderAndOrientBarges(bargeList, ocpHash, bepHash);
status("ordered barges\n");
sprintf(fileName, "%s/mmEnds.%d", contigDir, version);
status("Writing barges to %s\n", fileName);
writeEndsFile(fileName, bargeList);
sprintf(fileName, "%s/mmEnds", contigDir);
writeEndsFile(fileName, bargeList);
sprintf(fileName, "%s/mmBarge.%d", contigDir, version);
saveBargeFile(fileName, contigName, bargeList, ocpHash, cloneHash, bepHash);
sprintf(fileName, "%s/mmBarge", contigDir);
saveBargeFile(fileName, contigName, bargeList, ocpHash, cloneHash, bepHash);
sprintf(fileName, "%s/info.mm", contigDir);
saveInfo(fileName, contigName, bargeList);
}
int main(int argc, char *argv[])
/* Process command line. */
{
if (argc != 2)
usage();
mm(argv[1]);
#ifdef SOMETIMES
mm("/projects/cc/hg/oo.24/6/ctg15907");
#endif
return 0;
}