4963dda407dcf12f61fa3973ffa6fbbece18901a angie Wed Dec 15 10:25:28 2010 -0800 bedOverSmall had a limit of 20 words per line, while other functionshad a limit of 64. The new snp132Ext has >20 columns and lines were truncated due to the 20-word limit. Changed limit to 64 everywhere. diff --git src/hg/lib/liftOver.c src/hg/lib/liftOver.c index 3cef345..d1111d2 100644 --- src/hg/lib/liftOver.c +++ src/hg/lib/liftOver.c @@ -23,30 +23,33 @@ char *name; /* Chromosome name. */ struct binKeeper *bk; /* Keyed by old position, values are chains. */ }; static char otherStrand(char c) /* Swap +/- */ { if (c == '-') return '+'; else if (c == '+') return '-'; else return c; } +// The maximum number of words per line that can be lifted: +#define LIFTOVER_MAX_WORDS 64 + void readLiftOverMap(char *fileName, struct hash *chainHash) /* Read map file into hashes. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); struct chain *chain; struct chromMap *map; int chainCount = 0; while ((chain = chainRead(lf)) != NULL) { if ((map = hashFindVal(chainHash, chain->tName)) == NULL) { AllocVar(map); map->bk = binKeeperNew(0, chain->tSize); hashAddSaveName(chainHash, chain->tName, map, &map->name); @@ -366,31 +369,31 @@ static int bedOverSmall(struct lineFile *lf, int fieldCount, struct hash *chainHash, double minMatch, int minSizeT, int minSizeQ, int minChainT, int minChainQ, FILE *mapped, FILE *unmapped, bool multiple, char *chainTable, int bedPlus, bool hasBin, bool tabSep, int *errCt) /* Do a bed without a block-list. * NOTE: it would be preferable to have all of the lift * functions work at the line level, rather than the file level. * Multiple option can be used with bed3 -- it will write a list of * regions as a bed4, where score is the "part #". This is used for * ENCODE region mapping */ { int i, wordCount, s, e; -char *words[20], *chrom; +char *words[LIFTOVER_MAX_WORDS], *chrom; char strand = '.', strandString[2]; char *error; int ct = 0; int errs = 0; struct bed *bedList = NULL, *unmappedBedList = NULL; int totalUnmapped = 0; double unmappedRatio; int totalUnmappedAll = 0; int totalBases = 0; double mappedRatio; char *region = NULL; /* region name from BED file-- used with -multiple */ char *db = NULL, *chainTableName = NULL; if (chainTable) { @@ -991,31 +994,31 @@ bed->thickEnd = thickEnd; } slFreeList(&rangeList); slFreeList(&badRanges); slFreeList(&binList); return error; } static int bedOverBig(struct lineFile *lf, int refCount, struct hash *chainHash, double minMatch, double minBlocks, bool fudgeThick, FILE *mapped, FILE *unmapped, int bedPlus, bool hasBin, bool tabSep, int *errCt) /* Do a bed with block-list. */ { int wordCount, bedCount; -char *line, *words[64]; +char *line, *words[LIFTOVER_MAX_WORDS]; char *whyNot = NULL; int ct = 0; int errs = 0; int i; while (lineFileNextReal(lf, &line)) { struct bed *bed; wordCount = chopLineBin(line, words, ArraySize(words), hasBin, tabSep); if (refCount != wordCount) lineFileExpectWords(lf, refCount, wordCount); if (wordCount == bedPlus) bedPlus = 0; /* no extra fields */ bedCount = (bedPlus ? bedPlus : wordCount); bed = bedLoadN(words, bedCount); @@ -1050,31 +1053,31 @@ return ct; } int liftOverBedPlus(char *fileName, struct hash *chainHash, double minMatch, double minBlocks, int minSizeT, int minSizeQ, int minChainT, int minChainQ, bool fudgeThick, FILE *f, FILE *unmapped, bool multiple, char *chainTable, int bedPlus, bool hasBin, bool tabSep, int *errCt) /* Lift bed N+ file. * Return the number of records successfully converted */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); int wordCount; int bedFieldCount = bedPlus; char *line; -char *words[64]; +char *words[LIFTOVER_MAX_WORDS]; int ct = 0; if (lineFileNextReal(lf, &line)) { line = cloneString(line); if (tabSep) wordCount = chopByChar(line, '\t', words, ArraySize(words)); else wordCount = chopLine(line, words); if (hasBin) wordCount--; lineFileReuse(lf); freez(&line); if (wordCount < 3) errAbort("Data format error: expecting at least 3 fields in BED file (%s)", fileName);