ef8001c7de45f170e6b17352f0e4ee89876edb83 jcasper Thu Dec 22 16:24:52 2016 -0800 Adding bed3 option to liftUp, so that if the 6th field just happens coincidentally to be a -, it's not converted to a + by an overzealous reverse strand lift refs #18589 diff --git src/hg/liftUp/liftUp.c src/hg/liftUp/liftUp.c index bae6663..d55024f 100644 --- src/hg/liftUp/liftUp.c +++ src/hg/liftUp/liftUp.c @@ -24,31 +24,31 @@ boolean isPtoG = TRUE; /* is protein to genome lift */ boolean nohead = FALSE; /* No header for psl files? */ boolean nosort = FALSE; /* Don't sort files */ boolean ignoreVersions = FALSE; /* drop NCBI versions */ boolean extGenePred = FALSE; /* load extended genePred */ int dots=0; /* Put out I'm alive dot now and then? */ int gapsize = 0; /* optional non-default gap size */ void usage() /* Explain usage and exit. */ { errAbort( "liftUp - change coordinates of .psl, .agp, .gap, .gl, .out, .align, .gff, .gtf\n" - ".bscore .tab .gdup .axt .chain .net, .gp, .genepred, .wab, .bed, or .bed8 files to\n" + ".bscore .tab .gdup .axt .chain .net, .gp, .genepred, .wab, .bed, .bed3, or .bed8 files to\n" "parent coordinate system.\n" "\n" "usage:\n" " liftUp [-type=.xxx] destFile liftSpec how sourceFile(s)\n" "The optional -type parameter tells what type of files to lift\n" "If omitted the type is inferred from the suffix of destFile\n" "Type is one of the suffixes described above.\n" "DestFile will contain the merged and lifted source files,\n" "with the coordinates translated as per liftSpec. LiftSpec\n" "is tab-delimited with each line of the form:\n" " offset oldName oldSize newName newSize\n" "LiftSpec may optionally have a sixth column specifying + or - strand,\n" "but strand is not supported for all input types.\n" "The 'how' parameter controls what the program will do with\n" "items which are not in the liftSpec. It must be one of:\n" @@ -1584,30 +1584,36 @@ if (ferror(f)) errAbort("error writing %s", destFile); fclose(f); if (!anyHits) errAbort("No lines lifted!"); } void liftBed(char *destFile, struct hash *liftHash, int sourceCount, char *sources[]) /* Lift Browser Extensible Data file. This is a tab * separated file where first three fields are * seq, start, end. This also sorts the result. */ { liftTabbed(destFile, liftHash, sourceCount, sources, 0, 1, 2, FALSE, 0, 0, 0, 0, 5); } +void liftBed3(char *destFile, struct hash *liftHash, int sourceCount, char *sources[]) +/* Lift BED3, avoiding consideration of field 5 as a strand value (as with regular BED). */ +{ +liftTabbed(destFile, liftHash, sourceCount, sources, 0, 1, 2, FALSE, 0, 0, 0, 0, -1); +} + void liftBed8(char *destFile, struct hash *liftHash, int sourceCount, char *sources[]) /* Lift BED8, getting the thickStart, and thickEnd fields. */ { /* uses same contig for both pairs of coords */ liftTabbed(destFile, liftHash, sourceCount, sources, 0, 1, 2, TRUE, 0, 6, 7, 0, 5); } void liftGff(char *destFile, struct hash *liftHash, int sourceCount, char *sources[]) /* Lift up coordinates of a .gff or a .gtf file. */ { liftTabbed(destFile, liftHash, sourceCount, sources, 0, 3, 4, FALSE, 0, 0, 0, 1, 6); } void liftGdup(char *destFile, struct hash *liftHash, int sourceCount, char *sources[]) /* Lift up coordinates of a .gdup. */ @@ -1779,30 +1785,36 @@ liftHash = hashLift(lifts, TRUE); liftGff(destFile, liftHash, sourceCount, sources); } else if (endsWith(destType, ".gdup")) { rmChromPart(lifts); liftHash = hashLift(lifts, FALSE); liftGdup(destFile, liftHash, sourceCount, sources); } else if (endsWith(destType, ".bed")) { rmChromPart(lifts); liftHash = hashLift(lifts, TRUE); liftBed(destFile, liftHash, sourceCount, sources); } +else if (endsWith(destType, ".bed3")) + { + rmChromPart(lifts); + liftHash = hashLift(lifts, TRUE); + liftBed3(destFile, liftHash, sourceCount, sources); + } else if (endsWith(destType, ".bed8")) { rmChromPart(lifts); liftHash = hashLift(lifts, TRUE); liftBed8(destFile, liftHash, sourceCount, sources); } else if (endsWith(destType, ".gp") || endsWith(destType, ".genepred")) { rmChromPart(lifts); liftHash = hashLift(lifts, TRUE); if (extGenePred) liftGenePredExt(destFile, liftHash, sourceCount, sources); else liftGenePred(destFile, liftHash, sourceCount, sources); }