8f29deda5ff897fbf1d208907ac7f734afc07ed2 jcasper Tue Oct 1 20:31:17 2024 -0700 Many liftOver functions now have a preserveInput argument, which appends the input position to the name field. This makes it easier to see what got mapped where. The option is made available through a command-line argument to the liftOver utility and a checkbox in the hgLiftOver CGI, refs #28023 diff --git src/hg/hgLiftOver/hgLiftOver.c src/hg/hgLiftOver/hgLiftOver.c index aae46cd..1017169 100644 --- src/hg/hgLiftOver/hgLiftOver.c +++ src/hg/hgLiftOver/hgLiftOver.c @@ -20,30 +20,31 @@ #include "liftOver.h" #include "liftOverChain.h" #include "errCatch.h" /* CGI Variables */ #define HGLFT_USERDATA_VAR "hglft_userData" /* typed/pasted in data */ #define HGLFT_DATAFILE_VAR "hglft_dataFile" /* file of data to convert */ #define HGLFT_FROMORG_VAR "hglft_fromOrg" /* FROM organism */ #define HGLFT_FROMDB_VAR "hglft_fromDb" /* FROM assembly */ #define HGLFT_TOORG_VAR "hglft_toOrg" /* TO organism */ #define HGLFT_TODB_VAR "hglft_toDb" /* TO assembly */ #define HGLFT_ERRORHELP_VAR "hglft_errorHelp" /* Print explanatory text */ #define HGLFT_REFRESHONLY_VAR "hglft_doRefreshOnly" /* Just refresh drop-down lists */ #define HGLFT_LAST_CHAIN "hglft_lastChain" +#define HGLFT_EXTRA_NAME_INFO "hglft_extranameinfo" /* Include input position in output item names */ /* liftOver options: */ #define HGLFT_MINMATCH "hglft_minMatch" #define HGLFT_MINSIZEQ "hglft_minSizeQ" #define HGLFT_MINCHAINT "hglft_minChainT" #define HGLFT_MULTIPLE "hglft_multiple" #define HGLFT_MINBLOCKS "hglft_minBlocks" #define HGLFT_FUDGETHICK "hglft_fudgeThick" /* Global Variables */ struct cart *cart; /* CGI and other variables */ struct hash *oldVars = NULL; /* Filename prefix */ #define HGLFT "hglft" @@ -54,31 +55,31 @@ char *onChange = "document.mainForm." HGLFT_REFRESHONLY_VAR ".value = 1;" "document.mainForm.submit();"; char *chainStringVal(struct liftOverChain *chain) /* keep the last chain in memory in this format */ { char chainS[64]; safef(chainS, sizeof(chainS), "%s.%s", chain->fromDb, chain->toDb); return cloneString(chainS); } void webMain(struct liftOverChain *chain, boolean multiple, boolean keepSettings, int minSizeQ, - int minChainT, float minBlocks, float minMatch, boolean fudgeThick) + int minChainT, float minBlocks, float minMatch, boolean fudgeThick, boolean extraNameInfo) /* set up page for entering data */ { struct dbDb *dbList; char *fromOrg = hOrganism(chain->fromDb), *toOrg = hOrganism(chain->toDb); char *chainString = chainStringVal(chain); cgiParagraph( "This tool converts genome coordinates and annotation files " "from the original to the new assembly using an alignment.  " "The input regions can be entered into the text box or uploaded as a file.  " "For files over 500Mb, use the command-line tool described in our " "LiftOver documentation." "  If a pair of assemblies cannot be selected from the pull-down menus," " a sequential lift may still be possible (e.g., mm9 to mm10 to mm39).  " "If your desired conversion is still not available, please " "contact us." @@ -138,30 +139,39 @@ cgiMakeDoubleVar(HGLFT_MINMATCH, (keepSettings) ? minMatch : chain->minMatch,6); puts(" "); printInfoIcon("The minimum ratio of basepairs of the input region covered by an alignment. Regions scoring lower than this will not be lifted at all."); cgiTableFieldEnd(); cgiTableRowEnd(); cgiSimpleTableRowStart(); cgiTableRowEnd(); cgiSimpleTableRowStart(); cgiTableField("Regions defined by chrom:start-end (BED 4 to BED 6)"); cgiTableRowEnd(); cgiSimpleTableRowStart(); +cgiTableField("Prepend input positions to item names:"); +cgiSimpleTableFieldStart(); +cgiMakeCheckBox(HGLFT_EXTRA_NAME_INFO,extraNameInfo); +puts(" "); +printInfoIcon("Lifted items will include their original positions as part of their output names to assist in determining what got mapped where (in case multiple items have the same name in the input). Coordinates are 1-based fully closed, so the BED entry "chr1 100 150 item1" will be labeled "chr1:101-150:item1"."); +cgiTableFieldEnd(); +cgiTableRowEnd(); + +cgiSimpleTableRowStart(); cgiTableField("Allow multiple output regions:"); cgiSimpleTableFieldStart(); cgiMakeCheckBox(HGLFT_MULTIPLE,multiple); puts(" "); printInfoIcon("By default, input regions that map to multiple regions will not be lifted at all. When this option is checked, all targets are output."); cgiTableFieldEnd(); cgiTableRowEnd(); cgiSimpleTableRowStart(); cgiTableField("  Minimum hit size in query:"); cgiSimpleTableFieldStart(); cgiMakeIntVar(HGLFT_MINSIZEQ,(keepSettings) ? minSizeQ : chain->minSizeQ,4); puts(" "); printInfoIcon("In multiple output mode, repeated regions within longer input regions can lead to artifacts. The 'hit size' filter allows to keep only targets with a certain length."); cgiTableFieldEnd(); @@ -425,41 +435,43 @@ cartDb = NULL; for (this = chainList; this != NULL; this = this->next) { double score = scoreLiftOverChain(this, fromOrg, fromDb, toOrg, toDb, cartOrg, cartDb, dbRank, dbDbHash); if (score > bestScore) { choice = this; bestScore = score; } } return choice; } + void doMiddle(struct cart *theCart) /* Set up globals and make web page */ { char *userData; char *organism; char *db; float minBlocks, minMatch; boolean multiple, fudgeThick; int minSizeQ, minChainT; boolean refreshOnly = FALSE; boolean keepSettings = FALSE; +boolean extraNameInfo = FALSE; char *thisChain = NULL; char *lastChain = NULL; struct liftOverChain *chainList = NULL, *choice; cart = theCart; if (cgiOptionalString(HGLFT_ERRORHELP_VAR)) { puts("
");
     puts(liftOverErrHelp());
     puts("
"); return; } @@ -479,79 +491,81 @@ choice = defaultChoices(chainList, db); thisChain = chainStringVal(choice); if (choice == NULL) errAbort("Sorry, no conversions available from this assembly\n"); minSizeQ = cartCgiUsualInt(cart, HGLFT_MINSIZEQ, choice->minSizeQ); minChainT = cartCgiUsualInt(cart, HGLFT_MINCHAINT, choice->minChainT); minBlocks = cartCgiUsualDouble(cart, HGLFT_MINBLOCKS, choice->minBlocks); minMatch = cartCgiUsualDouble(cart, HGLFT_MINMATCH, choice->minMatch); fudgeThick = cartCgiUsualBoolean(cart, HGLFT_FUDGETHICK, (choice->fudgeThick[0]=='Y') ? TRUE : FALSE); multiple = cartCgiUsualBoolean(cart, HGLFT_MULTIPLE, (choice->multiple[0]=='Y') ? TRUE : FALSE); refreshOnly = cartCgiUsualInt(cart, HGLFT_REFRESHONLY_VAR, 0); lastChain = cartCgiUsualString(cart, HGLFT_LAST_CHAIN, NULL); if (lastChain && thisChain && sameString(lastChain, thisChain)) keepSettings = TRUE; +extraNameInfo = cartCgiUsualBoolean(cart, HGLFT_EXTRA_NAME_INFO, FALSE); -webMain(choice, multiple, keepSettings, minSizeQ, minChainT, minBlocks, minMatch, fudgeThick); +webMain(choice, multiple, keepSettings, minSizeQ, minChainT, minBlocks, minMatch, fudgeThick, extraNameInfo); liftOverChainFreeList(&chainList); struct errCatch *errCatch = errCatchNew(); if (errCatchStart(errCatch)) { if (!refreshOnly && userData != NULL && userData[0] != '\0') { struct hash *chainHash = newHash(0); char *chainFile; struct tempName oldTn, mappedTn, unmappedTn; FILE *old, *mapped, *unmapped; char *line; int lineSize; char *fromDb, *toDb; int ct = -1, errCt = 0; enum liftOverFileType lft; /* read in user data and save to file */ makeTempName(&oldTn, HGLFT, ".user"); old = mustOpen(oldTn.forCgi, "w"); + fputs(userData, old); fputs("\n", old); /* in case user doesn't end last line */ carefulClose(&old); chmod(oldTn.forCgi, 0666); /* setup output files -- one for converted lines, the other * for lines that could not be mapped */ makeTempName(&mappedTn, HGLFT, ".bed"); makeTempName(&unmappedTn, HGLFT, ".err"); mapped = mustOpen(mappedTn.forCgi, "w"); chmod(mappedTn.forCgi, 0666); unmapped = mustOpen(unmappedTn.forCgi, "w"); chmod(unmappedTn.forCgi, 0666); fromDb = cgiString(HGLFT_FROMDB_VAR); toDb = cgiString(HGLFT_TODB_VAR); chainFile = liftOverChainFile(fromDb, toDb); if (chainFile == NULL) errAbort("ERROR: Can't convert from %s to %s: no chain file loaded", fromDb, toDb); readLiftOverMap(chainFile, chainHash); lft = liftOverSniff(oldTn.forCgi); if (lft == bed) ct = liftOverBed(oldTn.forCgi, chainHash, minMatch, minBlocks, 0, minSizeQ, minChainT, 0, - fudgeThick, mapped, unmapped, multiple, FALSE, NULL, &errCt); + fudgeThick, mapped, unmapped, multiple, FALSE, NULL, &errCt, extraNameInfo); else if (lft == positions) ct = liftOverPositions(oldTn.forCgi, chainHash, minMatch, minBlocks, 0, minSizeQ, minChainT, 0, fudgeThick, mapped, unmapped, multiple, NULL, &errCt); if (ct == -1) /* programming error */ errAbort("ERROR: Unsupported data format.\n"); webNewSection("Results"); if (ct > 0) { /* some records succesfully converted */ cgiParagraph(""); printf("Successfully converted %d record", ct);