fc825ca18eeb99e3807a73d23fd26b614807fa0f markd Tue Oct 11 19:26:06 2022 -0700 add row number of output psl to pslMap mapInfo file to make it easy to sync up the two files diff --git src/utils/pslMap/pslMap.c src/utils/pslMap/pslMap.c index 4c24925..2477cda 100644 --- src/utils/pslMap/pslMap.c +++ src/utils/pslMap/pslMap.c @@ -38,31 +38,31 @@ static boolean swapIn = FALSE; static boolean simplifyMappingIds = FALSE; static char* mapInfoFile = NULL; static char* mappingPslFile = NULL; static char *mapInfoHdr = "#srcQName\t" "srcQStart\t" "srcQEnd\t" "srcQSize\t" "srcTName\t" "srcTStart\t" "srcTEnd\t" "srcStrand\t" "srcAligned\t" "mappingQName\t" "mappingQStart\t" "mappingQEnd\t" "mappingTName\t" "mappingTStart\t" "mappingTEnd\t" "mappingStrand\t" "mappingId\t" "mappedQName\t" "mappedQStart\t" "mappedQEnd\t" "mappedTName\t" "mappedTStart\t" "mappedTEnd\t" "mappedStrand\t" - "mappedAligned\t" "qStartTrunc\t" "qEndTrunc\n"; + "mappedAligned\t" "qStartTrunc\t" "qEndTrunc\t" "mappedPslLine\n"; static void usage() /* usage msg and exit */ { /* message got huge, so it's in a generate file */ static char *usageMsg = #include "usage.msg" ; errAbort("%s", usageMsg); } static void verbosePslNl(int verbosity, char *msg, struct psl *psl) /* Verbose logging of msg, if not null, followed by a psl if not null, followed by a new line */ { if (verboseLevel() >= verbosity) @@ -220,165 +220,173 @@ dyStringFree(&idBuf); return mapAlns; } static int pslAlignedBases(struct psl *psl) /* get number of aligned bases in a psl. Need because we can't set the stats * in the mapped psl */ { int i, cnt = 0; for (i = 0; i < psl->blockCount; i++) cnt += psl->blockSizes[i]; return cnt; } static void writeMapInfo(FILE* fh, struct psl *inPsl, struct mapAln *mapAln, - struct psl* mappedPsl) + struct psl* mappedPsl, unsigned outPslLine) /* write mapInfo row. mapAln and mappedPsl are NULL in not mapped */ { /* srcQName srcQStart srcQEnd srcQSize */ fprintf(fh, "%s\t%d\t%d\t%d\t", inPsl->qName, inPsl->qStart, inPsl->qEnd, inPsl->qSize); /* srcTName srcTStart srcTEnd srcStrand srcAligned */ fprintf(fh, "%s\t%d\t%d\t%s\t%d\t", inPsl->tName, inPsl->tStart, inPsl->tEnd, inPsl->strand, pslAlignedBases(inPsl)); if (mapAln != NULL) { /* mappingQName mappingQStart mappingQEnd */ fprintf(fh, "%s\t%d\t%d", mapAln->psl->qName, mapAln->psl->qStart, mapAln->psl->qEnd); /* mappingTName mappingTStart mappingTEnd mappingStrand mappingId */ fprintf(fh, "\t%s\t%d\t%d\t%s\t%d\t", mapAln->psl->tName, mapAln->psl->tStart, mapAln->psl->tEnd, mapAln->psl->strand, mapAln->id); /* mappedQName mappedQStart mappedQEnd mappedTName mappedTStart mappedTEnd mappedStrand */ fprintf(fh, "%s\t%d\t%d\t%s\t%d\t%d\t%s\t", mappedPsl->qName, mappedPsl->qStart, mappedPsl->qEnd, mappedPsl->tName, mappedPsl->tStart, mappedPsl->tEnd, mappedPsl->strand); /* mappedAligned qStartTrunc qEndTrunc */ - fprintf(fh, "%d\t%d\t%d\n", + fprintf(fh, "%d\t%d\t%d\t", pslAlignedBases(mappedPsl), (mappedPsl->qStart-inPsl->qStart), (inPsl->qEnd-mappedPsl->qEnd)); + /* mappedPslLine */ + fprintf(fh, "%u", outPslLine); } else { int i; - for (i = 9; i < 26; i++) + for (i = 9; i < 27; i++) fputc('\t', fh); - fputc('\n', fh); } +fputc('\n', fh); } static void addQNameSuffix(struct psl *psl) /* add the suffix to the mapped psl qName */ { char *oldQName = psl->qName; char *newQName = needMem(strlen(psl->qName)+strlen(suffix)+1); strcpy(newQName, oldQName); strcat(newQName, suffix); psl->qName = newQName; freeMem(oldQName); } static void mappedPslOutput(struct psl *inPsl, struct mapAln *mapAln, struct psl* mappedPsl, - FILE* outPslFh, FILE *mapInfoFh, FILE *mappingPslFh) + FILE* outPslFh, FILE *mapInfoFh, FILE *mappingPslFh, unsigned outPslLine) /* output one mapped psl */ { if (suffix != NULL) addQNameSuffix(mappedPsl); pslTabOut(mappedPsl, outPslFh); if (mapInfoFh != NULL) - writeMapInfo(mapInfoFh, inPsl, mapAln, mappedPsl); + writeMapInfo(mapInfoFh, inPsl, mapAln, mappedPsl, outPslLine); if (mappingPslFh != NULL) pslTabOut(mapAln->psl, mappingPslFh); } static boolean mapPslPair(struct psl *inPsl, struct mapAln *mapAln, - FILE* outPslFh, FILE *mapInfoFh, FILE *mappingPslFh) + FILE* outPslFh, FILE *mapInfoFh, FILE *mappingPslFh, + unsigned* outPslLineRef) /* map one pair of query and target PSL */ { struct psl* mappedPsl; if (inPsl->tSize != mapAln->psl->qSize) errAbort("Error: inPsl %s tSize (%d) != mapping alignment %s qSize (%d) (perhaps you need to specify -swapMap?)\n", inPsl->tName, inPsl->tSize, mapAln->psl->qName, mapAln->psl->qSize); verbosePslNl(2, "inAln", inPsl); verbosePslNl(2, "mapAln", mapAln->psl); mappedPsl = pslTransMap(mapOpts, inPsl, mapAln->psl); verbosePslNl(2, "mappedAln", mappedPsl); /* only output if blocks were actually mapped */ boolean wasMapped = mappedPsl != NULL; if (wasMapped) - mappedPslOutput(inPsl, mapAln, mappedPsl, outPslFh, mapInfoFh, mappingPslFh); + { + mappedPslOutput(inPsl, mapAln, mappedPsl, outPslFh, mapInfoFh, mappingPslFh, *outPslLineRef); + (*outPslLineRef)++; + } pslFree(&mappedPsl); return wasMapped; } static void mapQueryPsl(struct psl* inPsl, struct genomeRangeTree *mapAlns, - FILE* outPslFh, FILE *mapInfoFh, FILE *mappingPslFh) + FILE* outPslFh, FILE *mapInfoFh, FILE *mappingPslFh, + unsigned* outPslLineRef) /* map a query psl to all targets */ { static struct dyString *idBuf = NULL; struct range *overMapAlnNodes = genomeRangeTreeAllOverlapping(mapAlns, getMappingId(inPsl->tName, &idBuf), inPsl->tStart, inPsl->tEnd); struct range *overMapAlnNode; struct mapAln *overMapAln; boolean wasMapped = FALSE; for (overMapAlnNode = overMapAlnNodes; overMapAlnNode != NULL; overMapAlnNode = overMapAlnNode->next) { for (overMapAln = overMapAlnNode->val; overMapAln != NULL; overMapAln = overMapAln->next) { if ((overMapAln->qNameMatch == NULL) || sameString(inPsl->qName, overMapAln->qNameMatch)) { - if (mapPslPair(inPsl, overMapAln, outPslFh, mapInfoFh, mappingPslFh)) + if (mapPslPair(inPsl, overMapAln, outPslFh, mapInfoFh, mappingPslFh, outPslLineRef)) wasMapped = TRUE; } } } if ((mapInfoFh != NULL) && !wasMapped) - writeMapInfo(mapInfoFh, inPsl, NULL, NULL); + writeMapInfo(mapInfoFh, inPsl, NULL, NULL, 0); } static void pslMap(char* inPslFile, char *mapFile, char *outPslFile) /* project inPsl query through mapFile query to mapFile target */ { struct genomeRangeTree *mapAlns; struct psl* inPsl; struct lineFile* inPslLf = pslFileOpen(inPslFile); FILE *outPslFh, *mapInfoFh = NULL, *mappingPslFh = NULL; +unsigned outPslLine = 0; if (chainMapFile) mapAlns = loadMapChains(mapFile); else mapAlns = loadMapPsls(mapFile); outPslFh = mustOpen(outPslFile, "w"); if (mapInfoFile != NULL) { mapInfoFh = mustOpen(mapInfoFile, "w"); fputs(mapInfoHdr, mapInfoFh); } if (mappingPslFile != NULL) mappingPslFh = mustOpen(mappingPslFile, "w"); while ((inPsl = pslNext(inPslLf)) != NULL) { if (swapIn) pslSwap(inPsl, FALSE); - mapQueryPsl(inPsl, mapAlns, outPslFh, mapInfoFh, mappingPslFh); + mapQueryPsl(inPsl, mapAlns, outPslFh, mapInfoFh, mappingPslFh, &outPslLine); pslFree(&inPsl); } carefulClose(&mappingPslFh); carefulClose(&mapInfoFh); carefulClose(&outPslFh); lineFileClose(&inPslLf); } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, optionSpecs); if (argc != 4) usage(); suffix = optionVal("suffix", NULL);