f952b14fca3833e9d8fef0ae8a086ad15485b552 galt Wed Jan 1 02:11:38 2025 -0800 fix various errors for utils compiled on Rocky 9 hgwdev-new. diff --git src/hg/mouseStuff/phToPsl/phToPsl.c src/hg/mouseStuff/phToPsl/phToPsl.c index 7814e5d..75d2d8a 100644 --- src/hg/mouseStuff/phToPsl/phToPsl.c +++ src/hg/mouseStuff/phToPsl/phToPsl.c @@ -1,372 +1,372 @@ /* phToPsl - Convert from Pattern Hunter to PSL format. */ /* Copyright (C) 2011 The Regents of the University of California * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" #include "linefile.h" #include "hash.h" #include "dystring.h" #include "options.h" #include "psl.h" #include "obscure.h" void usage() /* Explain usage and exit. */ { errAbort( "phToPsl - Convert from Pattern Hunter to PSL format\n" "usage:\n" " phToPsl in.ph qSizes tSizes out.psl\n" "options:\n" " -tName=target (defaults to 'in')\n" ); } char *tName; /* Name of target side of alignment. */ boolean psl = TRUE; /* Output PSL format. */ struct hash *readSizes(char *fileName) /* Read tab-separated file into hash with * name key size value. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); struct hash *hash = newHash(0); char *row[2]; while (lineFileRow(lf, row)) { char *name = row[0]; int size = lineFileNeedNum(lf, row, 1); if (hashLookup(hash, name) != NULL) warn("Duplicate %s, ignoring all but first\n", name); else hashAdd(hash, name, intToPt(size)); } lineFileClose(&lf); return hash; } int findSize(struct hash *hash, char *name) /* Find size of name in hash or die trying. */ { void *val = hashMustFindVal(hash, name); return ptToInt(val); } void countInserts(char *s, int size, unsigned *retNumInsert, int *retBaseInsert) /* Count up number and total size of inserts in s. */ { char c, lastC = s[0]; int i; int baseInsert = 0, numInsert = 0; if (lastC == '-') errAbort("%s starts with -", s); for (i=0; i<size; ++i) { c = s[i]; if (c == '-') { if (lastC != '-') numInsert += 1; baseInsert += 1; } lastC = c; } *retNumInsert = numInsert; *retBaseInsert = baseInsert; } void aliStringToPsl(struct lineFile *lf, char *qName, char *tName, char *qString, char *tString, int qSize, int tSize, int aliSize, int qStart, int tStart, FILE *f) /* Output alignment in a pair of strings with insert chars * to a psl line in a file. */ { unsigned match = 0; /* Number of bases that match */ unsigned misMatch = 0; /* Number of bases that don't match */ unsigned qNumInsert = 0; /* Number of inserts in query */ int qBaseInsert = 0; /* Number of bases inserted in query */ unsigned tNumInsert = 0; /* Number of inserts in target */ int tBaseInsert = 0; /* Number of bases inserted in target */ boolean eitherInsert = FALSE; /* True if either in insert state. */ int blockCount = 1, blockIx=0; boolean qIsRc = FALSE; char strand; int i; char q,t; int qs,qe,ts,te; int *blocks = NULL, *qStarts = NULL, *tStarts = NULL; /* Deal with minus strand issues. */ if (tStart < 0) errAbort("Can't handle minus target strand line %d of %s", lf->lineIx, lf->fileName); if (qStart < 0) { qStart = qSize + qStart; qIsRc = TRUE; } /* First count up number of blocks and inserts. */ countInserts(qString, aliSize, &qNumInsert, &qBaseInsert); countInserts(tString, aliSize, &tNumInsert, &tBaseInsert); blockCount = 1 + qNumInsert + tNumInsert; /* Count up match/mismatch. */ for (i=0; i<aliSize; ++i) { q = qString[i]; t = tString[i]; if (q == t) ++match; else ++misMatch; } /* Deal with minus strand. */ qs = qStart; qe = qs + qBaseInsert + match + misMatch; strand = '+'; if (qIsRc) { strand = '-'; reverseIntRange(&qs, &qe, qSize); } /* Output header */ fprintf(f, "%d\t", match); fprintf(f, "%d\t", misMatch); fprintf(f, "0\t"); fprintf(f, "0\t"); fprintf(f, "%d\t", qNumInsert); fprintf(f, "%d\t", qBaseInsert); fprintf(f, "%d\t", tNumInsert); fprintf(f, "%d\t", tBaseInsert); fprintf(f, "%c\t", strand); fprintf(f, "%s\t", qName); fprintf(f, "%d\t", qSize); fprintf(f, "%d\t", qs); fprintf(f, "%d\t", qe); fprintf(f, "%s\t", tName); fprintf(f, "%d\t", tSize); fprintf(f, "%d\t", tStart); fprintf(f, "%d\t", tStart + tBaseInsert + match + misMatch); fprintf(f, "%d\t", blockCount); if (ferror(f)) { perror("Error writing psl file\n"); errAbort("\n"); } /* Allocate dynamic memory for block lists. */ AllocArray(blocks, blockCount); AllocArray(qStarts, blockCount); AllocArray(tStarts, blockCount); /* Figure block sizes and starts. */ eitherInsert = FALSE; qs = qe = qStart; ts = te = tStart; for (i=0; i<aliSize; ++i) { q = qString[i]; t = tString[i]; if (q == '-' || t == '-') { if (!eitherInsert) { blocks[blockIx] = qe - qs; qStarts[blockIx] = qs; tStarts[blockIx] = ts; ++blockIx; eitherInsert = TRUE; } else if (i > 0) { /* Handle cases like * aca---gtagtg * acacag---gtg */ if ((q == '-' && tString[i-1] == '-') || (t == '-' && qString[i-1] == '-')) { blocks[blockIx] = 0; qStarts[blockIx] = qe; tStarts[blockIx] = te; ++blockIx; } } if (q != '-') qe += 1; if (t != '-') te += 1; } else { if (eitherInsert) { qs = qe; ts = te; eitherInsert = FALSE; } qe += 1; te += 1; } } assert(blockIx == blockCount-1); blocks[blockIx] = qe - qs; qStarts[blockIx] = qs; tStarts[blockIx] = ts; /* Output block sizes */ for (i=0; i<blockCount; ++i) fprintf(f, "%d,", blocks[i]); fprintf(f, "\t"); /* Output qStarts */ for (i=0; i<blockCount; ++i) fprintf(f, "%d,", qStarts[i]); fprintf(f, "\t"); /* Output tStarts */ for (i=0; i<blockCount; ++i) fprintf(f, "%d,", tStarts[i]); fprintf(f, "\n"); /* Clean Up. */ freez(&blocks); freez(&qStarts); freez(&tStarts); } boolean convertOneAli(struct lineFile *lf, char *qName, char *tName, int qSize, int tSize, FILE *f) /* Convert one pattern hunter alignment to one psl line. */ { char c, *line, *parts[16]; int partCount; int qStart, tStart; struct dyString *q = dyStringNew(4096); struct dyString *t = dyStringNew(4096); /* Grab first line and extract starting position from it. */ if (!lineFileNext(lf, &line, NULL)) return FALSE; if (!isdigit(line[0])) { lineFileReuse(lf); return FALSE; } partCount = chopString(line, ":(,)<>", parts, ArraySize(parts)); if (partCount < 6) errAbort("Can't deal with format of Pattern Hunter file %s line %d", lf->fileName, lf->lineIx); qStart = lineFileNeedNum(lf, parts, 1); tStart = lineFileNeedNum(lf, parts, 2); /* Loop through rest of file 4 lines at a time, appending * sequence to q and t strings. */ for (;;) { /* Get next line and check to see if we are at end of * data we want to parse. Append it to q. */ if (!lineFileNext(lf, &line, NULL)) break; c = line[0]; if (c != '-' && !isalpha(c)) { lineFileReuse(lf); break; } dyStringAppend(q, line); /* Skip line with match/mismatch */ lineFileNeedNext(lf, &line, NULL); /* Append next line to t. */ lineFileNeedNext(lf, &line, NULL); dyStringAppend(t, line); /* Skip blank line. */ lineFileNext(lf, &line, NULL); } /* Do some sanity checks and then output. */ if (q->stringSize != t->stringSize) { errAbort("Alignment strings not same size line %d of %s", lf->lineIx, lf->fileName); } if (q->stringSize <= 0) errAbort("Empty alignment string line %d of %s", lf->lineIx, lf->fileName); aliStringToPsl(lf, qName, tName, q->string, t->string, qSize, tSize, q->stringSize, qStart, tStart, f); /* Clean up time. */ dyStringFree(&q); dyStringFree(&t); return TRUE; } boolean convertOneQuery(struct lineFile *lf, struct hash *qSizeHash, struct hash *tSizeHash, FILE *f) /* Convert pattern hunter alignments on one query * to psl */ { char *line, *word; char *qName; int qSize, tSize; /* Grab first line and extract query name from it. */ if (!lineFileNext(lf, &line, NULL)) return FALSE; if (line[0] != '>') errAbort("Expecting line starting with '>' line %d of %s", lf->lineIx, lf->fileName); line += 1; word = nextWord(&line); if (word == NULL) errAbort("Expecting sequence name after '>' line %d of %s", lf->lineIx, lf->fileName); qName = cloneString(word); /* Figure out sizes. */ qSize = findSize(qSizeHash, qName); tSize = findSize(tSizeHash, tName); /* Loop through alignments between this query and target. */ while (convertOneAli(lf, qName, tName, qSize, tSize, f)) ; freez(&qName); return TRUE; } void phToPsl(char *inName, char *qSizes, char *tSizes, char *outName) /* phToPsl - Convert from Pattern Hunter to PSL format. */ { struct lineFile *lf = lineFileOpen(inName, TRUE); FILE *f = mustOpen(outName, "w"); -char dir[256], root[128], ext[64]; +char dir[PATH_LEN], root[FILENAME_LEN], ext[FILEEXT_LEN]; struct hash *qSizeHash = readSizes(qSizes); struct hash *tSizeHash = readSizes(tSizes); splitPath(inName, dir, root, ext); tName = optionVal("tName", root); pslWriteHead(f); while (convertOneQuery(lf, qSizeHash, tSizeHash, f)) ; } int main(int argc, char *argv[]) /* Process command line. */ { optionHash(&argc, argv); if (argc != 5) usage(); phToPsl(argv[1], argv[2], argv[3], argv[4]); return 0; }