e70152e44cc66cc599ff6b699eb8adc07f3e656a kent Sat May 24 21:09:34 2014 -0700 Adding Copyright NNNN Regents of the University of California to all files I believe with reasonable certainty were developed under UCSC employ or as part of Genome Browser copyright assignment. diff --git src/hg/lib/rmskOut.c src/hg/lib/rmskOut.c index f734a00..52fcdb8 100644 --- src/hg/lib/rmskOut.c +++ src/hg/lib/rmskOut.c @@ -1,394 +1,397 @@ /* rmskOut.c was originally generated by the autoSql program, which also * generated rmskOut.h and rmskOut.sql. This module links the database and the RAM * representation of objects. */ +/* Copyright (C) 2014 The Regents of the University of California + * See README in this or parent directory for licensing information. */ + #include "common.h" #include "jksql.h" #include "obscure.h" #include "linefile.h" #include "rmskOut.h" #include "binRange.h" void rmskOutStaticLoad(char **row, struct rmskOut *ret) /* Load a row from rmskOut table into ret. The contents of ret will * be replaced at the next call to this function. */ { ret->swScore = sqlUnsigned(row[0]); ret->milliDiv = sqlUnsigned(row[1]); ret->milliDel = sqlUnsigned(row[2]); ret->milliIns = sqlUnsigned(row[3]); ret->genoName = row[4]; ret->genoStart = sqlUnsigned(row[5]); ret->genoEnd = sqlUnsigned(row[6]); ret->genoLeft = sqlSigned(row[7]); strcpy(ret->strand, row[8]); ret->repName = row[9]; ret->repClass = row[10]; ret->repFamily = row[11]; ret->repStart = sqlSigned(row[12]); ret->repEnd = sqlSigned(row[13]); ret->repLeft = sqlSigned(row[14]); strcpy(ret->id, row[15]); } struct rmskOut *rmskOutLoad(char **row) /* Load a rmskOut from row fetched with select * from rmskOut * from database. Dispose of this with rmskOutFree(). */ { struct rmskOut *ret; AllocVar(ret); ret->swScore = sqlUnsigned(row[0]); ret->milliDiv = sqlUnsigned(row[1]); ret->milliDel = sqlUnsigned(row[2]); ret->milliIns = sqlUnsigned(row[3]); ret->genoName = cloneString(row[4]); ret->genoStart = sqlUnsigned(row[5]); ret->genoEnd = sqlUnsigned(row[6]); ret->genoLeft = sqlSigned(row[7]); strcpy(ret->strand, row[8]); ret->repName = cloneString(row[9]); ret->repClass = cloneString(row[10]); ret->repFamily = cloneString(row[11]); ret->repStart = sqlSigned(row[12]); ret->repEnd = sqlSigned(row[13]); ret->repLeft = sqlSigned(row[14]); strcpy(ret->id, row[15]); return ret; } struct rmskOut *rmskOutCommaIn(char **pS, struct rmskOut *ret) /* Create a rmskOut out of a comma separated string. * This will fill in ret if non-null, otherwise will * return a new rmskOut */ { char *s = *pS; if (ret == NULL) AllocVar(ret); ret->swScore = sqlUnsignedComma(&s); ret->milliDiv = sqlUnsignedComma(&s); ret->milliDel = sqlUnsignedComma(&s); ret->milliIns = sqlUnsignedComma(&s); ret->genoName = sqlStringComma(&s); ret->genoStart = sqlUnsignedComma(&s); ret->genoEnd = sqlUnsignedComma(&s); ret->genoLeft = sqlSignedComma(&s); sqlFixedStringComma(&s, ret->strand, sizeof(ret->strand)); ret->repName = sqlStringComma(&s); ret->repClass = sqlStringComma(&s); ret->repFamily = sqlStringComma(&s); ret->repStart = sqlSignedComma(&s); ret->repEnd = sqlSignedComma(&s); ret->repLeft = sqlSignedComma(&s); sqlFixedStringComma(&s, ret->id, sizeof(ret->id)); *pS = s; return ret; } void rmskOutFree(struct rmskOut **pEl) /* Free a single dynamically allocated rmskOut such as created * with rmskOutLoad(). */ { struct rmskOut *el; if ((el = *pEl) == NULL) return; freeMem(el->genoName); freeMem(el->repName); freeMem(el->repClass); freeMem(el->repFamily); freez(pEl); } void rmskOutFreeList(struct rmskOut **pList) /* Free a list of dynamically allocated rmskOut's */ { struct rmskOut *el, *next; for (el = *pList; el != NULL; el = next) { next = el->next; rmskOutFree(&el); } *pList = NULL; } void rmskOutOutput(struct rmskOut *el, FILE *f, char sep, char lastSep) /* Print out rmskOut. Separate fields with sep. Follow last field with lastSep. */ { fprintf(f, "%u", el->swScore); fputc(sep,f); fprintf(f, "%u", el->milliDiv); fputc(sep,f); fprintf(f, "%u", el->milliDel); fputc(sep,f); fprintf(f, "%u", el->milliIns); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->genoName); if (sep == ',') fputc('"',f); fputc(sep,f); fprintf(f, "%u", el->genoStart); fputc(sep,f); fprintf(f, "%u", el->genoEnd); fputc(sep,f); fprintf(f, "%d", el->genoLeft); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->strand); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->repName); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->repClass); if (sep == ',') fputc('"',f); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->repFamily); if (sep == ',') fputc('"',f); fputc(sep,f); fprintf(f, "%d", el->repStart); fputc(sep,f); fprintf(f, "%d", el->repEnd); fputc(sep,f); fprintf(f, "%d", el->repLeft); fputc(sep,f); if (sep == ',') fputc('"',f); fprintf(f, "%s", el->id); if (sep == ',') fputc('"',f); fputc(lastSep,f); } /* ------------ End of AutoSQL generated code. ------------------ */ void rmskOutOpenVerify(char *fileName, struct lineFile **retFile, boolean *retEmpty) /* Open repeat masker .out file and verify that it is good. * Set retEmpty if it has header characteristic of an empty file. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *line; int lineSize; lineFileNeedNext(lf, &line, &lineSize); if (startsWith("There were no", line)) *retEmpty = TRUE; if (startsWith(" SW", line)) { line = skipLeadingSpaces(line); if (!startsWith("SW", line)) errAbort("%s doesn't seem to be a RepeatMasker .out file", fileName); lineFileSkip(lf, 2); *retEmpty = FALSE; } *retFile = lf; } static int negParenNum(struct lineFile *lf, char *s) /* Return number where negative is shown by parenthization. */ { boolean hasParen = FALSE; int result; if (*s == '(') { hasParen = TRUE; ++s; } if (!isdigit(s[0]) && s[0] != '-') errAbort("Expecting digit line %d of %s got %s\n", lf->lineIx, lf->fileName, s); result = atoi(s); if (hasParen) result = -result; return result; } static void parseClassAndFamily(char *s, char **retClass, char **retFamily) /* Separate repeatMasker class/family .*/ { char *e = strchr(s, '/'); if (e == NULL) *retClass = *retFamily = s; else { *e++ = 0; *retClass = s; *retFamily = e; } } struct rmskOut *rmskOutReadNext(struct lineFile *lf) /* Read next record from repeat masker file. Return NULL at EOF. */ { char *words[32]; int wordCount; char id; struct rmskOut *ret; char *class, *family; if ((wordCount = lineFileChop(lf, words)) == 0) return NULL; if (wordCount < 14 ) errAbort("Expecting at least 14 words line %d of %s", lf->lineIx, lf->fileName); if (wordCount >= 15) id = words[14][0]; else id = 0; AllocVar(ret); ret->swScore = lineFileNeedNum(lf, words, 0); ret->milliDiv = round(10.0*atof(words[1])); ret->milliDel = round(10.0*atof(words[2])); ret->milliIns = round(10.0*atof(words[3])); ret->genoName = cloneString(words[4]); ret->genoStart = lineFileNeedNum(lf, words, 5)-1; ret->genoEnd = lineFileNeedNum(lf, words, 6); ret->genoLeft = -negParenNum(lf, words[7]); if (sameString(words[8], "C")) ret->strand[0] = '-'; else if (sameString(words[8], "+")) ret->strand[0] = '+'; else errAbort("Unexpected strand char line %d of %s", lf->lineIx, lf->fileName); ret->repName = cloneString(words[9]); parseClassAndFamily(words[10], &class, &family); ret->repClass = cloneString(class); ret->repFamily = cloneString(family); ret->repStart = negParenNum(lf, words[11])-1; ret->repEnd = sqlSigned(words[12]); ret->repLeft = -negParenNum(lf, words[13]); return ret; } struct rmskOut *rmskOutRead(char *fileName) /* Read all records in .out file and return as list. */ { struct lineFile *lf; boolean isEmpty; struct rmskOut *list = NULL, *el; rmskOutOpenVerify(fileName, &lf, &isEmpty); if (!isEmpty) { while ((el = rmskOutReadNext(lf)) != NULL) { slAddHead(&list, el); } slReverse(&list); } lineFileClose(&lf); return list; } void rmskOutWriteHead(FILE *f) /* Write out rmsk header lines. */ { fprintf(f, " SW perc perc perc query position in query matching repeat position in repeat\n" "score div. del. ins. sequence begin end (left) repeat class/family begin end (left) ID\n" "\n"); } static void parenNeg(int num, char *s, size_t sSize) /* Write number to s, parenthesizing if negative. */ { if (num <= 0) safef(s, sSize, "(%d)", -num); else safef(s, sSize, "%d", num); } void rmskOutWriteOneOut(struct rmskOut *rmsk, FILE *f) /* Write one rmsk in .out format to file. */ { char genoLeft[24], repStart[24], repLeft[24]; char classFam[128]; parenNeg(-rmsk->genoLeft, genoLeft, sizeof(genoLeft)); parenNeg(rmsk->repStart+1, repStart, sizeof(repStart)); parenNeg(-rmsk->repLeft, repLeft, sizeof(repLeft)); if (sameString(rmsk->repClass, rmsk->repFamily)) safef(classFam, sizeof(classFam), "%s", rmsk->repClass); else safef(classFam, sizeof(classFam), "%s/%s", rmsk->repClass, rmsk->repFamily); fprintf(f, "%5d %5.1f %4.1f %4.1f %-9s %7d %7d %9s %1s %-14s %-19s %6s %4d %6s %6s\n", rmsk->swScore, 0.1*rmsk->milliDiv, 0.1*rmsk->milliDel, 0.1*rmsk->milliIns, rmsk->genoName, rmsk->genoStart+1, rmsk->genoEnd, genoLeft, (rmsk->strand[0] == '+' ? "+" : "C"), rmsk->repName, classFam, repStart, rmsk->repEnd, repLeft, rmsk->id); } void rmskOutWriteAllOut(char *fileName, struct rmskOut *rmskList) /* Write .out format file containing all in rmskList. */ { FILE *f = mustOpen(fileName, "w"); struct rmskOut *rmsk; rmskOutWriteHead(f); for (rmsk = rmskList; rmsk != NULL; rmsk = rmsk->next) rmskOutWriteOneOut(rmsk, f); fclose(f); } struct binKeeper *readRepeats(char *chrom, char *rmskFileName, struct hash *tSizeHash) /* read all repeats for a chromosome of size size, returns results in binKeeper structure for fast query*/ { boolean rmskRet; struct lineFile *rmskF = NULL; struct rmskOut *rmsk; struct binKeeper *bk; int size; size = hashIntVal(tSizeHash, chrom); bk = binKeeperNew(0, size); assert(size > 1); rmskOutOpenVerify(rmskFileName ,&rmskF , &rmskRet); while ((rmsk = rmskOutReadNext(rmskF)) != NULL) { binKeeperAdd(bk, rmsk->genoStart, rmsk->genoEnd, rmsk); } lineFileClose(&rmskF); return bk; } struct hash *readRepeatsAll(char *sizeFileName, char *rmskDir) /* read all repeats for a all chromosomes getting sizes from sizeFileNmae , returns results in hash of binKeeper structure for fast query*/ { boolean rmskRet; struct binKeeper *bk; struct lineFile *rmskF = NULL; struct rmskOut *rmsk; struct lineFile *lf = lineFileOpen(sizeFileName, TRUE); struct hash *hash = newHash(0); char *row[2]; char rmskFileName[256]; while (lineFileRow(lf, row)) { char *name = row[0]; int size = lineFileNeedNum(lf, row, 1); if (hashLookup(hash, name) != NULL) warn("Duplicate %s, ignoring all but first\n", name); else { bk = binKeeperNew(0, size); assert(size > 1); safef(rmskFileName, sizeof(rmskFileName), "%s/%s.fa.out",rmskDir,name); rmskOutOpenVerify(rmskFileName ,&rmskF , &rmskRet); while ((rmsk = rmskOutReadNext(rmskF)) != NULL) { binKeeperAdd(bk, rmsk->genoStart, rmsk->genoEnd, rmsk); } lineFileClose(&rmskF); hashAdd(hash, name, bk); } } lineFileClose(&lf); return hash; }