aa6447741ae61479b43cbd2b02182aa405487dab hiram Fri Sep 30 15:19:00 2022 -0700 can now read qSizes from file or URL refs #29819 diff --git src/hg/mouseStuff/netClass/netClass.c src/hg/mouseStuff/netClass/netClass.c index 07d216b..9d53d85 100644 --- src/hg/mouseStuff/netClass/netClass.c +++ src/hg/mouseStuff/netClass/netClass.c @@ -1,78 +1,83 @@ /* netClass - Add classification info to net. */ /* Copyright (C) 2013 The Regents of the University of California * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" #include "linefile.h" +#include "chromInfo.h" #include "hash.h" #include "options.h" #include "rbTree.h" #include "jksql.h" #include "hdb.h" #include "localmem.h" #include "agpGap.h" #include "simpleRepeat.h" #include "liftUp.h" #include "chainNet.h" /* Command line switches. */ char *tNewR = NULL; char *qNewR = NULL; boolean noAr = FALSE; char *qRepeatTable = NULL; char *tRepeatTable = NULL; +char *qSizes = NULL; struct hash *liftHashT = NULL; struct hash *liftHashQ = NULL; /* Localmem obj shared by cached query rbTrees. */ struct lm *qLm = NULL; /* command line option specifications */ static struct optionSpec optionSpecs[] = { {"tNewR", OPTION_STRING}, {"qNewR", OPTION_STRING}, {"noAr", OPTION_BOOLEAN}, {"qRepeats", OPTION_STRING}, {"tRepeats", OPTION_STRING}, {"liftT", OPTION_STRING}, {"liftQ", OPTION_STRING}, + {"qSizes", OPTION_STRING}, {NULL, 0} }; static void usage() /* Explain usage and exit. */ { errAbort( "netClass - Add classification info to net\n" "usage:\n" " netClass [options] in.net tDb qDb out.net\n" " tDb - database to fetch target repeat masker table information\n" " qDb - database to fetch query repeat masker table information\n" "options:\n" " -tNewR=dir - Dir of chrN.out.spec files, with RepeatMasker .out format\n" " lines describing lineage specific repeats in target\n" " -qNewR=dir - Dir of chrN.out.spec files for query\n" " -noAr - Don't look for ancient repeats\n" " -qRepeats=table - table name for query repeats in place of rmsk\n" " -tRepeats=table - table name for target repeats in place of rmsk\n" " - for example: -tRepeats=windowmaskerSdust\n" " -liftQ=file.lft - Lift in.net's query coords to chrom-level using\n" " file.lft (for accessing chrom-level coords in qDb)\n" " -liftT=file.lft - Lift in.net's target coords to chrom-level using\n" " file.lft (for accessing chrom-level coords in tDb)\n" + " -qSizes=chrom.sizes - file with query chrom.sizes instead of reading\n" + " - the chromInfo table from the database\n" ); } struct chrom /* Basic information on a chromosome. */ { struct chrom *next; /* Next in list */ char *name; /* Chromosome name, allocated in hash. */ int size; /* Chromosome size. */ struct rbTree *nGaps; /* Gaps in sequence (Ns) */ struct rbTree *repeats; /* Repeats in sequence */ struct rbTree *newRepeats; /* New (lineage specific) repeats. */ struct rbTree *oldRepeats; /* Old (pre-split) repeats. */ struct rbTree *trf; /* Simple repeats. */ }; @@ -665,50 +670,66 @@ else errAbort("Can't find ancientRepeat table in %s or %s", sqlGetDatabase(tConn), sqlGetDatabase(qConn)); char query[1024]; sqlSafef(query, sizeof query, "select name,family,class from ancientRepeat"); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { sprintf(key, "%s.%s.%s", row[0], row[1], row[2]); hashAdd(hash, key, NULL); } sqlFreeResult(&sr); return hash; } -void getChroms(struct sqlConnection *conn, struct hash **retHash, - struct chrom **retList) -/* Get hash of chromosomes from database. */ +static void getChroms(struct sqlConnection *conn, struct hash **retHash, + struct chrom **retList, char *sizes) +/* Get hash of chromosomes from database or from sizes file if given. */ { -struct sqlResult *sr; -char **row; struct chrom *chromList = NULL, *chrom; struct hash *hash = hashNew(8); +if (sizes) + { + struct chromInfo *list = chromInfoListFromFile(sizes); + struct chromInfo *ci = NULL; + for (ci = list; ci; ci=ci->next) + { + AllocVar(chrom); + hashAddSaveName(hash, ci->chrom, chrom, &chrom->name); + chrom->size = ci->size; + slAddHead(&chromList, chrom); + } + } +else + { + struct sqlResult *sr; + char **row; char query[1024]; sqlSafef(query, sizeof query, "select chrom,size from chromInfo"); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { AllocVar(chrom); hashAddSaveName(hash, row[0], chrom, &chrom->name); chrom->size = atoi(row[1]); slAddHead(&chromList, chrom); } sqlFreeResult(&sr); + } + slReverse(&chromList); *retHash = hash; *retList = chromList; } int liftStart(char *name, int start, struct hash *liftHash) /* Lift start if necessary. */ { int s = start; if (liftHash != NULL) { struct liftSpec *lft = hashMustFindVal(liftHash, name); s += lft->offset; } return s; @@ -871,31 +892,31 @@ { struct chainNet *net; struct lineFile *lf = lineFileOpen(inName, TRUE); FILE *f = mustOpen(outName, "w"); struct chrom *qChromList, *chrom; struct hash *qChromHash; struct hash *arHash = NULL; struct sqlConnection *tConn = sqlConnect(tDb); struct sqlConnection *qConn = sqlConnect(qDb); qLm = lmInit(0); if (!noAr) arHash = getAncientRepeats(tConn, qConn); -getChroms(qConn, &qChromHash, &qChromList); +getChroms(qConn, &qChromHash, &qChromList, qSizes); verbose(1, "Reading gaps in %s\n", qDb); if (sqlTableExists(qConn, "gap")) { getSeqGapsUnsplit(qConn, qChromHash); } else { for (chrom = qChromList; chrom != NULL; chrom = chrom->next) chrom->nGaps = getSeqGaps(qConn, chrom->name); } if (qNewR) { verbose(1, "Reading new repeats from %s\n", qNewR); @@ -976,28 +997,29 @@ int main(int argc, char *argv[]) /* Process command line. */ { char *liftFileQ = NULL, *liftFileT = NULL; optionInit(&argc, argv, optionSpecs); if (argc != 5) usage(); tNewR = optionVal("tNewR", tNewR); qNewR = optionVal("qNewR", qNewR); noAr = optionExists("noAr"); qRepeatTable = optionVal("qRepeats", qRepeatTable); tRepeatTable = optionVal("tRepeats", tRepeatTable); liftFileQ = optionVal("liftQ", liftFileQ); liftFileT = optionVal("liftT", liftFileT); +qSizes = optionVal("qSizes", qSizes); if (liftFileQ != NULL) { struct liftSpec *lifts = readLifts(liftFileQ); liftHashQ = hashLift(lifts, TRUE); } if (liftFileT != NULL) { struct liftSpec *lifts = readLifts(liftFileT); liftHashT = hashLift(lifts, TRUE); } netClass(argv[1], argv[2], argv[3], argv[4]); return 0; }