aa6447741ae61479b43cbd2b02182aa405487dab
hiram
  Fri Sep 30 15:19:00 2022 -0700
can now read qSizes from file or URL refs #29819

diff --git src/hg/mouseStuff/netClass/netClass.c src/hg/mouseStuff/netClass/netClass.c
index 07d216b..9d53d85 100644
--- src/hg/mouseStuff/netClass/netClass.c
+++ src/hg/mouseStuff/netClass/netClass.c
@@ -1,1003 +1,1025 @@
 /* netClass - Add classification info to net. */
 
 /* Copyright (C) 2013 The Regents of the University of California 
  * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 #include "common.h"
 #include "linefile.h"
+#include "chromInfo.h"
 #include "hash.h"
 #include "options.h"
 #include "rbTree.h"
 #include "jksql.h"
 #include "hdb.h"
 #include "localmem.h"
 #include "agpGap.h"
 #include "simpleRepeat.h"
 #include "liftUp.h"
 #include "chainNet.h"
 
 
 /* Command line switches. */
 char *tNewR = NULL;
 char *qNewR = NULL;
 boolean noAr = FALSE;
 char *qRepeatTable = NULL;
 char *tRepeatTable = NULL;
+char *qSizes = NULL;
 struct hash *liftHashT = NULL;
 struct hash *liftHashQ = NULL;
 
 /* Localmem obj shared by cached query rbTrees. */
 struct lm *qLm = NULL;
 
 /* command line option specifications */
 static struct optionSpec optionSpecs[] = {
     {"tNewR", OPTION_STRING},
     {"qNewR", OPTION_STRING},
     {"noAr", OPTION_BOOLEAN},
     {"qRepeats", OPTION_STRING},
     {"tRepeats", OPTION_STRING},
     {"liftT", OPTION_STRING},
     {"liftQ", OPTION_STRING},
+    {"qSizes", OPTION_STRING},
     {NULL, 0}
 };
 
 static void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "netClass - Add classification info to net\n"
   "usage:\n"
   "   netClass [options] in.net tDb qDb out.net\n"
   "       tDb - database to fetch target repeat masker table information\n"
   "       qDb - database to fetch query repeat masker table information\n"
   "options:\n"
   "   -tNewR=dir - Dir of chrN.out.spec files, with RepeatMasker .out format\n"
   "                lines describing lineage specific repeats in target\n"
   "   -qNewR=dir - Dir of chrN.out.spec files for query\n"
   "   -noAr - Don't look for ancient repeats\n"
   "   -qRepeats=table - table name for query repeats in place of rmsk\n"
   "   -tRepeats=table - table name for target repeats in place of rmsk\n"
   "                   - for example: -tRepeats=windowmaskerSdust\n"
   "   -liftQ=file.lft - Lift in.net's query coords to chrom-level using\n"
   "                     file.lft (for accessing chrom-level coords in qDb)\n"
   "   -liftT=file.lft - Lift in.net's target coords to chrom-level using\n"
   "                     file.lft (for accessing chrom-level coords in tDb)\n"
+  "   -qSizes=chrom.sizes - file with query chrom.sizes instead of reading\n"
+  "                   - the chromInfo table from the database\n"
   );
 }
 
 struct chrom
 /* Basic information on a chromosome. */
     {
     struct chrom *next;	  /* Next in list */
     char *name;		  /* Chromosome name, allocated in hash. */
     int size;		  /* Chromosome size. */
     struct rbTree *nGaps; /* Gaps in sequence (Ns) */
     struct rbTree *repeats; /* Repeats in sequence */
     struct rbTree *newRepeats; /* New (lineage specific) repeats. */
     struct rbTree *oldRepeats; /* Old (pre-split) repeats. */
     struct rbTree *trf;	       /* Simple repeats. */
     };
 
 struct simpleRange
 /* A part of a chromosome. */
     {
     int start, end;	/* Half open zero based coordinates. */
     };
 
 #define overlap(r1, r2) ((r1)->start <= (r2)->end && (r2)->start <= (r1)->end)
 
 int simpleRangeCmp(void *va, void *vb)
 /* Return -1 if a before b,  0 if a and b overlap,
  * and 1 if a after b. */
 {
 struct simpleRange *a = va;
 struct simpleRange *b = vb;
 if (a->end <= b->start)
     return -1;
 else if (b->end <= a->start)
     return 1;
 else
     return 0;
 }
 
 static int interSize;	/* Size of intersection. */
 static struct simpleRange interRange; /* Range to intersect with. */
 
 void addInterSize(void *item)
 /* Add range to interSize. */
 {
 struct simpleRange *r = item;
 int size;
 size = rangeIntersection(r->start, r->end, interRange.start, interRange.end);
 interSize += size;
 }
 
 int intersectionSize(struct rbTree *tree, int start, int end)
 /* Return total size of things intersecting range start-end. */
 {
 if (tree == NULL)
     return 0;
 interRange.start = start;
 interRange.end = end;
 interSize = 0;
 rbTreeTraverseRange(tree, &interRange, &interRange, addInterSize);
 return interSize;
 }
 
 void setNGap(char *chr, struct hash *chromHash, struct rbTree *tree)
 /* Set the appropriate chrom->nGaps to tree -- make sure it's NULL first. */
 {
 struct chrom *chrom = hashMustFindVal(chromHash, chr);
 if (chrom->nGaps != NULL)
     errAbort("getSeqGapsUnsplit: query results must be orderd by chrom, but "
 	     "looks like they weren't for %s", chr);
 chrom->nGaps = tree;
 }
 
 void getSeqGapsUnsplit(struct sqlConnection *conn, struct hash *chromHash)
 /* Return a tree of ranges for sequence gaps in all chromosomes, 
  * assuming an unsplit gap table -- when the table is unsplit, it's 
  * probably for a scaffold assembly where we *really* don't want 
  * to do one query per scaffold! */
 {
 struct rbTreeNode **stack = lmAlloc(qLm, 256 * sizeof(stack[0]));
 struct rbTree *tree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack);
 int rowOffset = hOffsetPastBin(sqlGetDatabase(conn), NULL, "gap");
 struct sqlResult *sr;
 char **row;
 char *prevChrom = NULL;
 
 char query[1024];
 sqlSafef(query, sizeof query, "select * from gap order by chrom");
 sr = sqlGetResult(conn, query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     struct agpGap gap;
     struct simpleRange *range;
     agpGapStaticLoad(row+rowOffset, &gap);
     if (prevChrom == NULL)
 	prevChrom = cloneString(gap.chrom);
     else if (! sameString(prevChrom, gap.chrom))
 	{
 	setNGap(prevChrom, chromHash, tree);
 	freeMem(prevChrom);
 	stack = lmAlloc(qLm, 256 * sizeof(stack[0]));
 	tree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack);
 	prevChrom = cloneString(gap.chrom);
 	}
     lmAllocVar(tree->lm, range);
     range->start = gap.chromStart;
     range->end = gap.chromEnd;
     rbTreeAdd(tree, range);
     }
 if (prevChrom != NULL)
     {
     setNGap(prevChrom, chromHash, tree);
     freeMem(prevChrom);
     }
 sqlFreeResult(&sr);
 }
 
 struct rbTree *getSeqGaps(struct sqlConnection *conn, char *chrom)
 /* Return a tree of ranges for sequence gaps in chromosome */
 {
 struct rbTree *tree = rbTreeNew(simpleRangeCmp);
 int rowOffset;
 struct sqlResult *sr = hChromQuery(conn, "gap", chrom, NULL, &rowOffset);
 char **row;
 
 while ((row = sqlNextRow(sr)) != NULL)
     {
     struct agpGap gap;
     struct simpleRange *range;
     agpGapStaticLoad(row+rowOffset, &gap);
     lmAllocVar(tree->lm, range);
     range->start = gap.chromStart;
     range->end = gap.chromEnd;
     rbTreeAdd(tree, range);
     }
 sqlFreeResult(&sr);
 return tree;
 }
 
 void setTrf(char *chr, struct hash *chromHash, struct rbTree *tree)
 /* Set the appropriate chrom->trf to tree -- make sure it's NULL first. */
 {
 struct chrom *chrom = hashMustFindVal(chromHash, chr);
 if (chrom->trf != NULL)
     errAbort("getTrfUnsplit: query results must be orderd by chrom, but "
 	     "looks like they weren't for %s", chr);
 chrom->trf = tree;
 }
 
 void getTrfUnsplit(struct sqlConnection *conn, struct hash *chromHash)
 /* Return a tree of ranges for simple repeats in all chromosomes, 
  * from a single query on the whole (unsplit) simpleRepeat table. */
 {
 struct rbTreeNode **stack = lmAlloc(qLm, 256 * sizeof(stack[0]));
 struct rbTree *tree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack);
 struct simpleRange *range, *prevRange = NULL;
 struct sqlResult *sr;
 char **row;
 char *prevChrom = NULL;
 
 char query[1024];
 sqlSafef(query, sizeof query, 
 "select chrom,chromStart,chromEnd from simpleRepeat"
 		  " order by chrom,chromStart");
 sr = sqlGetResult(conn, query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     if (prevChrom == NULL)
 	prevChrom = cloneString(row[0]);
     else if (! sameString(prevChrom, row[0]))
 	{
 	rbTreeAdd(tree, prevRange);
 	setTrf(prevChrom, chromHash, tree);
 	prevRange = NULL;
 	freeMem(prevChrom);
 	stack = lmAlloc(qLm, 256 * sizeof(stack[0]));
 	tree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack);
 	prevChrom = cloneString(row[0]);
 	}
     lmAllocVar(tree->lm, range);
     range->start = sqlUnsigned(row[1]);
     range->end = sqlUnsigned(row[2]);
     if (prevRange == NULL)
 	prevRange = range;
     else if (overlap(range, prevRange))
 	{
 	/* merge r into prevR & discard; prevR gets passed forward. */
 	if (range->end > prevRange->end)
 	    prevRange->end = range->end;
 	if (range->start < prevRange->start)
 	    prevRange->start = range->start;
 	}
     else
 	{
 	rbTreeAdd(tree, prevRange);
 	prevRange = range;
 	}
     }
 if (prevChrom != NULL)
     {
     rbTreeAdd(tree, prevRange);
     setTrf(prevChrom, chromHash, tree);
     freeMem(prevChrom);
     }
 sqlFreeResult(&sr);
 }
 
 struct rbTree *getTrf(struct sqlConnection *conn, char *chrom)
 /* Return a tree of ranges for simple repeats in chromosome. */
 {
 struct rbTree *tree = rbTreeNew(simpleRangeCmp);
 struct simpleRange *range, *prevRange = NULL;
 char query[256];
 struct sqlResult *sr;
 char **row;
 
 sqlSafef(query, sizeof query, "select chromStart,chromEnd from simpleRepeat "
                "where chrom = '%s'",
 	       chrom);
 sr = sqlGetResult(conn, query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     lmAllocVar(tree->lm, range);
     range->start = sqlUnsigned(row[0]);
     range->end = sqlUnsigned(row[1]);
     if (prevRange == NULL)
 	prevRange = range;
     else if (overlap(range, prevRange))
 	{
 	/* merge r into prevR & discard; prevR gets passed forward. */
 	if (range->end > prevRange->end)
 	    prevRange->end = range->end;
 	if (range->start < prevRange->start)
 	    prevRange->start = range->start;
 	}
     else
 	{
 	rbTreeAdd(tree, prevRange);
 	prevRange = range;
 	}
     }
 if (prevRange != NULL)
     rbTreeAdd(tree, prevRange);
 sqlFreeResult(&sr);
 return tree;
 }
 
 static void setRepeats(char *chr, struct hash *chromHash,
 	struct rbTree *allTree, struct rbTree *newTree)
 /* Set the appropriate chrom->repeats to tree -- make sure it's NULL first. */
 {
 struct chrom *chrom = hashMustFindVal(chromHash, chr);
 if (chrom->repeats != NULL)
     errAbort("getRepeatsUnsplit: query results must be orderd by genoName "
 	     "(chrom), but looks like they weren't for %s", chr);
 chrom->repeats = allTree;
 chrom->newRepeats = newTree;
 }
 
 static void getRepeatsUnsplit(struct sqlConnection *conn,
 	struct hash *chromHash, struct hash *arHash)
 /* Return a tree of ranges for sequence gaps all chromosomes, 
  * assuming an unsplit table -- when the table is unsplit, it's 
  * probably for a scaffold assembly where we *really* don't want 
  * to do one query per scaffold! */
 {
 struct sqlResult *sr;
 char **row;
 struct rbTreeNode **stack = lmAlloc(qLm, 256 * sizeof(stack[0]));
 struct rbTree *allTree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack);
 struct rbTreeNode **newstack = lmAlloc(qLm, 256 * sizeof(newstack[0]));
 struct rbTree *newTree = rbTreeNewDetailed(simpleRangeCmp, qLm, newstack);
 char *prevChrom = NULL;
 struct simpleRange *prevRange = NULL, *prevNewRange = NULL;
 
 char query[1024];
 sqlSafef(query, sizeof query, 
     "select genoName,genoStart,genoEnd,repName,repClass,repFamily from rmsk "
     "order by genoName,genoStart");
 sr = sqlGetResult(conn, query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     struct simpleRange *range;
     char arKey[512];
     if (prevChrom == NULL)
 	prevChrom = cloneString(row[0]);
     else if (! sameString(prevChrom, row[0]))
 	{
 	rbTreeAdd(allTree, prevRange);
 	if (prevNewRange != NULL)
 	    rbTreeAdd(newTree, prevNewRange);
 	setRepeats(prevChrom, chromHash, allTree, newTree);
 	freeMem(prevChrom);
 	prevRange = prevNewRange = NULL;
 	stack = lmAlloc(qLm, 256 * sizeof(stack[0]));
 	allTree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack);
 	if (arHash != NULL)
 	    {
 	    stack = lmAlloc(qLm, 256 * sizeof(stack[0]));
 	    newTree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack);
 	    }
 	prevChrom = cloneString(row[0]);
 	}
     lmAllocVar(allTree->lm, range);
     range->start = sqlUnsigned(row[1]);
     range->end = sqlUnsigned(row[2]);
     if (prevRange == NULL)
 	prevRange = range;
     else if (overlap(range, prevRange))
 	{
 	/* merge r into prevR & discard; prevR gets passed forward. */
 	if (range->end > prevRange->end)
 	    prevRange->end = range->end;
 	if (range->start < prevRange->start)
 	    prevRange->start = range->start;
 	}
     else
 	{
 	rbTreeAdd(allTree, prevRange);
 	prevRange = range;
 	}
     sprintf(arKey, "%s.%s.%s", row[3], row[4], row[5]);
     if (arHash != NULL && hashLookup(arHash, arKey))
         {
 	lmAllocVar(newTree->lm, range);
 	range->start = sqlUnsigned(row[1]);
 	range->end = sqlUnsigned(row[2]);
 	if (prevNewRange == NULL)
 	    prevNewRange = range;
 	else if (overlap(range, prevNewRange))
 	    {
 	    /* merge r into prevR & discard; prevR gets passed forward. */
 	    if (range->end > prevNewRange->end)
 		prevNewRange->end = range->end;
 	    if (range->start < prevNewRange->start)
 		prevNewRange->start = range->start;
 	    }
 	else
 	    {
 	    rbTreeAdd(newTree, prevNewRange);
 	    prevNewRange = range;
 	    }
 	}
     }
 if (prevChrom != NULL)
     {
     rbTreeAdd(allTree, prevRange);
     if (prevNewRange != NULL)
 	rbTreeAdd(newTree, prevNewRange);
     setRepeats(prevChrom, chromHash, allTree, newTree);
     freeMem(prevChrom);
     }
 sqlFreeResult(&sr);
 }
 
 static void getRepeatsUnsplitTable(struct sqlConnection *conn,
 	struct hash *chromHash, char *table)
 /* Return a tree of ranges for sequence gaps all chromosomes, 
  *	from specified table
  */
 {
 struct sqlResult *sr;
 char **row;
 struct rbTreeNode **stack = lmAlloc(qLm, 256 * sizeof(stack[0]));
 struct rbTree *allTree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack);
 struct rbTreeNode **newstack = lmAlloc(qLm, 256 * sizeof(newstack[0]));
 struct rbTree *newTree = rbTreeNewDetailed(simpleRangeCmp, qLm, newstack);
 char *prevChrom = NULL;
 struct simpleRange *prevRange = NULL, *prevNewRange = NULL;
 char query[256];
 
 
 sqlSafef(query, ArraySize(query), "select chrom,chromStart,chromEnd from %s "
     "order by chrom,chromStart", table);
 sr = sqlGetResult(conn, query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     struct simpleRange *range;
     if (prevChrom == NULL)
 	prevChrom = cloneString(row[0]);
     else if (! sameString(prevChrom, row[0]))
 	{
 	rbTreeAdd(allTree, prevRange);
 	if (prevNewRange != NULL)
 	    rbTreeAdd(newTree, prevNewRange);
 	setRepeats(prevChrom, chromHash, allTree, newTree);
 	freeMem(prevChrom);
 	prevRange = prevNewRange = NULL;
 	stack = lmAlloc(qLm, 256 * sizeof(stack[0]));
 	allTree = rbTreeNewDetailed(simpleRangeCmp, qLm, stack);
 	prevChrom = cloneString(row[0]);
 	}
     lmAllocVar(allTree->lm, range);
     range->start = sqlUnsigned(row[1]);
     range->end = sqlUnsigned(row[2]);
     if (prevRange == NULL)
 	prevRange = range;
     else if (overlap(range, prevRange))
 	{
 	/* merge r into prevR & discard; prevR gets passed forward. */
 	if (range->end > prevRange->end)
 	    prevRange->end = range->end;
 	if (range->start < prevRange->start)
 	    prevRange->start = range->start;
 	}
     else
 	{
 	rbTreeAdd(allTree, prevRange);
 	prevRange = range;
 	}
     }
 if (prevChrom != NULL)
     {
     rbTreeAdd(allTree, prevRange);
     if (prevNewRange != NULL)
 	rbTreeAdd(newTree, prevNewRange);
     setRepeats(prevChrom, chromHash, allTree, newTree);
     freeMem(prevChrom);
     }
 sqlFreeResult(&sr);
 }	/*	void getRepeatsUnsplitTable()	*/
 
 static void getRepeats(struct sqlConnection *conn, struct hash *arHash,
     char *chrom, struct rbTree **retAllRepeats,
 	struct rbTree **retNewRepeats)
 /* Return a tree of ranges for sequence gaps in chromosome */
 {
 char *db = sqlGetDatabase(conn);
 struct sqlResult *sr;
 char **row;
 struct rbTree *allTree = rbTreeNew(simpleRangeCmp);
 struct rbTree *newTree = rbTreeNew(simpleRangeCmp);
 char tableName[64];
 char query[256];
 boolean splitRmsk = TRUE;
 struct simpleRange *prevRange = NULL, *prevNewRange = NULL;
 
 safef(tableName, sizeof(tableName), "%s_rmsk", chrom);
 if (! sqlTableExists(conn, tableName))
     {
     safef(tableName, sizeof(tableName), "rmsk");
     if (! sqlTableExists(conn, tableName))
 	errAbort("Can't find rmsk table for %s (%s.%s_rmsk or %s.rmsk)\n",
 		 chrom, db, chrom, db);
     splitRmsk = FALSE;
     }
 if (splitRmsk)
     sqlSafef(query, sizeof query,
 	    "select genoStart,genoEnd,repName,repClass,repFamily from %s",
 	    tableName);
 else
     sqlSafef(query, sizeof query,
 	    "select genoStart,genoEnd,repName,repClass,repFamily from %s "
 	    "where genoName = \"%s\"",
 	    tableName, chrom);
 sr = sqlGetResult(conn, query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     struct simpleRange *range;
     char arKey[512];
     lmAllocVar(allTree->lm, range);
     range->start = sqlUnsigned(row[0]);
     range->end = sqlUnsigned(row[1]);
     if (prevRange == NULL)
 	prevRange = range;
     else if (overlap(range, prevRange))
 	{
 	/* merge r into prevR & discard; prevR gets passed forward. */
 	if (range->end > prevRange->end)
 	    prevRange->end = range->end;
 	if (range->start < prevRange->start)
 	    prevRange->start = range->start;
 	}
     else
 	{
 	rbTreeAdd(allTree, prevRange);
 	prevRange = range;
 	}
     sprintf(arKey, "%s.%s.%s", row[2], row[3], row[4]);
     if (arHash != NULL && hashLookup(arHash, arKey))
         {
 	lmAllocVar(newTree->lm, range);
 	range->start = sqlUnsigned(row[0]);
 	range->end = sqlUnsigned(row[1]);
 	if (prevNewRange == NULL)
 	    prevNewRange = range;
 	else if (overlap(range, prevNewRange))
 	    {
 	    /* merge r into prevR & discard; prevR gets passed forward. */
 	    if (range->end > prevNewRange->end)
 		prevNewRange->end = range->end;
 	    if (range->start < prevNewRange->start)
 		prevNewRange->start = range->start;
 	    }
 	else
 	    {
 	    rbTreeAdd(allTree, prevNewRange);
 	    prevNewRange = range;
 	    }
 	}
     }
 if (prevRange != NULL)
     rbTreeAdd(allTree, prevRange);
 if (prevNewRange != NULL)
     rbTreeAdd(newTree, prevNewRange);
 sqlFreeResult(&sr);
 *retAllRepeats = allTree;
 *retNewRepeats = newTree;
 }
 
 static void getRepeatsTable(struct sqlConnection *conn, char *table,
     char *chrom, struct rbTree **retAllRepeats,
 	struct rbTree **retNewRepeats)
 /* Return a tree of ranges for sequence gaps in chromosome from
  *	specified table */
 {
 struct sqlResult *sr;
 char **row;
 struct rbTree *allTree = rbTreeNew(simpleRangeCmp);
 struct rbTree *newTree = rbTreeNew(simpleRangeCmp);
 char query[256];
 struct simpleRange *prevRange = NULL, *prevNewRange = NULL;
 
 sqlSafef(query, ArraySize(query), "select chromStart,chromEnd from %s "
 	    "where chrom = \"%s\"", table, chrom);
 sr = sqlGetResult(conn, query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     struct simpleRange *range;
     lmAllocVar(allTree->lm, range);
     range->start = sqlUnsigned(row[0]);
     range->end = sqlUnsigned(row[1]);
     if (prevRange == NULL)
 	prevRange = range;
     else if (overlap(range, prevRange))
 	{
 	/* merge r into prevR & discard; prevR gets passed forward. */
 	if (range->end > prevRange->end)
 	    prevRange->end = range->end;
 	if (range->start < prevRange->start)
 	    prevRange->start = range->start;
 	}
     else
 	{
 	rbTreeAdd(allTree, prevRange);
 	prevRange = range;
 	}
     }
 if (prevRange != NULL)
     rbTreeAdd(allTree, prevRange);
 if (prevNewRange != NULL)
     rbTreeAdd(newTree, prevNewRange);
 sqlFreeResult(&sr);
 *retAllRepeats = allTree;
 *retNewRepeats = newTree;
 }	/*	static void getRepeatsTable()	*/
 
 static struct rbTree *getNewRepeats(char *dirName, char *chrom)
 /* Read in repeatMasker .out line format file into a tree of ranges. */
 /* Handles lineage-specific files that preserve header */
 {
 struct rbTree *tree = rbTreeNew(simpleRangeCmp);
 struct simpleRange *range;
 char fileName[512];
 struct lineFile *lf;
 char *row[7];
 boolean headerDone = FALSE;
 
 sprintf(fileName, "%s/%s.out.spec", dirName, chrom);
 lf = lineFileOpen(fileName, TRUE);
 while (lineFileRow(lf, row))
     {
     /* skip header lines (don't contain numeric first field) */
     if (!headerDone && atoi(row[0]) == 0)
         continue;
     if (!sameString(chrom, row[4]))
         errAbort("Expecting %s word 5, line %d of %s\n", 
 		chrom, lf->lineIx, lf->fileName);
     headerDone = TRUE;
     lmAllocVar(tree->lm, range);
     range->start = lineFileNeedNum(lf, row, 5) - 1;
     range->end = lineFileNeedNum(lf, row, 6);
     rbTreeAdd(tree, range);
     }
 lineFileClose(&lf);
 return tree;
 }
 
 struct hash *getAncientRepeats(struct sqlConnection *tConn,
 			       struct sqlConnection *qConn)
 /* Get hash of ancient repeats.  This keyed by name.family.class. */
 {
 struct sqlConnection *conn = NULL;
 struct sqlResult *sr;
 char **row;
 char key[512];
 struct hash *hash = newHash(10);
 
 if (sqlTableExists(tConn, "ancientRepeat"))
     conn = tConn;
 else if (sqlTableExists(qConn, "ancientRepeat"))
     conn = qConn;
 else
     errAbort("Can't find ancientRepeat table in %s or %s",
 	     sqlGetDatabase(tConn), sqlGetDatabase(qConn));
 char query[1024];
 sqlSafef(query, sizeof query, "select name,family,class from ancientRepeat");
 sr = sqlGetResult(conn, query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     sprintf(key, "%s.%s.%s", row[0], row[1], row[2]);
     hashAdd(hash, key, NULL);
     }
 sqlFreeResult(&sr);
 return hash;
 }
 
-void getChroms(struct sqlConnection *conn, struct hash **retHash,
-	       struct chrom **retList)
-/* Get hash of chromosomes from database. */
+static void getChroms(struct sqlConnection *conn, struct hash **retHash,
+	       struct chrom **retList, char *sizes)
+/* Get hash of chromosomes from database or from sizes file if given. */
 {
-struct sqlResult *sr;
-char **row;
 struct chrom *chromList = NULL, *chrom;
 struct hash *hash = hashNew(8);
 
+if (sizes)
+    {
+    struct chromInfo *list = chromInfoListFromFile(sizes);
+    struct chromInfo *ci = NULL;
+    for (ci = list; ci; ci=ci->next)
+	{
+	AllocVar(chrom);
+	hashAddSaveName(hash, ci->chrom, chrom, &chrom->name);
+	chrom->size = ci->size;
+	slAddHead(&chromList, chrom);
+	}
+    }
+else
+    {
+    struct sqlResult *sr;
+    char **row;
     char query[1024];
     sqlSafef(query, sizeof query, "select chrom,size from chromInfo");
     sr = sqlGetResult(conn, query);
     while ((row = sqlNextRow(sr)) != NULL)
 	{
 	AllocVar(chrom);
 	hashAddSaveName(hash, row[0], chrom, &chrom->name);
 	chrom->size = atoi(row[1]);
 	slAddHead(&chromList, chrom);
 	}
     sqlFreeResult(&sr);
+    }
+
 slReverse(&chromList);
 *retHash = hash;
 *retList = chromList;
 }
 
 int liftStart(char *name, int start, struct hash *liftHash)
 /* Lift start if necessary. */
 {
 int s = start;
 if (liftHash != NULL)
     {
     struct liftSpec *lft = hashMustFindVal(liftHash, name);
     s += lft->offset;
     }
 return s;
 }
 
 void tAddN(struct chainNet *net, struct cnFill *fillList, struct rbTree *tree)
 /* Add tN's to all gaps underneath fillList. */
 {
 struct cnFill *fill;
 for (fill = fillList; fill != NULL; fill = fill->next)
     {
     int s = liftStart(net->name, fill->tStart, liftHashT);
     fill->tN = intersectionSize(tree, s, s + fill->tSize);
     if (fill->children)
 	tAddN(net, fill->children, tree);
     }
 }
 
 struct chrom *getQChrom(char *qName, struct hash *qChromHash)
 /* Lift qName to chrom if necessary and dig up from qChromHash. */
 {
 struct chrom *qChrom = NULL;
 if (liftHashQ != NULL)
     {
     struct liftSpec *lft = hashMustFindVal(liftHashQ, qName);
     qChrom = hashMustFindVal(qChromHash, lft->newName);
     }
 else
     qChrom = hashMustFindVal(qChromHash, qName);
 return(qChrom);
 }
 
 void qAddN(struct chainNet *net, struct cnFill *fillList, struct hash *qChromHash)
 /* Add qN's to all gaps underneath fillList. */
 {
 struct cnFill *fill;
 for (fill = fillList; fill != NULL; fill = fill->next)
     {
     struct chrom *qChrom = getQChrom(fill->qName, qChromHash);
     struct rbTree *tree = qChrom->nGaps;
     int s = liftStart(fill->qName, fill->qStart, liftHashQ);
     fill->qN = intersectionSize(tree, s, s + fill->qSize);
     if (fill->children)
 	qAddN(net, fill->children, qChromHash);
     }
 }
 
 void tAddR(struct chainNet *net, struct cnFill *fillList, struct rbTree *tree)
 /* Add t repeats's to all things underneath fillList. */
 {
 struct cnFill *fill;
 for (fill = fillList; fill != NULL; fill = fill->next)
     {
     int s = liftStart(net->name, fill->tStart, liftHashT);
     fill->tR = intersectionSize(tree, s, s + fill->tSize);
     if (fill->children)
 	tAddR(net, fill->children, tree);
     }
 }
 
 void qAddR(struct chainNet *net, struct cnFill *fillList, struct hash *qChromHash)
 /* Add q repeats to all things underneath fillList. */
 {
 struct cnFill *fill;
 for (fill = fillList; fill != NULL; fill = fill->next)
     {
     struct chrom *qChrom = getQChrom(fill->qName, qChromHash);
     int s = liftStart(fill->qName, fill->qStart, liftHashQ);
     fill->qR = intersectionSize(qChrom->repeats, s, s + fill->qSize);
     if (fill->children)
 	qAddR(net, fill->children, qChromHash);
     }
 }
 
 void tAddNewR(struct chainNet *net, struct cnFill *fillList, struct rbTree *tree)
 /* Add t new repeats's to all things underneath fillList. */
 {
 struct cnFill *fill;
 for (fill = fillList; fill != NULL; fill = fill->next)
     {
     int s = liftStart(net->name, fill->tStart, liftHashT);
     fill->tNewR = intersectionSize(tree, s, s + fill->tSize);
     if (fill->children)
 	tAddNewR(net, fill->children, tree);
     }
 }
 
 void qAddNewR(struct chainNet *net, struct cnFill *fillList, struct hash *qChromHash)
 /* Add q new repeats to all things underneath fillList. */
 {
 struct cnFill *fill;
 for (fill = fillList; fill != NULL; fill = fill->next)
     {
     struct chrom *qChrom = getQChrom(fill->qName, qChromHash);
     int s = liftStart(fill->qName, fill->qStart, liftHashQ);
     fill->qNewR = intersectionSize(qChrom->newRepeats, s, s + fill->qSize);
     if (fill->children)
 	qAddNewR(net, fill->children, qChromHash);
     }
 }
 
 void tAddOldR(struct chainNet *net, struct cnFill *fillList, struct rbTree *tree)
 /* Add t new repeats's to all things underneath fillList. */
 {
 struct cnFill *fill;
 for (fill = fillList; fill != NULL; fill = fill->next)
     {
     int s = liftStart(net->name, fill->tStart, liftHashT);
     fill->tOldR = intersectionSize(tree, s, s + fill->tSize);
     if (fill->children)
 	tAddOldR(net, fill->children, tree);
     }
 }
 
 void qAddOldR(struct chainNet *net, struct cnFill *fillList, struct hash *qChromHash)
 /* Add q new repeats to all things underneath fillList. */
 {
 struct cnFill *fill;
 for (fill = fillList; fill != NULL; fill = fill->next)
     {
     struct chrom *qChrom = getQChrom(fill->qName, qChromHash);
     int s = liftStart(fill->qName, fill->qStart, liftHashQ);
     fill->qOldR = intersectionSize(qChrom->oldRepeats, s, s + fill->qSize);
     if (fill->children)
 	qAddOldR(net, fill->children, qChromHash);
     }
 }
 
 void tAddTrf(struct chainNet *net, struct cnFill *fillList, struct rbTree *tree)
 /* Add t simple repeats's to all things underneath fillList. */
 {
 struct cnFill *fill;
 for (fill = fillList; fill != NULL; fill = fill->next)
     {
     int s = liftStart(net->name, fill->tStart, liftHashT);
     fill->tTrf = intersectionSize(tree, s, s + fill->tSize);
     if (fill->children)
 	tAddTrf(net, fill->children, tree);
     }
 }
 
 void qAddTrf(struct chainNet *net, struct cnFill *fillList, struct hash *qChromHash)
 /* Add q new repeats to all things underneath fillList. */
 {
 struct cnFill *fill;
 for (fill = fillList; fill != NULL; fill = fill->next)
     {
     struct chrom *qChrom = getQChrom(fill->qName, qChromHash);
     int s = liftStart(fill->qName, fill->qStart, liftHashQ);
     fill->qTrf = intersectionSize(qChrom->trf, s, s + fill->qSize);
     if (fill->children)
 	qAddTrf(net, fill->children, qChromHash);
     }
 }
 
 
 
 void netClass(char *inName, char *tDb, char *qDb, char *outName)
 /* netClass - Add classification info to net. */
 {
 struct chainNet *net;
 struct lineFile *lf = lineFileOpen(inName, TRUE);
 FILE *f = mustOpen(outName, "w");
 struct chrom *qChromList, *chrom;
 struct hash *qChromHash;
 struct hash *arHash = NULL;
 struct sqlConnection *tConn = sqlConnect(tDb);
 struct sqlConnection *qConn = sqlConnect(qDb);
 
 qLm = lmInit(0);
 
 if (!noAr)
     arHash = getAncientRepeats(tConn, qConn);
 
-getChroms(qConn, &qChromHash, &qChromList);
+getChroms(qConn, &qChromHash, &qChromList, qSizes);
 
 verbose(1, "Reading gaps in %s\n", qDb);
 if (sqlTableExists(qConn, "gap"))
     {
     getSeqGapsUnsplit(qConn, qChromHash);
     }
 else
     {
     for (chrom = qChromList; chrom != NULL; chrom = chrom->next)
 	chrom->nGaps = getSeqGaps(qConn, chrom->name);
     }
 
 if (qNewR)
     {
     verbose(1, "Reading new repeats from %s\n", qNewR);
     for (chrom = qChromList; chrom != NULL; chrom = chrom->next)
         chrom->newRepeats = getNewRepeats(qNewR, chrom->name);
     }
 
 verbose(1, "Reading simpleRepeats in %s\n", qDb);
 getTrfUnsplit(qConn, qChromHash);
 
 if (qRepeatTable)
     {
     verbose(1, "Reading repeats in %s from table %s\n", qDb, qRepeatTable);
     getRepeatsUnsplitTable(qConn, qChromHash, qRepeatTable);
     }
 else
     {
     verbose(1, "Reading repeats in %s\n", qDb);
     if (sqlTableExists(qConn, "rmsk"))
 	getRepeatsUnsplit(qConn, qChromHash, arHash);
     else
 	{
 	for (chrom = qChromList; chrom != NULL; chrom = chrom->next)
 	    getRepeats(qConn, arHash, chrom->name, &chrom->repeats,
 		       &chrom->oldRepeats);
 	}
     }
 
 while ((net = chainNetRead(lf)) != NULL)
     {
     struct rbTree *tN, *tRepeats, *tOldRepeats, *tTrf;
     char *tName = net->name;
     if (liftHashT != NULL)
 	{
 	struct liftSpec *lft = hashMustFindVal(liftHashT, net->name);
 	tName = lft->newName;
 	}
 
     verbose(1, "Processing %s.%s\n", tDb, net->name);
     tN = getSeqGaps(tConn, tName);
     tAddN(net, net->fillList, tN);
     rbTreeFree(&tN);
     qAddN(net, net->fillList, qChromHash);
 
     if (tRepeatTable)
 	getRepeatsTable(tConn, tRepeatTable, tName, &tRepeats, &tOldRepeats);
     else
 	getRepeats(tConn, arHash, tName, &tRepeats, &tOldRepeats);
     tAddR(net, net->fillList, tRepeats);
     if (!noAr)
 	tAddOldR(net, net->fillList, tOldRepeats);
     rbTreeFree(&tRepeats);
     rbTreeFree(&tOldRepeats);
     qAddR(net, net->fillList, qChromHash);
     if (!noAr)
 	qAddOldR(net, net->fillList, qChromHash);
 
     tTrf = getTrf(tConn, tName);
     tAddTrf(net, net->fillList, tTrf);
     rbTreeFree(&tTrf);
     qAddTrf(net, net->fillList, qChromHash);
 
     if (tNewR)
         {
 	struct rbTree *tree = getNewRepeats(tNewR, tName);
 	tAddNewR(net, net->fillList, tree);
 	rbTreeFree(&tree);
 	}
     if (qNewR)
         qAddNewR(net, net->fillList, qChromHash);
     chainNetWrite(net, f);
     chainNetFree(&net);
     }
 sqlDisconnect(&tConn);
 sqlDisconnect(&qConn);
 }
 
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 char *liftFileQ = NULL, *liftFileT = NULL;
 optionInit(&argc, argv, optionSpecs);
 if (argc != 5)
     usage();
 tNewR = optionVal("tNewR", tNewR);
 qNewR = optionVal("qNewR", qNewR);
 noAr = optionExists("noAr");
 qRepeatTable = optionVal("qRepeats", qRepeatTable);
 tRepeatTable = optionVal("tRepeats", tRepeatTable);
 liftFileQ = optionVal("liftQ", liftFileQ);
 liftFileT = optionVal("liftT", liftFileT);
+qSizes = optionVal("qSizes", qSizes);
 if (liftFileQ != NULL)
     {
     struct liftSpec *lifts = readLifts(liftFileQ);
     liftHashQ = hashLift(lifts, TRUE);
     }
 if (liftFileT != NULL)
     {
     struct liftSpec *lifts = readLifts(liftFileT);
     liftHashT = hashLift(lifts, TRUE);
     }
 netClass(argv[1], argv[2], argv[3], argv[4]);
 return 0;
 }