9cb00c4f07c40b647094f0d03b2bcce00e535d9c kent Thu Dec 31 14:56:48 2020 -0800 Added hashTsvBy since I needed it in two places, and it seems pretty general, but not common. diff --git src/lib/obscure.c src/lib/obscure.c index f6bea35..609e321 100644 --- src/lib/obscure.c +++ src/lib/obscure.c @@ -156,30 +156,58 @@ struct lineFile *lf = lineFileOpen(fileName, TRUE); struct hash *hash = hashNew(16); char *row[3]; int fields = 0; while ((fields = lineFileChop(lf, row)) != 0) { lineFileExpectWords(lf, 2, fields); char *name = row[0]; char *value = lmCloneString(hash->lm, row[1]); hashAdd(hash, name, value); } lineFileClose(&lf); return hash; } +struct hash *hashTsvBy(char *in, int keyColIx, int *retColCount) +/* Return a hash of rows keyed by the given column */ +{ +struct lineFile *lf = lineFileOpen(in, TRUE); +struct hash *hash = hashNew(0); +char *line = NULL, **row = NULL; +int colCount = 0, colAlloc=0; /* Columns as counted and as allocated */ +while (lineFileNextReal(lf, &line)) + { + if (colCount == 0) + { + *retColCount = colCount = chopByChar(line, '\t', NULL, 0); + verbose(2, "Got %d columns in first real line\n", colCount); + colAlloc = colCount + 1; // +1 so we can detect unexpected input and complain + lmAllocArray(hash->lm, row, colAlloc); + } + int count = chopByChar(line, '\t', row, colAlloc); + if (count != colCount) + { + errAbort("Expecting %d words, got more than that line %d of %s", + colCount, lf->lineIx, lf->fileName); + } + hashAdd(hash, row[keyColIx], lmCloneRow(hash->lm, row, colCount) ); + } +lineFileClose(&lf); +return hash; +} + struct slPair *slPairTwoColumnFile(char *fileName) /* Read in a two column file into an slPair list */ { char *row[2]; struct slPair *list = NULL; struct lineFile *lf = lineFileOpen(fileName, TRUE); while (lineFileRow(lf, row)) slPairAdd(&list, row[0], cloneString(row[1])); lineFileClose(&lf); slReverse(&list); return list; } struct slName *readAllLines(char *fileName) /* Read all lines of file into a list. (Removes trailing carriage return.) */