be4311c07e14feb728abc6425ee606ffaa611a58 markd Fri Jan 22 06:46:58 2021 -0800 merge with master diff --git src/utils/subColumn/subColumn.c src/utils/subColumn/subColumn.c index 00c2eaa..341d0b5 100644 --- src/utils/subColumn/subColumn.c +++ src/utils/subColumn/subColumn.c @@ -1,20 +1,21 @@ /* subColumn - Substitute one column in a tab-separated file.. */ /* Copyright (C) 2011 The Regents of the University of California * See README in this or parent directory for licensing information. */ #include "common.h" +#include "localmem.h" #include "linefile.h" #include "hash.h" #include "options.h" #include "dystring.h" #include "obscure.h" boolean isList = FALSE; FILE *fMiss = NULL; void usage() /* Explain usage and exit. */ { errAbort( "subColumn - Substitute one column in a tab-separated file.\n" @@ -48,34 +49,53 @@ dyStringClear(dy); while (in != NULL && in[0] != 0) { char *e = strchr(in, ','); if (e != NULL) *e++ = 0; char *s = hashMustFindVal(subHash, in); dyStringPrintf(dy, "%s,", s); in = e; } return dy->string; } int missCount = 0; +struct hash *hashTwoColumnTsv(char *fileName) +/* Given a two column file (key, value) return a hash. */ +{ +struct lineFile *lf = lineFileOpen(fileName, TRUE); +struct hash *hash = hashNew(16); +char *row[3]; +int fields = 0; +while ((fields = lineFileChopTab(lf, row)) != 0) + { + lineFileExpectWords(lf, 2, fields); + char *name = row[0]; + char *value = lmCloneString(hash->lm, row[1]); + hashAdd(hash, name, value); + } +lineFileClose(&lf); +return hash; +} + + void subColumn(char *asciiColumn, char *inFile, char *subFile, char *outFile) /* subColumn - Substitute one column in a tab-separated file.. */ { -struct hash *subHash = hashTwoColumnFile(subFile); +struct hash *subHash = hashTwoColumnTsv(subFile); int column = atoi(asciiColumn); if (column == 0) usage(); else column -= 1; char *row[1024*4]; struct lineFile *lf = lineFileOpen(inFile, TRUE); FILE *f = mustOpen(outFile, "w"); int rowCount; while ((rowCount = lineFileChopNextTab(lf, row, ArraySize(row))) > 0) { if (rowCount == ArraySize(row)) errAbort("Too many columns (%d) line %d of %s.", rowCount, lf->lineIx, lf->fileName); if (column >= rowCount) errAbort("Not enough columns (%d) line %d of %s.", rowCount, lf->lineIx, lf->fileName);