4b4e6832927f7648993eb55e5062aac73a55f3d2 kent Wed Apr 28 16:01:50 2021 -0700 Writing something to massage data for an adipose single cell data set to get a good meta.tsv file. diff --git src/oneShot/adiposeRelabelImmune/adiposeRelabelImmune.c src/oneShot/adiposeRelabelImmune/adiposeRelabelImmune.c new file mode 100644 index 0000000..1f79c67 --- /dev/null +++ src/oneShot/adiposeRelabelImmune/adiposeRelabelImmune.c @@ -0,0 +1,65 @@ +/* adiposeRelabelImmune - Merge together two files from the adipose tissue data set in GSE129363 to add immune subclusterings.. */ +#include "common.h" +#include "linefile.h" +#include "hash.h" +#include "options.h" +#include "obscure.h" + +void usage() +/* Explain usage and exit. */ +{ +errAbort( + "adiposeRelabelImmune - Merge together two files from the adipose tissue data set in GSE129363 to add immune subclusterings.\n" + "usage:\n" + " adiposeRelabelImmune meta.tsv\n" + "options:\n" + " -xxx=XXX\n" + ); +} + +/* Command line validation table. */ +static struct optionSpec options[] = { + {NULL, 0}, +}; + +void adiposeRelabelImmune(char *metaOut) +/* adiposeRelabelImmune - Merge together two files from the adipose tissue data set in GSE129363 to add immune subclusterings.. */ +{ +char *allIn = "CellClusterAnnotation.txt"; +char *immuneIn = "ImmuneCell_ClusterAnnotation.txt"; +char *metaIn = "GSE129363_Discovery_Cohort_CellAnnotation.txt"; +struct hash *immuneHash = hashTwoColumnFile(immuneIn); +struct hash *allHash = hashTwoColumnFile(allIn); +struct lineFile *lf = lineFileOpen(metaIn, TRUE); +FILE *f = mustOpen(metaOut, "w"); +char *row[4]; + +char *cluster; +while (lineFileNextRow(lf, row, ArraySize(row))) + { + if (lf->lineIx == 1) // header + cluster = "cellType"; + else + { + char *cell = row[0]; + char *oldCluster = hashFindVal(allHash, cell); + cluster = hashFindVal(immuneHash, cell); + if (cluster == NULL) + cluster = oldCluster; + if (cluster == NULL) + errAbort("Can't find cluster for cell %s", cell); + } + fprintf(f, "%s\t%s\t%s\t%s\t%s\n", row[0], row[1], row[2], row[3], cluster); + } +carefulClose(&f); +} + +int main(int argc, char *argv[]) +/* Process command line. */ +{ +optionInit(&argc, argv, options); +if (argc != 2) + usage(); +adiposeRelabelImmune(argv[1]); +return 0; +}