b8180d9f6d41dc708a2f249ba892cbca311e7a06 jcasper Mon Feb 27 11:38:55 2023 -0800 Adding transparency support for colors refs #30569 diff --git src/hg/utils/genePredToBigGenePred/genePredToBigGenePred.c src/hg/utils/genePredToBigGenePred/genePredToBigGenePred.c index e1676d2..d7e79cc 100644 --- src/hg/utils/genePredToBigGenePred/genePredToBigGenePred.c +++ src/hg/utils/genePredToBigGenePred/genePredToBigGenePred.c @@ -1,267 +1,268 @@ /* genePredToBigGenePred - converts genePred or genePredExt to bigGenePred. */ #include "common.h" #include "obscure.h" #include "linefile.h" #include "hash.h" #include "options.h" #include "genePred.h" #include "bigGenePred.h" #include "jksql.h" #include "memgfx.h" void usage() /* Explain usage and exit. */ { errAbort( "genePredToBigGenePred - converts genePred or genePredExt to bigGenePred input (bed format with extra fields)\n" "usage:\n" " genePredToBigGenePred [-known] [-score=scores] [-geneNames=geneNames] [-colors=colors] file.gp stdout | sort -k1,1 -k2,2n > file.bgpInput\n" "NOTE: to build bigBed:\n" " wget https://genome.ucsc.edu/goldenpath/help/examples/bigGenePred.as\n" " bedToBigBed -type=bed12+8 -tab -as=bigGenePred.as file.bgpInput chrom.sizes output.bb\n" "options:\n" " -known input file is a genePred in knownGene format\n" " -score=scores scores is two column file with id's mapping to scores\n" " -geneNames=geneNames geneNames is a three column file with id's mapping to two gene names\n" " -colors=colors colors is a four column file with id's mapping to r,g,b\n" " -cds=cds cds is a five column file with id's mapping to cds status codes and exonFrames (see knownCds.as)\n" " -geneType=geneType geneType is a two column file with id's mapping to geneType\n" ); } struct cds { char *name; enum cdsStatus cdsStartStat; /* enum('none','unk','incmpl','cmpl') */ enum cdsStatus cdsEndStat; /* enum('none','unk','incmpl','cmpl') */ int exonCount; int *exonFrames; /* Exon frame {0,1,2}, or -1 if no frame for exon */ }; struct geneNames { char *name; char *name2; }; struct hash *colorsHash = NULL; struct hash *scoreHash = NULL; struct hash *geneHash = NULL; struct hash *cdsHash = NULL; struct hash *geneTypeHash = NULL; boolean isKnown; /* Command line validation table. */ static struct optionSpec options[] = { {"known", OPTION_BOOLEAN}, {"score", OPTION_STRING}, {"geneNames", OPTION_STRING}, {"colors", OPTION_STRING}, {"cds", OPTION_STRING}, {"geneType", OPTION_STRING}, {NULL, 0}, }; #define MAX_BLOCKS 10000 unsigned blockSizes[MAX_BLOCKS]; unsigned blockStarts[MAX_BLOCKS]; void outBigGenePred(FILE *fp, struct genePred *gp) { struct bigGenePred bgp; if (gp->exonCount > MAX_BLOCKS) errAbort("genePred has more than %d exons, make MAX_BLOCKS bigger in source", MAX_BLOCKS); if (gp->exonFrames == NULL) { struct cds *cds; if ((cdsHash != NULL) && (cds = (struct cds *)hashFindVal(cdsHash, gp->name)) != NULL) { gp->cdsStartStat = cds->cdsStartStat; gp->cdsEndStat = cds->cdsEndStat; gp->exonFrames = cds->exonFrames; } else genePredAddExonFrames(gp); } bgp.chrom = gp->chrom; bgp.chromStart = gp->txStart; bgp.chromEnd = gp->txEnd; bgp.name = gp->name; bgp.score = 0; if (scoreHash) { char *val = hashFindVal(scoreHash, gp->name); if (val != NULL) bgp.score = sqlUnsigned(val); } bgp.strand[0] = gp->strand[0]; bgp.strand[1] = gp->strand[1]; bgp.thickStart = gp->cdsStart; bgp.thickEnd = gp->cdsEnd; bgp.itemRgb = 0; // BLACK if (colorsHash) { struct rgbColor *color = hashFindVal(colorsHash, gp->name); if (color == NULL) warn("Warning: no color found for %s", gp->name); else { bgp.itemRgb = ( ((color->r) & 0xff) << 16) | (((color->g) & 0xff) << 8) | ((color->b) & 0xff); } } bgp.blockCount = gp->exonCount; bgp.blockSizes = (unsigned *)blockSizes; bgp.chromStarts = (unsigned *)blockStarts; int ii; for(ii=0; ii < bgp.blockCount; ii++) { blockStarts[ii] = gp->exonStarts[ii] - bgp.chromStart; blockSizes[ii] = gp->exonEnds[ii] - gp->exonStarts[ii]; } bgp.name2 = gp->name2; bgp.cdsStartStat = gp->cdsStartStat; bgp.cdsEndStat = gp->cdsEndStat; bgp.exonFrames = gp->exonFrames; bgp.type = NULL; bgp.geneName = gp->name; bgp.geneName2 = gp->name2; if (geneHash) { struct geneNames *gn = hashFindVal(geneHash, gp->name); if (gn == NULL) warn("Warning: no gene name found for %s", gp->name); else { bgp.geneName = gn->name; bgp.geneName2 = gn->name2; } } bgp.geneType = NULL; if (geneTypeHash) bgp.geneType = hashFindVal(geneTypeHash, gp->name); bigGenePredOutput(&bgp, fp, '\t', '\n'); } void genePredToBigGenePred(char *genePredFile, char *bigGeneOutput) /* genePredToBigGenePred - converts genePred or genePredExt to bigGenePred. */ { struct genePred *gp; if (isKnown) gp = genePredKnownLoadAll(genePredFile) ; else gp = genePredExtLoadAll(genePredFile) ; FILE *fp = mustOpen(bigGeneOutput, "w"); for(; gp ; gp = gp->next) { outBigGenePred(fp, gp); } } struct hash *hashCds(char *fileName) { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[5]; struct hash *hash = hashNew(16); while (lineFileChopTab(lf, row)) { char *name = row[0]; struct cds *cds; lmAllocVar(hash->lm, cds ); cds->cdsStartStat = atoi(row[1]); cds->cdsEndStat = atoi(row[2]); cds->exonCount = atoi(row[3]); // this memory will be leaked if the hash is free'd int numBlocks; sqlSignedDynamicArray(row[4], &cds->exonFrames, &numBlocks); assert(numBlocks == cds->exonCount); hashAdd(hash, name, cds); } lineFileClose(&lf); return hash; } struct hash *hashGeneNames(char *fileName) /* Given a three column file (key, geneName, geneName2) return a hash. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[3]; struct hash *hash = hashNew(16); while (lineFileChopTab(lf, row)) { char *name = row[0]; struct geneNames *gn; lmAllocVar(hash->lm, gn); gn->name = lmCloneString(hash->lm, row[1]); gn->name2 = lmCloneString(hash->lm, row[2]); hashAdd(hash, name, gn); } lineFileClose(&lf); return hash; } struct hash *hashColors(char *fileName) /* Given a four column file (key, r, g, b) return a hash. */ { struct lineFile *lf = lineFileOpen(fileName, TRUE); char *row[4]; struct hash *hash = hashNew(16); while (lineFileChopTab(lf, row)) { char *name = row[0]; struct rgbColor *color; lmAllocVar(hash->lm, color); color->r = atoi(row[1]); color->g = atoi(row[2]); color->b = atoi(row[3]); + color->a = 255; hashAdd(hash, name, color); } lineFileClose(&lf); return hash; } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); if (argc != 3) usage(); isKnown = optionExists("known"); char *scoreFile = optionVal("score", NULL); if (scoreFile != NULL) scoreHash = hashTwoColumnFile(scoreFile); char *geneTypeFile = optionVal("geneType", NULL); if (geneTypeFile != NULL) geneTypeHash = hashTwoColumnFile(geneTypeFile); char *geneNames = optionVal("geneNames", NULL); if (geneNames != NULL) geneHash = hashGeneNames(geneNames); char *cdsValues = optionVal("cds", NULL); if (cdsValues != NULL) cdsHash = hashCds(cdsValues); char *colors = optionVal("colors", NULL); if (colors != NULL) colorsHash = hashColors(colors); genePredToBigGenePred(argv[1], argv[2]); return 0; }