5765efc706b41b9528d3a2a17a865f898473ce05 braney Wed Feb 1 13:14:08 2023 -0800 fix a bug in calculating knownCanonical that b0b reported that was the result of a bone-headed parsing of the hash structure. While I was in there I prioritized MANE membership over the appris tags. diff --git src/hg/makeDb/makeGencodeKnownGene/makeGencodeKnownGene.c src/hg/makeDb/makeGencodeKnownGene/makeGencodeKnownGene.c index 97aad17..c28a066 100644 --- src/hg/makeDb/makeGencodeKnownGene/makeGencodeKnownGene.c +++ src/hg/makeDb/makeGencodeKnownGene/makeGencodeKnownGene.c @@ -537,69 +537,72 @@ if (hel == NULL) { AllocVar(ti); slAddHead(&tiList, ti); ti->transcriptId = gp->name; ti->geneId = wga->geneId; ti->length = gp->txEnd - gp->txStart; ti->tag = "none"; ti->tagVal = 0; ti->gp = gp; ti->ucscId = ucscId; } for(; hel; hel = hel->next) { + if (differentString(gp->name, hel->name)) + continue; char *tag = (char *)hel->val; AllocVar(ti); slAddHead(&tiList, ti); ti->transcriptId = gp->name; ti->geneId = wga->geneId; ti->length = gp->txEnd - gp->txStart; ti->tag = cloneString(tag); ti->gp = gp; ti->ucscId = ucscId; - if (startsWith("appris_principal", tag)) + if (startsWith("MANE", tag)) + ti->tagVal = 110; + else if (startsWith("appris_principal", tag)) ti->tagVal = 100; else if (startsWith("appris_alternative", tag)) ti->tagVal = 90; else if (startsWith("basic", tag)) ti->tagVal = 80; else ti->tagVal = 0; } } slSort(&tiList, tiCmp); struct hash *geneHash = newHash(10); for(ti=tiList; ti; ti = ti->next) { if (hashLookup(geneHash, ti->geneId) == NULL) { ti->clusterId = clusterId; hashAdd(geneHash, ti->geneId, ti); struct genePred *gp = ti->gp; fprintf(canonF, "%s\t%d\t%d\t%d\t%s\t%s\n",gp->chrom, gp->txStart, gp->txEnd, clusterId, ti->transcriptId, ti->geneId); clusterId++; -// printf("%s %s %s %d\n", ti->geneId, ti->transcriptId, ti->tag, ti->length); } } for (gp = compGenePreds; gp; gp = gp->next) { struct wgEncodeGencodeAttrs *wga = (struct wgEncodeGencodeAttrs *)hashMustFindVal(hashes->genToAttrs, gp->name); struct hashEl *hel; if ((hel = hashLookup(geneHash, wga->geneId)) == NULL) errAbort("gene not in geneHash"); struct transInfo *ti = (struct transInfo *)hel->val; fprintf(isoF, "%d\t%s\n", ti->clusterId, gp->name); } fclose(canonF); fclose(isoF); }