0564395ec363631f2ff8d295da6f6b50f873fda4
braney
  Mon Jan 24 17:01:53 2022 -0800
more chromAlias work:  some name changes and support for the new genark
chromAlias format

diff --git src/hg/hgc/transMapClick.c src/hg/hgc/transMapClick.c
index c55dd83..6c8bae3 100644
--- src/hg/hgc/transMapClick.c
+++ src/hg/hgc/transMapClick.c
@@ -1,442 +1,442 @@
 /* transMapClick - transMap click handling */
 
 /* Copyright (C) 2010 The Regents of the University of California 
  * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 
 /* FIXME: the two version support should be be delete once support
  * for table version is fully dropped.
  */
 
 /*
  * For the older table-based version of transMap, the following
  * tables are used.  These are mapped into the bigTransMap objects.
  *
  * Per-genome tables associated with transMap:
  *   transMapAliXxx - mapped PSLs alignments
  *   transMapInfoXxx - information for mapped alignments
  * hgFixed tables associated with transMap:
  *   transMapSrcXxx - information about source alignments
  *   transMapGeneXxx - gene information
  *   transMapSeqXxx - seq table for accessing sequence
  *   transMapExtFileXxx - extFile table
  *
  * Xxx is:
  *    - UcscGene - UCSC genes
  *    - RefSeq - RefSeq mRNAs
  *    - MRna - GenBank mRNAs
  *    - SplicedEst - GenBank spliced ESTs
  */
 
 #include "common.h"
 #include "hgc.h"
 #include "hui.h"
 #include "hCommon.h"
 #include "transMapClick.h"
 #include "bigTransMap.h"
 #include "bigBed.h"
 #include "bigPsl.h"
 #include "transMapStuff.h"
 #include "transMapInfo.h"
 #include "transMapSrc.h"
 #include "transMapGene.h"
 #include "genbank.h"
 #include "chromAlias.h"
 
 enum geneSrcSetType
 /* constants for source sets */
 {
     ENSEMBL_SRC_SET,
     REFSEQ_SRC_SET,
     RNA_SRC_SET,
     EST_SRC_SET,
     UCSC_GENES_SRC_SET,
     UNKNOWN_SRC_SET
 };
 
 struct transMapBag
 /* object contain collected information on a specific transMap mapping
 * this has both PSL and bigTransMap objects */
 {
     struct psl *psl;              // transMap alignment
     struct bigTransMap *meta;     // bigTransMap record for metadata
     enum geneSrcSetType srcSet;   // source data set (not owned)
     boolean srcDbIsActive;        // source database is active
 };
 
 static char *guessTranscriptType(struct transMapGene *gene)
 /* guess the transcript type when not known */
 {
 if (gene == NULL)
     return "unknown";  // EST
 else if (sameString(gene->cds, "n/a") || sameString(gene->cds, ""))
     return "non_coding";
 else    
     return "protein_coding";
 }
 
 static enum geneSrcSetType guessGeneSrcSet(struct trackDb *tdb)
 /* guess the the source set from the table name for table
  * based-transmap */
 {
 if (stringIn("UcscGenes", tdb->table))
     return UCSC_GENES_SRC_SET;
 else if (stringIn("RefSeq", tdb->table))
     return REFSEQ_SRC_SET;
 else if (stringIn("MRna", tdb->table))
     return RNA_SRC_SET;
 else if (stringIn("SplicedEst", tdb->table))
     return EST_SRC_SET;
 else
     return UNKNOWN_SRC_SET;
 }
 
 static char *chainSubsetToBigStr(enum transMapInfoChainSubset cs)
 /* convert chain subset type to string used in bigTransMap */
 {
 switch (cs)
     {
     case transMapInfoUnknown:
         assert(FALSE);
         return "unknown";
     case transMapInfoAll:
         return "all";
     case transMapInfoSyn:
         return "syn";
     case transMapInfoRbest:
         return "rbest";
     }
 assert(FALSE);
 return NULL;
 }
 
 static struct bigTransMap *buildFakeBigTransMapRec(struct transMapInfo *info,
                                                    struct transMapSrc *src,
                                                    struct transMapGene *gene)
 /* build a partial bigTransMap record from the tables; only metadata fields
  * are filled in. */
 {
 struct bigTransMap *bigTransMap;
 AllocVar(bigTransMap);
 bigTransMap->seqType = 1;
 bigTransMap->name = cloneString(src->id);
 bigTransMap->srcDb = cloneString(src->db);
 bigTransMap->srcChrom = cloneString(src->chrom);
 bigTransMap->srcChromStart = src->chromStart;
 bigTransMap->srcChromEnd = src->chromEnd;
 bigTransMap->srcIdent = (int)(1000.0*src->ident);
 bigTransMap->srcAligned = (int)(1000.0*src->aligned);
 if (gene != NULL)
     {
     bigTransMap->geneName = cloneString(gene->geneName);
     bigTransMap->geneId = cloneString(gene->geneId);
     bigTransMap->oCDS = cloneString(gene->cds);
     }
 bigTransMap->geneType = cloneString(guessTranscriptType(gene)); // set for ESTs
 bigTransMap->transcriptType = cloneString(guessTranscriptType(gene));
 bigTransMap->chainType = cloneString(chainSubsetToBigStr(info->chainSubset));
 bigTransMap->commonName = hOrganism(info->srcDb);
 if (bigTransMap->commonName == NULL)
     bigTransMap->commonName = cloneString("");
 bigTransMap->scientificName = hScientificName(info->srcDb);
 if (bigTransMap->scientificName == NULL)
     bigTransMap->scientificName = cloneString("");
 bigTransMap->orgAbbrev = cloneString(emptyForNull(hOrgShortForDb(info->srcDb)));
 return bigTransMap;
 }
 
 static struct transMapBag *transMapBagLoadDb(struct trackDb *tdb, char *mappedId)
 /* load information from various tables for database version of transmap */
 {
 struct sqlConnection *conn = hAllocConn(database);
 struct transMapBag *bag;
 AllocVar(bag);
 bag->psl = getAlignments(conn, tdb->table, mappedId);
 
 char *transMapInfoTbl = trackDbRequiredSetting(tdb, transMapInfoTblSetting);
 struct transMapInfo *info = transMapInfoQuery(conn, transMapInfoTbl, mappedId);
 
 char *transMapSrcTbl = trackDbRequiredSetting(tdb, transMapSrcTblSetting);
 struct transMapSrc *src = transMapSrcQuery(conn, transMapSrcTbl, info->srcDb, info->srcId);
 
 struct transMapGene *gene = NULL;
 char *transMapGeneTbl = trackDbSetting(tdb, transMapGeneTblSetting);
 if (transMapGeneTbl != NULL)
     gene = transMapGeneQuery(conn, transMapGeneTbl,
                              info->srcDb, transMapIdToSeqId(info->srcId));
 bag->srcSet = guessGeneSrcSet(tdb);
 bag->srcDbIsActive = hDbIsActive(info->srcDb);
 bag->meta = buildFakeBigTransMapRec(info, src, gene);
 transMapInfoFree(&info);
 transMapSrcFree(&src);
 transMapGeneFree(&gene);
 hFreeConn(&conn);
 return bag;
 }
 
 static enum geneSrcSetType getGeneSrcSet(struct trackDb *tdb)
 /* get the geneSrcSetType from trackDb */
 {
 char *srcSet = trackDbRequiredSetting(tdb, "transMapSrcSet");
 if (sameString(srcSet, "ensembl"))
     return ENSEMBL_SRC_SET;
 else if (sameString(srcSet, "refseq"))
     return REFSEQ_SRC_SET;
 else if (sameString(srcSet, "rna"))
     return RNA_SRC_SET;
 else if (sameString(srcSet, "est"))
     return EST_SRC_SET;
 else
     return UNKNOWN_SRC_SET;
 }
 
 static char *formatGeneSrcSet(enum geneSrcSetType srcSet)
 /* get display version of source set */
 {
 switch (srcSet)
     {
     case ENSEMBL_SRC_SET:
         return "Ensembl";
     case REFSEQ_SRC_SET:
         return "RefSeq RNA";
     case RNA_SRC_SET:
         return "GenBank RNA";
     case EST_SRC_SET:
         return "GenBank EST";
     case UCSC_GENES_SRC_SET:
         return "UCSC Genes";
     case UNKNOWN_SRC_SET:
         return "Unknown";
     }
 return "Unknown";
 }
 
 static struct transMapBag *transMapBagLoadBig(struct trackDb *tdb, char *mappedId)
 /* load information from bigTransMap file */
 {
 struct sqlConnection *conn = hAllocConn(database);
 struct transMapBag *bag;
 AllocVar(bag);
 
 char *fileName = bbiNameFromSettingOrTable(tdb, conn, tdb->table);
 char *chrom = cartString(cart, "c");
-struct bbiFile *bbi =  bigBedFileOpenAlias(fileName, chromAliasGetHash(database));
+struct bbiFile *bbi =  bigBedFileOpenAlias(fileName, chromAliasChromToAliasHash(database));
 struct lm *lm = lmInit(0);
 int fieldIx;
 struct bptFile *bpt = bigBedOpenExtraIndex(bbi, "name", &fieldIx);
 struct bigBedInterval *bb = bigBedNameQuery(bbi, bpt, fieldIx, mappedId, lm);
 if (slCount(bb) != 1)
     errAbort("expected 1 item named \"%s\", got %d from %s", mappedId, slCount(bb), fileName);
 char *fields[bbi->fieldCount];
 char startBuf[32], endBuf[32];
 int bbFieldCount = bigBedIntervalToRow(bb, chrom, startBuf, endBuf, fields,
                                        bbi->fieldCount);
 if (bbFieldCount != BIGTRANSMAP_NUM_COLS)
     errAbort("expected %d fields in bigTransMap record, got %d in %s",
              BIGTRANSMAP_NUM_COLS, bbFieldCount, fileName);
 bag->psl = pslFromBigPsl(chrom, bb, 0, NULL, NULL); 
 bag->meta = bigTransMapLoad(fields);
 bag->srcSet = getGeneSrcSet(tdb);
 bag->srcDbIsActive = hDbIsActive(bag->meta->srcDb);
 
 bigBedFileClose(&bbi);
 lmCleanup(&lm);
 hFreeConn(&conn);
 return bag;
 }
 
 static void transMapBagFree(struct transMapBag **bagPtr)
 /* free the bag */
 {
 struct transMapBag *bag = *bagPtr;
 if (bag != NULL)
     {
     pslFree(&bag->psl);
     bigTransMapFree(&bag->meta);
     freez(bagPtr);
     }
 }
 
 static void prOrgScientificDb(char *db)
 /* print organism and scientific name for a database.
  */
 {
 char *org = hOrganism(db);
 char *sciName = hScientificName(db);
 if ((org != NULL) && (sciName != NULL))
     printf("%s (%s)", org, sciName);
 else
     printf("n/a");
 freeMem(org);
 freeMem(sciName);
 }
 
 static void displayMapped(struct transMapBag *bag)
 /* display information about the mapping alignment */
 {
 printf("<TABLE class=\"transMap\">\n");
 printf("<CAPTION>TransMap Alignment</CAPTION>\n");
 printf("<TBODY>\n");
 
 // organism/assembly
 printf("<TR CLASS=\"transMapLeft\"><TD>Organism<TD>");
 prOrgScientificDb(database);
 printf("</TR>\n");
 printf("<TR CLASS=\"transMapLeft\"><TD>Genome<TD>%s</TR>\n", database);
 
 // position
 printf("<TR CLASS=\"transMapLeft\">");
 printf("<TD>Position<TD CLASS=\"transMapNoWrap\">");
 printf("<A HREF=\"%s&position=%s:%d-%d\">",
       hgTracksPathAndSettings(),
       bag->psl->tName, bag->psl->tStart, bag->psl->tEnd);
 printf("%s:%d-%d</A>", bag->psl->tName, bag->psl->tStart, bag->psl->tEnd);
 printf("</TR>\n");
 
 // % identity and % aligned
 printf("<TR CLASS=\"transMapLeft\"><TD>Identity<TD>%0.1f%%</TR>\n",
        100.0*pslIdent(bag->psl));
 printf("<TR CLASS=\"transMapLeft\"><TD>Aligned<TD>%0.1f%%</TR>\n",
        100.0*pslQueryAligned(bag->psl));
 
 // chain type used in mapping
 printf("<TR CLASS=\"transMapLeft\"><TD>Chain subset<TD>%s</TR>\n",
        bag->meta->chainType);
 printf("</TBODY></TABLE>\n");
 }
 
 static void displaySource(struct transMapBag *bag)
 /* display information about the source gene that was mapped */
 {
 printf("<TABLE class=\"transMap\">\n");
 printf("<CAPTION>Source Alignment</CAPTION>\n");
 
 printf("<TBODY>\n");
 // organism/assembly
 printf("<TR CLASS=\"transMapLeft\"><TD>Organism<TD>%s (%s)</TR>\n",
        bag->meta->commonName, bag->meta->scientificName);
 printf("<TR CLASS=\"transMapLeft\"><TD>Genome<TD>%s</TR>\n", bag->meta->srcDb);
 printf("<TR CLASS=\"transMapLeft\"><TD>Source<TD>%s</TR>\n", formatGeneSrcSet(bag->srcSet));
 
 // position
 printf("<TR CLASS=\"transMapLeft\"><TD>Position\n");
 printf("<TD CLASS=\"transMapNoWrap\">");
 if (bag->srcDbIsActive)
     printf("<A HREF=\"%s?db=%s&position=%s:%d-%d\" target=_blank>"
            "%s:%d-%d</A>",
            hgTracksName(), bag->meta->srcDb,
            bag->meta->srcChrom, bag->meta->srcChromStart, bag->meta->srcChromEnd,
            bag->meta->srcChrom, bag->meta->srcChromStart, bag->meta->srcChromEnd);
 else
     printf("%s:%d-%d", bag->meta->srcChrom, bag->meta->srcChromStart, bag->meta->srcChromEnd);
 printf("</TR>\n");
 
 // % identity and % aligned, values stored as 1000*fraction ident
 printf("<TR CLASS=\"transMapLeft\"><TD>Identity<TD>%0.1f%%</TR>\n",
        bag->meta->srcIdent/10.0);
 printf("<TR CLASS=\"transMapLeft\"><TD>Aligned<TD>%0.1f%%</TR>\n",
        bag->meta->srcAligned/10.0);
 
 // gene and CDS
 printf("<TR CLASS=\"transMapLeft\"><TD>Gene<TD>%s</TR>\n",
        strOrNbsp(bag->meta->geneName));
 printf("<TR CLASS=\"transMapLeft\"><TD>Gene Id<TD>%s</TR>\n",
        strOrNbsp(bag->meta->geneId));
 printf("<TR CLASS=\"transMapLeft\"><TD>Gene Type<TD>%s</TR>\n",
        strOrNbsp(bag->meta->geneType));
 printf("<TR CLASS=\"transMapLeft\"><TD>Transcript Id<TD>%s</TR>\n",
        transMapIdToAcc(bag->meta->name));
 printf("<TR CLASS=\"transMapLeft\"><TD>Transcript Type<TD>%s</TR>\n",
        strOrNbsp(bag->meta->transcriptType));
 printf("<TR CLASS=\"transMapLeft\"><TD>CDS<TD>%s</TR>\n",
        strOrNbsp(bag->meta->oCDS));
 printf("</TBODY></TABLE>\n");
 }
 
 static void displayAligns(struct trackDb *tdb, struct transMapBag *bag)
 /* display cDNA alignments */
 {
 int start = cartInt(cart, "o");
 printf("<H3>mRNA/Genomic Alignments</H3>");
 printAlignmentsSimple(bag->psl, start, "hgcTransMapCdnaAli", tdb->table, bag->psl->qName);
 }
 
 void transMapClickHandler(struct trackDb *tdb, char *mappedId)
 /* Handle click on a transMap tracks */
 {
 if (tdb == NULL)
     errAbort("transMapClickHandler called without trackDb");
 if (mappedId == NULL)
     errAbort("transMapClickHandler called without mappedId");
 struct transMapBag *bag = (trackDbSetting(tdb, "bigDataUrl") == NULL)
     ? transMapBagLoadDb(tdb, mappedId)
     : transMapBagLoadBig(tdb, mappedId);
 
 genericHeader(tdb, mappedId);
 printf("<TABLE class=\"transMapLayout\">\n");
 
 printf("<TR><TD>\n");
 displayMapped(bag);
 printf("<TD>\n");
 displaySource(bag);
 printf("<TD>&nbsp;\n");
 printf("</TR>\n");
 printf("<TR><TD COLSPAN=3>\n");
 displayAligns(tdb, bag);
 printf("</TR>\n");
 printf("</TABLE>\n");
 
 printTrackHtml(tdb);
 transMapBagFree(&bag);
 }
 
 static struct dnaSeq *getCdnaSeqDb(struct trackDb *tdb, char *name)
 /* look up sequence name in seq and extFile tables specified
  * for base coloring. */
 {
 /* FIXME: this is really a rip off of some of the code in
  * hgTracks/cds.c; really need to centralize it somewhere */
 char *spec = trackDbRequiredSetting(tdb, BASE_COLOR_USE_SEQUENCE);
 char *specCopy = cloneString(spec);
 
 // value is: extFile seqTbl extFileTbl
 char *words[3];
 int nwords = chopByWhite(specCopy, words, ArraySize(words));
 if ((nwords != ArraySize(words)) || !sameString(words[0], "extFile"))
     errAbort("invalid %s track setting: %s", BASE_COLOR_USE_SEQUENCE, spec);
 struct dnaSeq *seq = hDnaSeqMustGet(NULL, name, words[1], words[2]);
 freeMem(specCopy);
 return seq;
 }
 
 void transMapShowCdnaAli(struct trackDb *tdb, char *mappedId)
 /* Show alignment for mappedId, mostly ripped off from htcCdnaAli */
 {
 if (tdb == NULL)
     errAbort("transMapShowCdnaAli called without trackDb");
 if (mappedId == NULL)
     errAbort("transMapShowCdnaAli called without mappedId");
 struct transMapBag *bag = NULL;
 struct dnaSeq *seq = NULL;
 if (trackDbSetting(tdb, "bigDataUrl") == NULL)
     {
     bag = transMapBagLoadDb(tdb, mappedId);
     seq = getCdnaSeqDb(tdb, transMapIdToSeqId(mappedId));
     }
 else
     {
     bag = transMapBagLoadBig(tdb, mappedId);
     seq = newDnaSeq(cloneString(bag->meta->oSequence), strlen(bag->meta->oSequence),
                     mappedId);
     }
 
 struct genbankCds cds;
 if (isEmpty(bag->meta->oCDS) || !genbankCdsParse(bag->meta->oCDS, &cds))
     ZeroVar(&cds);  /* can't get or parse CDS, so zero it */
 
 char title[1024];
 safef(title, sizeof title, "%s vs Genomic", mappedId);
 htmlFramesetStart(title);
 showSomeAlignment(bag->psl, seq, gftDna, 0, seq->size, NULL, cds.start, cds.end);
 dnaSeqFree(&seq);
 transMapBagFree(&bag);
 }