0564395ec363631f2ff8d295da6f6b50f873fda4
braney
  Mon Jan 24 17:01:53 2022 -0800
more chromAlias work:  some name changes and support for the new genark
chromAlias format

diff --git src/hg/hgc/bigDbSnpClick.c src/hg/hgc/bigDbSnpClick.c
index cebb059..d1cb44a 100644
--- src/hg/hgc/bigDbSnpClick.c
+++ src/hg/hgc/bigDbSnpClick.c
@@ -1,423 +1,423 @@
 /* Show details for bigDbSnp track items. */
 
 /* Copyright (C) 2019 The Regents of the University of California 
  * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 
 #include "common.h"
 #include "hgc.h"
 #include "bigDbSnp.h"
 #include "dbSnpDetails.h"
 #include "bPlusTree.h"
 #include "htslib/bgzf.h"
 #include "soTerm.h"
 #include "chromAlias.h"
 
 static struct dbSnpDetails *getDetails(struct bigDbSnp *bds, char *detailsFileOrUrl)
 /* Seek to the offset for this variant in detailsFileOrUrl, read the line and load as
  * struct dbSnpDetails.  */
 {
 bits64 offset = bds->_dataOffset;
 bits64 len = bds->_dataLen;
 char *line = readOneLineMaybeBgzip(detailsFileOrUrl, offset, len);
 // Newline must be trimmed or else it messes up parsing of final column if empty!
 if (line[len-1] == '\n')
     line[len-1] = '\0';
 char *row[DBSNPDETAILS_NUM_COLS+1];
 int wordCount = chopTabs(line, row);
 if (wordCount != DBSNPDETAILS_NUM_COLS)
     errAbort("dbSnpDetails: expected %d tab-separated words at offset %Ld in %s, got %d",
              DBSNPDETAILS_NUM_COLS, offset, detailsFileOrUrl, wordCount);
 return dbSnpDetailsLoad(row);
 }
 
 struct slName *getFreqSourceOrder(struct trackDb *tdb, char *rsId, int expectedCount)
 /* If tdb has freqSourceOrder*/
 {
 struct slName *sourceList = NULL;
 char *sourceNames = trackDbSetting(tdb, "freqSourceOrder");
 if (sourceNames)
     {
     sourceList = slNameListFromComma(sourceNames);
     int settingCount = slCount(sourceList);
     if (settingCount != expectedCount)
         errAbort("bigDbSnp freqSourceCount for %s is %d, "
                  "but trackDb setting freqSourceOrder lists %d sources",
                  rsId, expectedCount, settingCount);
     }
 return sourceList;
 }
 
 INLINE boolean freqSourceHasData(struct dbSnpDetails *details, int sourceIx)
 /* Return TRUE if freqSource sourceIx has any data for this variant. */
 {
 return details->alleleTotals[sourceIx] > 0;
 }
 
 struct alCounts
     {
     int obsCount;   // Number of chromosomes on which allele was observed by a project
     int totalCount; // Number of chromosomes on which project observed some allele of this variant
     };
 
 static struct hash *makePerAlleleCounts(struct dbSnpDetails *details, boolean isRc)
 /* Return a hash of allele to array of alCounts (each source's frequency count data). */
 {
 struct hash *perAlleleCounts = hashNew(0);
 int sIx;
 for (sIx = 0;  sIx < details->freqSourceCount;  sIx++)
     {
     if (freqSourceHasData(details, sIx))
         {
         int totalCount = details->alleleTotals[sIx];
         char *blob = cloneString(details->alleleCounts[sIx]);
         struct slName *pab, *perAlleleBlobs = slNameListFromString(blob, '|');
         for (pab = perAlleleBlobs;  pab != NULL;  pab = pab->next)
             {
             char *words[3];
             int wordCount = chopByChar(pab->name, ':', words, ArraySize(words));
             if (wordCount != 2)
                 errAbort("Malformed allele:count in |-separated '%s': "
                          "expecting two :-separated words but got %d", pab->name, wordCount);
             char *allele = words[0];
             if (isRc)
                 reverseComplement(allele, strlen(allele));
             int obsCount = atoi(words[1]);
             struct alCounts *alArray = hashFindVal(perAlleleCounts, allele);
             if (alArray == NULL)
                 {
                 AllocArray(alArray, details->freqSourceCount);
                 hashAdd(perAlleleCounts, allele, alArray);
                 }
             alArray[sIx].obsCount = obsCount;
             alArray[sIx].totalCount = totalCount;
             }
         }
     }
 return perAlleleCounts;
 }
 
 static void printAlleleRow(struct hash *perAlleleCounts, char *allele,
                            struct dbSnpDetails *details, char **perSourceMajorAl)
 /* Print the allele and its counts/freqs from each freqSource for allele, if any. */
 {
 puts("<tr>");
 char buf[1024];
 char *abbrevAl = isEmpty(allele) ? "-" : bigDbSnpAbbrevAllele(allele, buf, sizeof buf);
 printf("<td><span class='inlineCode'>%s</span></td>", abbrevAl);
 struct alCounts *alleleCounts = hashFindVal(perAlleleCounts, allele);
 int sIx;
 for (sIx = 0;  sIx < details->freqSourceCount;  sIx++)
     {
     if (freqSourceHasData(details, sIx))
         {
         struct alCounts *counts = NULL;
         if (alleleCounts)
             counts = &alleleCounts[sIx];
         if (counts != NULL && counts->totalCount != 0)
             {
             boolean isMajor = sameString(perSourceMajorAl[sIx], allele);
             printf("<td>");
             if (isMajor)
                 printf("<b>");
             printf("%d/%d (%f)", counts->obsCount, counts->totalCount,
                    ((double)counts->obsCount / (double)counts->totalCount));
             if (isMajor)
                 printf("</b>");
             }
         else
             printf("<td>-</td>");
         }
     }
 puts("</tr>");
 }
 
 static void printAlleleCountsAndFreqs(struct bigDbSnp *bds, struct dbSnpDetails *details,
                                       struct trackDb *tdb)
 /* Print a table row containing a table of alleles, counts & frequencies if applicable. */
 {
 // Allele counts & frequencies
 if (bds->freqSourceCount > 0)
     {
     puts("<tr><td colspan=2><b>Allele frequency counts:</b></td></tr>");
     puts("<tr><td colspan=2><table class='stdTbl'>");
     struct slName *sourceList = getFreqSourceOrder(tdb, bds->name, bds->freqSourceCount);
     int sIx;
     if (sourceList)
         {
         puts("<tr><th>Allele</th>");
         struct slName *source = sourceList;
         for (sIx = 0;  sIx < bds->freqSourceCount;  sIx++, source = source->next)
             if (freqSourceHasData(details, sIx))
                 printf("<th>%s</th> ", source->name);
         puts("</tr>");
         }
     boolean isRc = (stringIn(bdsRevStrand, bds->ucscNotes) != NULL);
     struct hash *perAlleleCounts = makePerAlleleCounts(details, isRc);
     // Row for reference allele
     printAlleleRow(perAlleleCounts, bds->ref, details, bds->majorAllele);
     hashRemove(perAlleleCounts, bds->ref);
     // Rows for alternate alleles
     int aIx;
     for (aIx = 0;  aIx < bds->altCount;  aIx++)
         {
         char *alt = bds->alts[aIx];
         printAlleleRow(perAlleleCounts, alt, details, bds->majorAllele);
         hashRemove(perAlleleCounts, alt);
         }
     // Rows for frequency alleles not included in ref/alt, if any
     struct hashEl *hel, *helList = hashElListHash(perAlleleCounts);
     for (hel = helList;  hel != NULL;  hel = hel->next)
         {
         char *alt = hel->name;
         printAlleleRow(perAlleleCounts, alt, details, bds->majorAllele);
         }
     puts("</table>");
     puts("</td></tr>");
     }
 }
 
 static void printOneSoTerm(struct dbSnpDetails *details, int ix)
 /* Print a SO term with link to MISO browser for details->soTerms[ix]. */
 {
 printf("%s", soTermToMisoLink((enum soTerm)details->soTerms[ix]));
 }
 
 static void printOneClinVar(struct dbSnpDetails *details, int ix)
 /* Print a link to ClinVar for details->clinVar{Accs,Sigs}[ix]. */
 {
 printf("<a href='https://www.ncbi.nlm.nih.gov/clinvar/%s' target=_blank>%s</a> (%s)",
        details->clinVarAccs[ix], details->clinVarAccs[ix], details->clinVarSigs[ix]);
 }
 
 static void printOneSubmitter(struct dbSnpDetails *details, int ix)
 /* Print a link to dbSNP submitter page for details->submitters[ix]. */
 {
 printf("<a href=\"https://www.ncbi.nlm.nih.gov/SNP/snp_viewTable.cgi?h=%s\" target=_blank>"
        "%s</a>", details->submitters[ix], details->submitters[ix]);
 }
 
 static void printOnePub(struct dbSnpDetails *details, int ix)
 /* Print a link to Pubmed for details->pubMedIds[ix]. */
 {
 printf("<a href=\"https://www.ncbi.nlm.nih.gov/pubmed/%d\" target=_blank>PMID%d</a>\n",
        details->pubMedIds[ix], details->pubMedIds[ix]);
 }
 
 static void printDetailsRow(struct dbSnpDetails *details, char *label, int itemCount,
                             void (*printOneValue)(struct dbSnpDetails *details, int ix))
 /* Print out one row of a details table using callback to print comma-sep list of values. */
 {
 if (itemCount > 0)
     {
     printf("<tr><td><b>%s:</b></td><td>", label);
     int ix;
     for (ix = 0;  ix < itemCount;  ix++)
         {
         if (ix != 0)
             puts(", ");
         printOneValue(details, ix);
         }
     puts("</td></tr>");
     }
 }
 
 static void printDbSnpDetails(struct bigDbSnp *bds, struct dbSnpDetails *details,
                               struct trackDb *tdb)
 /* Print out extras from dbSnpDetails file. */
 {
 printAlleleCountsAndFreqs(bds, details, tdb);
 printDetailsRow(details, "Functional effects", details->soTermCount, printOneSoTerm);
 printDetailsRow(details, "ClinVar", details->clinVarCount, printOneClinVar);
 printDetailsRow(details, "Submitted by", details->submitterCount, printOneSubmitter);
 printDetailsRow(details, "Publications in PubMed", details->pubMedIdCount, printOnePub);
 }
 
 static void printUcscNotes(char *ucscNotes)
 /* Print explanations of ucscNotes items. */
 {
 if (isNotEmpty(ucscNotes))
     {
     puts("<p>Interesting or anomalous conditions noted by UCSC:</b><br>");
     puts("<ul>");
     boolean isCommonAll = (stringIn(bdsCommonAll, ucscNotes) != NULL);
     boolean isRareAll = (stringIn(bdsRareAll, ucscNotes) != NULL);
     struct slName *note, *noteList = slNameListFromComma(ucscNotes);
     for (note = noteList;  note != NULL;  note = note->next)
         {
         // When commonAll is true, commonSome is also true but not informative,
         // so skip commonSome if commonAll is true.  Likewise for rareAll & rareSome.
         if (! ((isCommonAll && sameString(note->name, bdsCommonSome)) ||
                (isRareAll && sameString(note->name, bdsRareSome))))
             {
             char *desc = bigDbSnpDescribeUcscNote(note->name);
             printf("<li>%s\n", desc ? desc : note->name);
             }
         }
     puts("</ul>");
     }
 }
 
 static char *getMinRep(char *ref, char *alt, boolean leftJustify)
 /* If ref and alt can be trimmed down to a shorter representation then return that, othw NULL. */
 {
 char *minRep = NULL;
 int refLen = strlen(ref);
 int altLen = strlen(alt);
 if ((refLen > 1 && altLen > 0) || (altLen > 1 && refLen > 0))
     {
     char *refCpy = cloneStringZ(ref, refLen);
     char *altCpy = cloneStringZ(alt, altLen);
     uint start=0, end=refLen;
     int refCpyLen = refLen, altCpyLen = altLen;
     if (leftJustify)
         trimRefAltLeft(refCpy, altCpy, &start, &end, &refCpyLen, &altCpyLen);
     else
         trimRefAlt(refCpy, altCpy, &start, &end, &refCpyLen, &altCpyLen);
     if (refCpyLen < refLen)
         {
         struct dyString *dy = dyStringNew(0);
         char bufRef[1024], bufAlt[1024];
         if (refCpyLen == 0)
             dyStringPrintf(dy, "ins%s",
                            bigDbSnpAbbrevAllele(altCpy, bufAlt, sizeof bufAlt));
         else if (altCpyLen == 0)
             dyStringPrintf(dy, "del%s",
                            bigDbSnpAbbrevAllele(refCpy, bufRef, sizeof bufRef));
         else
             dyStringPrintf(dy, "del%sins%s",
                            bigDbSnpAbbrevAllele(refCpy, bufRef, sizeof bufRef),
                            bigDbSnpAbbrevAllele(altCpy, bufAlt, sizeof bufAlt));
         minRep = dyStringCannibalize(&dy);
         }
     }
 return minRep;
 }
 
 static void printOtherMappings(struct bbiFile *bbi, struct bigDbSnp *bds, struct trackDb *tdb)
 /* If the variant maps to other genomic sequences (alts/fixes/PAR), link to those positions. */
 {
 int fieldIx;
 struct bptFile *bpt = bigBedOpenExtraIndex(bbi, "name", &fieldIx);
 struct lm *lm = lmInit(0);
 struct bigBedInterval *bbList = bigBedNameQuery(bbi, bpt, fieldIx, bds->name, lm);
 if (slCount(bbList) > 1)
     {
     puts("<p><b>This variant maps to additional locations:</b><br><br>");
     char chromName[bbi->chromBpt->keySize+1];
     int lastChromId = -1;
     struct bigBedInterval *bb;
     for (bb = bbList; bb != NULL; bb = bb->next)
         {
         if (!startsWithWord(bds->name, bb->rest))
             errAbort("Error: bigBedNameQuery search for name '%s' yielded '%s'",
                      bds->name, bb->rest);
         bbiCachedChromLookup(bbi, bb->chromId, lastChromId, chromName, sizeof(chromName));
         char startBuf[16], endBuf[16];
         char *row[BIGDBSNP_NUM_COLS];
         int bbFieldCount = bigBedIntervalToRow(bb, chromName, startBuf, endBuf, row, ArraySize(row));
         if (bbFieldCount != BIGDBSNP_NUM_COLS)
             errAbort("bigDbSnpClick: expected %d columns but got %d", BIGDBSNP_NUM_COLS,
                      bbFieldCount);
         struct bigDbSnp *otherBds = bigDbSnpLoad(row);
         if (differentString(bds->chrom, otherBds->chrom) ||
             bds->chromStart != otherBds->chromStart ||
             bds->chromEnd != otherBds->chromEnd)
             {
             bedPrintPos((struct bed *)otherBds, 3, tdb);
             if (bb->next != NULL)
                 puts("<br>");
             }
         }
     puts("</p>");
     }
 bptFileDetach(&bpt);
 lmCleanup(&lm);
 }
 
 void doBigDbSnp(struct trackDb *tdb, char *rsId)
 /* Show details for bigDbSnp item. */
 {
 int start = cartInt(cart, "o");
 int end = cartInt(cart, "t");
 char *fileOrUrl = hReplaceGbdb(trackDbSetting(tdb, "bigDataUrl"));
 if (isEmpty(fileOrUrl))
     errAbort("bigDbSnpClick: trackDb is missing bigDataUrl setting");
-struct bbiFile *bbi =  bigBedFileOpenAlias(fileOrUrl, chromAliasGetHash(database));
+struct bbiFile *bbi =  bigBedFileOpenAlias(fileOrUrl, chromAliasChromToAliasHash(database));
 boolean found = FALSE;
 char *chrom = cartString(cart, "c");
 int ivStart = start, ivEnd = end;
 if (start == end)
     {
     // item is an insertion; expand the search range from 0 bases to 2 so we catch it:
     ivStart = max(0, start-1);
     ivEnd++;
     }
 struct lm *lm = lmInit(0);
 struct bigBedInterval *bbList = bigBedIntervalQuery(bbi, chrom, ivStart, ivEnd, 0, lm);
 struct bigBedInterval *bb;
 for (bb = bbList; bb != NULL; bb = bb->next)
     {
     if (!startsWithWord(rsId, bb->rest))
 	continue;
     found = TRUE;
     char startBuf[16], endBuf[16];
     char *row[BIGDBSNP_NUM_COLS];
     int bbFieldCount = bigBedIntervalToRow(bb, chrom, startBuf, endBuf, row, ArraySize(row));
     if (bbFieldCount != BIGDBSNP_NUM_COLS)
         errAbort("bigDbSnpClick: expected %d columns but got %d", BIGDBSNP_NUM_COLS, bbFieldCount);
     struct bigDbSnp *bds = bigDbSnpLoad(row);
     struct dbSnpDetails *details = NULL;
     struct slPair *detailsUrls = parseDetailsTablUrls(tdb);
     if (detailsUrls)
         details = getDetails(bds, detailsUrls->val);
     bedPrintPos((struct bed *)bds, 3, tdb);
     puts("<br>");
     puts("<table>");
     char *ref = bds->ref;
     char abbrev[1024];
     char *abbrevRef = isEmpty(ref) ? "-" : bigDbSnpAbbrevAllele(ref, abbrev, sizeof abbrev);
     printf("<tr><td><b>Reference allele:</b></td><td><span class='inlineCode'>%s</span><td></tr>\n",
            abbrevRef);
     printf("<tr><td");
     if (bds->altCount > 1)
         printf(" style='vertical-align:top'");
     printf("><b>Alternate allele%s:</b></td><td>", (bds->altCount > 1 ? "s" : ""));
     if (bds->altCount == 0)
         printf("<em>none</em>");
     else
         {
         int i;
         for (i = 0;  i < bds->altCount;  i++)
             {
             char *alt = bds->alts[i];
             char *abbrevAlt = isEmpty(alt) ? "-" : bigDbSnpAbbrevAllele(alt, abbrev, sizeof abbrev);
             printf("%s<span class='inlineCode'>%s", (i > 0 ? ",<br>\n" : ""), abbrevAlt);
             char *minRepLeft = getMinRep(ref, alt, TRUE);
             if (minRepLeft)
                 {
                 char *minRepRight = getMinRep(ref, alt, FALSE);
                 if (sameString(minRepLeft, minRepRight))
                     printf(" [%s]", minRepLeft);
                 else
                     printf(" [%s (left-shifted), %s (right-shifted)]", minRepLeft, minRepRight);
                 }
             printf("</span>");
             }
         puts("</td></tr>");
         }
     if (bds->shiftBases > 0)
         printf("<td><b>Uncertainty in indel placement:</b></td><td>%d base%s</td></tr>\n",
                bds->shiftBases, (bds->shiftBases > 1 ? "s" : ""));
     if (details)
         printDbSnpDetails(bds, details, tdb);
     printf("<tr><td><b>Variation class/type:</b></td><td>%s</td></tr>\n",
            bigDbSnpClassToString(bds->class));
     puts("</table>");
     printUcscNotes(bds->ucscNotes);
     printOtherMappings(bbi, bds, tdb);
     }
 if (!found)
     printf("No item %s starting at %s:%d\n", rsId, chrom, start+1);
 lmCleanup(&lm);
 bbiFileClose(&bbi);
 }