4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/protein/lib/pbUtil.c src/hg/protein/lib/pbUtil.c
index 4adcedb..f80976a 100644
--- src/hg/protein/lib/pbUtil.c
+++ src/hg/protein/lib/pbUtil.c
@@ -1,1055 +1,1055 @@
 /* pbUtil.c various utility functions for Proteome Browser */
 
 /* Copyright (C) 2013 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 #include "common.h"
 #include "hCommon.h"
 #include "string.h"
 #include "portable.h"
 #include "memalloc.h"
 #include "jksql.h"
 #include "memgfx.h"
 #include "vGfx.h"
 #include "htmshell.h"
 #include "cart.h"
 #include "hdb.h"
 #include "web.h"
 #include "hui.h"
 #include "cheapcgi.h"
 #include "hgColors.h"
 #include "pbStamp.h"
 #include "pbTracks.h"
 
 
 void hWrites(char *string)
 /* Write string with no '\n' if not suppressed. */
 {
 if (!suppressHtml)
     fputs(string, stdout);
 }
 
 void hButton(char *name, char *label)
 /* Write out button if not suppressed. */
 {
 if (!suppressHtml)
     cgiMakeButton(name, label);
 }
 
 void aaPropertyInit(int *hasResFreq)
 /* initialize AA properties */
 {
 int i, j, ia, iaCnt;
 
 struct sqlConnection *conn;
 char query[56];
 struct sqlResult *sr;
 char **row;
 
 for (i=0; i<256; i++)
     {
     aa_attrib[i] = 0;
     aa_hydro[i] = 0;
     }
 
 aa_attrib['R'] = CHARGE_POS;
 aa_attrib['H'] = CHARGE_POS;
 aa_attrib['K'] = CHARGE_POS;
 aa_attrib['D'] = CHARGE_NEG;
 aa_attrib['E'] = CHARGE_NEG;
 aa_attrib['C'] = POLAR;
 aa_attrib['Q'] = POLAR;
 aa_attrib['S'] = POLAR;
 aa_attrib['Y'] = POLAR;
 aa_attrib['N'] = POLAR;
 aa_attrib['T'] = POLAR;
 aa_attrib['M'] = POLAR;
 aa_attrib['A'] = NEUTRAL;
 aa_attrib['W'] = NEUTRAL;
 aa_attrib['V'] = NEUTRAL;
 aa_attrib['F'] = NEUTRAL;
 aa_attrib['P'] = NEUTRAL;
 aa_attrib['I'] = NEUTRAL;
 aa_attrib['L'] = NEUTRAL;
 aa_attrib['G'] = NEUTRAL;
 
 /* Ala:  1.800  Arg: -4.500  Asn: -3.500  Asp: -3.500  Cys:  2.500  Gln: -3.500 */
 aa_hydro['A'] =  1.800;
 aa_hydro['R'] = -4.500;
 aa_hydro['N'] = -3.500;
 aa_hydro['D'] = -3.500;
 aa_hydro['C'] =  2.500;
 aa_hydro['Q'] = -3.500;
 /* Glu: -3.500  Gly: -0.400  His: -3.200  Ile:  4.500  Leu:  3.800  Lys: -3.900 */
 aa_hydro['E'] = -3.500;
 aa_hydro['G'] = -0.400;
 aa_hydro['H'] = -3.200;
 aa_hydro['I'] =  4.500;
 aa_hydro['L'] =  3.800;
 aa_hydro['K'] = -3.900;
 /* Met:  1.900  Phe:  2.800  Pro: -1.600  Ser: -0.800  Thr: -0.700  Trp: -0.900 */
 aa_hydro['M'] =  1.900;
 aa_hydro['F'] =  2.800;
 aa_hydro['P'] = -1.600;
 aa_hydro['S'] = -0.800;
 aa_hydro['T'] = -0.700;
 aa_hydro['W'] = -0.900;
 /* Tyr: -1.300  Val:  4.200  Asx: -3.500  Glx: -3.500  Xaa: -0.490 */
 aa_hydro['Y'] = -1.300;
 aa_hydro['V'] =  4.200;
 /* ?? Asx: -3.500 Glx: -3.500  Xaa: -0.490 ?? */
 
 /* get average frequency distribution for each AA residue */
 conn= hAllocConn(database);
 if (!hTableExists(database, "pbResAvgStd"))
     {
     *hasResFreq = 0;
     return;
     }
 else
     {
     *hasResFreq = 1;
     }
 sqlSafef(query, sizeof(query), "select * from %s.pbResAvgStd", database);
 iaCnt = 0;
 sr = sqlMustGetResult(conn, query);
 row = sqlNextRow(sr);
 
 while (row != NULL)
     {
     for (j=0; j<20; j++)
         {
         if (row[0][0] == aaAlphabet[j])
             {
             iaCnt++;
             ia = j;
             aaChar[ia] = row[0][0];
             avg[ia] = (double)(atof(row[1]));
             stddev[ia] = (double)(atof(row[2]));
             break;
             }
         }
     row = sqlNextRow(sr);
     }
 sqlFreeResult(&sr);
 if (iaCnt != 20)
     {
     errAbort("in doAnomalies(), not all 20 amino acide residues are accounted for.");
     }
 }
 
 char *getAA(char *pepAccession)
 {
 struct sqlConnection *conn;
 char query[256];
 struct sqlResult *sr;
 char **row;
 
 char *chp;
 int i,len;
 char *seq;
 char *protDbDate;
 
 conn= hAllocConn(database);
 
 /* Figure out which is the appropriate DB to use,
    either spXXXXXX (for PB supported GB) so that we can handle TrEMBL-NEW entries
    or swissProt (to support global proteome
 
    The following convention needs to be followed when building protein DBs:
 
        spXXXXXX ---> proteinsXXXXXX
 
        swissProt points to the latest spXXXXXX
        proteins  points to the latest proteinsXXXXXX
 
 */
 
 if (strstr(protDbName, "proteins") == NULL)
     {
     sqlSafef(query, sizeof(query), "select val from %s.protein where acc='%s';",
     	  UNIPROT_DB_NAME, pepAccession);
     }
 else
     {
     protDbDate = strstr(protDbName, "proteins") + strlen("proteins");
     sqlSafef(query, sizeof(query),
     "select val from sp%s.protein where acc='%s';", protDbDate, pepAccession);
     }
 
 sr  = sqlMustGetResult(conn, query);
 row = sqlNextRow(sr);
 if (row != NULL)
     {
     seq	= cloneString(row[0]);
     len = strlen(seq);
     chp = seq;
     for (i=0; i<len; i++)
 	{
 	*chp = toupper(*chp);
 	chp++;
 	}
     }
 else
     {
     seq = NULL;
     }
 sqlFreeResult(&sr);
 hFreeConn(&conn);
 
 return(seq);
 }
 
 int chkAnomaly(double currentAvg, double pctLow, double pctHi)
 /* chkAnomaly() checks if the frequency of an AA residue in a protein
    is abnormally high (returns 1) or low (returns -1) */
 {
 int result;
 if (currentAvg >= pctHi)
     {
     result = 1;
     }
 else
     {
     if (currentAvg <= pctLow)
         {
         result = -1;
         }
     else
         {
         result = 0;
         }
     }
 return(result);
 }
 
 void getExonInfo(char *proteinID, int *exonCount, char **chrom, char *strandChar)
 {
 char query[256];
 struct sqlResult *sr;
 char **row;
 struct sqlConnection  *conn;
 
 char *qNameStr;
 char *qSizeStr;
 char *qStartStr;
 char *qEndStr;
 char *tNameStr=NULL;
 char *tSizeStr;
 char *tStartStr;
 char *tEndStr;
 char *blockCountStr;
 char *blockSizesStr;
 char *qStartsStr;
 char *tStartsStr;
 
 char *chp;
 int exonStartPos;
 int exonEndPos;
 int exonGenomeStartPos, exonGenomeEndPos;
 char *exonStartStr = NULL;
 char *exonSizeStr  = NULL;
 char *exonGenomeStartStr = NULL;
 char *strand       = NULL;
 int blockCount=0;
 int exonIndex;
 int i, isize;
 int done = 0;
 int alignDiff, alignDiffShortest;
 
 char *answer;
 int hggStart   = 0;
 int hggEnd     = 0;
 char *hggGene  = NULL;
 char *hggChrom = NULL;
 
 conn= hAllocConn(database);
 
 /* NOTE: the query below may not always return single answer, */
 /* and kgProtMap and knownGene alignments may not be identical, so pick the closest one. */
 
 sqlSafef(query,sizeof(query), "select qName, qSize, qStart, qEnd, tName, tSize, tStart, tEnd, blockCount, blockSizes, qStarts, tStarts, strand from %s.%s where qName='%s';",
         database, kgProtMapTableName, proteinID);
 sr  = sqlMustGetResult(conn, query);
 row = sqlNextRow(sr);
 
 if (row == NULL)
     {
     errAbort("<BLOCKQUOTE>Sorry, cannot display Proteome Browser for %s."
              "<BR>No entry is found in kgProtMap table for this protein.</BLOCKQUOTE>",
 	     proteinID);
     }
 
 answer = cloneString(cartOptionalString(cart, "hgg_gene"));
 if (answer != NULL) hggGene = cloneString(answer);
 answer = cloneString(cartOptionalString(cart, "hgg_start"));
 if (answer != NULL) hggStart = atoi(answer);
 answer = cloneString(cartOptionalString(cart, "hgg_end"));
 if (answer != NULL) hggEnd = atoi(answer);
 answer = cloneString(cartOptionalString(cart, "hgg_chrom"));
 if (answer != NULL) hggChrom = cloneString(answer);
 
 alignDiffShortest = 2000000000;  /* initialize it with a very large number */
 while (row != NULL)
     {
     qNameStr        = cloneString(row[0]);
     qSizeStr        = cloneString(row[1]);
     qStartStr       = cloneString(row[2]);
     qEndStr         = cloneString(row[3]);
     tNameStr        = cloneString(row[4]);
     tSizeStr        = cloneString(row[5]);
     tStartStr       = cloneString(row[6]);
     tEndStr         = cloneString(row[7]);
     blockCountStr   = cloneString(row[8]);
     blockSizesStr   = cloneString(row[9]);
     qStartsStr      = cloneString(row[10]);
     tStartsStr      = cloneString(row[11]);
     strand          = cloneString(row[12]);
 
     if (!((strand[0] == '+') || (strand[0] == '-')) || (strand[1] != '\0') )
    	errAbort("wrong strand '%s' data encountered in getExonInfo(), aborting ...", strand);
 
     alignDiff = abs(atoi(tStartStr) - hggStart) + abs(atoi(tEndStr) - prevGBEndPos);
 
     if (alignDiff < alignDiffShortest)
         {
         alignDiffShortest  = alignDiff;
 	*strandChar 	   = strand[0];
 	blockCount 	   = atoi(blockCountStr);
 	exonStartStr 	   = qStartsStr;
 	exonGenomeStartStr = tStartsStr;
 	exonSizeStr 	   = blockSizesStr;
 	}
     row = sqlNextRow(sr);
     }
 
 sqlFreeResult(&sr);
 hFreeConn(&conn);
 
 exonIndex 	   = 0;
 
 while (!done)
     {
     /* get protein side exon position */
 
     chp  = strstr(exonStartStr, ",");
     *chp = '\0';
     exonStartPos 	  = atoi(exonStartStr);
     blockStart[exonIndex] = exonStartPos;
     aaStart[exonIndex]    = exonStartPos/3;
     chp++;
     exonStartStr = chp;
 
     /* get Genome side exon position */
     chp  = strstr(exonGenomeStartStr, ",");
     *chp = '\0';
     exonGenomeStartPos 		= atoi(exonGenomeStartStr);
     blockGenomeStart[exonIndex] = exonGenomeStartPos;
     chp++;
     exonGenomeStartStr = chp;
 
     chp   = strstr(exonSizeStr, ",");
     *chp  = '\0';
     isize = atoi(exonSizeStr);
     blockSize[exonIndex] = isize;
     exonEndPos       	 = exonStartPos + isize - 1;
     blockEnd[exonIndex]  = exonEndPos;
     aaEnd[exonIndex]     = exonEndPos/3;
     exonGenomeEndPos     = exonGenomeStartPos + isize - 1;
     blockGenomeEnd[exonIndex] = exonGenomeEndPos;
     chp++;
     exonSizeStr = chp;
 
     exonIndex++;
     if (exonIndex == blockCount) done = 1;
     }
 
 /* reverse the negative strand block size sequence to positive direction */
 for (i=0; i<blockCount; i++)
     {
     if (*strandChar == '-')
 	{
 	blockSizePositive[i]          = blockSize[blockCount - i - 1];
 	blockStartPositive[i]         = protSeqLen*3 - blockEnd[blockCount - i - 1] - 1;
 	blockEndPositive[i]   	      = protSeqLen*3 - blockStart[blockCount - i - 1] - 1;
     	blockGenomeStartPositive[i]   = blockGenomeStart[blockCount - i - 1];
     	blockGenomeEndPositive[i]     = blockGenomeEnd[blockCount - i - 1];
 	}
     else
 	{
 	blockSizePositive[i]          = blockSize[i];
 	blockStartPositive[i]         = blockStart[i];
 	blockEndPositive[i]           = blockEnd[i];
     	blockGenomeStartPositive[i]   = blockGenomeStart[i];
     	blockGenomeEndPositive[i]     = blockGenomeEnd[i];
 	}
     }
 *exonCount = blockCount;
 assert(*exonCount > 0);
 *chrom     = tNameStr;
 }
 
 void printFASTA(char *proteinID, char *aa)
 /* print the FASTA format protein sequence */
 {
 int i, l;
 char *chp;
 
 l =strlen(aa);
 
 hPrintf("<B>Total amino acids:</B> %d\n", strlen(aa));
 hPrintf("\n");
 hPrintf("<P><B>FASTA record:</B>\n");
 hPrintf("<pre>\n");
 if (hIsGsidServer())
     hPrintf(">%s", proteinID);
 else
     hPrintf(">%s|%s|%s", proteinID, protDisplayID, description);
 
 chp = aa;
 for (i=0; i<l; i++)
     {
     if ((i%50) == 0) hPrintf("\n");
 
     hPrintf("%c", *chp);
     chp++;
     }
 
 hPrintf("</pre>");
 fflush(stdout);
 }
 
 /* more sophisticated processing can be done using genome coordinates */
 void printExonAA(char *proteinID, char *aa, int exonNum)
 {
 int i, j, k, jj;
 int l;
 int il;
 int istart, iend;
 int ilast;
 char *chp;
 
 l =strlen(aa);
 
 ilast = 0;
 
 hPrintf("<pre>");
 
 if (exonNum == -1)
     {
     hPrintf(">%s", proteinID);
 
     chp = aa;
     for (i=0; i<l; i++)
 	{
 	if ((i%50) == 0) hPrintf("\n");
 
 	hPrintf("%c", *chp);
 	chp++;
 	}
     hPrintf("\n\n");
     }
 
 j=0;
 il = 0;
 if (exonNum == -1)
     {
     hPrintf("Total amino acids: %d\n", strlen(aa));
 
     istart = 0;
     iend   = l-1;
     j = 0;
     }
 else
     {
     hPrintf("AA Start position:%4d\n",	   aaStart[exonNum-1]+1);
     hPrintf("AA End position:  %4d\n",  	   aaEnd[exonNum-1]+1);
     hPrintf("AA Length:        %4d<br>\n",  aaEnd[exonNum-1]-aaStart[exonNum-1]+1);
 
     istart = aaStart[exonNum-1];
     iend   = aaEnd[exonNum-1];
     j      = exonNum-1;
     }
 for (i=istart; i<=iend; i++)
     {
     if (((i%50) == 0) && (exonNum == -1))
 	{
 	hPrintf("\n");
 	hPrintf("<span style='color:black;'>");
 	for (jj=0; jj<5; jj++)
 	    {
 	    if ((i+(jj+1)*10) <= (iend+1))
 		{
 		hPrintf("%11d", ilast + (jj+1)*10);
 		}
 	    }
 	hPrintf("<br>");
 	hPrintf("</span>");
 	ilast = ilast + 50;
 	}
 
     if (i == aaStart[j])
 	{
 	j++;
 	k=j%2;
 	if (k)
 	    {
 	    hPrintf("<font color = blue>");
 	    }
 	else
 	    {
 	    hPrintf("<font color = green>");
 	    }
 	}
     if ((i%10) == 0) hPrintf(" ");
     hPrintf("%c", aa[i]);
     if (i == aaEnd[j-1]) hPrintf("</font>");
     il++;
     if (il == 50)
 	{
 	il = 0;
 	}
     }
 hPrintf("</pre>");
 
 /* Force black color at the end */
 hPrintf("<font color = black>");
 }
 
 void doGenomeBrowserLink(char *spAcc, char *mrnaID, char *hgsidStr)
 {
 hPrintf("\n<LI>Genome Browser - ");
 if (mrnaID != NULL)
     {
     hPrintf("<A HREF=\"../cgi-bin/hgTracks?position=%s&db=%s%s\"", mrnaID, database, hgsidStr);
     hPrintf(" TARGET=_BLANK>%s</A></LI>\n", mrnaID);
     }
 else
     {
     hPrintf("<A HREF=\"../cgi-bin/hgTracks?position=%s&db=%s%s\"", spAcc, database, hgsidStr);
     hPrintf(" TARGET=_BLANK>%s</A></LI>\n", spAcc);
     }
 }
 
 void doGeneSorterLink(char *spAcc, char *mrnaID, char *hgsidStr)
 {
 hPrintf("\n<LI>Gene Sorter - ");
 if (mrnaID != NULL)
     {
     /* hPrintf("<A HREF=\"../cgi-bin/hgNear?near_search=%s&hgsid=%s\"", mrnaID, hgsid); */
     hPrintf("<A HREF=\"../cgi-bin/hgNear?near_search=%s&db=%s&org=%s%s\"",
     	    mrnaID, database, organism, hgsidStr);
     hPrintf(" TARGET=_BLANK>%s</A>&nbsp</LI>\n", mrnaID);
     }
 else
     {
     hPrintf("<A HREF=\"../cgi-bin/hgNear?near_search=%s&db=%s&org=%s%s\"",
     	    spAcc, database, organism, hgsidStr);
     hPrintf(" TARGET=_BLANK>%s</A>&nbsp</LI>\n", spAcc);
     }
 hPrintf("\n");
 }
 
 void doGeneDetailsLink(char *spAcc, char *mrnaID, char *hgsidStr)
 {
 char cond_str[128];
 char *hggChrom, *hggStart, *hggEnd;
 char *displayId;
 
 sqlSafefFrag(cond_str, sizeof(cond_str), "kgId='%s' and spID='%s'", mrnaID, spAcc);
 displayId  = sqlGetField(database, "kgXref", "spDisplayID", cond_str);
 /* Feed hgGene with chrom, txStart, and txEnd data, otherwise it would use whatever are in the cart */
 sqlSafefFrag(cond_str, sizeof(cond_str), "name='%s'", mrnaID);
 hggChrom = sqlGetField(database, "knownGene", "chrom", cond_str);
 hggStart = sqlGetField(database, "knownGene", "txStart", cond_str);
 hggEnd   = sqlGetField(database, "knownGene", "txEnd", cond_str);
 if (mrnaID != NULL)
     {
     hPrintf("\n<LI>Gene Details Page - ");
     hPrintf("<A HREF=\"../cgi-bin/hgGene?db=%s&hgg_gene=%s&hgg_prot=%s&hgg_chrom=%s"
             "&hgg_start=%s&hgg_end=%s\"",
     	    database, mrnaID, displayId, hggChrom, hggStart, hggEnd);
     hPrintf(" TARGET=_BLANK>%s</A></LI>\n", mrnaID);
     }
 }
 
 void doBlatLink(char *db, char *sciName, char *commonName, char *aaSeq)
 {
 hPrintf("\n<LI>BLAT - ");
 hPrintf("<A HREF=\"../cgi-bin/hgBlat?db=%s&type=protein&userSeq=%s\"", db, aaSeq);
 hPrintf(" TARGET=_BLANK>%s", sciName);
 if (commonName != NULL) hPrintf(" (%s)", commonName);
 hPrintf("</A></LI>\n");
 }
 
 void doPathwayLinks(char *spAcc, char *mrnaName)
 /* Show pathway links */
 /* spAcc is a place holder for future extension */
 {
 struct sqlConnection *conn  = hAllocConn(database);
 struct sqlConnection *conn2 = hAllocConn(database);
 struct sqlResult *sr;
 char **row;
 char query[256];
 char cond_str[128];
 char *mapID, *locusID, *mapDescription;
 char *geneID;
 char *geneSymbol;
 char *cgapID, *biocMapID;
 boolean hasPathway;
 
 if (hTableExists(database, "kgXref"))
     {
     sqlSafefFrag(cond_str, sizeof(cond_str), "kgID='%s'", mrnaName);
     geneSymbol = sqlGetField(database, "kgXref", "geneSymbol", cond_str);
     if (geneSymbol == NULL)
         {
         geneSymbol = mrnaName;
         }
     }
 else
     {
     geneSymbol = mrnaName;
     }
 
 /* Show Pathway links if any exist */
 hasPathway = FALSE;
 cgapID     = NULL;
 
 /*Process BioCarta Pathway link data */
 if (sqlTableExists(conn, "cgapBiocPathway"))
     {
     sqlSafefFrag(cond_str, sizeof(cond_str), "alias='%s'", geneSymbol);
     cgapID = sqlGetField(database, "cgapAlias", "cgapID", cond_str);
 
     if (cgapID != NULL)
 	{
     	sqlSafef(query, sizeof(query), "select mapID from %s.cgapBiocPathway where cgapID = '%s'", database, cgapID);
     	sr = sqlGetResult(conn, query);
     	row = sqlNextRow(sr);
     	if (row != NULL)
 	    {
 	    if (!hasPathway)
 	        {
 	        hPrintf("<B>Pathways:</B>\n<UL>");
 	        hasPathway = TRUE;
 	    	}
 	    }
     	while (row != NULL)
 	    {
 	    biocMapID = row[0];
 	    hPrintf("<LI>BioCarta - &nbsp");
 	    sqlSafefFrag(cond_str, sizeof(cond_str), "mapID=%c%s%c", '\'', biocMapID, '\'');
 	    mapDescription = sqlGetField(database, "cgapBiocDesc", "description",cond_str);
 	    hPrintf("<A HREF = \"");
 	    hPrintf("http://cgap.nci.nih.gov/Pathways/BioCarta/%s", biocMapID);
 	    hPrintf("\" TARGET=_blank>%s</A> - %s <BR>\n", biocMapID, mapDescription);
 	    row = sqlNextRow(sr);
 	    }
         sqlFreeResult(&sr);
 	}
     }
 
 /* Process KEGG Pathway link data */
 if (sqlTableExists(conn, "keggPathway"))
     {
     sqlSafef(query, sizeof(query),
 	  "select * from %s.keggPathway where kgID = '%s'", database, mrnaName);
     sr = sqlGetResult(conn, query);
     row = sqlNextRow(sr);
     if (row != NULL)
 	{
 	if (!hasPathway)
 	    {
 	    hPrintf("<B>Pathways:</B>\n<UL>");
 	    hasPathway = TRUE;
 	    }
         while (row != NULL)
             {
             locusID = row[1];
 	    mapID   = row[2];
 	    hPrintf("<LI>KEGG - &nbsp");
 	    sqlSafefFrag(cond_str, sizeof(cond_str), "mapID=%c%s%c", '\'', mapID, '\'');
 	    mapDescription = sqlGetField(database, "keggMapDesc", "description", cond_str);
 	    hPrintf("<A HREF = \"");
 	    hPrintf("http://www.genome.ad.jp/dbget-bin/show_pathway?%s+%s", mapID, locusID);
 	    hPrintf("\" TARGET=_blank>%s</A> - %s <BR>\n",mapID, mapDescription);
             row = sqlNextRow(sr);
 	    }
 	}
     sqlFreeResult(&sr);
     }
 
 /* Process SRI BioCyc link data */
 if (sqlTableExists(conn, "bioCycPathway"))
     {
     sqlSafef(query, sizeof(query), "select * from %s.bioCycPathway where kgID = '%s'", database, mrnaName);
     sr = sqlGetResult(conn, query);
     row = sqlNextRow(sr);
     if (row != NULL)
 	{
 	if (!hasPathway)
 	    {
 	    hPrintf("<BR><B>Pathways:</B>\n<UL>");
 	    hasPathway = TRUE;
 	    }
         while (row != NULL)
             {
             geneID  = row[1];
 	    mapID   = row[2];
 	    hPrintf("<LI>BioCyc - &nbsp");
 	    sqlSafefFrag(cond_str, sizeof(cond_str), "mapID=%c%s%c", '\'', mapID, '\'');
 	    mapDescription = sqlGetField(database, "bioCycMapDesc", "description", cond_str);
 	    hPrintf("<A HREF = \"");
 
 	    hPrintf("http://biocyc.org/HUMAN/new-image?type=PATHWAY&object=%s&detail-level=2",
 		   mapID);
 	    hPrintf("\" TARGET=_blank>%s</A> %s <BR>\n",mapID, mapDescription);
             row = sqlNextRow(sr);
 	    }
 	}
     sqlFreeResult(&sr);
     }
 
 if (hasPathway)
     {
     hPrintf("</UL>\n");
     }
 
 hFreeConn(&conn);
 hFreeConn(&conn2);
 }
 
 char *hDbOrganism(char *database)
 /* Function to get organism from the genome db */
 {
 struct sqlConnection *connCentral = hConnectCentral();
 char buf[128];
 char query[256];
 char *res;
 sqlSafef(query, sizeof(query), "select organism from dbDb where name = '%s'",
         database);
 res = strdup(sqlQuickQuery(connCentral, query, buf, sizeof(buf)));
 hDisconnectCentral(&connCentral);
 return res;
 }
 
 int searchProteinsInSupportedGenomes(char *queryID, char **database)
 /* search existing genome databases to see if they contain the protein
    Input: queryID
    return: number of proteins found in existing genome databases
    output: the last genome database is stored at *database
 */
 {
 int  pbProteinCnt = {0};
 char *gDatabase;
 char *org = NULL;
 
 
 char cond_str[255];
 struct sqlConnection *conn;
 
 struct sqlConnection *connCentral;
 char queryCentral[256];
 struct sqlResult *srCentral;
 char **row3;
 char *answer;
 
 /* get all genome DBs that support PB */
 connCentral = hConnectCentral();
 sqlSafef(queryCentral, sizeof(queryCentral),
       "select defaultDb.name, dbDb.organism from dbDb,defaultDb where hgPbOk=1 and defaultDb.name=dbDb.name");
 srCentral = sqlMustGetResult(connCentral, queryCentral);
 row3 = sqlNextRow(srCentral);
 
 /* go through each valid genome database that has PB */
 while (row3 != NULL)
     {
     gDatabase = row3[0];
     org       = row3[1];
     conn = sqlConnect(gDatabase);
     sqlSafefFrag(cond_str, sizeof(cond_str), "alias='%s'", queryID);
     answer = sqlGetField(gDatabase, "kgSpAlias", "count(distinct spID)", cond_str);
     sqlDisconnect(&conn);
 
     if ((answer != NULL) && (!sameWord(answer, "0")))
     	{
 	/* increase the count only by one, because new addition of splice variants to kgSpAlias
 	   would give a count of 2 for both the parent and the variant, which caused the
 	   problem when rescale button is pressed */
 	if (atoi(answer) > 0) pbProteinCnt++;
 	*database = strdup(gDatabase);
 	}
     row3 = sqlNextRow(srCentral);
     }
 sqlFreeResult(&srCentral);
 hDisconnectCentral(&connCentral);
 return(pbProteinCnt);
 }
 
 void presentProteinSelections(char *queryID, int protCntInSwissByGene, int protCntInSupportedGenomeDb)
 /* Fuction to present a web page with proteins of different organisms */
 {
 char *gDatabase;
 char *org = NULL;
 char *spID, *displayID, *desc;
 
 char cond_str[255];
 struct sqlConnection *conn, *conn3;
 char query[256], query3[512];
 struct sqlResult *sr, *sr3;
 char **row, **row3;
 
 struct sqlConnection *connCentral, *proteinsConn;
 char queryCentral[256];
 struct sqlResult *srCentral;
 char *answer;
 char *taxonId, *protAcc, *protDisp, *protOrg, *protDesc;
 char *oldOrg, *orgSciName;
 char *pbOrgSciName[MAX_PB_ORG];
 boolean pbOrgPresented[MAX_PB_ORG];
 boolean skipIt;
 int  i, maxPbOrg;
 int  otherCnt;
 connCentral = hConnectCentral();
 
 hPrintf("<TABLE WIDTH=\"100%%\" BGCOLOR=\"#"HG_COL_HOTLINKS"\" BORDER=\"0\" CELLSPACING=\"0\"");
 hPrintf("CELLPADDING=\"2\"><TR>\n");
 hPrintf("<TD ALIGN=LEFT><A HREF=\"../index.html\">%s</A></TD>", wrapWhiteFont("Home"));
 hPrintf("<TD style='text-align:center; color:#FFFFFF; font-size:medium;'>%s</TD>",
         "UCSC Proteome Browser");
 if (proteinInSupportedGenome)
     {
     hPrintf("<TD ALIGN=Right><A HREF=\"../goldenPath/help/pbTracksHelpFiles/pbTracksHelp.shtml\"");
     }
 else
     {
     hPrintf("<TD ALIGN=Right><A HREF=\"../goldenPath/help/pbTracksHelpFiles/pbTracksHelp.shtml\"");
     }
 
 hPrintf("TARGET=_blank>%s</A></TD>", wrapWhiteFont("Help"));
 hPrintf("</TR></TABLE>");
 
 hPrintf("<FONT SIZE=4><BR><B>Please select one of the following proteins:<BR><BR></B></FONT>\n");
 
 
 /* remmember a list of scientific names for the genomes that supports PB */
 sqlSafef(queryCentral, sizeof(queryCentral),
       "select distinct dbDb.scientificName from dbDb where hgPbOk=1");
 srCentral = sqlMustGetResult(connCentral, queryCentral);
 row3 = sqlNextRow(srCentral);
 i=0;
 while (row3 != NULL)
     {
     pbOrgSciName[i] = strdup(row3[0]);
     pbOrgPresented[i] = FALSE;
     i++;
     row3 = sqlNextRow(srCentral);
     }
 maxPbOrg = i;
 /* go through each genome DB that supports PB */
 sqlSafef(queryCentral, sizeof(queryCentral),
       "select defaultDb.name, dbDb.organism, dbDb.scientificName from dbDb,defaultDb where hgPbOk=1 and defaultDb.name=dbDb.name");
 srCentral = sqlMustGetResult(connCentral, queryCentral);
 row3 = sqlNextRow(srCentral);
 while (row3 != NULL)
     {
     gDatabase = row3[0];
     org       = row3[1];
     orgSciName= row3[2];
 
     protDbName = hPdbFromGdb(gDatabase);
     proteinsConn = sqlConnect(protDbName);
 
     conn = sqlConnect(gDatabase);
     sqlSafefFrag(cond_str, sizeof(cond_str), "alias='%s' and spID != ''", queryID);
     answer = sqlGetField(gDatabase, "kgSpAlias", "count(distinct spID)", cond_str);
     if ((answer != NULL) && (!sameWord(answer, "0")))
 	{
 	/* display organism name */
 	hPrintf("<FONT SIZE=4><B>");
 	hPrintf("<A href=\"https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&name=%s&lvl=0&srchmode=1\" TARGET=_blank>%s</A>",
            cgiEncode(orgSciName), orgSciName);
 	hPrintf(" (%s):</B></FONT>\n", org);
 	hPrintf("<UL>");
 
        	sqlSafef(query, sizeof(query),
               "select distinct spID from %s.kgSpAlias where alias='%s' "
 	      "and spID != ''",
 	      gDatabase, queryID);
 
     	sr = sqlMustGetResult(conn, query);
     	row = sqlNextRow(sr);
 
     	while (row != NULL)
 	    {
    	    spID = row[0];
     	    sqlSafefFrag(cond_str, sizeof(cond_str), "accession='%s'", spID);
     	    displayID = sqlGetField(protDbName, "spXref3", "displayID", cond_str);
     	    sqlSafefFrag(cond_str, sizeof(cond_str), "accession='%s'", spID);
     	    desc = sqlGetField(protDbName, "spXref3", "description", cond_str);
 
 	    /* display a protein */
 	    hPrintf(
 		"<LI><A HREF=\"../cgi-bin/pbGlobal?proteinID=%s&db=%s\">",
 		displayID, gDatabase);
 	    if (sameWord(spID, displayID) || (strstr(displayID, spID) != NULL))
 		{
 		hPrintf("%s</A> %s\n", spID, desc);
 		}
 	    else
 		{
 	    	hPrintf("%s</A> (aka %s) %s\n", spID, displayID, desc);
 		}
 
 	    /* remember the fact that a protein is shown under this PB supported genome */
 	    for (i=0; i<maxPbOrg; i++)
 	        {
 	    	if (sameWord(orgSciName, pbOrgSciName[i]))
 		    {
 		    pbOrgPresented[i] = TRUE;
 		    }
 	        }
 	    row = sqlNextRow(sr);
 	    }
 	hPrintf("</UL>");fflush(stdout);
    	sqlFreeResult(&sr);
 	}
     sqlDisconnect(&proteinsConn);
     row3 = sqlNextRow(srCentral);
     }
 sqlFreeResult(&srCentral);
 hDisconnectCentral(&connCentral);
 sqlDisconnect(&conn);
 
 if (protCntInSwissByGene > protCntInSupportedGenomeDb)
     {
     otherCnt = -1;
     if (protCntInSupportedGenomeDb >0)
     	{
 	otherCnt = 0;
     	hPrintf("<FONT SIZE=4><B>Other Organisms:</B></FONT>\n");
     	hPrintf("<UL>");
 	}
     else
         {
     	hPrintf("<UL>");
         }
 
     oldOrg = strdup("");
     conn3 = sqlConnect(UNIPROT_DB_NAME);
     sqlSafef(query3, sizeof(query3),
             "select taxon.id, gene.acc, displayId.val, binomial, description.val "
             "from gene, displayId, accToTaxon,taxon, description "
             "where gene.val='%s' and gene.acc=displayId.acc and accToTaxon.taxon=taxon.id "
             "and accToTaxon.acc=gene.acc and description.acc=gene.acc order by binomial",
             queryID);
     sr3  = sqlMustGetResult(conn3, query3);
     row3 = sqlNextRow(sr3);
 
    /* go through each protein */
     while (row3 != NULL)
     	{
         taxonId  = row3[0];
         protAcc  = row3[1];
         protDisp = row3[2];
         protOrg  = row3[3];
         protDesc = row3[4];
 
 	/* decide if this entry should be skipped */
 	skipIt = FALSE;
 	for (i=0; i<maxPbOrg; i++)
 	    {
 	    if (sameWord(pbOrgSciName[i], protOrg) && pbOrgPresented[i])
 	    	{
 	    	skipIt = TRUE;
 		}
 	    }
 
 	/* print organism name if organism changed */
 	if (!sameWord(protOrg, oldOrg))
 	    {
 	    if (!sameWord(oldOrg, ""))
 	        {
 	        hPrintf("</UL>\n");
 		}
 	    if (!skipIt)
 	    	{
     		sqlSafefFrag(cond_str, sizeof(cond_str), "id=%s and nameType='genbank common name'", taxonId);
     		answer = sqlGetField(PROTEOME_DB_NAME, "taxonNames", "name", cond_str);
 		hPrintf("<FONT SIZE=3><B>");
 		hPrintf("<A href=\"https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?mode=Undef&name=%s&lvl=0&srchmode=1\" TARGET=_blank>%s</A>",
            		cgiEncode(protOrg), protOrg);
 		if (answer != NULL)
 		    {
 		    hPrintf(" (%s)", answer);
 		    }
 		hPrintf(":</B></FONT>\n");
 		}
             hPrintf("<UL>\n");
 	    }
 
 	/* print protein entry, if it is not already displayed in the PB supported genome list */
 	if (!skipIt)
 	    {
 	    otherCnt++;
 	    if (sameWord(protAcc, protDisp))
 		{
 		hPrintf("<LI><A HREF=\"../cgi-bin/pbGlobal?proteinID=%s\">", protAcc);
 		hPrintf("%s</A> %s\n", protAcc, protDesc);
 		}
 	    else
 		{
 		hPrintf("<LI><A HREF=\"../cgi-bin/pbGlobal?proteinID=%s\">", protAcc);
 		if (strstr(protDisp, protAcc) != NULL)
 		    {
 		    hPrintf("%s</A> %s\n", protAcc, protDesc);
 		    }
 	        else
 		    {
 	    	    hPrintf("%s</A> (aka %s) %s\n", protAcc, protDisp, protDesc);
 		    }
 		}
 	    }
 	oldOrg = strdup(protOrg);
 	row3 = sqlNextRow(sr3);
 	}
     if (otherCnt == 0) hPrintf("</UL>None");fflush(stdout);
     sqlFreeResult(&sr3);
     sqlDisconnect(&conn3);
     }
 }
 
 int searchProteinsInSwissProtByGene(char *queryGeneID)
 /* search Swiss-Prot database to see if it contains the protein
    Input: queryGeneID
    return: number of proteins found in Swiss-Prot
 */
 {
 int  proteinCnt;
 struct sqlConnection *conn;
 char query[256];
 struct sqlResult *sr;
 char **row;
 
 conn = sqlConnect(UNIPROT_DB_NAME);
 sqlSafef(query, sizeof(query),
             "select count(*) from gene, displayId, accToTaxon,taxon "
             "where gene.val='%s' and gene.acc=displayId.acc and accToTaxon.taxon=taxon.id "
             "and accToTaxon.acc=gene.acc order by taxon.id",
             queryGeneID);
 
 sr  = sqlMustGetResult(conn, query);
 row = sqlNextRow(sr);
 
 if (row == NULL)
     {
     errAbort("Error occured during mySQL query: %s\n", query);
     }
 
 proteinCnt = atoi(row[0]);
 
 sqlFreeResult(&sr);
 sqlDisconnect(&conn);
 return(proteinCnt);
 }