4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/hgGene/otherOrgs.c src/hg/hgGene/otherOrgs.c
index 5ed612e..98238fa 100644
--- src/hg/hgGene/otherOrgs.c
+++ src/hg/hgGene/otherOrgs.c
@@ -1,465 +1,465 @@
 /* otherOrgs.c - Handle other species homolog section. */
 
 /* Copyright (C) 2013 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 
 #include "common.h"
 #include "hash.h"
 #include "linefile.h"
 #include "dystring.h"
 #include "hui.h"
 #include "web.h"
 #include "hdb.h"
 #include "axt.h"
 #include "hgGene.h"
 
 
 struct otherOrg
 /* Links involving another organism. */
     {
     struct otherOrg *next;	/* Next in list. */
     double priority;	/* Order to print in. */
     char *name;		/* Symbolic name. */
     char *shortLabel;	/* Short human-readable label. */
     char *idSql;	/* SQL to create ID. */
     char *idToProtIdSql;/* Convert from id to protein ID. */
     char *otherIdSql;	/* Convert from our id to other database ID. */
     char *otherIdSql2;  /* Try this if first otherIdSql doesn't work. */
     char *genomeUrl;	/* URL of genome browser link. */
     char *sorterUrl;	/* URL of gene sorter link. */
     char *geneUrl;	/* URL of hgGene link */
     char *otherUrl;	/* URL of other link. */
     char *otherName;	/* Name of other link. */
     char *db;		/* Other organism specific database. */
     char *pepTable;	/* Table in db peptides live in. */
     char *geneTable;	/* Table in db genes live in. */
     };
 
 static int otherOrgCmpPriority(const void *va, const void *vb)
 /* Compare to sort otherOrgs based on priority. */
 {
 const struct otherOrg *a = *((struct otherOrg **)va);
 const struct otherOrg *b = *((struct otherOrg **)vb);
 float dif = a->priority - b->priority;
 if (dif < 0)
     return -1;
 else if (dif > 0)
     return 1;
 else
     return 0;
 }
 
 static char *otherOrgRequiredField(struct hash *hash, char *name)
 /* Return required field in otherOrg hash.  Squawk and die if it doesn't exist. */
 {
 char *s = hashFindVal(hash,name);
 if (s == NULL)
     errAbort("Couldn't find %s field in %s in otherOrgs.ra", name, 
 	     (char *)hashFindVal(hash, "name"));
 return s;
 }
 
 static char *otherOrgOptionalField(struct hash *hash, char *name)
 /* Return field in hash if it exists or NULL otherwise. */
 {
 return hashFindVal(hash, name);
 }
 
 static struct otherOrg *getOtherOrgList(struct sqlConnection *conn,
 	char *raFile)
 /* Get list of otherOrgs - starting with everything in .ra file,
  * and making sure any associated tables and databases exist. */
 {
 struct hash *ra, *raList = readRa(raFile, NULL);
 struct otherOrg *otherOrgList = NULL, *otherOrg;
 for (ra = raList; ra != NULL; ra = ra->next)
     {
     if (otherOrgOptionalField(ra, "hide") == NULL)
 	{
 	if (checkDatabases(otherOrgOptionalField(ra, "databases")) 
 	    && sqlTablesExist(conn, otherOrgOptionalField(ra, "tables")))
 	    {
 	    AllocVar(otherOrg);
 	    otherOrg->priority = atof(otherOrgRequiredField(ra, "priority"));
 	    otherOrg->name = otherOrgRequiredField(ra, "name");
 	    otherOrg->shortLabel = otherOrgRequiredField(ra, "shortLabel");
 	    otherOrg->idSql = otherOrgRequiredField(ra, "idSql");
 	    otherOrg->idToProtIdSql = otherOrgOptionalField(ra, "idToProtIdSql");
             char *otherIdSql = otherOrgOptionalField(ra, "otherIdSql");
             if (otherIdSql)
                 otherOrg->otherIdSql = replaceChars(otherIdSql, "KNOWNDB",sqlGetDatabase(conn));
 	    otherOrg->otherIdSql2 = otherOrgOptionalField(ra, "otherIdSql2");
 	    otherOrg->genomeUrl = otherOrgOptionalField(ra, "genomeUrl");
 	    otherOrg->sorterUrl = otherOrgOptionalField(ra, "sorterUrl");
 	    otherOrg->geneUrl = otherOrgOptionalField(ra, "geneUrl");
 	    otherOrg->otherUrl = otherOrgOptionalField(ra, "otherUrl");
 	    otherOrg->otherName = otherOrgOptionalField(ra, "otherName");
 	    otherOrg->db = otherOrgRequiredField(ra, "db");
 	    otherOrg->pepTable = otherOrgOptionalField(ra, "pepTable");
 	    otherOrg->geneTable = otherOrgOptionalField(ra, "geneTable");
 	    slAddHead(&otherOrgList, otherOrg);
 	    }
 	}
     }
 slSort(&otherOrgList, otherOrgCmpPriority);
 return otherOrgList;
 }
 
 static boolean otherOrgsExists(struct section *section, 
 	struct sqlConnection *conn, char *geneId)
 /* Return TRUE if necessary database exists and has something
  * on this one. */
 {
 struct otherOrg *otherOrgList;
 otherOrgList = section->items = getOtherOrgList(conn, section->raFile);
 return otherOrgList != NULL;
 return FALSE;
 }
 
 static char *otherOrgId(struct otherOrg *otherOrg, struct sqlConnection *conn, 
 	char *geneId)
 /* Return gene ID in other organism or NULL if it doesn't exist. */
 {
 if (geneId != NULL)
     {
     char query[256];
     sqlSafef(query, sizeof(query), otherOrg->idSql, geneId);
     return sqlQuickString(conn, query);
     }
 else
     return NULL;
 }
 
 static char *otherOrgPositionFromDb(struct otherOrg *otherOrg, char *id)
 /* Get position of id from other organism database, if possible. */
 {
 struct hTableInfo *hti = hFindTableInfo(otherOrg->db, NULL,
                                         otherOrg->geneTable);
 if (hti == NULL)
     return NULL;  // table  not found
 
 struct sqlConnection *conn = hAllocConn(otherOrg->db);
 char query[512];
 sqlSafef(query, sizeof(query),
       "select concat(%s, ':', %s+1, '-', %s) from %s "
       "where %s = '%s'",
       hti->chromField, hti->startField, hti->endField,
       otherOrg->geneTable, hti->nameField, id);
 char *pos = sqlQuickString(conn, query);
 if (pos != NULL)
     {
     char posPlus[2048];
     safef(posPlus, sizeof(posPlus), "%s&%s=%s&hgFind.matches=%s",
           pos,
           otherOrg->geneTable, hTrackOpenVis(sqlGetDatabase(conn), otherOrg->geneTable),
           id);
     hFreeConn(&conn);
     freez(&pos);
     return cloneString(posPlus);
     }
 else
     {
     hFreeConn(&conn);
     return NULL;
     }
 }
 
 static char *otherOrgPosition(struct otherOrg *otherOrg,
 			      struct sqlConnection *conn, char *geneId)
 /* Return position of gene ID in other organism or NULL if it doesn't exist;
  * also make sure that the gene track is visible, and id highlighted, in 
  * hgTracks (the destination when this is used). */
 {
 char *id = otherOrgId(otherOrg, conn, geneId);
 if (id != NULL)
     {
     if (otherOrg->db == NULL || otherOrg->geneTable == NULL
         || !sqlDatabaseExists(otherOrg->db))
 	return id;
     else
         return otherOrgPositionFromDb(otherOrg, id);
     }
 return NULL;
 }
 
 static char *otherOrgProteinId(struct otherOrg *otherOrg, struct sqlConnection *conn,
     char *geneId)
 /* Return protein ID in other organism or NULL if it doesn't exist. */
 {
 char *otherId = otherOrgId(otherOrg, conn, geneId);
 char *protId = NULL;
 if (otherOrg->db != NULL && otherId != NULL && otherOrg->idToProtIdSql != NULL
     && sqlDatabaseExists(otherOrg->db))
     {
     struct sqlConnection *conn = hAllocConn(otherOrg->db);
     char query[512];
     sqlSafef(query, sizeof(query), otherOrg->idToProtIdSql, otherId);
     protId = sqlQuickString(conn, query);
     hFreeConn(&conn);
     }
 if (protId == NULL)
     {
     protId = otherId;
     otherId = NULL;
     }
 freez(&otherId);
 return protId;
 }
 
 static char *otherOrgExternalId(struct otherOrg *otherOrg, char *localId)
 /* Convert other organism UCSC id to external database ID. */
 {
 char *otherId = NULL;
 if (localId != NULL)
     {
     if (otherOrg->otherIdSql && sqlDatabaseExists(otherOrg->db))
 	{
 	struct sqlConnection *conn = hAllocConn(otherOrg->db);
 	char query[512];
 	sqlSafef(query, sizeof(query), otherOrg->otherIdSql, localId);
 	otherId = sqlQuickString(conn, query);
 	if (otherId == NULL && otherOrg->otherIdSql2 != NULL)
 	    {
 	    sqlSafef(query, sizeof(query), otherOrg->otherIdSql2, localId);
 	    otherId = sqlQuickString(conn, query);
 	    }
 	hFreeConn(&conn);
 	}
     else
         otherId = cloneString(localId);
     }
 return otherId;
 }
 
 static void otherOrgPrintLink(struct otherOrg *otherOrg,  
 	char *label,  char *missingLabel, boolean internalLink,
 	char *otherId, char *urlFormat)
 /* If label and urlFormat exist then print up a link.  Otherwise print n/a. */
 {
 if (internalLink)
     webPrintLinkCellStart();
 else
     webPrintLinkOutCellStart();
 if (urlFormat != NULL && label != NULL)
     {
     if (otherId != NULL)
         {
 	hPrintf("<A HREF=\"");
 	hPrintf(urlFormat, otherId);
 	if (internalLink)
 	    hPrintf("&db=%s", otherOrg->db);
 	hPrintf("\"");
 	hPrintf(" TARGET=_blank");
 	hPrintf(" class=\"toc\">");
 	hPrintf("%s", label);
 	hPrintf("</A>");
 	}
     else
         {
 	hPrintf("%s", missingLabel);
 	}
     }
 else
     hPrintf("&nbsp;");
 webPrintLinkCellEnd();
 }
 
 void otherOrgPepLink(struct otherOrg *otherOrg, char *command, char *label,
 	char *id, struct sqlConnection *conn)
 /* Print link that will invoke self to work on other organism peptide. */
 {
 boolean gotIt = FALSE;
 webPrintLinkCellStart();
 if (id != NULL)
     {
     if (otherOrg->db != NULL && otherOrg->pepTable != NULL)
 	{
 	char dbTable[128];
 	safef(dbTable, sizeof(dbTable), "%s.%s", otherOrg->db, otherOrg->pepTable);
 	if (sqlTableExists(conn, dbTable))
 	    {
 	    struct sqlResult *sr;
 	    char **row;
 	    char query[256];
 	    sqlSafef(query, sizeof(query), "select seq from %s where name = '%s'",
 	    	dbTable, id);
 	    sr = sqlGetResult(conn, query);
 	    if ((row = sqlNextRow(sr)) != NULL)
 	        {
 		gotIt = TRUE;
 		hPrintf("<A HREF=\"%s?%s&%s=%s&%s=%s&%s=%s\" class=\"toc\">",
 		    geneCgi, cartSidUrlString(cart), 
 		    command, "on",
 		    hggOtherPepTable, dbTable,
 		    hggOtherId, id);
 		hPrintf("%s", label);
 		hPrintf("</A>");
 		}
 	    sqlFreeResult(&sr);
 	    }
 	}
     }
 if (!gotIt)
     hPrintf("&nbsp;");
 webPrintLinkCellEnd();
 }
 
 static void otherOrgsPrint(struct section *section, struct sqlConnection *conn,
 	char *geneId)
 /* Print the otherOrgs section. */
 {
 struct otherOrg *otherOrg, *otherOrgList = section->items;
 
 hPrintf(
 	"Orthologies between human, mouse, and rat are computed by taking the "
 	"best BLASTP hit, and filtering out non-syntenic hits. For "
 	"more distant species reciprocal-best BLASTP hits are used. "
 	"Note that the absence of an ortholog in the table below may "
 	"reflect incomplete annotations in the other species rather than "
 	"a true absence of the orthologous gene.");
 webPrintLinkTableStart();
 for (otherOrg = otherOrgList; otherOrg != NULL; otherOrg = otherOrg->next)
     webPrintLabelCell(otherOrg->shortLabel);
 hPrintf("</TR>\n<TR>");
 for (otherOrg = otherOrgList; otherOrg != NULL; otherOrg = otherOrg->next)
     {
     char *pos = otherOrgPosition(otherOrg, conn, geneId);
     otherOrgPrintLink(otherOrg, "Genome Browser", "No ortholog", TRUE, 
     	pos, otherOrg->genomeUrl);
     freeMem(pos);
     }
 hPrintf("</TR>\n<TR>");
 for (otherOrg = otherOrgList; otherOrg != NULL; otherOrg = otherOrg->next)
     {
     char *id = otherOrgId(otherOrg, conn, geneId);
     otherOrgPrintLink(otherOrg, "Gene Details", "&nbsp;", TRUE,
     	id, otherOrg->geneUrl);
     freeMem(id);
     }
 hPrintf("</TR>\n<TR>");
 for (otherOrg = otherOrgList; otherOrg != NULL; otherOrg = otherOrg->next)
     {
     char *id = otherOrgId(otherOrg, conn, geneId);
     otherOrgPrintLink(otherOrg, "Gene Sorter", "&nbsp;", TRUE, 
     	id, otherOrg->sorterUrl);
     freeMem(id);
     }
 hPrintf("</TR>\n<TR>");
 for (otherOrg = otherOrgList; otherOrg != NULL; otherOrg = otherOrg->next)
     {
     char *id = otherOrgId(otherOrg, conn, geneId);
     id = otherOrgExternalId(otherOrg, id);
     otherOrgPrintLink(otherOrg, otherOrg->otherName, "&nbsp;", FALSE, 
     	id, otherOrg->otherUrl);
     freeMem(id);
     }
 hPrintf("</TR>\n<TR>");
 for (otherOrg = otherOrgList; otherOrg != NULL; otherOrg = otherOrg->next)
     {
     char *id = otherOrgProteinId(otherOrg, conn, geneId);
     otherOrgPepLink(otherOrg, hggDoOtherProteinSeq, "Protein Sequence", id, conn);
     freeMem(id);
     }
 hPrintf("</TR>\n<TR>");
 for (otherOrg = otherOrgList; otherOrg != NULL; otherOrg = otherOrg->next)
     {
     char *id = otherOrgProteinId(otherOrg, conn, geneId);
     otherOrgPepLink(otherOrg, hggDoOtherProteinAli, "Alignment", id, conn);
     freeMem(id);
     }
 webPrintLinkTableEnd();
 }
 
 struct section *otherOrgsSection(struct sqlConnection *conn,
 	struct hash *sectionRa)
 /* Create otherOrgs section. */
 {
 struct section *section = sectionNew(sectionRa, "otherOrgs");
 section->exists = otherOrgsExists;
 section->print = otherOrgsPrint;
 section->raFile = "otherOrgs.ra";
 return section;
 }
 
 void doOtherProteinSeq(struct sqlConnection *conn, char *homologName)
 /* Put up page that displays protein sequence from other organism. */
 {
 char *table = cartString(cart, hggOtherPepTable);
 char *id = cartString(cart, hggOtherId);
 char name[256];
 safef(name, sizeof(name), "%s homolog", homologName);
 showSeqFromTable(conn, id, name, table);
 }
 
 static bioSeq *getSeq(struct sqlConnection *conn, char *table, char *id)
 /* Get sequence from table. */
 {
 char query[512];
 struct sqlResult *sr;
 char **row;
 bioSeq *seq = NULL;
 sqlSafef(query, sizeof(query), 
     "select seq from %s where name = '%s'", table, id);
 sr = sqlGetResult(conn, query);
 if ((row = sqlNextRow(sr)) != NULL)
     {
     AllocVar(seq);
     seq->name = cloneString(id);
     seq->dna = cloneString(row[0]);
     seq->size = strlen(seq->dna);
     }
 sqlFreeResult(&sr);
 return seq;
 }
 
 void doOtherProteinAli(struct sqlConnection *conn, 
 	char *localId, char *localName)
 /* Put up page that shows alignment between this protein sequence
  * and other species. */
 {
 char *otherTable = cartString(cart, hggOtherPepTable);
 char *otherId = cartString(cart, hggOtherId);
 char *localTable = genomeSetting("knownGenePep");
 bioSeq *localSeq = getSeq(conn, localTable, localId);
 bioSeq *otherSeq = getSeq(conn, otherTable, otherId);
 
 if (localSeq != NULL && otherSeq != NULL)
     {
     struct axtScoreScheme *ss = axtScoreSchemeProteinDefault();
     if (axtAffineSmallEnough(localSeq->size, otherSeq->size))
         {
 	struct axt *axt = axtAffine(localSeq, otherSeq, ss);
 	hPrintf("<TT><PRE>");
 	if (axt != NULL)
 	    {
 	    printf("ID (including gaps) %3.1f%%, coverage (of both) %3.1f%%, score %d\n",
 		     axtIdWithGaps(axt)*100, 
 		     axtCoverage(axt, localSeq->size, otherSeq->size)*100, 
 		     axt->score);
 	    printf("Alignment between %s (top %s %daa) and\n",
 		    localName, localSeq->name, localSeq->size);
 	    printf("%s homolog (bottom %s %daa)\n",
 		     otherTable, otherSeq->name, otherSeq->size);
 	    printf("\n");
 	    axtPrintTraditional(axt, 60, ss, stdout);
 	    axtFree(&axt);
 	    }
 	else
 	    {
 	    printf("%s and %s don't align\n", localSeq->name, otherSeq->name);
 	    }
 	hPrintf("</PRE></TT>");
 	}
     else
         {
 	printf("Sorry, %s (%d amino acids) and %s (%d amino acids) are too big to align",
 	    localSeq->name, localSeq->size, otherSeq->name, otherSeq->size);
 	}
     }
 else
     {
     warn("Couldn't get sequences, database out of sync?");
     }
 }