4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/visiGene/knownToVisiGene/knownToVisiGene.c src/hg/visiGene/knownToVisiGene/knownToVisiGene.c
index d74a762..be292ae 100644
--- src/hg/visiGene/knownToVisiGene/knownToVisiGene.c
+++ src/hg/visiGene/knownToVisiGene/knownToVisiGene.c
@@ -1,344 +1,344 @@
 /* knownToVisiGene - Create knownToVisiGene table by riffling through various other knownTo tables. */
 
 /* Copyright (C) 2013 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "dystring.h"
 #include "options.h"
 #include "jksql.h"
 #include "hgRelate.h"
 #include "genePred.h"
 #include "psl.h"
 #include "rangeTree.h"
 #include "binRange.h"
 #include "hdb.h"
 #include "visiGene.h"
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "knownToVisiGene - Create knownToVisiGene table by riffling through various other knownTo tables\n"
   "usage:\n"
   "   knownToVisiGene database\n"
   "options:\n"
   "   -table=XXX - give another name to table other than knownToVisiGene\n"
   "   -visiDb=XXX - use a VisiGene database other than 'visiGene'\n"
   "   -probesDb=database - use given database for probes (default is to use database)\n"
   );
 }
 
 char *outTable = "knownToVisiGene";
 char *visiDb = "visiGene";
 char *probesDb;
 boolean vgProbes = FALSE;
 boolean vgAllProbes = FALSE;
 
 static struct optionSpec options[] = {
    {"table", OPTION_STRING},
    {"visiDb", OPTION_STRING},
    {"probesDb", OPTION_STRING},
    {NULL, 0},
 };
 
 struct prioritizedImage 
     {
     int imageId;	/* ID of image */
     float priority;	/* Image priority - smaller is more urgent */
     int geneId;         /* visiGene gene.id */
     };
 
 void createTable(struct sqlConnection *conn, char *tableName)
 /* Create our name/value table, dropping if it already exists. */
 {
 struct dyString *dy = dyStringNew(512);
 sqlDyStringPrintf(dy, 
 "CREATE TABLE  %s (\n"
 "    name varchar(255) not null,\n"
 "    value varchar(255) not null,\n"
 "    geneId int not null,\n"
 "              #Indices\n"
 "    PRIMARY KEY(name),\n"
 "    INDEX(value)\n"
 ")\n",  tableName);
 sqlRemakeTable(conn, tableName, dy->string);
 dyStringFree(&dy);
 }
 
 void addPrioritizedImage(struct hash *hash, int id, float priority, int geneId, char *key)
 /* Add image to hash, replacing what's already there if we have a better priority */
 {
 struct prioritizedImage *pi = hashFindVal(hash, key);
 if (pi == NULL)
     {
     AllocVar(pi);
     pi->imageId = id;
     pi->priority = priority;
     pi->geneId = geneId;
     hashAdd(hash, key, pi);
     }
 else if (pi->priority > priority)
     {
     pi->imageId = id;
     pi->priority = priority;
     pi->geneId = geneId;
     }
 }
 
 void foldIntoHash(struct sqlConnection *conn, char *table, char *keyField, char *valField,
 	struct hash *hash, struct hash *uniqHash, boolean secondary)
 /* Add key/value pairs from table into hash */
 {
 struct sqlResult *sr;
 char query[512];
 char **row;
 sqlSafef(query, sizeof(query), "select %s,%s from %s", keyField, valField, table);
 sr = sqlGetResult(conn, query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     if (uniqHash != NULL)
         {
 	if (hashLookup(uniqHash, row[1]))
 	    {
 	    if (secondary)
 		continue;
 	    }
 	else
 	    hashAdd(uniqHash, row[1], NULL);
 	}
     hashAdd(hash, row[0], cloneString(row[1]));
     }
 sqlFreeResult(&sr);
 }
 
 struct hash *keepersForChroms(struct sqlConnection *conn)
 /* Create hash of binKeepers keyed by chromosome */
 {
 struct hash *keeperHash = hashNew(0);
 struct sqlResult *sr = sqlGetResult(conn, NOSQLINJ "select chrom,size from chromInfo");
 char **row;
 while ((row = sqlNextRow(sr)) != NULL)
     {
     char *chrom = row[0];
     int size = sqlUnsigned(row[1]);
     struct binKeeper *bk = binKeeperNew(0, size);
     hashAdd(keeperHash, chrom, bk);
     }
 sqlFreeResult(&sr);
 return keeperHash;
 }
 
 void bestProbeOverlap(struct sqlConnection *conn, char *probeTable, 
 	struct genePred *gpList, struct hash *gpToProbeHash)
 /* Create hash of most overlapping probe if any for each gene. Require
  * at least 100 base overlap. */
 {
 /* Create a hash of binKeepers filled with probes. */
 struct hash *keeperHash = keepersForChroms(conn);
 struct hashCookie it = hashFirst(keeperHash);
 struct hashEl *hel;
 int pslCount = 0;
 while ((hel = hashNext(&it)) != NULL)
     {
     char *chrom = hel->name;
     struct binKeeper *bk = hel->val;
     int rowOffset;
     struct sqlResult *sr = hChromQuery(conn, probeTable, chrom, NULL, &rowOffset);
     char **row;
     while ((row = sqlNextRow(sr)) != NULL)
         {
 	struct psl *psl = pslLoad(row+rowOffset);
 	binKeeperAdd(bk, psl->tStart, psl->tEnd, psl);
 	++pslCount;
 	}
     sqlFreeResult(&sr);
     }
 verbose(2, "Loaded %d psls from %s\n", pslCount, probeTable);
 
 /* Loop through gene list, finding best probe if any for each gene. */
 struct genePred *gp;
 for (gp = gpList; gp != NULL; gp = gp->next)
     {
     struct rbTree *rangeTree = genePredToRangeTree(gp, FALSE);
     struct psl *bestPsl = NULL;
     int bestOverlap = 99;	/* MinOverlap - 1 */
     struct binKeeper *bk = hashMustFindVal(keeperHash, gp->chrom);
     struct binElement *bin, *binList = binKeeperFind(bk, gp->txStart, gp->txEnd);
     for (bin = binList; bin != NULL; bin = bin->next)
         {
 	struct psl *psl = bin->val;
 	if (psl->strand[0] == gp->strand[0])
 	    {
 	    int overlap = pslRangeTreeOverlap(psl, rangeTree);
 	    if (overlap > bestOverlap)
 		{
 		bestOverlap = overlap;
 		bestPsl = psl;
 		}
 	    }
 	}
     if (bestPsl != NULL)
         hashAdd(gpToProbeHash, gp->name, bestPsl->qName);
     }
 }
 
 struct prioritizedImage *bestImage(char *kgId, struct hash *kgToHash, struct hash *imageHash)
 /* Return best image id if possible, otherwise 0 */
 {
 struct hashEl *extId = hashLookup(kgToHash, kgId);
 struct prioritizedImage *best = NULL;
 
 while (extId)
     {
     struct prioritizedImage *pi = hashFindVal(imageHash, extId->val);
     if (pi)
 	{
 	if (!best || pi->priority < best->priority)
 	    {
 	    best = pi;
 	    }
 	}
     extId = hashLookupNext(extId);
     }
 return best;
 }
 
 void knownToVisiGene(char *database)
 /* knownToVisiGene - Create knownToVisiGene table by riffling through various other knownTo tables. */
 {
 char *tempDir = ".";
 FILE *f = hgCreateTabFile(tempDir, outTable);
 struct sqlConnection *hConn = sqlConnect(database);
 struct sqlConnection *iConn = sqlConnect(visiDb);
 struct sqlResult *sr;
 char **row;
 struct hash *geneImageHash = newHash(18);
 struct hash *locusLinkImageHash = newHash(18);
 struct hash *refSeqImageHash = newHash(18);
 struct hash *genbankImageHash = newHash(18);
 struct hash *probeImageHash = newHash(18);
 struct hash *knownToLocusLinkHash = newHash(18);
 struct hash *knownToRefSeqHash = newHash(18);
 struct hash *knownToGeneHash = newHash(18);
 struct hash *favorHugoHash = newHash(18);
 struct hash *knownToProbeHash = newHash(18);
 struct hash *knownToAllProbeHash = newHash(18);
 struct genePred *knownList = NULL, *known;
 struct hash *dupeHash = newHash(17);
 
 
 probesDb  = optionVal("probesDb", database);
 struct sqlConnection *probesConn = sqlConnect(probesDb);
 vgProbes = sqlTableExists(probesConn,"vgProbes");
 vgAllProbes = sqlTableExists(probesConn,"vgAllProbes");
 
 /* Go through and make up hashes of images keyed by various fields. */
 sr = sqlGetResult(iConn,
         NOSQLINJ "select image.id,imageFile.priority,gene.name,gene.locusLink,gene.refSeq,gene.genbank"
 	",probe.id,submissionSet.privateUser,vgPrbMap.vgPrb,gene.id"
 	" from image,imageFile,imageProbe,probe,gene,submissionSet,vgPrbMap"
 	" where image.imageFile = imageFile.id"
 	" and image.id = imageProbe.image"
 	" and imageProbe.probe = probe.id"
 	" and probe.gene = gene.id"
 	" and image.submissionSet=submissionSet.id"
 	" and vgPrbMap.probe = probe.id");
 
 while ((row = sqlNextRow(sr)) != NULL)
     {
     int id = sqlUnsigned(row[0]);
     float priority = atof(row[1]);
     int privateUser = sqlSigned(row[7]);
     char vgPrb_Id[256];
     safef(vgPrb_Id, sizeof(vgPrb_Id), "vgPrb_%s",row[8]);
     int geneId = sqlUnsigned(row[9]);
     if (privateUser == 0)
 	{
 	addPrioritizedImage(probeImageHash, id, priority, geneId, vgPrb_Id);
 	addPrioritizedImage(geneImageHash, id, priority, geneId, row[2]);
 	addPrioritizedImage(locusLinkImageHash, id, priority, geneId, row[3]);
 	addPrioritizedImage(refSeqImageHash, id, priority, geneId, row[4]);
 	addPrioritizedImage(genbankImageHash, id, priority, geneId, row[5]);
 	}
     }
 verbose(2, "Made hashes of image: geneImageHash %d, locusLinkImageHash %d, refSeqImageHash %d"
            ", genbankImageHash %d probeImageHash %d\n", 
             geneImageHash->elCount, locusLinkImageHash->elCount, refSeqImageHash->elCount, 
 	    genbankImageHash->elCount, probeImageHash->elCount);
 sqlFreeResult(&sr);
 
 /* Build up list of known genes. */
 sr = sqlGetResult(hConn, NOSQLINJ "select * from knownGene");
 while ((row = sqlNextRow(sr)) != NULL)
     {
     struct genePred *known = genePredLoad(row);
     if (!hashLookup(dupeHash, known->name))
         {
 	hashAdd(dupeHash, known->name, NULL);
 	slAddHead(&knownList, known);
 	}
     }
 slReverse(&knownList);
 sqlFreeResult(&sr);
 verbose(2, "Got %d known genes\n", slCount(knownList));
 
 /* Build up hashes from knownGene to other things. */
 if (vgProbes)
     bestProbeOverlap(probesConn, "vgProbes", knownList, knownToProbeHash);
 if (vgAllProbes)
     bestProbeOverlap(probesConn, "vgAllProbes", knownList, knownToAllProbeHash);
 
 foldIntoHash(hConn, "knownToLocusLink", "name", "value", knownToLocusLinkHash, NULL, FALSE);
 foldIntoHash(hConn, "knownToRefSeq", "name", "value", knownToRefSeqHash, NULL, FALSE);
 foldIntoHash(hConn, "kgXref", "kgID", "geneSymbol", knownToGeneHash, favorHugoHash, FALSE);
 foldIntoHash(hConn, "kgAlias", "kgID", "alias", knownToGeneHash, favorHugoHash, TRUE);
 foldIntoHash(hConn, "kgProtAlias", "kgID", "alias", knownToGeneHash, favorHugoHash, TRUE);
 
 verbose(2, "knownToLocusLink %d, knownToRefSeq %d, knownToGene %d knownToProbe %d knownToAllProbe %d\n", 
    knownToLocusLinkHash->elCount, knownToRefSeqHash->elCount, knownToGeneHash->elCount,
    knownToProbeHash->elCount, knownToAllProbeHash->elCount);
 
 /* Try and find an image for each gene. */
 for (known = knownList; known != NULL; known = known->next)
     {
     char *name = known->name;
     struct prioritizedImage *best = NULL;
     {
     best = bestImage(name, knownToLocusLinkHash, locusLinkImageHash);
     if (!best)
 	best = bestImage(name, knownToRefSeqHash, refSeqImageHash);
     if (!best)
 	{
 	best = hashFindVal(genbankImageHash, name);
 	}
     if (!best)
 	best = bestImage(name, knownToGeneHash, geneImageHash);
     if (vgProbes && !best)
 	best = bestImage(name, knownToProbeHash, probeImageHash);
     if (vgAllProbes && !best)
 	best = bestImage(name, knownToAllProbeHash, probeImageHash);
     }	    
     if (best)
         {
 	fprintf(f, "%s\t%d\t%d\n", name, best->imageId, best->geneId);
 	}
     }
 
 createTable(hConn, outTable);
 hgLoadTabFile(hConn, tempDir, outTable, &f);
 hgRemoveTabFile(tempDir, outTable);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 outTable = optionVal("table", outTable);
 visiDb = optionVal("visiDb", visiDb);
 if (argc != 2)
     usage();
 knownToVisiGene(argv[1]);
 return 0;
 }