4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/protein/gadPos/gadPos.c src/hg/protein/gadPos/gadPos.c
index c6cf4df..8c3a62f 100644
--- src/hg/protein/gadPos/gadPos.c
+++ src/hg/protein/gadPos/gadPos.c
@@ -1,166 +1,166 @@
 /* gadPos - generate genomic positions for GAD entries */
 
 /* Copyright (C) 2013 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 #include "common.h"
 #include "hCommon.h"
 #include "hdb.h"
 #include "options.h"
 
 // Using options just to get -verbose:
 static struct optionSpec optionSpecs[] = {
     {NULL, 0}
 };
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "gadPos -  find genomic positions for GAD entries based on gene symbol\n"
   "usage:\n"
   "   gadPos db outFile\n"
   "      db is the genome database that must contain updated gadAll table,\n"
   "      known{Gene,Canonical,Isoforms}, kg{Xref,Alias}, and ref{Gene,Link}.\n"
   "      Each geneSymbol with association=Y in gadAll is looked up first in \n"
   "      knownCanonical; if not found then refGene; if not found, then kgAlias.\n"
   "      If db contains wgEncodeGencode{Pseudogene,Comp}, they will be searched\n"
   "      if geneSymbol still has not been found.\n"
   "      outFile is the output file name for GAD positions (bed 4 format)\n"
   "example: gadPos hg17 stdout | uniq > gadPos.tab\n");
 }
 
 boolean printBed4FromQueryAndName(struct sqlConnection *conn, char *query, char *name,
 				  FILE *outF)
 /* If there are results from query, assume that they are {chrom, chromStart, chromEnd},
  * print BED4 with those coords and name, and return TRUE; */
 {
 boolean found = FALSE;
 struct sqlResult *sr = sqlGetResult(conn, query);
 char **row;
 while ((row = sqlNextRow(sr)) != NULL)
     {
     found = TRUE;
     fprintf(outF, "%s\t%s\t%s\t%s\n", row[0], row[1], row[2], name);
     }
 sqlFreeResult(&sr);
 return found;
 }
 
 boolean findInKnownCanonical(struct sqlConnection *conn, char *geneSymbol, FILE *outF)
 /* For each knownGene cluster for this geneSymbol, print output using the
  * cluster bounds as {chrom,chromStart,chromEnd} and return TRUE. */
 {
 boolean found = FALSE;
 char query[1024];
 sqlSafef(query, sizeof(query), "select distinct clusterId from kgXref x, knownIsoforms i "
 		    "where x.geneSymbol='%s' and i.transcript=x.kgId", geneSymbol);
 struct slName *id, *clusterIds = sqlQuickList(conn, query);
 for (id = clusterIds;  id != NULL;  id = id->next)
     {
     sqlSafef(query, sizeof(query), "select k.chrom, min(txStart), max(txEnd) "
 	  "from knownGene k, knownIsoforms i, knownCanonical c "
 	  "where i.clusterId=%s and i.transcript=k.name "
 	  "and c.clusterId=i.clusterId and k.chrom=c.chrom",
 	  id->name);
     found |= printBed4FromQueryAndName(conn, query, geneSymbol, outF);
     }
 slFreeList(&clusterIds);
 return found;
 }
 
 boolean findInRefGene(struct sqlConnection *conn, char *geneSymbol, FILE *outF)
 /* If there is a refGene for geneSymbol, print output using the
  * refGene coords as {chrom,chromStart,chromEnd} and return TRUE. */
 {
 char query[1024];
 sqlSafef(query, sizeof(query), "select chrom, txStart, txEnd from refGene rg, refLink rl "
       "where rl.name = '%s' and rl.mrnaAcc = rg.name", geneSymbol);
 return printBed4FromQueryAndName(conn, query, geneSymbol, outF);
 }
 
 boolean findInKgAlias(struct sqlConnection *conn, char *geneSymbol, FILE *outF)
 /* If geneSymbol can be found in kgAlias, print output using the
  * knownGene coords as {chrom,chromStart,chromEnd} and return TRUE. */
 {
 char query[1024];
 sqlSafef(query, sizeof(query), "select distinct(chrom), min(txStart), max(txEnd) "
       "from knownGene, kgAlias where alias='%s' and name=kgId "
       "group by chrom", geneSymbol);
 return printBed4FromQueryAndName(conn, query, geneSymbol, outF);
 }
 
 boolean findInGencode(struct sqlConnection *conn, char *geneSymbol, FILE *outF)
 /* If Gencode tables are present and geneSymbol can be found in them,
  * use those coords and return TRUE. */
 {
 #define PSEUDOGENE_TABLE "wgEncodeGencodePseudoGeneV14"
 #define COMP_TABLE "wgEncodeGencodeCompV14"
 boolean found = FALSE;
 char query[1024];
 if (sqlTableExists(conn, PSEUDOGENE_TABLE))
     {
     sqlSafef(query, sizeof(query), "select chrom, txStart, txEnd from %s where name2 = '%s'",
 	  PSEUDOGENE_TABLE, geneSymbol);
     found = printBed4FromQueryAndName(conn, query, geneSymbol, outF);
     }
 if (!found && sqlTableExists(conn, COMP_TABLE))
     {
     sqlSafef(query, sizeof(query), "select chrom, txStart, txEnd from %s where name2 = '%s'",
 	  COMP_TABLE, geneSymbol);
     found = printBed4FromQueryAndName(conn, query, geneSymbol, outF);
     }
 return found;
 }
 
 void gadPos(char *db, char *outFileName)
 /* Try to get genomic positions for GAD gene symbols from knownGene, refGene,
  * kgAlias and Gencode V14 in that order. */
 {
 FILE *outF = mustOpen(outFileName, "w");
 struct sqlConnection *conn = hAllocConn(db);
 
 /* loop over all gene symbols in GAD */	
 struct slName *geneSymbols = sqlQuickList(conn,
 			       NOSQLINJ "select distinct geneSymbol from gadAll where association='Y'");
 struct slName *symbol;
 int kcCount = 0, rgCount = 0, kaCount = 0, gcCount = 0, missingCount = 0;
 for (symbol = geneSymbols;  symbol != NULL;  symbol = symbol->next)
     {
     if (findInKnownCanonical(conn, symbol->name, outF))
 	kcCount++;
     else if (findInRefGene(conn, symbol->name, outF))
 	rgCount++;
     else if (findInKgAlias(conn, symbol->name, outF))
 	kaCount++;
     else if (findInGencode(conn, symbol->name, outF))
 	gcCount++;
     else
 	{
 	verbose(2, "No result for gene symbol '%s'\n", symbol->name);
 	missingCount++;
 	}
     }
 verbose(1, "Found in knownCanonical: %d\n", kcCount);
 verbose(1, "Found in refGene: %d\n", rgCount);
 verbose(1, "Found in kgAlias: %d\n", kaCount);
 verbose(1, "Found in Gencode: %d\n", gcCount);
 verbose(1, "Not found: %d\n", missingCount);
 
 hFreeConn(&conn);
 carefulClose(&outF);
 }
 
 
 int main(int argc, char *argv[])
 {
 optionInit(&argc, argv, optionSpecs);
 
 if (argc != 3) usage();
 char *db   = argv[1];
 char *outFileName = argv[2];
 
 gadPos(db, outFileName);
 
 return(0);
 }