4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/hgKnownGeneList/hgKnownGeneList.c src/hg/hgKnownGeneList/hgKnownGeneList.c
index b128657..e5b18d2 100644
--- src/hg/hgKnownGeneList/hgKnownGeneList.c
+++ src/hg/hgKnownGeneList/hgKnownGeneList.c
@@ -1,371 +1,371 @@
 /* hgKnownGeneList - Generate Known Genes List HTML pages to be indexed by Google. */
 
 /* Copyright (C) 2013 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 #include "common.h"
 #include "hdb.h"
 #include "dbDb.h"
 #include "hCommon.h"
 #include "web.h"
 #include "htmshell.h"
 
 #define LINKSPERPAGE 30
 #define MAXPAGES 3000
 #define MAXSUBDIR 50
 #define MAXTOP 200
 #define TESTSIZE 2600
 
 /* global variables */
 char *genome, *genomeDesc;
 char command[255];
 char *database;
 char startSymbol[MAXPAGES][20];
 char endSymbol[MAXPAGES][20];
 char pageStartSymbol[MAXSUBDIR][20];
 char pageEndSymbol[MAXSUBDIR][20];
 char topStartSymbol[MAXTOP][20];
 char topEndSymbol[MAXTOP][20];
 int  currentPage;
 char emptyString[10] = {"&nbsp"};
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
     "hgKnownGeneList - Generate Known Genes List HTML pages to be indexed by Google\n"
     "usage:\n"
     "   hgKnownGeneList db\n"
     "   db is the genome database\n"
     "example:\n"
     "   hgKnownGeneList hg17\n");
 }
 
 void printHtmlHead(FILE *outf)
 {
 fprintf(outf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2//EN\">");
 fprintf(outf, "<HTML><HEAD>\n%s",getCspMetaHeader());
 fprintf(outf, "<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html;CHARSET=iso-8859-1\">\n");
 fprintf(outf, "<META http-equiv=\"Content-Script-Type\" content=\"text/javascript\">\n");
 fprintf(outf, "<TITLE>UCSC Known Genes Description and Page Index</TITLE>\n");
 webIncludeResourcePrintToFile(outf,"HGStyle.css");
 fprintf(outf, "</HEAD><BODY BGCOLOR='#FFF9D2'>\n");
 }
 
 void printHtmlEnd(FILE *outf)
 {
 fprintf(outf, "</BODY></HTML>\n");
 fflush(outf);
 }
 
 int main(int argc, char *argv[])
 {
 struct sqlConnection *conn, *conn2, *conn3;
 struct sqlConnection *connCentral = hConnectCentral();
 char query[256], query2[256], query3[256];
 struct sqlResult *sr, *sr2;
 char **row, **row2;
 char buf[128];
 char *answer;
 char *kgID, *chrom, *txStart, *txEnd;
 char *mRNA;
 int i;
 int geneCnt  = 0;
 int pageNum  = 0;
 int topLevel = 1;
 
 char *geneSymbol, *spID, *desc;
 // char *proteinID;  unused
 FILE *outf, *outf2;
 char fileName[255];
 database = strdup("hg17");
 boolean newPage;
 int totalKgId;
 // int totalKgCnt;  unused
 int totalKgPage;
 int kgIdCnt = 0;
 
 if (argc != 2) usage();
 database = argv[1];
 
 sqlSafef(query, sizeof query, "select genome from dbDb where name = '%s'", database);
 answer = sqlQuickQuery(connCentral, query, buf, sizeof(buf));
 if (answer == NULL)
     {
     fprintf(stderr,"'%s' is not a valid genome database name.", database);
     exit(1);
     }
 else
     {
     genome = strdup(answer);
     }
 
 if (!hTableExists(database, "knownGene"))
     {
     fprintf(stderr,"Database %s currently does not have UCSC Known Genes.", database);
     exit(1);
     }
 
 sqlSafef(query, sizeof query, "select description from dbDb where name = '%s'", database);
 
 genomeDesc = strdup(sqlQuickQuery(connCentral, query, buf, sizeof(buf)));
 hDisconnectCentral(&connCentral);
 
 /* create first top level subdirectory */
 safef(command, sizeof(command), "mkdir -p knownGeneList/%s/%d", database, topLevel);
 mustSystem(command);
 
 conn = hAllocConn(database);
 conn2= hAllocConn(database);
 conn3= hAllocConn(database);
 
 newPage  = TRUE;
 
 currentPage = 0;
 
 /* put this in to avoid compiler complaining */
 outf = NULL;
 geneSymbol = NULL;
 char *protAcc = NULL;
 
 /* figure out how many pages in total */
 sqlSafef(query2, sizeof(query2), "select count(k.name) from %s.knownGene k, %s.kgXref x where k.name=x.kgId and geneSymbol != ''", database, database);
 sr2  = sqlMustGetResult(conn2, query2);
 row2 = sqlNextRow(sr2);
 // totalKgCnt = atoi(row2[0]);
 sqlFreeResult(&sr2);
 
 /* figure out how many KG IDs in total */
 sqlSafef(query2, sizeof(query2), "select count(*) from %s.kgXref where geneSymbol !=''", database);
 sr2  = sqlMustGetResult(conn2, query2);
 row2 = sqlNextRow(sr2);
 totalKgId = atoi(row2[0]);
 sqlFreeResult(&sr2);
 totalKgPage = totalKgId/LINKSPERPAGE + 1;
 
 sqlSafef(query2, sizeof(query2),
       "select kgID, geneSymbol, description from %s.kgXref where geneSymbol!= '' order by geneSymbol",
       database);
 
       /* for debugging */
       /* "select kgID, geneSymbol, description from %s.kgXref order by geneSymbol limit %d",
       database, TESTSIZE);*/
 sr2  = sqlMustGetResult(conn2, query2);
 row2 = sqlNextRow(sr2);
 
 /* for debugging */
 /* while (kgIdCnt < TESTSIZE) */
 
 while (kgIdCnt < totalKgId)
     {
     kgIdCnt++;
 
     kgID 	= row2[0];
     geneSymbol  = strdup(row2[1]);
     desc 	= row2[2];
     sqlSafef(query, sizeof(query),
     "select chrom,txSTart,txEnd,proteinID from %s.knownGene where name='%s'", database, kgID);
     sr = sqlMustGetResult(conn, query);
     row = sqlNextRow(sr);
     if (row != NULL)
     	{
 	geneCnt++;
     	chrom     = row[0];
     	txStart   = row[1];
     	txEnd     = row[2];
 //    	proteinID = row[3];  unused
 
 	if (newPage)
 	    {
 	    /* create a KG links page */
 	    pageNum++;
 	    currentPage++;
 
 	    /* use mkdir -p to make sure the subdirectory exists */
 	    safef(command, sizeof(command), "mkdir -p knownGeneList/%s/%d", database, topLevel);
 	    mustSystem(command);
 	    safef(fileName, sizeof(fileName),
 	    	  "knownGeneList/%s/%d/kgList%d.html", database, topLevel, pageNum);
   	    outf = fopen(fileName, "w");
 	    printHtmlHead(outf);
 
 	    fprintf(outf,"<H2>UCSC %s Known Genes List (page %d of %d)</H2>\n",
 	    	    genome, pageNum, totalKgPage);
 	    fprintf(outf, "<TABLE BORDER=1=CELLSPACING=1 CELLPADDING=3 BGCOLOR=\"#D9F8E4\"><TR>\n");
 	    fprintf(outf,
 	    "<TR><TH>Gene Symbol</TH><TH>Known Gene ID</TH><TH>mRNA</TH><TH>UniProt</TH><TH>RefSeq Protein</TH><TH>Description</TH>\n");
 	    strcpy(startSymbol[pageNum], geneSymbol);
 	    strcpy(pageStartSymbol[currentPage], geneSymbol);
 	    newPage = FALSE;
 	    }
 
 	fprintf(outf,"<TR>");
     	fprintf(outf,"<TD>%s</TD>", geneSymbol);
     	/*fprintf(outf,"<TD>%d:%s</TD>", geneCnt, geneSymbol);*/
     	fprintf(outf,"<TD>");
     	fprintf(outf,"<A href=\"/cgi-bin/hgGene?db=%s&hgg_gene=%s", database, kgID);
     	fprintf(outf,"&hgg_chrom=%s&hgg_start=%s&hgg_end=%s\">", chrom, txStart, txEnd);
     	fprintf(outf,"%s", kgID);
     	fprintf(outf,"</A>");
     	fprintf(outf,"</TD>\n");
 
 	sqlSafef(query3,sizeof(query3),"select spID from %s.kgXref where kgID = '%s'", database, kgID);
 	spID = cloneString(sqlQuickQuery(conn3, query3, buf, sizeof(buf)));
 	if (spID == NULL)
 	    {
 	    spID = emptyString;
 	    }
 	else
 	    {
 	    if (sameWord(spID,"")) spID = emptyString;
 	    }
 
 	sqlSafef(query3,sizeof(query3),"select mRNA from %s.kgXref where kgID = '%s'", database, kgID);
 	mRNA = cloneString(sqlQuickQuery(conn3, query3, buf, sizeof(buf)));
 	if (mRNA == NULL)
 	    {
 	    mRNA = emptyString;
 	    }
 	else
 	    {
 	    if (sameWord(mRNA,"")) mRNA = emptyString;
 	    }
 
 	sqlSafef(query3,sizeof(query3),"select protAcc from %s.kgXref where kgID = '%s'", database, kgID);
 	protAcc = sqlQuickQuery(conn3, query3, buf, sizeof(buf));
 	if (protAcc == NULL)
 	    {
 	    protAcc = emptyString;
 	    }
 	else
 	    {
 	    if (sameWord(protAcc,"")) protAcc = emptyString;
 	    }
 
 	fprintf(outf,"<TD>%s</TD>", mRNA);
 	fprintf(outf,"<TD>%s</TD>", spID);
 	fprintf(outf,"<TD>%s</TD>", protAcc);
     	fprintf(outf,"<TD>%s</TD>", desc );
     	fprintf(outf,"</TR>\n");
 
 	if ((geneCnt % LINKSPERPAGE) == 0)
     	    {
 	    /* flush out and close the page if a page is filled, and start a new page */
 	    fprintf(outf,"</TABLE>");
 	    strcpy(endSymbol[pageNum], geneSymbol);
 	    strcpy(pageEndSymbol[currentPage], endSymbol[pageNum]);
 	    fprintf(outf, "<BR>");
 	    fprintf(outf, "<A href=\"/knownGeneList/%s/%d/kgIndex%d.html\">",
 	    	    database, topLevel,topLevel);
 	    fprintf(outf, "Up");
 	    fprintf(outf,"</A><BR>\n");
 	    printHtmlEnd(outf);
 	    newPage = TRUE;
 	    fclose(outf);
 	    outf = NULL;
 
 	    if ((pageNum % LINKSPERPAGE) == 0 )
 	    	{
 	    	printf("Processing topLevel %d ...\n", topLevel);fflush(stdout);
 	    	safef(fileName, sizeof(fileName),
 	    	      "knownGeneList/%s/%d/kgIndex%d.html", database, topLevel, topLevel);
 	    	outf2 = fopen(fileName, "w");
 	    	printHtmlHead(outf2);
 		//fprintf(outf2,"<H2>UCSC %s Known Genes List</H2>\n", genome);
 		fprintf(outf2,"<H2>UCSC %s Known Genes List (Group %d)</H2>\n", genome, topLevel);
 	    	for (i=1; i<= currentPage; i++)
 	      	    {
 	      	    fprintf(outf2, "Page %d: ", (topLevel-1)*LINKSPERPAGE+i);
 	            fprintf(outf2,
 	      	    	    "<A href=\"/knownGeneList/%s/%d/kgList%d.html\">",
 	             	    database, topLevel, (topLevel-1)*LINKSPERPAGE+i);
 	      	    fprintf(outf2, "%s to %s", pageStartSymbol[i], pageEndSymbol[i]);
     	      	    fprintf(outf2,"</A><BR>\n");
 	      	    }
 		fprintf(outf2, "<BR>");
 		fprintf(outf2, "<A href=\"/knownGeneList/%s/top.html\">",database);
 		fprintf(outf2, "Up");
 		fprintf(outf2,"</A><BR>\n");
 	    	printHtmlEnd(outf2);
 	    	fclose(outf2);
 
 	    strcpy(topStartSymbol[topLevel], pageStartSymbol[1]);
 	    strcpy(  topEndSymbol[topLevel], pageEndSymbol[currentPage]);
 	    currentPage = 0;
 	    topLevel++;
 	    }
     	}
 	row = sqlNextRow(sr);
     	}
     sqlFreeResult(&sr);
     row2 = sqlNextRow(sr2);
     }
 sqlFreeResult(&sr2);
 
 /* flush out and close the last list page */
 if (outf != NULL)
     {
     fprintf(outf,"</TABLE>");
     strcpy(endSymbol[pageNum], geneSymbol);
     strcpy(pageEndSymbol[currentPage], endSymbol[pageNum]);
     fprintf(outf, "<BR>");
     fprintf(outf, "<A href=\"/knownGeneList/%s/%d/kgIndex%d.html\">",
     database, topLevel,topLevel);
     fprintf(outf, "Up");
     fprintf(outf,"</A><BR>\n");
     printHtmlEnd(outf);
     fclose(outf);
     }
 
 /* generate the last index page */
 safef(command, sizeof(command), "mkdir -p knownGeneList/%s/%d", database, topLevel);
 mustSystem(command);
 safef(fileName, sizeof(fileName),
       "knownGeneList/%s/%d/kgIndex%d.html", database, topLevel, topLevel);
 outf2 = fopen(fileName, "w");
 printHtmlHead(outf2);
 fprintf(outf2,"<H2>UCSC %s Known Genes List (Group %d)</H2>\n", genome, topLevel);
 for (i=1; i<= currentPage; i++)
     {
     fprintf(outf2, "Page %d: ", (topLevel-1)*LINKSPERPAGE+i);
     fprintf(outf2, "<A href=\"/knownGeneList/%s/%d/kgList%d.html\">",
 	    database, topLevel, (topLevel-1)*LINKSPERPAGE+i);
     fprintf(outf2, "%s to %s", pageStartSymbol[i], pageEndSymbol[i]);
     fprintf(outf2,"</A><BR>\n");
     fflush(outf2);
     }
 
 fprintf(outf2, "<BR>");
 fprintf(outf2, "<A href=\"/knownGeneList/%s/top.html\">",database);
 fprintf(outf2, "Up");
 fprintf(outf2,"</A><BR>\n");
 strcpy(topStartSymbol[topLevel], pageStartSymbol[1]);
 strcpy(  topEndSymbol[topLevel], pageEndSymbol[currentPage]);
 
 fclose(outf2);
 
 currentPage = 0;
 
 /* generate the top HTML page */
 safef(fileName, sizeof(fileName), "knownGeneList/%s/top.html", database);
 outf2 = fopen(fileName, "w");
 printHtmlHead(outf2);
 fprintf(outf2,"<H2>UCSC %s Known Genes List</H2>\n", genome);
 for (i=1; i<= topLevel; i++)
     {
     fprintf(outf2, "Group %d: ", i);
     fprintf(outf2, "<A href=\"/knownGeneList/%s/%d/kgIndex%d.html\">", database, i, i);
     fprintf(outf2, " %s to %s", topStartSymbol[i], topEndSymbol[i]);
     fprintf(outf2,"</A><BR>\n");
     fflush(outf2);
     }
 
 fprintf(outf2, "<BR>");
 fprintf(outf2, "<A href=\"/knownGeneLists.html\">");
 fprintf(outf2, "Up");
 fprintf(outf2,"</A><BR>\n");
 
 printHtmlEnd(outf2);
 printHtmlEnd(outf2);
 fclose(outf2);
 
 return(0);
 }