4898794edd81be5285ea6e544acbedeaeb31bf78 max Tue Nov 23 08:10:57 2021 -0800 Fixing pointers to README file for license in all source code files. refs #27614 diff --git src/hg/hgKnownGeneList/hgKnownGeneList.c src/hg/hgKnownGeneList/hgKnownGeneList.c index b128657..e5b18d2 100644 --- src/hg/hgKnownGeneList/hgKnownGeneList.c +++ src/hg/hgKnownGeneList/hgKnownGeneList.c @@ -1,371 +1,371 @@ /* hgKnownGeneList - Generate Known Genes List HTML pages to be indexed by Google. */ /* Copyright (C) 2013 The Regents of the University of California - * See README in this or parent directory for licensing information. */ + * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" #include "hdb.h" #include "dbDb.h" #include "hCommon.h" #include "web.h" #include "htmshell.h" #define LINKSPERPAGE 30 #define MAXPAGES 3000 #define MAXSUBDIR 50 #define MAXTOP 200 #define TESTSIZE 2600 /* global variables */ char *genome, *genomeDesc; char command[255]; char *database; char startSymbol[MAXPAGES][20]; char endSymbol[MAXPAGES][20]; char pageStartSymbol[MAXSUBDIR][20]; char pageEndSymbol[MAXSUBDIR][20]; char topStartSymbol[MAXTOP][20]; char topEndSymbol[MAXTOP][20]; int currentPage; char emptyString[10] = {" "}; void usage() /* Explain usage and exit. */ { errAbort( "hgKnownGeneList - Generate Known Genes List HTML pages to be indexed by Google\n" "usage:\n" " hgKnownGeneList db\n" " db is the genome database\n" "example:\n" " hgKnownGeneList hg17\n"); } void printHtmlHead(FILE *outf) { fprintf(outf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2//EN\">"); fprintf(outf, "<HTML><HEAD>\n%s",getCspMetaHeader()); fprintf(outf, "<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html;CHARSET=iso-8859-1\">\n"); fprintf(outf, "<META http-equiv=\"Content-Script-Type\" content=\"text/javascript\">\n"); fprintf(outf, "<TITLE>UCSC Known Genes Description and Page Index</TITLE>\n"); webIncludeResourcePrintToFile(outf,"HGStyle.css"); fprintf(outf, "</HEAD><BODY BGCOLOR='#FFF9D2'>\n"); } void printHtmlEnd(FILE *outf) { fprintf(outf, "</BODY></HTML>\n"); fflush(outf); } int main(int argc, char *argv[]) { struct sqlConnection *conn, *conn2, *conn3; struct sqlConnection *connCentral = hConnectCentral(); char query[256], query2[256], query3[256]; struct sqlResult *sr, *sr2; char **row, **row2; char buf[128]; char *answer; char *kgID, *chrom, *txStart, *txEnd; char *mRNA; int i; int geneCnt = 0; int pageNum = 0; int topLevel = 1; char *geneSymbol, *spID, *desc; // char *proteinID; unused FILE *outf, *outf2; char fileName[255]; database = strdup("hg17"); boolean newPage; int totalKgId; // int totalKgCnt; unused int totalKgPage; int kgIdCnt = 0; if (argc != 2) usage(); database = argv[1]; sqlSafef(query, sizeof query, "select genome from dbDb where name = '%s'", database); answer = sqlQuickQuery(connCentral, query, buf, sizeof(buf)); if (answer == NULL) { fprintf(stderr,"'%s' is not a valid genome database name.", database); exit(1); } else { genome = strdup(answer); } if (!hTableExists(database, "knownGene")) { fprintf(stderr,"Database %s currently does not have UCSC Known Genes.", database); exit(1); } sqlSafef(query, sizeof query, "select description from dbDb where name = '%s'", database); genomeDesc = strdup(sqlQuickQuery(connCentral, query, buf, sizeof(buf))); hDisconnectCentral(&connCentral); /* create first top level subdirectory */ safef(command, sizeof(command), "mkdir -p knownGeneList/%s/%d", database, topLevel); mustSystem(command); conn = hAllocConn(database); conn2= hAllocConn(database); conn3= hAllocConn(database); newPage = TRUE; currentPage = 0; /* put this in to avoid compiler complaining */ outf = NULL; geneSymbol = NULL; char *protAcc = NULL; /* figure out how many pages in total */ sqlSafef(query2, sizeof(query2), "select count(k.name) from %s.knownGene k, %s.kgXref x where k.name=x.kgId and geneSymbol != ''", database, database); sr2 = sqlMustGetResult(conn2, query2); row2 = sqlNextRow(sr2); // totalKgCnt = atoi(row2[0]); sqlFreeResult(&sr2); /* figure out how many KG IDs in total */ sqlSafef(query2, sizeof(query2), "select count(*) from %s.kgXref where geneSymbol !=''", database); sr2 = sqlMustGetResult(conn2, query2); row2 = sqlNextRow(sr2); totalKgId = atoi(row2[0]); sqlFreeResult(&sr2); totalKgPage = totalKgId/LINKSPERPAGE + 1; sqlSafef(query2, sizeof(query2), "select kgID, geneSymbol, description from %s.kgXref where geneSymbol!= '' order by geneSymbol", database); /* for debugging */ /* "select kgID, geneSymbol, description from %s.kgXref order by geneSymbol limit %d", database, TESTSIZE);*/ sr2 = sqlMustGetResult(conn2, query2); row2 = sqlNextRow(sr2); /* for debugging */ /* while (kgIdCnt < TESTSIZE) */ while (kgIdCnt < totalKgId) { kgIdCnt++; kgID = row2[0]; geneSymbol = strdup(row2[1]); desc = row2[2]; sqlSafef(query, sizeof(query), "select chrom,txSTart,txEnd,proteinID from %s.knownGene where name='%s'", database, kgID); sr = sqlMustGetResult(conn, query); row = sqlNextRow(sr); if (row != NULL) { geneCnt++; chrom = row[0]; txStart = row[1]; txEnd = row[2]; // proteinID = row[3]; unused if (newPage) { /* create a KG links page */ pageNum++; currentPage++; /* use mkdir -p to make sure the subdirectory exists */ safef(command, sizeof(command), "mkdir -p knownGeneList/%s/%d", database, topLevel); mustSystem(command); safef(fileName, sizeof(fileName), "knownGeneList/%s/%d/kgList%d.html", database, topLevel, pageNum); outf = fopen(fileName, "w"); printHtmlHead(outf); fprintf(outf,"<H2>UCSC %s Known Genes List (page %d of %d)</H2>\n", genome, pageNum, totalKgPage); fprintf(outf, "<TABLE BORDER=1=CELLSPACING=1 CELLPADDING=3 BGCOLOR=\"#D9F8E4\"><TR>\n"); fprintf(outf, "<TR><TH>Gene Symbol</TH><TH>Known Gene ID</TH><TH>mRNA</TH><TH>UniProt</TH><TH>RefSeq Protein</TH><TH>Description</TH>\n"); strcpy(startSymbol[pageNum], geneSymbol); strcpy(pageStartSymbol[currentPage], geneSymbol); newPage = FALSE; } fprintf(outf,"<TR>"); fprintf(outf,"<TD>%s</TD>", geneSymbol); /*fprintf(outf,"<TD>%d:%s</TD>", geneCnt, geneSymbol);*/ fprintf(outf,"<TD>"); fprintf(outf,"<A href=\"/cgi-bin/hgGene?db=%s&hgg_gene=%s", database, kgID); fprintf(outf,"&hgg_chrom=%s&hgg_start=%s&hgg_end=%s\">", chrom, txStart, txEnd); fprintf(outf,"%s", kgID); fprintf(outf,"</A>"); fprintf(outf,"</TD>\n"); sqlSafef(query3,sizeof(query3),"select spID from %s.kgXref where kgID = '%s'", database, kgID); spID = cloneString(sqlQuickQuery(conn3, query3, buf, sizeof(buf))); if (spID == NULL) { spID = emptyString; } else { if (sameWord(spID,"")) spID = emptyString; } sqlSafef(query3,sizeof(query3),"select mRNA from %s.kgXref where kgID = '%s'", database, kgID); mRNA = cloneString(sqlQuickQuery(conn3, query3, buf, sizeof(buf))); if (mRNA == NULL) { mRNA = emptyString; } else { if (sameWord(mRNA,"")) mRNA = emptyString; } sqlSafef(query3,sizeof(query3),"select protAcc from %s.kgXref where kgID = '%s'", database, kgID); protAcc = sqlQuickQuery(conn3, query3, buf, sizeof(buf)); if (protAcc == NULL) { protAcc = emptyString; } else { if (sameWord(protAcc,"")) protAcc = emptyString; } fprintf(outf,"<TD>%s</TD>", mRNA); fprintf(outf,"<TD>%s</TD>", spID); fprintf(outf,"<TD>%s</TD>", protAcc); fprintf(outf,"<TD>%s</TD>", desc ); fprintf(outf,"</TR>\n"); if ((geneCnt % LINKSPERPAGE) == 0) { /* flush out and close the page if a page is filled, and start a new page */ fprintf(outf,"</TABLE>"); strcpy(endSymbol[pageNum], geneSymbol); strcpy(pageEndSymbol[currentPage], endSymbol[pageNum]); fprintf(outf, "<BR>"); fprintf(outf, "<A href=\"/knownGeneList/%s/%d/kgIndex%d.html\">", database, topLevel,topLevel); fprintf(outf, "Up"); fprintf(outf,"</A><BR>\n"); printHtmlEnd(outf); newPage = TRUE; fclose(outf); outf = NULL; if ((pageNum % LINKSPERPAGE) == 0 ) { printf("Processing topLevel %d ...\n", topLevel);fflush(stdout); safef(fileName, sizeof(fileName), "knownGeneList/%s/%d/kgIndex%d.html", database, topLevel, topLevel); outf2 = fopen(fileName, "w"); printHtmlHead(outf2); //fprintf(outf2,"<H2>UCSC %s Known Genes List</H2>\n", genome); fprintf(outf2,"<H2>UCSC %s Known Genes List (Group %d)</H2>\n", genome, topLevel); for (i=1; i<= currentPage; i++) { fprintf(outf2, "Page %d: ", (topLevel-1)*LINKSPERPAGE+i); fprintf(outf2, "<A href=\"/knownGeneList/%s/%d/kgList%d.html\">", database, topLevel, (topLevel-1)*LINKSPERPAGE+i); fprintf(outf2, "%s to %s", pageStartSymbol[i], pageEndSymbol[i]); fprintf(outf2,"</A><BR>\n"); } fprintf(outf2, "<BR>"); fprintf(outf2, "<A href=\"/knownGeneList/%s/top.html\">",database); fprintf(outf2, "Up"); fprintf(outf2,"</A><BR>\n"); printHtmlEnd(outf2); fclose(outf2); strcpy(topStartSymbol[topLevel], pageStartSymbol[1]); strcpy( topEndSymbol[topLevel], pageEndSymbol[currentPage]); currentPage = 0; topLevel++; } } row = sqlNextRow(sr); } sqlFreeResult(&sr); row2 = sqlNextRow(sr2); } sqlFreeResult(&sr2); /* flush out and close the last list page */ if (outf != NULL) { fprintf(outf,"</TABLE>"); strcpy(endSymbol[pageNum], geneSymbol); strcpy(pageEndSymbol[currentPage], endSymbol[pageNum]); fprintf(outf, "<BR>"); fprintf(outf, "<A href=\"/knownGeneList/%s/%d/kgIndex%d.html\">", database, topLevel,topLevel); fprintf(outf, "Up"); fprintf(outf,"</A><BR>\n"); printHtmlEnd(outf); fclose(outf); } /* generate the last index page */ safef(command, sizeof(command), "mkdir -p knownGeneList/%s/%d", database, topLevel); mustSystem(command); safef(fileName, sizeof(fileName), "knownGeneList/%s/%d/kgIndex%d.html", database, topLevel, topLevel); outf2 = fopen(fileName, "w"); printHtmlHead(outf2); fprintf(outf2,"<H2>UCSC %s Known Genes List (Group %d)</H2>\n", genome, topLevel); for (i=1; i<= currentPage; i++) { fprintf(outf2, "Page %d: ", (topLevel-1)*LINKSPERPAGE+i); fprintf(outf2, "<A href=\"/knownGeneList/%s/%d/kgList%d.html\">", database, topLevel, (topLevel-1)*LINKSPERPAGE+i); fprintf(outf2, "%s to %s", pageStartSymbol[i], pageEndSymbol[i]); fprintf(outf2,"</A><BR>\n"); fflush(outf2); } fprintf(outf2, "<BR>"); fprintf(outf2, "<A href=\"/knownGeneList/%s/top.html\">",database); fprintf(outf2, "Up"); fprintf(outf2,"</A><BR>\n"); strcpy(topStartSymbol[topLevel], pageStartSymbol[1]); strcpy( topEndSymbol[topLevel], pageEndSymbol[currentPage]); fclose(outf2); currentPage = 0; /* generate the top HTML page */ safef(fileName, sizeof(fileName), "knownGeneList/%s/top.html", database); outf2 = fopen(fileName, "w"); printHtmlHead(outf2); fprintf(outf2,"<H2>UCSC %s Known Genes List</H2>\n", genome); for (i=1; i<= topLevel; i++) { fprintf(outf2, "Group %d: ", i); fprintf(outf2, "<A href=\"/knownGeneList/%s/%d/kgIndex%d.html\">", database, i, i); fprintf(outf2, " %s to %s", topStartSymbol[i], topEndSymbol[i]); fprintf(outf2,"</A><BR>\n"); fflush(outf2); } fprintf(outf2, "<BR>"); fprintf(outf2, "<A href=\"/knownGeneLists.html\">"); fprintf(outf2, "Up"); fprintf(outf2,"</A><BR>\n"); printHtmlEnd(outf2); printHtmlEnd(outf2); fclose(outf2); return(0); }