4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/protein/kgXref/kgXref.c src/hg/protein/kgXref/kgXref.c
index e9164c0..b5ade82 100644
--- src/hg/protein/kgXref/kgXref.c
+++ src/hg/protein/kgXref/kgXref.c
@@ -1,175 +1,175 @@
 /* kgXref - create Known Gene cross reference table kgXref.tab file */
 
 /* Copyright (C) 2013 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 #include "common.h"
 #include "hCommon.h"
 #include "hdb.h"
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "kgXref - create Known Gene cross reference table kgXref.tab file."
   "usage:\n"
   "   kgXref <db> <proteinsYYMMDD> <ro_db>\n"
   "          <db> is known Genes database under construction\n"
   "          <proteinsYYMMDD> is protein database name \n"
   "          <ro_db> is target organism database\n"
   "example: kgXref kgDB proteins040115 hg16\n");
 }
 
 int main(int argc, char *argv[])
     {
     struct sqlConnection *conn, *conn2, *conn3;
     char query2[256];
     struct sqlResult *sr2;
     char **row2;
     char cond_str[256];  
   
     char *kgID;
     
     char *seqType;	/* sequence type m=mRNA g=genomic u=undefined */
 
     FILE *o1;
     char *database;
     char *proteinDB;
     char *ro_DB;
     char *refSeqName;
     char *hugoID;
     char *protAcc;	/* protein Accession number from NCBI */
     char *answer;
 
     int leg;		/* marker for debugging */
     char *spID, *spDisplayID, *geneSymbol, *refseqID, *desc;
 
     if (argc != 4) usage();
     database  = cloneString(argv[1]);
     proteinDB = cloneString(argv[2]);
     ro_DB = cloneString(argv[3]);
 
     conn = hAllocConn(database);
     conn2= hAllocConn(database);
     conn3= hAllocConn(database);
 
     o1 = mustOpen("j.dat", "w");
 	
     sqlSafef(query2, sizeof query2, "select name, proteinID from %s.knownGene;", database);
     sr2 = sqlMustGetResult(conn2, query2);
     row2 = sqlNextRow(sr2);
     while (row2 != NULL)
 	{
 	kgID 		= row2[0];
 	spDisplayID	= row2[1];
 	
 	refseqID 	= strdup("");
 	geneSymbol 	= strdup("");
 	desc		= strdup("");
 	protAcc		= strdup("");
 
         sqlSafefFrag(cond_str, sizeof cond_str, "displayID='%s'", spDisplayID);
         spID = sqlGetField(proteinDB, "spXref3", "accession", cond_str);
         
 	/* use description for the protein as default, replace it with HUGO desc if available. */
 	sqlSafefFrag(cond_str, sizeof cond_str, "displayID='%s'", spDisplayID);
         desc  = sqlGetField(proteinDB, "spXref3", "description", cond_str);
         
 	sqlSafefFrag(cond_str, sizeof cond_str, "name='%s' and seqType='g'", kgID);
         seqType = sqlGetField(database, "knownGeneLink", "seqType", cond_str);
 
         if (seqType != NULL)
             {
 	    leg = 1;
             /* special processing for RefSeq DNA based genes */
             sqlSafefFrag(cond_str, sizeof cond_str, "mrnaAcc = '%s'", kgID);
             refSeqName = sqlGetField(ro_DB, "refLink", "name", cond_str);
             if (refSeqName != NULL)
                 {
                 geneSymbol = cloneString(refSeqName);
 		refseqID   = kgID;
             	sqlSafefFrag(cond_str, sizeof cond_str, "mrnaAcc = '%s'", kgID);
             	desc = sqlGetField(ro_DB, "refLink", "product", cond_str);
 		
 		sqlSafefFrag(cond_str, sizeof cond_str, "mrnaAcc='%s'", refseqID);
         	answer = sqlGetField(ro_DB, "refLink", "protAcc", cond_str);
         	if (answer != NULL)
             	    {
 	    	    protAcc = strdup(answer);
 	    	    }
                 }
             }
         else
             {
             sqlSafefFrag(cond_str, sizeof cond_str, "displayID = '%s'", spDisplayID);
             hugoID = sqlGetField(proteinDB, "spXref3", "hugoSymbol", cond_str);
             if (!((hugoID == NULL) || (*hugoID == '\0')) )
                 {
 		leg = 21;
                 geneSymbol = cloneString(hugoID);
 
             	sqlSafefFrag(cond_str, sizeof cond_str, "displayID = '%s'", spDisplayID);
             	desc = sqlGetField(proteinDB, "spXref3", "hugoDesc", cond_str);
 		}
 
             sqlSafefFrag(cond_str, sizeof cond_str, "mrna = '%s'", kgID);
             answer = sqlGetField(database, "mrnaRefseq", "refseq", cond_str);
 	    if (answer != NULL) 
 		{
 		leg = 22;
 		refseqID = strdup(answer);
 		
 		sqlSafefFrag(cond_str, sizeof cond_str, "mrnaAcc='%s'", refseqID);
         	answer = sqlGetField(ro_DB, "refLink", "protAcc", cond_str);
         	if (answer != NULL)
             	    {
 	    	    protAcc = strdup(answer);
 	    	    }
 		}
             	
 	    if (strlen(geneSymbol) == 0)
 		{ 
 		leg = 23;
 		if (strlen(refseqID) != 0)
 			{
 			sqlSafefFrag(cond_str, sizeof cond_str, "mrnaAcc = '%s'", refseqID);
 			answer = sqlGetField(ro_DB, "refLink", "name", cond_str);
 			if (answer != NULL) 
 				{
 				leg = 24;
 				geneSymbol = strdup(answer);
 				}
 			}
                 }
             }
 
 	/* fix missing fields */
 	if (strlen(refseqID) == 0)
 		{
 		/* printf("%3d %s reseqID is empty.\n", leg, kgID);*/
 		}
 
 	if (strlen(geneSymbol) == 0)
 		{
 		/* printf("%3d %s geneSymbol is empty.\n", leg, kgID);fflush(stdout);*/
 		geneSymbol = strdup(kgID);
 		}
 
 	if (strlen(desc) == 0)
 		{
 		/* printf("%3d %s desc is empty.\n", leg, kgID);fflush(stdout);*/
 		desc = strdup("N/A");
 		}
 	
 	fprintf(o1, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", 
 		kgID, kgID, spID, spDisplayID, geneSymbol, refseqID, protAcc, desc);
 	row2 = sqlNextRow(sr2);
 	}
 
     fclose(o1);
     hFreeConn(&conn);
     hFreeConn(&conn2);
     hFreeConn(&conn3);
     mustSystem("cat j.dat|sort|uniq  >kgXref.tab");
     mustSystem("rm j.dat");
     return(0);
     }