4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/protein/kgPick/kgPick.c src/hg/protein/kgPick/kgPick.c
index 3e38668..07596f1 100644
--- src/hg/protein/kgPick/kgPick.c
+++ src/hg/protein/kgPick/kgPick.c
@@ -1,223 +1,223 @@
 /* kgPick - select the best repersentative mRNA/protein pair from KG candidates 
    having identical CDS Structure */
 
 /* Copyright (C) 2013 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 #include "common.h"
 #include "hCommon.h"
 #include "hdb.h"
 
 char printedMrna[40], printedProt[40], printedCds[40];
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "kgPick - select the best repersentative mRNA/protein pair\n"
   "usage:\n"
   "   kgPick tempDb outfile\n"
   "      tempDb is the temp KG DB name\n"
   "      roDb   is the read only genome DB name\n"
   "      spDb   is the proteinsYYMMDDt DB name\n"
   "      outfile is the output file name\n"
   "      dupOutfile is the duplicate mrna/prot output file name\n"
   "example: kgPick kgHg17ETemp hg17 proteins050415 kg2.tmp dupSpMrna.tmp\n");
 }
 
 /* convert a possible compund mrnaID ("protID_mrnaID") to just the mrnaID. */
 char *convert(char *compound)
 {
 char *chp;
     
 chp = compound;
 if ((*chp == 'N') && (*(chp+1) == 'M') && (*(chp+2) == '_'))
     {
     return compound;
     }
 else
     {
     chp = strstr(compound, "_");
     if (chp == NULL) return compound;
 
     chp ++;
     }
 return chp;
 }	
     
 void printKg(struct sqlConnection *conn, struct sqlConnection *conn2, 
              char *kgTempDb, char *spDb, char *cdsId, char *mrnaID, char *protAcc, 
 	     char *alignID, FILE *outf)
 {
 char query[256];
 struct sqlResult *sr;
 char **row;
 int i;
 char condStr[256];
 char *displayId;
 
 sqlSafef(query, sizeof(query), "select * from %s.kgCandidate where alignID='%s'", kgTempDb, alignID);
 sr = sqlMustGetResult(conn, query);
 row = sqlNextRow(sr);
 while (row != NULL)
     {
     fprintf(outf, "%s\t", convert(row[0]));
     for (i=1; i<10; i++) 
     	{
 	fprintf(outf, "%s\t", row[i]);
 	}
     
     /* get protein display ID */
     sqlSafefFrag(condStr, sizeof condStr, "acc='%s'", protAcc);
     displayId = sqlGetField(spDb, "displayId", "val", condStr);
     if (displayId == NULL) displayId = protAcc;
     
     fprintf(outf, "%s\t%s\t%s\n", displayId, alignID, cdsId);
     
     /* remember what have been printed */
     safecpy(printedCds, sizeof(printedCds), cdsId);
     safecpy(printedMrna, sizeof(printedCds), row[0]);
     safecpy(printedProt, sizeof(printedProt), displayId);
     row = sqlNextRow(sr);
     }
 sqlFreeResult(&sr);
 }
 
 int main(int argc, char *argv[])
 {
 struct sqlConnection *conn2, *conn3, *conn4, *conn5;
 char query2[256], query3[256];
 struct sqlResult *sr2, *sr3;
 char **row2, **row3;
 char condStr[256];
 char *kgTempDb;
 char *outfileName, *dupOutfileName;
 FILE *outf, *dupOutf;
 char *score;
 char *protAcc;
 char *ranking;
 char *cdsId, *mrnaID, *alignID;
 char *protDbId;
 char *spDb;
 boolean gotRefseq, gotNonRef;
 boolean isRefseq;
 int printCnt;
 char *roDb;
 char *refseqId;
 
 if (argc != 6) usage();
 kgTempDb       = argv[1];
 roDb           = argv[2];
 spDb           = argv[3];
 outfileName    = argv[4];
 dupOutfileName = argv[5];
 
 outf    = mustOpen(outfileName, "w");
 dupOutf = mustOpen(dupOutfileName, "w");
 
 conn2= hAllocConn(roDb);
 conn3= hAllocConn(roDb);
 conn4= hAllocConn(roDb);
 conn5= hAllocConn(roDb);
 
 strcpy(printedCds, "");
 strcpy(printedMrna, "");
 strcpy(printedProt, "");
 
 /* go through each unique cds structure */
 sqlSafef(query2, sizeof(query2), "select distinct cdsId from %s.kgCandidateZ", kgTempDb);
 sr2 = sqlMustGetResult(conn2, query2);
 row2 = sqlNextRow(sr2);
 
 while (row2 != NULL)
     {
     /* get all mrna/prot pairs with this CDS structure */
     cdsId = row2[0];
    
     /* ranking reflects CDS quaility and preference for RefSeq and MGC */
     /* Swiss-Prot is prefered than TrEMBL */
     /* finally, higher prot-Mrna alignment score is prefered */
     sqlSafef(query3, sizeof(query3), 
     "select * from %s.kgCandidateZ where cdsId='%s' order by ranking, protDbId, score desc", 
     	    kgTempDb, cdsId);
     sr3  = sqlMustGetResult(conn3, query3);
     row3 = sqlNextRow(sr3);
 	      
     gotRefseq    = FALSE;
     gotNonRef    = FALSE;
     
     printCnt = 0;
     while (row3 != NULL)
         {
         cdsId    = row3[0];
 	ranking  = row3[1];
 	protDbId = row3[2];
 	score    = row3[3];
 	mrnaID   = row3[4];
 	protAcc  = row3[5];
 	alignID  = row3[6];
 
     	isRefseq    = FALSE;
 	
 	if ((mrnaID[0] == 'N') && (mrnaID[1] == 'M') && (mrnaID[2] == '_'))
 	    {
 	    isRefseq  = TRUE;
 	    }
 	
 	if (isRefseq)
 	    {
 	    /* print one qualified RefSeq */
 	    if (!gotRefseq)
 	    	{
 		/* double check again to see if the RefSeq is still valid */
     		sqlSafefFrag(condStr, sizeof condStr, "name='%s'", mrnaID);
     		refseqId = sqlGetField(roDb, "refGene", "name", condStr);
 		if (refseqId != NULL)
 		    {
 		    if (printCnt < 1)
 		    	{
 		    	printKg(conn4, conn5, kgTempDb, spDb, cdsId, mrnaID, protAcc, 
 		    		alignID, outf);
 		    	printCnt++;
 		    	gotRefseq = TRUE;
 		    	}
 		    }
 		else
 		    {
 		    fprintf(stderr, "%s no longer found in refGene.\n", mrnaID);
 		    }
 		}
 	    }
 	else
 	    {
 	    /* print out only one non-RefSeq entry for this CDS structure */
 	    if (!gotRefseq && (!gotNonRef))
 	    	{
 	        printKg(conn4, conn5, kgTempDb, spDb, cdsId, mrnaID, protAcc, alignID, outf);
 		printCnt++;
 		gotNonRef = TRUE;
 		}
 	    }
 	
 	/* save duplicates of mrna/prot that has same CDS but does not make it to final KG set */
 	if (sameWord(printedCds, cdsId))
 	    {
 	    if ( (!sameWord(printedMrna, mrnaID)) || (!sameWord(printedProt, protAcc)) )
 	    	{
 	    	fprintf(dupOutf, "%s\t%s\t%s\t%s\n", convert(printedMrna), printedProt, convert(mrnaID), protAcc);
 	   	}
 	    }
         row3 = sqlNextRow(sr3);
 	}
     
     sqlFreeResult(&sr3);
     row2 = sqlNextRow(sr2);
     }
 sqlFreeResult(&sr2);
 
 hFreeConn(&conn2);
 hFreeConn(&conn3);
 fclose(outf);
 fclose(dupOutf);
 return(0);
 }