4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/protein/spm6/spm6.c src/hg/protein/spm6/spm6.c
index c9c7a65..1b9ba1b 100644
--- src/hg/protein/spm6/spm6.c
+++ src/hg/protein/spm6/spm6.c
@@ -1,293 +1,293 @@
 /* spm6 - generates sorted.lis and knownGene0.tab for further KG duplicates processing */
 
 /* Copyright (C) 2013 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 #include "common.h"
 #include "hCommon.h"
 #include "hdb.h"
 
 #define PDB_YES        90000
 #define SWISSPROT_YES   8000
 #define REFSEQREVIWED    700
 #define REFSEQPROVISIONAL 60
 
 FILE   *o3, *o4, *o5;
 
 int exStart[500], exEnd[500];
 int aaStart[500], aaEnd[500];
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "spm6 - generates sorted.lis and knownGene0.tab for further duplicates processing\n"
   "usage:\n"
   "   spm6 xxxx db ro_db\n"
   "      xxxx is the release date of SWISS-PROT data\n"
   "      db is the genome under construction, e.g.: kgDB\n"
   "      ro_db is the target genome to read data from\n"
   "example:\n"
   "   spm6 040115 kgDB hg16\n");
 }
 void getCDS(char *cdsStart, char *cdsEnd, char *exonCount, char *exonStarts_in, char *exonEnds_in)
 
 {
 int exCount;
 
 char *sp, *ep;
 char *chp;
 int  i, j;
 int  aalen;
 int  cdsS, cdsE;
 int  eS, eE;
 int  estLen;
 char *exonStarts, *exonEnds;
 
 estLen = 0;
 exonStarts = strdup(exonStarts_in);
 exonEnds   = strdup(exonEnds_in);
 
 sscanf(exonCount, "%d", &exCount);
 
 sp = exonStarts;
 ep = exonEnds;
 sscanf(cdsStart, "%d", &cdsS);
 sscanf(cdsEnd, "%d", &cdsE);
 	
 aalen = 0;
 j=0;
 for (i=0; i<exCount; i++)
     {
     chp = strstr(sp, ",");
     *chp = '\0';
     sscanf(sp, "%d", &(exStart[i]));
     chp++;
     sp = chp;
 
     chp = strstr(ep, ",");
     *chp = '\0';
     sscanf(ep, "%d", &(exEnd[i]));
 	
     eS = exStart[i];
     eE = exEnd[i];
 		
     if (cdsS > eS)
 	{
 	eS = cdsS;
 	}
     if (cdsE < eE)
 	{
 	eE = cdsE;
 	}
     if (eS > eE) 
 	{
 	eS = 0;
 	eE = 0;
 	}
     estLen = estLen + (eE-eS+1);
     if (eS != eE)
 	{
 	aaStart[j] = aalen;
 	aaEnd[j] = aaStart[j] + (eE- eS +1)/3 -1;
 	aalen = aalen + (eE- eS +1)/3;
 			
 	j++;
 	}
     else
 	{
 	}
 		
     chp++;
     ep = chp;
     }
 for (i=0; i<exCount; i++)
     {
     if (exEnd[i] < cdsS)
 	{
 	// exon is outside of CDS at right
 	// do nothing
 	}
     else
 	{
 	if ((exStart[i] <= cdsS) && (cdsS <= exEnd[i])) 
 	    {
 	    fprintf(o4, "%d,", cdsS);
 	    }
 	else
 	    {
 	    if (exStart[i] <= cdsE)
 		{
 		fprintf(o4, "%d,", exStart[i]);
 		}
 	    }
 	}
     }
 fprintf(o4, "\t");
 	
 for (i=0; i<exCount; i++)
     {
     if (exEnd[i] < cdsS)
 	{
 	// exon is outside of CDS at right
 	// do nothing
 	}
     else
 	{
 	if ((exStart[i] <= cdsE) && (cdsE <= exEnd[i]))
 	    {
 	    fprintf(o4, "%d,", cdsE);
 	    }
 	else
 	    {
 	    if (cdsE > exEnd[i])
 		{
 		fprintf(o4, "%d,", exEnd[i]);
 		}
 	    }
 	}
     }
 fprintf(o4, "\t");
 }
 
 int main(int argc, char *argv[])
 {
 struct sqlConnection *conn2;
 char proteinName[40], mrnaName[40];
 char query2[256];
 char cond_str[500];
 
 FILE *inf;
 char line[1000];
 int alignmentID;
 
 struct sqlResult *sr2;
 char **row2;
     
 char *name, *chrom, *strand, *txStart, *txEnd, *cdsStart, *cdsEnd,
      *exonCount, *exonStarts, *exonEnds;
 
 char *mrnaDate;
 char *pdbID;
 
 char *genomeDBname;
 char *genomeReadOnly;
 char *proteinDataDate;
 char proteinsDB[40];
 
 char *proteinDbSource;
     
 int itxStart, itxEnd;
 int transcriptLength;
 int priority;    
 
 if (argc != 4) usage();
 
 proteinDataDate = argv[1];
 genomeDBname    = argv[2];
 genomeReadOnly = argv[3];
 
 o3 = fopen("j.dat", "w");
 o4 = fopen("jj.dat", "w");
 o5 = fopen("align.lis", "w");
     
 conn2= hAllocConn(genomeReadOnly);
 
 inf   = mustOpen("best.lis", "r");
 alignmentID = 0;
 
 while (fgets(line, 1000, inf) != NULL)
     {
     sscanf(line, "%s\t%s", proteinName, mrnaName);
 
     priority = 0;
     sqlSafefFrag(cond_str, sizeof cond_str, "sp='%s'", proteinName);
     sprintf(proteinsDB, "proteins%s", proteinDataDate);
     pdbID= sqlGetField(proteinsDB, "pdbSP", "pdb", cond_str);
     if (pdbID != NULL)
 	{
 	priority = priority + PDB_YES;
 	}
 	
     sqlSafefFrag(cond_str, sizeof cond_str, "displayID='%s'", proteinName);
     proteinDbSource  = sqlGetField(proteinsDB, "spXref2", "biodatabaseID", cond_str);
     if (proteinDbSource == NULL)
 	{
 	printf("No proteinDbSource found for %s, skipping it ... \n", proteinName);
 	break;
 	}
 
     if (strcmp(proteinDbSource, "1") == 0)
 	{
 	priority = priority + SWISSPROT_YES;
 	}
 
     // get mRNA date
     if ((mrnaDate = hGenbankModDate(mrnaName, conn2)) == NULL)
         {
         fprintf(stderr, "%s could not be found!!!\n", mrnaName);
         exit(1);
         }
 	
     sqlSafef(query2, sizeof query2,  "select * from %sTemp.mrnaGene where name='%s';", genomeDBname, mrnaName);
     	
     sr2 = sqlMustGetResult(conn2, query2);
     row2 = sqlNextRow(sr2);
     while (row2 != NULL)
 	{
  	name 	= row2[0];
 	chrom 	= row2[1];
 	strand	= row2[2];
  	txStart = row2[3];
 	txEnd   = row2[4];
 	cdsStart= row2[5]; 
 	cdsEnd	= row2[6];
 	exonCount = row2[7]; 
 	exonStarts= row2[8]; 
 	exonEnds  = row2[9];	
 
 	fprintf(o4, "%s\t", chrom);
 		
 	getCDS(cdsStart, cdsEnd, exonCount, exonStarts, exonEnds);
 
 	sscanf(txStart, "%d", &itxStart);
 	sscanf(txEnd, "%d", &itxEnd);
 	transcriptLength = itxEnd - itxStart;
 
 	fprintf(o4, "%d\t%010d\t%s\t%s\t%s\t%d\n", 
 		priority, transcriptLength, mrnaDate, name, proteinName, alignmentID);
 	fprintf(o5, "%d\t%d\t%s\t%s\t%s\n", 
 		alignmentID, priority,  name, proteinName, proteinDbSource);
 
 	fprintf(o3,"%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%d\n",
  	    	name,
 	    	chrom,
 	    	strand,
      		txStart,
     		txEnd,
  	   	cdsStart,
     		cdsEnd,
 	    	exonCount,
 	    	exonStarts,
     		exonEnds,
 		proteinName,
 		alignmentID);
 		
 	alignmentID++;
 	row2 = sqlNextRow(sr2);
 	}
     sqlFreeResult(&sr2);
     }
 
 hFreeConn(&conn2);
 	
 fclose(o3);
 fclose(o4);
 fclose(o5);
     
 mustSystem("cat j.dat|sort|uniq >knownGene0.tab");
 mustSystem("rm j.dat");
 mustSystem("cat jj.dat|sort -r |uniq >sorted.lis");
 mustSystem("rm jj.dat");
 return(0);
 }