4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/zoo/analyze4d/analyze.c src/hg/zoo/analyze4d/analyze.c
index 40232de..154e578 100644
--- src/hg/zoo/analyze4d/analyze.c
+++ src/hg/zoo/analyze4d/analyze.c
@@ -1,259 +1,259 @@
 /* Analyze 4d - read zoo raw alignment, read 4d sites and do analysis of changes
    at 4d sites. The zoo alignments are read in using Webb's routine from read_malign.h 
    Trevor Bruen 2002 */
 
 /* Copyright (C) 2011 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 
 #include "util.h"
 #include "ctype.h"
 #include "string.h"
 #include "read_malign.h"
 
 
 #define SAME_STRING(x,y) (!strcasecmp(x,y))
 #define ALPHA_SIZE 4
 
 /* Print the totals for each type of base */
 void print_totals(int, int, int***, char *);
 
 /* Translate a base to an integer */
 int translate(char );
 
 /* Print the program usage */
 void print_usage();
 
 int main( int argc, char **argv)
 {
 /* Index default to -1 as flage for special human consideration */
 int i,j,k,nrow, ncol, *sites, **site_members,nsites, nspecies,*table,index=-1,total_columns=0,column_differs=0,column_4d_differs=0,*spec_breakdown,same;
 int **base_count,**bases_in_4d;
 char **seq_name, **site_species, **species,*token,*spec_name,ref;
 
 /* Needed for Webb's library functions */
 argv0 = "analyze";
 if ((argc != 3) && (argc != 4)  )
     print_usage();
 
 
 /* Determine which species to use as reference */
 if(argc == 4)
     {
     token = strtok(argv[3], "=");
     spec_name = strtok(NULL, " \n");
     }
 else
     spec_name="human";
 
 /* Read the alignment */
 site_species = read_malign(argv[1], &nrow, &ncol, &seq_name);
 
 /* Now read the 4d sites... */
 site_members = read_sites(argv[2], &nspecies, &nsites, &species, &sites);
 
 /* Make sure nspecies and nrow correspond -subtract one because of implicit human in 4d*/
 if(nspecies != (nrow-1))
     fatal("Species in alignment must correspond to number of species in 4d sites");
 
 /* Build conversion table */
 table=ckalloc(nspecies * sizeof(int));
 
 for(i=0;i<nspecies;i++)
     for(j=0;j<nrow;j++)
 	if(SAME_STRING(species[i],seq_name[j]))
 	    table[i]=j;
 
 
 /* Build species breakdown table */
 spec_breakdown=ckalloc(nspecies * sizeof(int));
 	  
 for(i=0;i<nspecies;i++)
     spec_breakdown[i]=0;
 
 /* Find species */	   
 if(!SAME_STRING(spec_name,"human")){
     for(i=0;i<nspecies;i++)
 	if(SAME_STRING(species[i],spec_name))
 	    index=i;
 
     if(index==-1)
 	fatal("Species not found");
 
 }
 
 
 /* Build count tables */
 base_count=ckalloc(ALPHA_SIZE * sizeof(int *));
 for(i=0;i<ALPHA_SIZE;i++)
     base_count[i]=ckalloc((ALPHA_SIZE+1) * sizeof(int));
 
 bases_in_4d=ckalloc(ALPHA_SIZE * sizeof(int *));
 for(i=0;i<ALPHA_SIZE;i++)
     bases_in_4d[i]=ckalloc((ALPHA_SIZE+1) * sizeof(int)); 
 
 /* Initialize base count arrays */
 for(i=0;i<ALPHA_SIZE;i++)
     for(j=0;j<ALPHA_SIZE;j++)
 	base_count[i][j]=(bases_in_4d[i][j]=0);
 
 /* For each 4d site */
 for(i=0;i<nsites;i++)
     {
     /* If the species is present... Special exception for human*/
     if(index==-1 || site_members[index][i])
 	{
 	/* Generate total bases */	
 	
 	total_columns++;
 	same=1;
 	
 	if(index!=-1)
 	    ref=toupper(site_species[table[index]][sites[i]]);
 	else
 	    ref=toupper(site_species[0][sites[i]]);
 	
 	/* Check for homogeneity */
 	    for(j=0;j<nspecies;j++)
 		{
 		if(j==index)
 		    continue;
 		if(toupper(site_species[table[j]][sites[i]])!=ref)
 		    same=0;
 		}
 	    if(!same)
 		{
 		column_differs++;
 		/* Find mix of bases */
 		for(j=0;j<nspecies;j++)
 		    {
 		    if(j==index)
 			continue;
 		    base_count[translate(ref)][translate(site_species[table[j]][sites[i]])]++;
 		    }
 		
 		}
 	    /* Check for homogeneity within 4d species */
 	    same=1;
 	    
 	    for(j=0;j<nspecies;j++)
 		{
 		if(j==index)
 		    continue;
 		/* If species is present at site */
 		if(site_members[j][i])
 		    {
 		    spec_breakdown[j]++;
 		    if(toupper(site_species[table[j]][sites[i]]) != ref)
 			{
 			same=0;
 			}
 		    }
 		}
 	    if(!same)
 		{
 		column_4d_differs++;
 		/* Find mix of bases */
 		for(j=0;j<nspecies;j++)
 		    {
 		    if(j==index)
 			continue;
 		    if(site_members[j][i])
 			bases_in_4d[translate(ref)][translate(site_species[table[j]][sites[i]])]++;
 		    }
 		
 		}
 	}
     }
 
 
 printf("\n\n");
 printf("Results of Bases Substitution at 4d sites.\n");
 printf("------------------------------------------\n\n");
 
 printf("Considering all species (%s as Reference)\n",spec_name);
 printf("--------------------------------------------\n\n");
 print_totals(total_columns, column_differs, &base_count, "Human");
 
 printf("\nConsidering species which share the site (%s as Reference)\n",spec_name);
 printf("-------------------------------------------------------------\n\n");
 print_totals(total_columns, column_4d_differs, &bases_in_4d, "Human");
 printf("\n");
 
 /* Result of species breakdown */
 for(i=0;i<nspecies;i++)
     {
     if(index==i)
 	continue;
     printf("Species %-8s occured %5d (%2.2f %)\n",species[i],spec_breakdown[i],(float)(spec_breakdown[i] * 100)/total_columns);
     }
 return 0;
 }
 
 /* Print the totals for each type of base */
 void print_totals(int total_columns, int column_differs, int ***base_count_a, char *species){
 
 int A,C,T,G,total;
 int **base_count=(*base_count_a);
 
 /* Very inefficient */
 A=base_count[0][0]+base_count[0][1]+base_count[0][2]+base_count[0][3]+base_count[0][4];
 C=base_count[1][0]+base_count[1][1]+base_count[1][2]+base_count[1][3]+base_count[1][4];
 T=base_count[2][0]+base_count[2][1]+base_count[2][2]+base_count[2][3]+base_count[2][4];
 G=base_count[3][0]+base_count[3][1]+base_count[3][2]+base_count[3][3]+base_count[3][4];
 total= A+C+G+T;
 
 printf("# Sites Considered: %5d\n\n",total_columns);
 printf("# Homogenous Sites: %5d (%2.2f)%\t# Non Homogeneous Sites: %5d (%2.2f)%\n\n",total_columns - column_differs,(((float)((total_columns - column_differs) * 100 ))/((float)total_columns)), column_differs,(((float)(column_differs * 100 ))/((float)total_columns)));  
 
 printf("A occurs %5d times (%2.2f %) and is replaced by: A %d (%2.2f %), C %d (%2.2f %), T %d (%2.2f %), G %d (%2.2f %), - %d (%2.2f %)\n\n",A,(float)(A * 100)/(float)(total),base_count[0][0],(float)(base_count[0][0] * 100)/(float)(A),base_count[0][1],(float)(base_count[0][1] * 100)/(float)(A),base_count[0][2],(float)(base_count[0][2] * 100)/(float)(A),base_count[0][3],(float)(base_count[0][3] * 100)/(float)(A),base_count[0][4],(float)(base_count[0][4] * 100)/(float)(A));            
 
 printf("C occurs %5d times (%2.2f %) and is replaced by: A %d (%2.2f %), C %d (%2.2f %), T %d (%2.2f %), G %d (%2.2f %), - %d (%2.2f %)\n\n",C,(float)(C * 100)/(float)(total),base_count[1][0],(float)(base_count[1][0] * 100)/(float)(C),base_count[1][1],(float)(base_count[1][1] * 100)/(float)(C),base_count[1][2],(float)(base_count[1][2] * 100)/(float)(C),base_count[1][3],(float)(base_count[1][3] * 100)/(float)(C),base_count[1][4],(float)(base_count[1][4] * 100)/(float)(C));
 
 printf("T occurs %5d times (%2.2f %) and is replaced by: A %d (%2.2f %), C %d (%2.2f %), T %d (%2.2f %), G %d (%2.2f %), - %d (%2.2f %)\n\n",T,(float)(T * 100)/(float)(total),base_count[2][0],(float)(base_count[2][0] * 100)/(float)(T),base_count[2][1],(float)(base_count[2][1] * 100)/(float)(T),base_count[2][2],(float)(base_count[2][2] * 100)/(float)(T),base_count[2][3],(float)(base_count[2][3] * 100)/(float)(T),base_count[2][4],(float)(base_count[2][4] * 100)/(float)(T));
 
 printf("G occurs %5d times (%2.2f %) and is replaced by: A %d (%2.2f %), C %d (%2.2f %), T %d (%2.2f %), G %d (%2.2f %), - %d (%2.2f %)\n\n",G,(float)(G * 100)/(float)(total),base_count[3][0],(float)(base_count[3][0] * 100)/(float)(G),base_count[3][1],(float)(base_count[3][1] * 100)/(float)(G),base_count[3][2],(float)(base_count[3][2] * 100)/(float)(G),base_count[3][3],(float)(base_count[3][3] * 100)/(float)(G),base_count[3][4],(float)(base_count[3][4] * 100)/(float)(G));
 }
 
 /* Prints out proper invocation and exits */
 void print_usage()
 {
 fprintf(stderr,
 	"analyze - Analyze the substituation at 4d sites.  First run amalagam
             shell script on gff files, then use this program to check 
             the substitution that occurs at the 4d sites.  It defaults
             to using human as reference, and assumes human is the first species
             alignment.  This program is not very robust.
 usage:
   analyze zooAlignmentfile 4dfile [options]
 
 options:
   -s= species - use a different species as base.\n");
 exit(1);
 }
 
 
 /* Translate a base to an integer */
 int translate(char c)
 {
 char d= toupper(c);
 if(d == 'A')
     return 0;
 if(d == 'C')
     return 1;
 if(d == 'T')
     return 2;
 if(d == 'G')
     return 3;
 if(d == '-')
     return 4;
 
 return -1;
 }