4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/near/hgNetDist/hgNetDist.c src/hg/near/hgNetDist/hgNetDist.c
index 101d52c..7a06d95 100644
--- src/hg/near/hgNetDist/hgNetDist.c
+++ src/hg/near/hgNetDist/hgNetDist.c
@@ -1,347 +1,347 @@
 /* hgNetDist - Gene/Interaction Network Distance path length calculator for GS 
    Galt Barber 2005-05-08
    This code assumes the network is bi-directional (undirected).
 */
 
 /* Copyright (C) 2011 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 
 #include <stdio.h>
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "math.h"
 #include "options.h"
 
 #include "hdb.h"
 
 
 boolean skipFirst=FALSE;
 boolean weighted=FALSE;
 float threshold=2.0;
 
 static struct optionSpec options[] = {
    {"skipFirst", OPTION_BOOLEAN},
    {"weighted", OPTION_BOOLEAN},
    {"threshold", OPTION_FLOAT},
    {NULL, 0},
 };
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "hgNetDist - GS loader for gene/protein interaction network distances.\n"
   "usage:\n"
   "   hgNetDist input.tab output.tab\n"
   "This will read the input.tab file and calculate the distances\n"
   "between all the genes/proteins in the (interaction) network and store\n"
   "the results in the output.tab specified, for use with hgLoadNetDist.\n"
   "Input .tab file format is 3 columns: gene1 gene2 value.\n"
   "The value is the edge-weight/distance but is ignored by default, treated as 1.0.\n"
   "Output .tab file format is 3 columns: gene1 gene2 pathLength.\n"
   "options:\n"
   "   -skipFirst  - skip the first header row in input.\n"
   "   -weighted  - count the 3rd column value as network edge weight.\n"
   "   -threshold=9.9  - maximum network distance allowed, default=%f.\n"
   , threshold
   );
 }
 
 
 int numEdges = 0;
 
 int numGenes = 0;
 char **geneIds;  /* dynamically allocated array of strings */
 
 struct hash *geneHash = NULL;
 struct hash *aliasHash = NULL;
 struct hash *missingHash = NULL;
 
 /* Floyd-Warshall dyn prg arrays */
 float *d;   /* previous d at step k    */
 float *dd;  /* current d at step k+1   */
 #define INFINITE -1.0
 
 /* --- the fields needed from input tab file  ---- */
 
 struct edge
 /* Information about gene/protein network interaction 
    This code assumes the edges are bi-directional.
 */
     {
     struct edge *next;  	/* Next in singly linked list. */
     char *geneA;		/* geneId */
     char *geneB;		/* geneId */
     float weight;		/* distance or weight (default value is 1) */
     };
 
 int slEdgeCmp(const void *va, const void *vb)
 /* Compare two slNames. */
 {
 const struct edge *a = *((struct edge **)va);
 const struct edge *b = *((struct edge **)vb);
 return strcmp(a->geneA, b->geneA);
 }
 
 int slEdgeCmpWeight(const void *va, const void *vb)
 /* Compare two slNames. */
 {
 const struct edge *a = *((struct edge **)va);
 const struct edge *b = *((struct edge **)vb);
 if (a->weight > b->weight) return 1;
 if (a->weight < b->weight) return -1;
 return 0;
 }
 
 struct edge *edgeLoad(char **row)
 /* Load an edge from row fetched from linefile
  * Dispose of this with edgeFree(). */
 {
 struct edge *ret;
 
 AllocVar(ret);
 ret->geneA  = cloneString(row[0]);
 ret->geneB  = cloneString(row[1]);
 ret->weight = weighted ? atof(row[2]) : 1.0;
 return ret;
 }
 
 void edgeFree(struct edge **pEl)
 /* Free an edge */
 {
 struct edge *el;
 
 if ((el = *pEl) == NULL) return;
 freeMem(el->geneA);
 freeMem(el->geneB);
 freez(pEl);
 }
 
 void edgeFreeList(struct edge **pList)
 /* Free a list of dynamically allocated edges */
 {
 struct edge *el, *next;
 
 for (el = *pList; el != NULL; el = next)
     {
     next = el->next;
     edgeFree(&el);
     }
 *pList = NULL;
 }
 
 
 struct edge *readEdges(char *fileName)
 /* read edges from file */
 {
 struct lineFile *lf=NULL;
 struct edge *list=NULL, *el;
 char *row[3];
 int rowCount=3;
 
 
 lf=lineFileOpen(fileName, TRUE);
 if (lf == NULL)
     {
     return NULL;
     }
 
 if (skipFirst)
     {
     /* skip first header row */
     lineFileNextRowTab(lf, row, rowCount);
     }
 
 while (lineFileNextRowTab(lf, row, rowCount))
     {
     el = edgeLoad(row);
     slAddHead(&list,el);
     }
 
 lineFileClose(&lf);
 return list;
 }
 
 
 void hgNetDist(char *inTab, char *outTab)
 {
 
 struct edge *edges = NULL;
 struct edge *edge = NULL;
 
 int k=0, i=0, j=0;
 float *dij=NULL, *dik=NULL, *dkj=NULL, *ddij=NULL, *tempswap=NULL;
 
 FILE *f=NULL;
 
 verbose(2,"inTab=%s outTab=%s \n", inTab, outTab);
 
 verbose(2,"reading edges %s \n",inTab);
 
 edges = readEdges(inTab);
 if (edges == NULL)
 {
 errAbort("readEdges returned NULL for %s.\n",inTab);
 }
 
 numEdges = slCount(edges);
 verbose(2,"slCount(edges)=%d for %s \n",numEdges,inTab);
 
 if (weighted)
     {
     slSort(&edges, slEdgeCmpWeight);
     verbose(2,"slSort done for %s \n",inTab);
     }
 
 
 /* for now, default to using numEdges for size estimate since we don't know numGenes at this point */
 /* estimate numGenes worst case as numEdges*2 as all disjoint pairs edges */
 geneHash = newHash(logBase2((numEdges*2)));  
 geneIds = needMem((numEdges*2)*sizeof(char*));  
 
 verbose(2,"beginning processing data %s ... \n",inTab);
 for(edge = edges; edge; edge = edge->next)
     {
     int n = 0;
     n = hashIntValDefault(geneHash,edge->geneA,-1);
     if (n == -1) 
 	{
 	geneIds[numGenes] = edge->geneA;
 	hashAddInt(geneHash, edge->geneA, numGenes++);
 	}
     n = hashIntValDefault(geneHash,edge->geneB,-1);
     if (n == -1) 
 	{
 	geneIds[numGenes] = edge->geneB;
 	hashAddInt(geneHash, edge->geneB, numGenes++);
 	}
     }
 
 verbose(2,"number of nodes=%d \n",numGenes);
 
 /* allocate arrays for Floyd-Warshall */
 d  = (float *) needHugeMem(numGenes*numGenes*sizeof(float));  
 dd = (float *) needHugeMem(numGenes*numGenes*sizeof(float));  
 
 /* initialize first array d */
 /* clear values to "INFINITE" */
 for(i=0;i<numGenes;++i)
     {
     for(j=0;j<numGenes;++j)
 	{
     	dij = d+(i*numGenes)+j;
 	if (i==j)
 	    {
 	    *dij = 0.0;
 	    }
 	else
 	    {
 	    *dij = INFINITE;
 	    }
 	}
     }
 
     
 
 /* initialize d values from input edges */
 for(edge = edges; edge; edge = edge->next)
     {
     int ia = 0, ib = 0;
     ia = hashIntVal(geneHash,edge->geneA);
     ib = hashIntVal(geneHash,edge->geneB);
     dij = d+(ia*numGenes)+ib;
     /* filter -  initialize each unique pair only once, 
      * priority least weighted distance due to sort above */
     if ((*dij == INFINITE) || (*dij==0.0 && ia==ib))
 	{
 	*dij = edge->weight;
 	/* repeat b/c non-directional undirected graph assumption */
 	dij = d+(ib*numGenes)+ia;
 	*dij = edge->weight;
 	}
     }
 
 /* repeat k times through the Floyd-Warshall dyn prg algorithm */
 for(k=0;k<numGenes;++k)
     {
    
     if ((k % 100)==0)
     	verbose(2,"k=%d \n",k);
 	
     for(i=0;i<numGenes;++i)
 	{
 	for(j=0;j<numGenes;++j)
 	    {
 	    dij  = d +(i*numGenes)+j;
 	    ddij = dd+(i*numGenes)+j;
 	    dik  = d +(i*numGenes)+k;
 	    dkj  = d +(k*numGenes)+j;
 	    if ((*dik == INFINITE) || (*dkj == INFINITE))
 		{
     		*ddij = *dij;
 		}
 	    else
 		{
 		float sumkj = *dik + *dkj;
 		if (*dij == INFINITE)
 		    {
 		    *ddij = sumkj;
 		    }
 		else
 		    {
 		    *ddij = (*dij < sumkj) ? *dij : sumkj;
 		    }
 		}
 	    }
 	}
 
     tempswap = dd;
     dd = d;
     d = tempswap;
 	
     }
 
 /* print final dyn prg array values */
 f = mustOpen(outTab,"w");
 for(i=0;i<numGenes;++i)
     {
     for(j=0;j<numGenes;++j)
 	{
     	dij = d+(i*numGenes)+j;
 	if ((*dij >= 0.0) && (*dij <= threshold))
 	    {
 	    char *gi=NULL, *gj=NULL;
 	    gi=geneIds[i];
 	    gj=geneIds[j];
 	    fprintf(f,"%s\t%s\t%f\n",gi,gj,*dij);
 	    }
 	}
     }
 carefulClose(&f);    
 
 edgeFreeList(&edges);
 freeMem(d);
 freeMem(dd);
 
 }
 
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc != 3)
     usage();
 
 skipFirst = optionExists("skipFirst");
 weighted = optionExists("weighted");
 threshold = optionFloat("threshold", threshold);
 
 hgNetDist(argv[1],argv[2]);
 
 verbose(2,"\ndone.\n");
 return 0;
 }