4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/makeDb/hgLoadMaf/hgLoadMaf.c src/hg/makeDb/hgLoadMaf/hgLoadMaf.c
index 5c0d5cd..c16cf7a 100644
--- src/hg/makeDb/hgLoadMaf/hgLoadMaf.c
+++ src/hg/makeDb/hgLoadMaf/hgLoadMaf.c
@@ -1,331 +1,331 @@
 /* hgLoadMaf - Load a maf file index into the database. */
 
 /* Copyright (C) 2011 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 #include "common.h"
 #include "cheapcgi.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "jksql.h"
 #include "hdb.h"
 #include "hgRelate.h"
 #include "portable.h"
 #include "maf.h"
 #include "scoredRef.h"
 #include "dystring.h"
 #include "errCatch.h"
 
 
 /* Command line options */
 
 /* multiz currently will generate some alignments that have a missing score
  * and/or contain only 1 or 2 sequences.  Webb Miller recommends these
  * be ignored, and he intends to remove them from multiz output.
  * When/if this happens, the -warn option should not be used.
  */
 
 /* command line option specifications */
 static struct optionSpec optionSpecs[] = {
     {"warn", OPTION_BOOLEAN},
     {"WARN", OPTION_BOOLEAN},
     {"test", OPTION_STRING},
     {"pathPrefix", OPTION_STRING},
     {"loadFile", OPTION_STRING},
     {"tmpDir", OPTION_STRING},
     {"refDb", OPTION_STRING},
     {"maxNameLen", OPTION_INT},
     {"defPos", OPTION_STRING},
     {"custom", OPTION_BOOLEAN},
     {NULL, 0}
 };
 
 boolean warnOption = FALSE;
 boolean warnVerboseOption = FALSE;  /* print warning detail */
 char *testFile = NULL;              /* maf filename if testing */
 char *pathPrefix = NULL;            /* extFile directory */
 static char *tmpDir = NULL;	    /* location to create a temporary file */
 char *loadFile;	                    /* file to read maf file from */
 char *refDb;	                    /* reference db (for custom tracks) */
 int maxNameLen = 0;	            /* maximum chrom name length */
 char *defPosFile;                   /* file to put default pos in */
 boolean isCustom;                   /* we're loading a custom track */
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "hgLoadMaf - Load a maf file index into the database\n"
   "usage:\n"
   "   hgLoadMaf database table\n"
   "options:\n"
   "   -warn            warn instead of error upon empty/incomplete alignments\n"
   "   -WARN            warn instead of error, with detail for the warning\n" 
   "   -test=infile     use infile as input, and suppress loading\n"
   "                    the database. Just create .tab file in current dir.\n" 
   "   -pathPrefix=dir  load files from specified directory \n"
   "                    (default /gbdb/database/table.\n"
   "   -tmpDir=<path>   path to directory for creation of temporary .tab file\n"
   "                    which will be removed after loading\n"
   "   -loadFile=file   use file as input\n"
   "   -maxNameLen=N    specify max chromosome name length to avoid\n"
   "                    reference to chromInfo table\n"
   "   -defPos=file     file to put default position in\n"
   "                    default position is first block\n"
   "   -custom          loading a custom track, don't use history\n"
   "                    or extFile tables\n"
   "\n"
   "NOTE: The maf files need to be in chromosome coordinates,\n"
   "      the reference species must be the first component, and \n"
   "      the blocks must be correctly ordered and be on the\n"
   "      '+' strand\n"
   );
 }
 
 struct mafComp *findComponent(struct mafAli *maf, char *prefix)
 /* Find maf component with the given prefix. */
 {
 int preLen = strlen(prefix);
 struct mafComp *mc;
 for (mc = maf->components; mc != NULL; mc = mc->next)
     {
     if (memcmp(mc->src, prefix, preLen) == 0 && mc->src[preLen] == '.')
         return mc;
     }
 return NULL;
 }
 
 void hgLoadMaf(char *database, char *table)
 /* hgLoadMaf - Load a maf file index into the database. */
 {
 struct fileInfo *fileList = NULL, *fileEl;
 struct sqlConnection *conn;
 long mafCount = 0;
 FILE *f = NULL;
 char extFileDir[512];
 char ext[10];
 char file[100];
 int indexLen;
 
 if (refDb == NULL)
     refDb = database;
 
 if (tmpDir == NULL)
     f = hgCreateTabFile(".", table);
 else
     f = hgCreateTabFile(tmpDir, table);
 
 
 if (testFile != NULL)
     {
     if (!fileExists(testFile))
         errAbort("Test file %s doesn't exist\n", testFile);
     splitPath(testFile, extFileDir, file, ext);
     strcat(file, ext);
     fileList = listDirX(extFileDir, file, TRUE);
     pathPrefix = extFileDir;
     }
 else
     {
     if (maxNameLen)
 	indexLen = maxNameLen;
     else
 	indexLen = hGetMinIndexLength(database);
 
     if (loadFile != NULL)
 	{
 	if (!fileExists(loadFile))
 	    errAbort("Load file %s doesn't exist\n", loadFile);
 	splitPath(loadFile, extFileDir, file, ext);
 	strcat(file, ext);
 	pathPrefix = extFileDir;
 	fileList = listDirX(pathPrefix, file, TRUE);
 	if (fileList == NULL)
 	    fileList = listDirX(".", file, TRUE);
 	}
     else
 	{
 	if (pathPrefix == NULL)
 	    {
 	    safef(extFileDir, sizeof(extFileDir), 
 		"/gbdb/%s/%s", database, table);
 	    pathPrefix = extFileDir;
 	    }
 	fileList = listDirX(pathPrefix, "*.maf", TRUE);
 	}
 
     if (isCustom)
 	conn = sqlConnect(database);
     else
 	conn = hgStartUpdate(database);
 
     scoredRefTableCreate(conn, table, indexLen);
     }
 if (fileList == NULL)
     errAbort("%s doesn't exist or doesn't have any maf files", pathPrefix);
 for (fileEl = fileList; fileEl != NULL; fileEl = fileEl->next)
     {
     char *fileName = fileEl->name;
     struct mafFile *mf;
     struct mafComp *mc;
     struct scoredRef mr;
     struct mafAli *maf;
     off_t offset;
     int warnCount = 0;
     int dbNameLen = strlen(refDb);
     HGID extId;
     if ((testFile != NULL) || isCustom)
         extId = 0;
     else
         extId = hgAddToExtFile(fileName, conn);
 
     struct errCatch *errCatch = errCatchNew();
     char *errMsg = NULL;
     if (errCatchStart(errCatch))
 	{
 	mf = mafOpen(fileName);
 	verbose(1,"Indexing and tabulating %s\n", fileName);
 	while ((maf = mafNextWithPos(mf, &offset)) != NULL)
 	    {
 	    double maxScore, minScore;
 	    mc = findComponent(maf, refDb);
 	    if (mc == NULL) 
 		{
 		char msg[2048];
 		safef(msg, sizeof(msg),
 				"Couldn't find %s. sequence line %d of %s\n", 
 				    refDb, mf->lf->lineIx, fileName);
 		if (warnOption || warnVerboseOption) 
 		    {
 		    warnCount++;
 		    if (warnVerboseOption)
 			verbose(1, "%s", msg);
 		    mafAliFree(&maf);
 		    continue;
 		    }
 		else 
 		    errAbort("%s", msg);
 		}
 
 	    ZeroVar(&mr);
 	    mr.chrom = mc->src + dbNameLen + 1;
 	    mr.chromStart = mc->start;
 	    mr.chromEnd = mc->start + mc->size;
 	    if (mc->strand == '-')
 		reverseUnsignedRange(&mr.chromStart, &mr.chromEnd, mc->srcSize);
 	    mr.extFile = extId;
 	    mr.offset = offset;
 
 	    if (defPosFile != NULL)
 		{
 		FILE *f = mustOpen(defPosFile, "w");
 
 		fprintf(f, "%s:%d-%d\n",mr.chrom, mr.chromStart+1, mr.chromEnd);
 		fclose(f);
 
 		defPosFile = NULL;
 		}
 
 	    /* The maf scores are the sum of all pairwise
 	     * alignment scores.  If you have A,B,C,D
 	     * species in the alignments it's the sum of
 	     * the AB, AC, AD, BC, BD, CD pairwise scores.
 	     * If you just had two species it would just
 	     * be a single pairwise score.  Since we don't
 	     * want to penalize things for missing species,
 	     * we basically divide by the number of possible
 	     * pairings.  To get it from the -100 to +100 (more or
 	     * less) per base blastz scoring scheme to something
 	     * that goes from 0 to 1, we also also divide by 200.
 	     * JK */
 
 	    /* scale using alignment size to get per-base score */
 	    mr.score = maf->score/mc->size;
 
 	    /* get scoring range -- depends on #species in alignment */
 	    mafColMinMaxScore(maf, &minScore, &maxScore);
 	    if (maxScore == minScore) 
 		/* protect against degenerate case -- 1 species, produces
 		 * max & min score of 0 */
 		mr.score = 0.0;
 	    else
 		/* translate to get zero-based score, 
 		 * then scale by blastz scoring range */
 		mr.score = (mr.score-minScore)/(maxScore-minScore);
 	    if (mr.score <= 0.0) 
 		{
 		if (warnOption || warnVerboseOption) 
 		    {
 		    warnCount++;
 		    if (warnVerboseOption)
 			verbose(1, "Score too small (raw %.1f scaled %.1f #species %d),"
 				" line %d of %s\n", 
 				maf->score, mr.score, slCount(maf->components),
 				mf->lf->lineIx, fileName);
 		    }
 		mr.score = 0.001;
 		}
 	    if (mr.score > 1.0) mr.score = 1.0;
 	    mafCount++;
 	    fprintf(f, "%u\t", hFindBin(mr.chromStart, mr.chromEnd));
 	    scoredRefTabOut(&mr, f);
 	    mafAliFree(&maf);
 	    }
 	mafFileFree(&mf);
 	}
     errCatchEnd(errCatch);
     if (errCatch->gotError)
 	{
 	errMsg = cloneString(errCatch->message->string);
 	}
     errCatchFree(&errCatch);
 
     if (errMsg)
 	{
 	if (!isCustom)
 	    hgPurgeExtFile(extId,  conn);
 	errAbort("%s", errMsg);
 	}
 
     if (warnCount)
         verbose(1, "%d warnings\n", warnCount);
     }
 if (testFile != NULL)
     return;
 verbose(1, "Loading %s into database\n", table);
 if (tmpDir == NULL)
     hgLoadTabFile(conn, ".", table, &f);
 else
     hgLoadTabFile(conn, tmpDir, table, &f);
 verbose(1, "Loaded %ld mafs in %d files from %s\n", mafCount, slCount(fileList), pathPrefix);
 if (isCustom)
     sqlDisconnect(&conn);
 else
     hgEndUpdate(&conn, 0, 0, "Add %ld mafs in %d files from %s\n", mafCount, slCount(fileList), pathPrefix);
 
 /*	if temp dir specified, unlink file to make it disappear */
 if ((char *)NULL != tmpDir)
     hgUnlinkTabFile(tmpDir, table);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, optionSpecs);
 warnOption = optionExists("warn");
 isCustom = optionExists("custom");
 warnVerboseOption = optionExists("WARN");
 testFile = optionVal("test", NULL);
 pathPrefix = optionVal("pathPrefix", NULL);
 tmpDir = optionVal("tmpDir", NULL);
 loadFile = optionVal("loadFile", loadFile);
 refDb = optionVal("refDb", refDb);
 maxNameLen = optionInt("maxNameLen", maxNameLen);
 defPosFile = optionVal("defPos", defPosFile);
 
 if (argc != 3)
     usage();
 
 hgLoadMaf(argv[1], argv[2]);
 return 0;
 }