4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/cirm/cdw/cdwMakeContaminationQa/cdwMakeContaminationQa.c src/hg/cirm/cdw/cdwMakeContaminationQa/cdwMakeContaminationQa.c
index a68d69b..3d5e458 100644
--- src/hg/cirm/cdw/cdwMakeContaminationQa/cdwMakeContaminationQa.c
+++ src/hg/cirm/cdw/cdwMakeContaminationQa/cdwMakeContaminationQa.c
@@ -1,194 +1,194 @@
 /* cdwMakeContaminationQa - Screen for contaminants by aligning against contaminant genomes.. */
 
 /* Copyright (C) 2013 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "options.h"
 #include "sqlNum.h"
 #include "jksql.h"
 #include "basicBed.h"
 #include "genomeRangeTree.h"
 #include "correlate.h"
 #include "hmmstats.h"
 #include "portable.h"
 #include "cdw.h"
 #include "cdwLib.h"
 
 boolean keepTemp = FALSE;
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "cdwMakeContaminationQa - Screen for contaminants by aligning against contaminant genomes.\n"
   "usage:\n"
   "   cdwMakeContaminationQa startId endId\n"
   "where startId and endId are id's in the cdwFile table\n"
   "options:\n"
   "   -keepTemp\n"
   );
 }
 
 /* Command line validation table. */
 static struct optionSpec options[] = {
    {"keepTemp", OPTION_BOOLEAN},
    {NULL, 0},
 };
 
 struct cdwFile *cdwFileLoadIdRange(struct sqlConnection *conn, long long startId, long long endId)
 /* Return list of all files in given id range */
 {
 char query[256];
 sqlSafef(query, sizeof(query), 
     "select * from cdwFile where id>=%lld and id<=%lld and endUploadTime != 0 "
     "and updateTime != 0 and deprecated = ''", 
     startId, endId);
 return cdwFileLoadByQuery(conn, query);
 }
 
 #define FASTQ_SAMPLE_SIZE 100000
 
 int cdwQaContamMade(struct sqlConnection *conn, long long fileId, int targetId)
 /* Return number of times have fileId paired with targetId in cdwQaContam table. */
 {
 char query[256];
 sqlSafef(query, sizeof(query), 
     "select count(*) from cdwQaContam where fileId=%lld and qaContamTargetId=%d",
     fileId, targetId);
 return sqlQuickNum(conn, query);
 }
 
 struct cdwQaContamTarget *getContamTargets(struct sqlConnection *conn, 
     struct cdwFile *ef, struct cdwValidFile *vf)
 /* Get list of contamination targets for file - basically all targets that aren't in same
  * taxon as self. */
 {
 assert(vf->ucscDb != NULL);
 struct cdwAssembly *origAsm = cdwAssemblyForUcscDb(conn, vf->ucscDb);
 assert(origAsm != NULL);
 char query[256];
 sqlSafef(query, sizeof(query), 
     "select cdwQaContamTarget.* from cdwQaContamTarget,cdwAssembly "
     "where cdwQaContamTarget.assemblyId = cdwAssembly.id "
          " and cdwAssembly.taxon != %d", origAsm->taxon);
 struct cdwQaContamTarget *targetList  = cdwQaContamTargetLoadByQuery(conn, query);
 cdwAssemblyFree(&origAsm);
 return targetList;
 }
 
 void screenFastqForContaminants(struct sqlConnection *conn, 
     struct cdwFile *ef, struct cdwValidFile *vf)
 /* The ef/vf point to same file, which is fastq format.  Set alignments up for a sample against all
  * contamination targets. */
 {
 /* Get target list and see if we have any work to do. */
 struct cdwQaContamTarget *target, *targetList;
 targetList = getContamTargets(conn, ef, vf);
 boolean needScreen = FALSE;
 for (target = targetList; target != NULL; target = target->next)
     {
     if (cdwQaContamMade(conn, ef->id, target->id) <= 0)
         {
 	needScreen = TRUE;
 	break;
 	}
     }
 
 if (needScreen)
     {
     verbose(1, "screenFastqForContaminants(%u(%s))\n", ef->id, ef->submitFileName);
 
     // Get the assay tag. 
     struct cgiParsedVars *tags = cdwMetaVarsList(conn, ef);
     char *assay = cdwLookupTag(tags, "assay");
 
     /* Get fastq record. */
     struct cdwFastqFile *fqf = cdwFastqFileFromFileId(conn, ef->id);
     if (fqf == NULL)
         errAbort("No cdwFastqFile record for file id %lld", (long long)ef->id);
 
     char sampleFastqName[PATH_LEN];
     
     /* Create downsampled fastq in temp directory - downsampled more than default even. */
     cdwMakeTempFastqSample(fqf->sampleFileName, FASTQ_SAMPLE_SIZE, sampleFastqName);
     verbose(1, "downsampled %s into %s\n", vf->licensePlate, sampleFastqName);
     
     for (target = targetList; target != NULL; target = target->next)
 	{
 	/* Get assembly associated with target */
 	int assemblyId = target->assemblyId;
 	char query[512];
 	sqlSafef(query, sizeof(query), "select * from cdwAssembly where id=%d", assemblyId);
 	struct cdwAssembly *newAsm = cdwAssemblyLoadByQuery(conn, query);
 	if (newAsm == NULL)
 	    errAbort("warehouse cdwQaContamTarget %d not found", assemblyId);
 
 	/* If we don't already have a match, do work to create contam record. */
 	int matchCount = cdwQaContamMade(conn, ef->id, target->id);
 	if (matchCount <= 0)
 	    {
 	    /* We run the bed-file maker, just for side effect calcs. */
 	    double mapRatio = 0, depth = 0, sampleCoverage = 0, uniqueMapRatio;
 	    cdwAlignFastqMakeBed(ef, newAsm, sampleFastqName, vf, NULL,
 		&mapRatio, &depth, &sampleCoverage, &uniqueMapRatio, assay);
 
 	    verbose(1, "%s mapRatio %g, depth %g, sampleCoverage %g\n", 
 		newAsm->name, mapRatio, depth, sampleCoverage);
 	    struct cdwQaContam contam = 
 		    {.fileId=ef->id, .qaContamTargetId=target->id, .mapRatio = mapRatio};
 	    cdwQaContamSaveToDb(conn, &contam, "cdwQaContam", 256);
 	    }
 	cdwAssemblyFree(&newAsm);
 	}
     cdwQaContamTargetFreeList(&targetList);
     if (keepTemp)
         verbose(1, "%s\n", sampleFastqName);
     else
 	remove(sampleFastqName);
     cdwFastqFileFree(&fqf);
     }
 }
 
 void doContaminationQa(struct sqlConnection *conn, struct cdwFile *ef)
 /* Try and do contamination level QA - mostly mapping fastq files to other
  * genomes. */
 {
 /* Get validated file info.  If not validated we don't bother. */
 struct cdwValidFile *vf = cdwValidFileFromFileId(conn, ef->id);
 if (vf == NULL)
     return;
 
 /* We only work on fastq. */
 if (!sameString(vf->format, "fastq"))
     return;
 
 screenFastqForContaminants(conn, ef, vf);
 }
 
 
 void cdwMakeContaminationQa(int startId, int endId)
 /* cdwMakeContaminationQa - Screen for contaminants by aligning against contaminant genomes.. */
 {
 /* Make list with all files in ID range */
 struct sqlConnection *conn = cdwConnectReadWrite();
 struct cdwFile *ef, *efList = cdwFileLoadIdRange(conn, startId, endId);
 
 for (ef = efList; ef != NULL; ef = ef->next)
     {
     doContaminationQa(conn, ef);
     }
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc != 3)
     usage();
 keepTemp = optionExists("keepTemp");
 cdwMakeContaminationQa(sqlUnsigned(argv[1]), sqlUnsigned(argv[2]));
 return 0;
 }