4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/encode3/eap/eapAddStep/eapAddStep.c src/hg/encode3/eap/eapAddStep/eapAddStep.c
index 4905d56..e719d36 100644
--- src/hg/encode3/eap/eapAddStep/eapAddStep.c
+++ src/hg/encode3/eap/eapAddStep/eapAddStep.c
@@ -1,321 +1,321 @@
 /* eapAddStep - Add a step to eapStep and related tables.  This is just a small shortcut for doing it in SQL.  You can only add steps defined in C code.. */
 
 /* Copyright (C) 2014 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "dystring.h"
 #include "jksql.h"
 #include "eapDb.h"
 #include "eapLib.h"
 #include "intValTree.h"
 
 boolean clList = FALSE;
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "eapAddStep - Add a step to eapStep and related tables.  This is just a small shortcut for doing it in SQL.  You can only add steps defined in C code.\n"
   "usage:\n"
   "   eapAddStep stepName\n"
   "The step name can be a quoted pattern as well as in\n"
   "   eapAddStep '*'\n"
   "to get them all.\n"
   "options:\n"
   "   -list - Just list steps matching stepName pattern\n"
   );
 }
 
 /* Command line validation table. */
 static struct optionSpec options[] = {
    {"list", OPTION_BOOLEAN},
    {NULL, 0},
 };
 
 struct stepInit
 /* Just stuff to help initialize a step */
     {
     char *name;     /* Name of step, should be same ase first software name */
     int cpusRequested;	/* # of cpus requested from job control system */
     char *description;  /* Sentence long description of step */
     char *software;	/* Comma separated list of software names. First one gives name to step. */
     char *inputTypes;
     char *inputFormats;	/* Comma separated list of input formats */
     char *inputDescriptions; /* Comma separated list of input descriptions. */
     char *outputTypes;
     char *outputFormats;/* Comma separated list of formats */
     char *outputNamesInTempDir;  /* Comma separated list of file names in temp dir */
     char *outputDescriptions; /* Comma separated list of input descriptions. */
     };
 
 struct stepInit steps[] =  
 {
     {
     "bwa_single_end", 2,		// name and CPUs
     "Align single ended fastq with bwa to produce BAM sorted by genome position",  // Description
     "eap_run_bwa_se,bwa,samtools",	// software
     "reads", "fastq",			// input names and formats
     "Reads in Sanger fastq format",     // input description
     "alignments", "bam", "out.bam",	// output names, formats, and file names
     "Alignments in BAM format sorted by genomic position including random pick of multi-aligners"
     },
 
     {
     "bwa_paired_end", 2, 
     "Align paired end fastqs with bwa to produce BAM sorted by genome position",  // Description
     "eap_run_bwa_pe,bwa,samtools",
     "reads1,reads2", "fastq,fastq",
     "Forward direction reads in Sanger fastq format,"     // input description
     "Reverse direction reads in Sanger fastq format",
     "alignments", "bam", "out.bam",
     "Alignments in BAM format sorted by genomic position including random pick of multi-aligners"
     },
 
     {
     "macs2_dnase_se", 1,
     "Call peaks and generate signal plot from a single ended DNAse bam file using Macs2",
     "eap_run_macs2_dnase_se,macs2,bedToBigBed,bedGraphToBigWig",
     "alignments", "bam",
     "Alignments of single end reads in bam format",
     "macs2_dnase_peaks,macs2_dnase_signal", "narrowPeak,bigWig", "out.narrowPeak.bigBed,out.bigWig",
     "Narrow peak calls from Macs2,Base-by-base signal graph from macs2"
     },
 
     {
     "macs2_dnase_pe", 1,
     "Call peaks and generate signal plot from a paired end DNAse bam file using Macs2",
     "eap_run_macs2_dnase_pe,macs2,bedToBigBed,bedGraphToBigWig",
     "Alignments of paired end reads in bam format",
     "alignments", "bam",
     "macs2_dnase_peaks,macs2_dnase_signal", "narrowPeak,bigWig", "out.narrowPeak.bigBed,out.bigWig",
     "Narrow peak calls from Macs2,Base-by-base signal graph from macs2"
     },
 
     {
     "hotspot", 1,
     "Call hotspots, peaks, and generate a signal plot from DNAse bam file using hotspot",
     "eap_run_hotspot,hotspot.py,edwBamFilter,starch,unstarch,hotspot,bedtools,eap_broadPeak_to_bigBed,eap_narrowPeak_to_bigBed,bedToBigBed,bedGraphToBigWig,bedmap,bedGraphPack",
     "alignments", "bam",
     "Alignments of reads with cuts on 5-prime ends in bam format",
     "hotspot_broad_peaks,hotspot_narrow_peaks,hotspot_signal",
     "broadPeak,narrowPeak,bigWig",
     "out.broadPeak.bigBed,out.narrowPeak.bigBed,out.bigWig",
     "Hotspot calls,Peak calls from hotspot,Base-by-base signal graph from hotspot"
     },
 
     {
     "macs2_chip_se", 1,
     "Generate peaks and signal for single ended ChIP-seq BAM files from IP and control using Macs2",
     "eap_run_macs2_chip_se,macs2,bedToBigBed,bedGraphToBigWig",
     "chipBam,controlBam", "bam,bam",
     "Alignments of single end reads from IP,Alignments of single end reads from control",
     "macs2_chip_peaks,macs2_chip_signal", "narrowPeak,bigWig", "out.narrowPeak.bigBed,out.bigWig",
     "Narrow peak calls from Macs2,Base-by-base signal graph from macs2"
     },
 
     {
     "macs2_chip_pe", 1,
     "Generate peaks and signal for paired end ChIP-seq BAM files from IP and control using Macs2",
     "eap_run_macs2_chip_pe,macs2,bedToBigBed,bedGraphToBigWig",
     "chipBam,controlBam", "bam,bam",
     "Alignments of paired end reads from IP,Alignments of paired end reads from control",
     "macs2_chip_peaks,macs2_chip_signal", "narrowPeak,bigWig", "out.narrowPeak.bigBed,out.bigWig",
     "Narrow peak calls from Macs2,Base-by-base signal graph from macs2"
     },
 
     {
     "spp_chip_se", 1,
     "Generate peaks single end ChIP-seq BAM files from IP and control using SPP",
     "eap_run_spp_chip_se,Rscript,bedToBigBed",
     "chipBam,controlBam", "bam,bam",
     "Alignments of single end reads from IP,Alignments of single end reads from control",
     "spp_chip_peaks", "narrowPeak", "out.narrowPeak.bigBed",
     "Narrow peak calls from SPP"
     },
 
     {
     "sum_bigWig", 1,
     "Add together signals from multiple bigWigs producing a bigWig for the sum",
     "eap_pool_big_wig,bigWigMerge,bedGraphPack,bedGraphToBigWig",
     "signal", "bigWig",
     "List of bigWig files",
     "pooled_signal", "bigWig", "out.bigWig",
     "Sum of inputs signals"
     },
 
     {
     "replicated_hotspot", 1,
     "Pool together two replicates and run hotspot on them",
     "eap_pool_hotspot,eap_run_hotspot,edwBamFilter,hotspot.py,starch,unstarch,hotspot,bedtools,eap_broadPeak_to_bigBed,eap_narrowPeak_to_bigBed,bedToBigBed,bedGraphToBigWig,bedmap,bedGraphPack",
     "alignments", "bam",
     "Alignments in bam format",
     "hotspot_broad_peaks,hotspot_narrow_peaks,hotspot_signal",
     "broadPeak,narrowPeak,bigWig",
     "out.broadPeak.bigBed,out.narrowPeak.bigBed,out.bigWig",
     "Hotspot calls,Peak calls from hotspot,Base-by-base signal graph from hotspot"
     },
     
     {
     "phantom_peak_stats", 1,
     "Run the phantom peaks stats tools to calculate RSC and NSC among other things.",
     "eap_run_phantom_peak_spp,Rscript",
     "alignments", "bam",
     "Alignments from some sort of peaky data set in BAM format",
     "", "", "",
     "",
     },
 
     {
     "dnase_stats", 1,
     "Subsample bam file to 5M mapped reads, run hotspot, and collect a bunch of statistics.",
     "eap_dnase_stats,edwBamStats,bigBedToBed,edwBamFilter,bigWigAverageOverBed,eap_run_phantom_peak_spp,Rscript,eap_run_hotspot,hotspot.py,starch,unstarch,hotspot,bedtools,eap_broadPeak_to_bigBed,eap_narrowPeak_to_bigBed,bedToBigBed,bedGraphToBigWig,bedmap,bedGraphPack",
     "alignments", "bam",
     "Alignments from a DNAse hypersensitivity assay in BAM format",
     "", "", "",
     "",
     },
 
 };
 
 int commaSepCount(char *s)
 /* Count number of comma-separated items assuming there can be a terminal non-separating comma
  * or not. */
 {
 if (isEmpty(s))
     return 0;
 int commaCount = countChars(s, ',');
 int sepCount = commaCount;
 char lastC = lastChar(s);
 if (lastC == ',')
     --sepCount;
 return sepCount + 1;
 }
 
 
 void initStep(struct sqlConnection *conn, struct stepInit *init)
 /* Create step based on initializer */
 {
 /* Do a little validation on while counting up inputs and outputs */
 int inCount = commaSepCount(init->inputTypes);
 int matchCount = commaSepCount(init->inputFormats);
 if (inCount != matchCount)
     errAbort("inputTypes has %d elements but inputFormats has %d in step %s", 
 	    inCount, matchCount, init->name);
 matchCount = commaSepCount(init->inputDescriptions);
 if (inCount != matchCount)
     errAbort("inputTypes has %d elements but inputDescriptions has %d in step %s",
 	    inCount, matchCount, init->name);
 int outCount = commaSepCount(init->outputTypes);
 matchCount = commaSepCount(init->outputFormats);
 if (outCount != matchCount)
     errAbort("outputTypes has %d elements but outputFormats has %d in step %s", 
 	    outCount, matchCount, init->name);
 matchCount = commaSepCount(init->outputNamesInTempDir);
 if (outCount != matchCount)
     errAbort("outputTypes has %d elements but outputNamesInTempDir has %d in step %s", 
 	    outCount, matchCount, init->name);
 matchCount = commaSepCount(init->outputDescriptions);
 if (outCount != matchCount)
     errAbort("outputTypes has %d elements but outputDescriptions has %d in step %s", 
 	    outCount, matchCount, init->name);
 
 struct dyString *query = dyStringNew(0);
 dyStringPrintf(query, "select count(*) from eapStep where name='%s'", init->name);
 int existingCount = sqlQuickNum(conn, query->string);
 if (existingCount > 0)
     {
     warn("%s already exists in eapStep", init->name);
     dyStringFree(&query);
     return;
     }
 
 /* Parse out software part and make sure that all pieces are there. */
 char **softwareArray;
 int softwareCount;
 sqlStringDynamicArray(init->software, &softwareArray, &softwareCount);
 unsigned softwareIds[softwareCount];
 int i;
 for (i=0; i<softwareCount; ++i)
     {
     char *name = softwareArray[i];
     dyStringClear(query);
     dyStringPrintf(query, "select id from eapSoftware where name='%s'", name);
     unsigned softwareId = sqlQuickNum(conn, query->string);
     if (softwareId == 0)
         errAbort("Software %s doesn't exist by that name in eapSoftware", name);
     softwareIds[i] = softwareId;
     }
 
 /* Make step record. */
 dyStringClear(query);
 dyStringAppend(query,
 	"insert eapStep (name,cpusRequested,description,"
         " inCount,inputTypes,inputFormats,inputDescriptions,"
 	" outCount,outputNamesInTempDir,outputTypes,outputFormats,outputDescriptions)"
 	" values (");
 dyStringPrintf(query, "'%s',", init->name);
 dyStringPrintf(query, "%d,", init->cpusRequested);
 dyStringPrintf(query, "\"%s\",", init->description);
 dyStringPrintf(query, "%d,", inCount);
 dyStringPrintf(query, "'%s',", init->inputTypes);
 dyStringPrintf(query, "'%s',", init->inputFormats);
 dyStringPrintf(query, "\"%s\",", init->inputDescriptions);
 dyStringPrintf(query, "%d,", outCount);
 dyStringPrintf(query, "'%s',", init->outputNamesInTempDir);
 dyStringPrintf(query, "'%s',", init->outputTypes);
 dyStringPrintf(query, "'%s',", init->outputFormats);
 dyStringPrintf(query, "\"%s\"", init->outputDescriptions);
 dyStringPrintf(query, ")");
 sqlUpdate(conn, query->string);
 
 /* Make software/step associations. */
 for (i=0; i<softwareCount; ++i)
     {
     dyStringClear(query);
     dyStringPrintf(query, "insert eapStepSoftware (step,software) values ('%s','%s')",
 	    init->name, softwareArray[i]);
     sqlUpdate(conn, query->string);
     }
 
 
 /* Force step version stuff to be made right away */
 eapCurrentStepVersion(conn, init->name);
 
 /* Clean up. */
 dyStringFree(&query);
 freez(&softwareArray[0]);
 freez(&softwareArray);
 }
 
 void eapAddStep(char *pattern)
 /* eapAddStep - Add a step to eapStep and related tables.  This is just a small shortcut for doing 
  * it in SQL.  You can only add steps defined in C code.. */
 {
 struct sqlConnection *conn = eapConnectReadWrite();
 int i;
 for (i=0; i<ArraySize(steps); ++i)
     {
     struct stepInit *init = &steps[i];
     if (wildMatch(pattern, init->name))
         {
 	if (clList)
 	    puts(init->name);
 	else
 	    initStep(conn, init);
 	}
     }
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc != 2)
     usage();
 clList = optionExists("list");
 eapAddStep(argv[1]);
 return 0;
 }