4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/snp/snpLoad/illuminaLookup.c src/hg/snp/snpLoad/illuminaLookup.c
index a50c544..bf64572 100644
--- src/hg/snp/snpLoad/illuminaLookup.c
+++ src/hg/snp/snpLoad/illuminaLookup.c
@@ -1,333 +1,333 @@
 /* illuminaLookup - validate illumina SNP arrays. */
 /* report if SNP missing */
 /* report unexpected position */
 /* check strand */
 /* check observed */
 /* report if class != single */
 /* report if locType != exact */
 
 /* Copyright (C) 2013 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 
 #include "common.h"
 
 #include "hash.h"
 #include "hdb.h"
 
 
 struct snpSubset 
 /* This is just a list of some of the snps.  We're not wanting to drag in snp127, snp128, etc. 
  * This is stored in a hash keyed by snpId, so we don't bother storing the snpId here. */
     {
     char *chrom;
     int start;
     int end;
     char *strand;
     char *observed;
     char *class;
     char *locType;
     };
 
 static void usage()
 /* Explain usage and exit. */
 {
 errAbort(
     "illuminaLookup - validation\n"
     "usage:\n"
     "    illuminaLookup db illuminaTable snpTable exceptionTable errorFile\n");
 }
 
 
 static struct hash *readMultipleAlignmentExceptions(char *tableName)
 /* Read in MultipleAlignment exceptions from table and store in a hash keyed by snp ID. */
 {
 struct hash *ret = NULL;
 char query[512];
 struct sqlConnection *conn = hAllocConn();
 struct sqlResult *sr;
 char **row;
 
 verbose(1, "reading exceptions...\n");
 ret = newHash(0);
 sqlSafef(query, sizeof(query), "select name, exception from %s", tableName);
 sr = sqlGetResult(conn, query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     if (!sameString(row[1], "MultipleAlignments")) continue;
     hashAdd(ret, cloneString(row[0]), NULL);
     }
 sqlFreeResult(&sr);
 hFreeConn(&conn);
 return ret;
 }
 
 static struct hash *readSnps(char *tableName, struct hash *multiMapHash)
 /* Store subset data for SNPs in a hash.  Exclude SNPs that align multiple places. */
 {
 struct hash *ret = NULL;
 char query[512];
 struct sqlConnection *conn = hAllocConn();
 struct sqlResult *sr;
 char **row;
 struct snpSubset *subsetElement = NULL;
 // FILE *errors = mustOpen(errorFileName, "w");
 
 verbose(1, "creating SNP hash...\n");
 ret = newHash(20);
 sqlSafef(query, sizeof(query), 
       "select name, chrom, chromStart, chromEnd, strand, observed, class, locType from %s", 
       tableName);
 sr = sqlGetResult(conn, query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     if (!hashLookup(multiMapHash, row[0]))
 	{
 	AllocVar(subsetElement);
 	subsetElement->chrom = cloneString(row[1]);
 	subsetElement->start = sqlUnsigned(row[2]);
 	subsetElement->end = sqlUnsigned(row[3]);
 	subsetElement->strand = cloneString(row[4]);
 	subsetElement->observed = cloneString(row[5]);
 	subsetElement->class = cloneString(row[6]);
 	subsetElement->locType = cloneString(row[7]);
 	hashAdd(ret, cloneString(row[0]), subsetElement);
 	}
     }
 sqlFreeResult(&sr);
 hFreeConn(&conn);
 // carefulClose(&errors);
 return ret;
 }
 
 static boolean isTriallelic(char *obs)
 {
 if (sameString(obs, "A/C/G")) return TRUE;
 if (sameString(obs, "A/C/T")) return TRUE;
 if (sameString(obs, "A/G/T")) return TRUE;
 if (sameString(obs, "C/G/T")) return TRUE;
 return FALSE;
 }
 
 static boolean isReverseComplement(char *obs1, char *obs2)
 {
 if (sameString(obs1, "A/C"))
     {
     if (sameString(obs2, "G/T")) return TRUE;
     return FALSE;
     }
 if (sameString(obs1, "A/G"))
     {
     if (sameString(obs2, "C/T")) return TRUE;
     return FALSE;
     }
 if (sameString(obs1, "C/T"))
     {
     if (sameString(obs2, "A/G")) return TRUE;
     return FALSE;
     }
 if (sameString(obs1, "G/T"))
     {
     if (sameString(obs2, "A/C")) return TRUE;
     return FALSE;
     }
 /* shouldn't get here */
 return FALSE;
 }
 
 static char *reverseObserved(char *obs)
 {
 if (sameString(obs, "A/C"))
     return "G/T";
 if (sameString(obs, "A/G"))
     return "C/T";
 if (sameString(obs, "C/T"))
     return "A/G";
 if (sameString(obs, "G/T"))
     return "A/C";
 return obs;
 }
 
 
 
 static void logSnpStrandMismatch(struct hash *snpHash, char *illuminaTable)
 /* Read illuminaTable.  Output +/+, +/-, -/+, -/- in various log files. */
 {
 char query[512];
 struct sqlConnection *conn = hAllocConn();
 struct sqlResult *sr;
 char **row;
 struct hashEl *hel = NULL;
 struct snpSubset *subsetElement = NULL;
 int pos = 0;
 char *strand = NULL;
 char *observed = NULL;
 FILE *plusplus = mustOpen("plusplus", "w");
 FILE *minusplus = mustOpen("minusplus", "w");
 FILE *plusminus = mustOpen("plusminus", "w");
 FILE *minusminus = mustOpen("minusminus", "w");
 
 verbose(1, "logging...\n");
 sqlSafef(query, sizeof(query), "select name, chrom, chromStart, strand, observed from %s", illuminaTable);
 sr = sqlGetResult(conn, query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     hel = hashLookup(snpHash, row[0]);
     if (hel == NULL) continue;
     subsetElement = (struct snpSubset *)hel->val;
     if (!sameString(subsetElement->chrom, row[1])) continue;
     pos = sqlUnsigned(row[2]);
     if (pos != subsetElement->start) continue;
     strand = cloneString(row[3]);
     observed = cloneString(row[4]);
     if (sameString(strand, "+") && sameString(subsetElement->strand, "+"))
         fprintf(plusplus, "snp %s: illumina %s (%s), dbSNP %s (%s)\n",
 	        row[0], observed, strand, subsetElement->observed, subsetElement->strand);
     else if (sameString(strand, "+") && sameString(subsetElement->strand, "-"))
         fprintf(plusminus, "snp %s: illumina %s (%s), dbSNP %s (%s)\n",
 	        row[0], observed, strand, subsetElement->observed, subsetElement->strand);
     else if (sameString(strand, "-") && sameString(subsetElement->strand, "+"))
         fprintf(minusplus, "snp %s: illumina %s (%s), dbSNP %s (%s)\n",
 	        row[0], observed, strand, subsetElement->observed, subsetElement->strand);
     else if (sameString(strand, "-") && sameString(subsetElement->strand, "-"))
         fprintf(minusminus, "snp %s: illumina %s (%s), dbSNP %s (%s)\n",
 	        row[0], observed, strand, subsetElement->observed, subsetElement->strand);
     }
 
 sqlFreeResult(&sr);
 hFreeConn(&conn);
 carefulClose(&plusplus);
 carefulClose(&plusminus);
 carefulClose(&minusplus);
 carefulClose(&minusminus);
 }
 
 static void processSnps(struct hash *snpHash, char *illuminaTable, char *errorFileName)
 /* read illuminaTable */
 /* lookup details in snpHash */
 /* report if SNP missing */
 /* report if class != single */
 /* report if locType != exact */
 {
 char query[512];
 struct sqlConnection *conn = hAllocConn();
 struct sqlResult *sr;
 char **row;
 struct hashEl *hel = NULL;
 struct snpSubset *subsetElement = NULL;
 FILE *errors = mustOpen(errorFileName, "w");
 int pos = 0;
 char *strand = NULL;
 char *observed = NULL;
 
 verbose(1, "process SNPs...\n");
 sqlSafef(query, sizeof(query), "select name, chrom, chromStart, strand, observed from %s", illuminaTable);
 sr = sqlGetResult(conn, query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     hel = hashLookup(snpHash, row[0]);
     if (hel == NULL) 
         {
 	fprintf(errors, "%s not found\n", row[0]);
 	continue;
 	}
     subsetElement = (struct snpSubset *)hel->val;
 
     if (!sameString(subsetElement->chrom, row[1]))
         {
 	fprintf(errors, "unexpected chrom %s for snp %s\n", row[1], row[0]);
 	continue;
 	}
 
     pos = sqlUnsigned(row[2]);
     if (pos != subsetElement->start)
         {
         fprintf(errors, "unexpected position %d for snp %s\n", pos, row[0]);
 	continue;
 	}
 
     if (!sameString(subsetElement->class, "single"))
         {
 	fprintf(errors, "unexpected class %s for snp %s\n", subsetElement->class, row[0]);
 	continue;
 	}
 
     strand = cloneString(row[3]);
     observed = cloneString(row[4]);
 
     if (sameString(subsetElement->observed, "A/C/G/T"))
         {
 	fprintf(errors, "snp %s is quad-allelic in dbSNP\n", row[0]);
 	continue;
 	}
 
     if (isTriallelic(subsetElement->observed))
         {
 	fprintf(errors, "snp %s is tri-allelic in dbSNP\n", row[0]);
 	continue;
 	}
         
 
     /* illumina positive strand - just compare observed */
     /* this seems odd... */
     if (sameString(strand, "+") && differentString(observed, subsetElement->observed))
 	fprintf(errors, "observed mismatch for %s: illumina %s (%s), dbSNP %s (%s)\n", 
 	        row[0], observed, strand, subsetElement->observed, subsetElement->strand);
 
     /* illumina negative strand - reverse complement and compare */
     if (sameString(strand, "-"))
         {
         if (differentString(reverseObserved(observed), subsetElement->observed))
 	    fprintf(errors, "observed mismatch for %s: illumina %s (%s), dbSNP %s (%s)\n", 
 	            row[0], observed, strand, subsetElement->observed, subsetElement->strand);
 	}
 
     if (!sameString(subsetElement->locType, "exact"))
 	fprintf(errors, "unexpected locType %s for snp %s\n", subsetElement->locType, row[0]);
 
     }
 
 sqlFreeResult(&sr);
 hFreeConn(&conn);
 carefulClose(&errors);
 }
 
 
 int main(int argc, char *argv[])
 /* load SNPs into hash */
 {
 char *snpDb = NULL;
 char *illuminaTableName = NULL;
 char *snpTableName = NULL;
 char *exceptionTableName = NULL;
 char errorFileName[64];
 
 struct hash *snpHash = NULL;
 struct hash *multiMapHash = NULL;
 
 if (argc != 6)
     usage();
 
 snpDb = argv[1];
 illuminaTableName = argv[2];
 snpTableName = argv[3];
 exceptionTableName = argv[4];
 safef(errorFileName, ArraySize(errorFileName), "%s", argv[5]);
 
 /* process args */
 hSetDb(snpDb);
 if (!hTableExists(illuminaTableName))
     errAbort("no %s table in %s\n", illuminaTableName, snpDb);
 if (!hTableExists(snpTableName))
     errAbort("no %s table in %s\n", snpTableName, snpDb);
 if (!hTableExists(exceptionTableName))
     errAbort("no %s table in %s\n", exceptionTableName, snpDb);
 
 multiMapHash = readMultipleAlignmentExceptions(exceptionTableName);
 verbose(1, "Read %d multiply mapped SNPS\n", multiMapHash->elCount);
 snpHash = readSnps(snpTableName, multiMapHash);
 verbose(1, "Read %d singly SNPS\n", snpHash->elCount);
 logSnpStrandMismatch(snpHash, illuminaTableName);
 processSnps(snpHash, illuminaTableName, errorFileName);
 
 return 0;
 }