4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/snp/snpLoad/hapmapLookup.c src/hg/snp/snpLoad/hapmapLookup.c
index fced61b..8bb0c17 100644
--- src/hg/snp/snpLoad/hapmapLookup.c
+++ src/hg/snp/snpLoad/hapmapLookup.c
@@ -1,356 +1,356 @@
 /* hapmapLookup - standalone utility program to check hapmap data against dbSNP. */
 
 /* Copyright (C) 2013 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 
 #include "common.h"
 
 #include "hash.h"
 #include "hdb.h"
 
 
 FILE *errorFileHandle = NULL;
 FILE *logFileHandle = NULL;
 
 struct snpSubset 
     {
     char *chrom;
     int start;
     int end;
     char strand;
     char *observed;
     char *class;
     char *locType;
     };
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
     "hapmapLookup - get coords using rsId\n"
     "usage:\n"
     "    hapmapLookup db hapmapTable snpTable exceptionTable \n");
 }
 
 #define PAR1X 2642881
 #define PAR1Y 2642881
 #define PAR2X 154494748
 #define PAR2Y 57372174
 
 
 boolean isPar(char *chrom, int start, int end)
 /* these are hg17 coordinates */
 {
 if (sameString(chrom, "chrY"))
     {
     if (end < PAR1Y) return TRUE;
     if (start > PAR2Y) return TRUE;
     }
 if (sameString(chrom, "chrX"))
     {
     if (end < PAR1X) return TRUE;
     if (start > PAR2X) return TRUE;
     }
 return FALSE;
 }
 
 boolean isComplexObserved(char *observed)
 {
 if (sameString(observed, "A/C")) return FALSE;
 if (sameString(observed, "A/G")) return FALSE;
 if (sameString(observed, "A/T")) return FALSE;
 if (sameString(observed, "C/G")) return FALSE;
 if (sameString(observed, "C/T")) return FALSE;
 if (sameString(observed, "G/T")) return FALSE;
 return TRUE;
 }
 
 struct hash *storeExceptions(char *tableName)
 /* store multiply-aligning SNPs */
 {
 struct hash *ret = NULL;
 char query[512];
 struct sqlConnection *conn = hAllocConn();
 struct sqlResult *sr;
 char **row;
 
 verbose(1, "reading exceptions...\n");
 ret = newHash(0);
 sqlSafef(query, sizeof(query), "select chrom, chromStart, chromEnd, name, exception from %s", tableName);
 sr = sqlGetResult(conn, query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     if (!sameString(row[4], "MultipleAlignments")) continue;
     if (isPar(row[0], sqlUnsigned(row[1]), sqlUnsigned(row[2]))) continue;
     hashAdd(ret, cloneString(row[3]), NULL);
     }
 sqlFreeResult(&sr);
 hFreeConn(&conn);
 return ret;
 }
 
 struct hash *storeSnps(char *tableName, struct hash *exceptionHash)
 /* store subset data for SNPs in a hash */
 /* exclude SNPs that align multiple places */
 {
 struct hash *ret = NULL;
 char query[512];
 struct sqlConnection *conn = hAllocConn();
 struct sqlResult *sr;
 char **row;
 struct snpSubset *subsetElement = NULL;
 struct hashEl *hel = NULL;
 
 verbose(1, "creating SNP hash...\n");
 ret = newHash(16);
 sqlSafef(query, sizeof(query), 
       "select name, chrom, chromStart, chromEnd, strand, observed, class, locType from %s", tableName);
 sr = sqlGetResult(conn, query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     hel = hashLookup(exceptionHash, row[0]);
     if (hel != NULL) continue;
     AllocVar(subsetElement);
     subsetElement->chrom = cloneString(row[1]);
     subsetElement->start = sqlUnsigned(row[2]);
     subsetElement->end = sqlUnsigned(row[3]);
     subsetElement->strand = row[4][0];
     subsetElement->observed = cloneString(row[5]);
     subsetElement->class = cloneString(row[6]);
     subsetElement->locType = cloneString(row[7]);
     hashAdd(ret, cloneString(row[0]), subsetElement);
     }
 sqlFreeResult(&sr);
 hFreeConn(&conn);
 return ret;
 }
 
 boolean samePosition(char **row, struct snpSubset *ss)
 /* simple comparison function */
 {
 /* must be simple */
 assert(sameString(ss->class, "single"));
 assert(sameString(ss->locType, "exact"));
 assert(ss->end == ss->start + 1);
 
 /* allow for PAR */
 if (sameString(row[0], "chrX") && sameString(ss->chrom, "chrX"))
     {
     if (sqlUnsigned(row[1]) < PAR1X && ss->start < PAR1X) return TRUE;
     if (sqlUnsigned(row[1]) > PAR2X && ss->start > PAR2X) return TRUE;
     }
 if (sameString(row[0], "chrX") && sameString(ss->chrom, "chrY"))
     {
     if (sqlUnsigned(row[1]) < PAR1X && ss->start < PAR1Y) return TRUE;
     if (sqlUnsigned(row[1]) > PAR2X && ss->start > PAR2Y) return TRUE;
     }
 if (sameString(row[0], "chrY") && sameString(ss->chrom, "chrX"))
     {
     if (sqlUnsigned(row[1]) < PAR1Y && ss->start < PAR1X) return TRUE;
     if (sqlUnsigned(row[1]) > PAR2Y && ss->start > PAR2X) return TRUE;
     }
 if (sameString(row[0], "chrY") && sameString(ss->chrom, "chrY"))
     {
     if (sqlUnsigned(row[1]) < PAR1Y && ss->start < PAR1Y) return TRUE;
     if (sqlUnsigned(row[1]) > PAR2Y && ss->start > PAR2Y) return TRUE;
     }
 
 if (differentString(row[0], ss->chrom)) return FALSE;
 if (sqlUnsigned(row[1]) != ss->start) return FALSE;
 if (sqlUnsigned(row[2]) != ss->end) return FALSE;
 return TRUE;
 }
 
 boolean reverseComplemented(char **row, struct snpSubset *ss)
 {
 char *rc;
 int i, len;
 
 len = strlen(ss->observed);
 rc = needMem(len + 1);
 rc = cloneString(ss->observed);
 for (i = 0; i < len; i = i+2)
     {
     char c = ss->observed[i];
     rc[len-i-1] = ntCompTable[(int)c];
     }
 
 if (sameString(rc, row[5])) return TRUE;
 
 return FALSE;
 }
 
 boolean strandMatch(char **row, struct snpSubset *ss)
 {
 if (row[4][0] != ss->strand) return FALSE;
 return TRUE;
 }
 
 void checkObservedAndStrand(char **row, struct snpSubset *ss)
 {
 boolean sameStrand = strandMatch(row, ss);
 char *rsId = cloneString(row[3]);
 char *hapmapObserved = cloneString(row[5]);
 
 /* we know that ss has valid observed */
 assert(differentString(ss->observed, "unknown"));
 assert(differentString(ss->observed, "n/a"));
 
 if (sameStrand && sameString(ss->observed, hapmapObserved)) return;
 
 if (!sameStrand)
     {
     /* perhaps this is a reverse complement */
     if (reverseComplemented(row, ss))
         {
         fprintf(errorFileHandle, "snp %s reverse complemented\n", rsId);
 	return;
 	}
 
     if (sameString(hapmapObserved, ss->observed))
         {
         fprintf(errorFileHandle, "snp %s strand mismatch\n", rsId);
         return;
         }
 
     fprintf(errorFileHandle, "snp %s: hapmap observed and strand issue\n", rsId);
     return;
     }
         
 fprintf(errorFileHandle, "snp %s observed mismatch\n", rsId);
         
 }
 
 
 boolean isComplexDbSnp(struct snpSubset *ss)
 /* not including observed in this */
 {
 if (ss->end != ss->start + 1) return TRUE;
 if (differentString(ss->class, "single")) return TRUE;
 if (differentString(ss->locType, "exact")) return TRUE;
 return FALSE;
 }
 
 void processSnps(struct hash *snpHash, struct hash *exceptionHash, char *hapmapTable)
 /* read hapmapTable, lookup in snpHash */
 /* report if SNP missing (could be due to multiple alignment, handle this if necessary) */
 /* report if location is different */
 /* report if observed is different */
 /* report if strand is different */
 {
 char query[512];
 struct sqlConnection *conn = hAllocConn();
 struct sqlResult *sr;
 char **row;
 struct hashEl *hel = NULL;
 struct hashEl *hel2 = NULL;
 struct snpSubset *subsetElement = NULL;
 char *rsId = NULL;
 
 verbose(1, "process SNPs...\n");
 sqlSafef(query, sizeof(query), "select chrom, chromStart, chromEnd, name, strand, observed from %s", hapmapTable);
 sr = sqlGetResult(conn, query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     rsId = cloneString(row[3]);
     hel = hashLookup(snpHash, rsId);
     if (hel == NULL) 
         {
 	hel2 = hashLookup(exceptionHash, rsId);
 	if (hel2 == NULL)
 	    fprintf(errorFileHandle, "%s not found\n", rsId);
 	else
 	    fprintf(errorFileHandle, "%s aligns multiple places\n", rsId);
 	continue;
 	}
     subsetElement = (struct snpSubset *)hel->val;
 
     if (sameString(subsetElement->observed, "unknown") || sameString(subsetElement->observed, "n/a"))
         {
 	if (!strandMatch(row, subsetElement))
             fprintf(errorFileHandle, "snp %s strand mismatch (dbSNP observed unknown)\n", rsId);
 	continue;
 	}
 
     char *hapmapObserved = cloneString(row[5]);
 
     if (!isComplexObserved(subsetElement->observed) && !isComplexObserved(hapmapObserved))
         {
         checkObservedAndStrand(row, subsetElement);
         }
 
     if (isComplexDbSnp(subsetElement))
         {
 	fprintf(logFileHandle, "%s %s %d %d %c %s %s %s\n", rsId, subsetElement->chrom, 
 	                       subsetElement->start, subsetElement->end, subsetElement->strand,
 			       subsetElement->observed, subsetElement->class, subsetElement->locType);
 	/* could print strand here, too */
 	if (differentString(subsetElement->observed, hapmapObserved))
 	    {
 	    if (isComplexObserved(subsetElement->observed) && !isComplexObserved(hapmapObserved))
 	        fprintf(errorFileHandle, "%s: snp complex observed = %s, hapmap simple observed = %s\n", 
 		        rsId, subsetElement->observed, hapmapObserved);
 	    if (!isComplexObserved(subsetElement->observed) && isComplexObserved(hapmapObserved))
 	        fprintf(errorFileHandle, "%s: snp simple observed = %s, hapmap complex observed = %s\n", 
 		        rsId, subsetElement->observed, hapmapObserved);
 	    if (isComplexObserved(subsetElement->observed) && isComplexObserved(hapmapObserved))
 	        fprintf(errorFileHandle, "%s: snp complex observed = %s, hapmap complex observed = %s\n", 
 		        rsId, subsetElement->observed, hapmapObserved);
 	    }
 	continue;
 	}
 
     if (!samePosition(row, subsetElement))
         {
 	fprintf(errorFileHandle, "position mismatch for snp %s\n", rsId);
 	continue;
 	}
 
     }
 }
 
 
 int main(int argc, char *argv[])
 /* load SNPs into hash */
 {
 char *snpDb = NULL;
 char *hapmapTableName = NULL;
 char *snpTableName = NULL;
 char *exceptionTableName = NULL;
 
 struct hash *snpHash = NULL;
 struct hash *exceptionHash = NULL;
 
 if (argc != 5)
     usage();
 
 snpDb = argv[1];
 hapmapTableName = argv[2];
 snpTableName = argv[3];
 exceptionTableName = argv[4];
 
 /* initialize ntCompTable */
 dnaUtilOpen();
 
 /* process args */
 hSetDb(snpDb);
 if (!hTableExists(hapmapTableName))
     errAbort("no %s table in %s\n", hapmapTableName, snpDb);
 if (!hTableExists(snpTableName))
     errAbort("no %s table in %s\n", snpTableName, snpDb);
 if (!hTableExists(exceptionTableName))
     errAbort("no %s table in %s\n", exceptionTableName, snpDb);
 
 errorFileHandle = mustOpen("hapmapLookup.error", "w");
 logFileHandle = mustOpen("hapmapLookup.log", "w");
 
 exceptionHash = storeExceptions(exceptionTableName);
 snpHash = storeSnps(snpTableName, exceptionHash);
 verbose(1, "finished creating dbSNP hash\n");
 processSnps(snpHash, exceptionHash, hapmapTableName);
 
 carefulClose(&errorFileHandle);
 carefulClose(&logFileHandle);
 
 return 0;
 }