4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/snp/snpLoad/snpClassAndObserved.c src/hg/snp/snpLoad/snpClassAndObserved.c
index 56effbf..1bd3b4e 100644
--- src/hg/snp/snpLoad/snpClassAndObserved.c
+++ src/hg/snp/snpLoad/snpClassAndObserved.c
@@ -1,325 +1,325 @@
 /* Copyright (C) 2013 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 
 /* snpClassAndObserved -- new seventh step
  * Hash the UniVariation table, storing subsnp_class and var_str.
  * subsnp_class is a number from 0-8.
  * Log and skip rows where subsnp_class = 0 or subsnp_class > 8.
  * Should only be a few of these.
  * Hash the SNP table, storing univar_id as string.
  * Again, log and skip rows where univar_id = 0.
  * There are likely order(10K) of these.
  * Serially read the chrN_snpTmp tables.
  * Do lookups into snpHash and univarHash.
  * Use UCSC chromInfo. */
 
 /* Could drop refUCSCReverseComp here. */
 /* Dropping ctg_id at this point. */
 
 #include "common.h"
 
 #include "hash.h"
 #include "hdb.h"
 
 
 struct snpData
     {
     int classInt;
     char *observed;
     };
 
 static struct hash *univarHash = NULL;
 static struct hash *snpHash = NULL;
 
 static char *snpDb = NULL;
 FILE *errorFileHandle = NULL;
 
 /* we will promote in-del to insertion if loc_type == 3 */
 /* we will promote in-del to deletion if loc_type == 1 or loc_type == 2 */
 /* we are hoping for no no-var and hardly any het */
 char *classStrings[] = {
     "single",
     "in-del",
     "het",
     "microsatellite",
     "named",
     "no-var",
     "mixed",
     "mnp"
 };
 
 int classCount = sizeof(classStrings);
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
     "snpClassAndObserved - lookup class and observed in SNP and UniVariation tables\n"
     "check for errors in observed\n"
     "recreate chrN_snpTmp\n"
     "usage:\n"
     "    snpClassAndObserved snpDb \n");
 }
 
 
 struct hash *getUnivarHash()
 /* read var_str and subsnp_class of UniVar into hash indexed by univar_id. */
 {
 struct snpData *el;
 char query[512];
 struct sqlConnection *conn = hAllocConn();
 struct sqlResult *sr;
 char **row;
 struct hash *ret = NULL;
 int classInt = 0;
 int rowCount = 0;
 int skipCount = 0;
 
 verbose(1, "create univarHash...\n");
 ret = newHash(0);
 sqlSafef(query, sizeof(query), "select univar_id, var_str, subsnp_class from UniVariation");
 sr = sqlGetResult(conn, query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     rowCount++;
     classInt = sqlUnsigned(row[2]);
     if (classInt == 0)
         {
 	fprintf(errorFileHandle, "subsnp_class = 0 (zero) for univar_id %s (skipping)\n", row[0]);
 	skipCount++;
 	continue;
 	}
     if (classInt > classCount)
         {
 	fprintf(errorFileHandle, "unexpected subsnp_class = %d for univar_id %s (skipping)\n", classInt, row[0]);
 	skipCount++;
 	continue;
 	}
     AllocVar(el);
     el->observed = cloneString(row[1]);
     el->classInt = classInt;
     hashAdd(ret, cloneString(row[0]), el);
     }
 sqlFreeResult(&sr);
 hFreeConn(&conn);
 verbose(1, "read %d rows in UniVariation\n", rowCount);
 verbose(1, "skipped %d rows \n", skipCount);
 return ret;
 }
 
 struct hash *getSNPHash()
 /* Store univar_id from SNP into hash indexed by snp_id. */
 {
 char query[512];
 struct sqlConnection *conn = hAllocConn();
 struct sqlResult *sr;
 char **row;
 struct hash *ret = NULL;
 int id = 0;
 int rowCount = 0;
 int skipCount = 0;
 struct hashEl *univarElement = NULL;
 struct snpData *snpElementNew = NULL;
 struct snpData *snpElementOld = NULL;
 
 verbose(1, "creating snpHash...\n");
 ret = newHash(16);
 sqlSafef(query, sizeof(query), "select snp_id, univar_id from SNP");
 sr = sqlGetResult(conn, query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     rowCount++;
     id = sqlUnsigned(row[1]);
     if (id == 0)
         {
 	fprintf(errorFileHandle, "univar_id = 0 (zero) for %s (skipping)\n", row[0]);
 	skipCount++;
 	continue;
 	}
     /* check that this univar_id is in univarHash */
     univarElement = hashLookup(univarHash, row[1]);
     if (univarElement == NULL)
         {
 	fprintf(errorFileHandle, "univar_id = %s for %s missing from UniVariation table (leaving out of snpHash)\n", 
 	        row[1], row[0]);
 	skipCount++;
 	continue;
 	}
     /* clone */
     snpElementOld = (struct snpData *)univarElement->val;
     AllocVar(snpElementNew);
     snpElementNew->observed = cloneString(snpElementOld->observed);
     snpElementNew->classInt = snpElementOld->classInt;
     hashAdd(ret, cloneString(row[0]), snpElementNew);
     }
 sqlFreeResult(&sr);
 hFreeConn(&conn);
 verbose(1, "read %d rows in SNP\n", rowCount);
 verbose(1, "skipped %d rows \n", skipCount);
 return ret;
 }
 
 void processSnps(char *chromName)
 /* read through all rows in snpTmp */
 /* look up class and observed */
 /* write to output file */
 {
 char query[512];
 struct sqlConnection *conn = hAllocConn();
 struct sqlResult *sr;
 char **row;
 char tableName[64];
 char fileName[64];
 FILE *f;
 struct hashEl *univarElement = NULL;
 struct snpData *dataElement = NULL;
 int classInt = 0;
 char *classString = NULL;
 int loc_type = 0;
 int skipCount = 0;
 
 safef(tableName, ArraySize(tableName), "%s_snpTmp", chromName);
 safef(fileName, ArraySize(fileName), "%s_snpTmp.tab", chromName);
 f = mustOpen(fileName, "w");
 
 sqlSafef(query, sizeof(query), 
      "select snp_id, chromStart, chromEnd, loc_type, orientation, allele, refUCSC, refUCSCReverseComp, weight from %s ", tableName);
 sr = sqlGetResult(conn, query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     loc_type = sqlUnsigned(row[3]);
     /* get univarElement from snpHash */
     univarElement = hashLookup(snpHash, row[0]);
     if (univarElement == NULL)
        {
         {
 	fprintf(errorFileHandle, "no data for %s (dropping)\n", row[0]);
 	skipCount++;
 	continue;
 	}
        }
     dataElement = (struct snpData *)univarElement->val;
     classInt = dataElement->classInt;
     // verbose(1, "classInt = %d\n", classInt);
     assert(classInt >= 1 && classInt <= classCount);
     /* lookup classString */
     classString = classStrings[classInt-1];
     /* special handling for class = in-del; split into classes of our own construction */
     if (sameString(classString, "in-del"))
         {
         if (loc_type == 3) 
 	    classString = cloneString("insertion");
 	if (loc_type == 1 || loc_type == 2) 
 	    classString = cloneString("deletion");
 	}
     fprintf(f, "%s\t%s\t%s\t%d\t%s\t", row[0], row[1], row[2], loc_type, classString);
     fprintf(f, "%s\t%s\t%s\t%s\t%s\t%s\n", row[4], row[5], row[6], row[7], dataElement->observed, row[8]);
     }
 
 sqlFreeResult(&sr);
 hFreeConn(&conn);
 carefulClose(&f);
 if (skipCount > 0)
     verbose(1, "%d rows dropped\n", skipCount);
 }
 
 
 void recreateDatabaseTable(char *chromName)
 /* create a new chrN_snpTmp table with new definition */
 {
 struct sqlConnection *conn = hAllocConn();
 char tableName[64];
 char *createString =
 "CREATE TABLE %s (\n"
 "    snp_id int(11) not null,\n"
 "    chromStart int(11) not null,\n"
 "    chromEnd int(11) not null,\n"
 "    loc_type tinyint(4) not null,\n"
 "    class varchar(255) not null,\n"
 "    orientation tinyint(4) not null,\n"
 "    allele blob,\n"
 "    refUCSC blob,\n"
 "    refUCSCReverseComp blob,\n"
 "    observed blob,\n"
 "    weight int\n"
 ");\n";
 
 struct dyString *dy = newDyString(1024);
 
 safef(tableName, ArraySize(tableName), "%s_snpTmp", chromName);
 sqlDyStringPrintf(dy, createString, tableName);
 sqlRemakeTable(conn, tableName, dy->string);
 dyStringFree(&dy);
 hFreeConn(&conn);
 }
 
 
 void loadDatabase(char *chromName)
 /* load one table into database */
 {
 FILE *f;
 struct sqlConnection *conn = hAllocConn();
 char tableName[64], fileName[64];
 
 safef(tableName, ArraySize(tableName), "%s_snpTmp", chromName);
 safef(fileName, ArraySize(fileName), "%s_snpTmp.tab", chromName);
 
 f = mustOpen(fileName, "r");
 /* hgLoadTabFile closes the file handle */
 hgLoadTabFile(conn, ".", tableName, &f);
 
 hFreeConn(&conn);
 }
 
 
 
 int main(int argc, char *argv[])
 /* hash subsnp_class and var_str from UniVariation */
 /* read SNP for univar_id, lookup into univarHash */
 /* store univarHash elements in snpHash */
 /* read through chrN_snpTmp, rewrite with extensions to individual chrom tables */
 {
 struct slName *chromList, *chromPtr;
 char tableName[64];
 
 if (argc != 2)
     usage();
 
 
 snpDb = argv[1];
 hSetDb(snpDb);
 
 /* check for necessary tables */
 if(!hTableExistsDb(snpDb, "SNP"))
     errAbort("missing SNP table");
 if(!hTableExistsDb(snpDb, "UniVariation"))
     errAbort("missing UniVariation table");
 
 chromList = hAllChromNamesDb(snpDb);
 
 errorFileHandle = mustOpen("snpClassAndObserved.errors", "w");
 
 univarHash = getUnivarHash();
 snpHash = getSNPHash();
 
 for (chromPtr = chromList; chromPtr != NULL; chromPtr = chromPtr->next)
     {
     safef(tableName, ArraySize(tableName), "%s_snpTmp", chromPtr->name);
     if (!hTableExists(tableName)) continue;
     verbose(1, "chrom = %s\n", chromPtr->name);
     processSnps(chromPtr->name);
     }
 
 carefulClose(&errorFileHandle);
 
 for (chromPtr = chromList; chromPtr != NULL; chromPtr = chromPtr->next)
     {
     safef(tableName, ArraySize(tableName), "%s_snpTmp", chromPtr->name);
     if (!hTableExists(tableName)) continue;
     recreateDatabaseTable(chromPtr->name);
     verbose(1, "loading chrom = %s\n", chromPtr->name);
     loadDatabase(chromPtr->name);
     }
 
 return 0;
 }