4898794edd81be5285ea6e544acbedeaeb31bf78
max
  Tue Nov 23 08:10:57 2021 -0800
Fixing pointers to README file for license in all source code files. refs #27614

diff --git src/hg/encode3/encodeDataWarehouse/lib/edwLib.c src/hg/encode3/encodeDataWarehouse/lib/edwLib.c
index 87d64fc..2a224c9 100644
--- src/hg/encode3/encodeDataWarehouse/lib/edwLib.c
+++ src/hg/encode3/encodeDataWarehouse/lib/edwLib.c
@@ -1,1688 +1,1688 @@
 /* edwLib - routines shared by various encodeDataWarehouse programs.    See also encodeDataWarehouse
  * module for tables and routines to access structs built on tables. */
 
 /* Copyright (C) 2014 The Regents of the University of California 
- * See README in this or parent directory for licensing information. */
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
 
 #include "common.h"
 #include "hex.h"
 #include "dystring.h"
 #include "jksql.h"
 #include "errAbort.h"
 #include "openssl/sha.h"
 #include "base64.h"
 #include "basicBed.h"
 #include "bigBed.h"
 #include "portable.h"
 #include "cheapcgi.h"
 #include "genomeRangeTree.h"
 #include "md5.h"
 #include "htmshell.h"
 #include "obscure.h"
 #include "bamFile.h"
 #include "raToStruct.h"
 #include "web.h"
 #include "encode3/encode3Valid.h"
 #include "encodeDataWarehouse.h"
 #include "edwLib.h"
 #include "edwFastqFileFromRa.h"
 #include "edwBamFileFromRa.h"
 #include "edwQaWigSpotFromRa.h"
 
 
 /* System globals - just a few ... for now.  Please seriously not too many more. */
 char *edwDatabase = "encodeDataWarehouse";
 int edwSingleFileTimeout = 4*60*60;   // How many seconds we give ourselves to fetch a single file
 
 char *edwRootDir = "/data/encode3/encodeDataWarehouse/";
 char *eapRootDir = "/data/encode3/encodeAnalysisPipeline/";
 char *edwValDataDir = "/data/encode3/encValData/";
 char *edwDaemonEmail = "edw@encodedcc.sdsc.edu";
 
 struct sqlConnection *edwConnect()
 /* Returns a read only connection to database. */
 {
 return sqlConnect(edwDatabase);
 }
 
 struct sqlConnection *edwConnectReadWrite()
 /* Returns read/write connection to database. */
 {
 return sqlConnectProfile("encodeDataWarehouse", edwDatabase);
 }
 
 char *edwPathForFileId(struct sqlConnection *conn, long long fileId)
 /* Return full path (which eventually should be freeMem'd) for fileId */
 {
 char query[256];
 char fileName[PATH_LEN];
 sqlSafef(query, sizeof(query), "select edwFileName from edwFile where id=%lld", fileId);
 sqlNeedQuickQuery(conn, query, fileName, sizeof(fileName));
 char path[512];
 safef(path, sizeof(path), "%s%s", edwRootDir, fileName);
 return cloneString(path);
 }
 
 char *edwTempDir()
 /* Returns pointer to edwTempDir.  This is shared, so please don't modify. */
 {
 static char path[PATH_LEN];
 if (path[0] == 0)
     {
     /* Note code elsewhere depends on tmp dir being inside of edwRootDir - also good
      * to have it there so move to a permanent file is quick and unlikely to fail. */
     safef(path, sizeof(path), "%s%s", edwRootDir, "tmp");
     makeDirsOnPath(path);
     strcat(path, "/");
     }
 return path;
 }
 
 char *edwTempDirForToday(char dir[PATH_LEN])
 /* Fills in dir with temp dir of the day, and returns a pointer to it. */
 {
 char dayDir[PATH_LEN];
 edwDirForTime(edwNow(), dayDir);
 safef(dir, PATH_LEN, "%s%stmp/", edwRootDir, dayDir);
 
 /* Bracket time consuming call to makeDirsOnPath with check that we didn't just do same
  * thing. */
 static char lastDayDir[PATH_LEN] = "";
 if (!sameString(dayDir, lastDayDir))
     {
     strcpy(lastDayDir, dayDir);
     int len = strlen(dir);
     dir[len-1] = 0;
     makeDirsOnPath(dir);
     dir[len-1] = '/';
     }
 return dir;
 }
 
 
 long long edwGettingFile(struct sqlConnection *conn, char *submitDir, char *submitFileName)
 /* See if we are in process of getting file.  Return file record id if it exists even if
  * it's not complete. Return -1 if record does not exist. */
 {
 /* First see if we have even got the directory. */
 char query[PATH_LEN+512];
 sqlSafef(query, sizeof(query), "select id from edwSubmitDir where url='%s'", submitDir);
 int submitDirId = sqlQuickNum(conn, query);
 if (submitDirId <= 0)
     return -1;
 
 /* Then see if we have file that matches submitDir and submitFileName. */
 sqlSafef(query, sizeof(query), 
     "select id from edwFile "
     "where submitFileName='%s' and submitDirId = %d and errorMessage = '' and deprecated=''"
     " and (endUploadTime > startUploadTime or startUploadTime < %lld) "
     "order by submitId desc limit 1"
     , submitFileName, submitDirId
     , (long long)edwNow() - edwSingleFileTimeout);
 long long id = sqlQuickLongLong(conn, query);
 if (id == 0)
     return -1;
 return id;
 }
 
 long long edwGotFile(struct sqlConnection *conn, char *submitDir, char *submitFileName, 
     char *md5, long long size)
 /* See if we already got file.  Return fileId if we do,  otherwise -1.  This returns
  * TRUE based mostly on the MD5sum.  For short files (less than 100k) then we also require
  * the submitDir and submitFileName to match.  This is to cover the case where you might
  * have legitimate empty files duplicated even though they were computed based on different
  * things. For instance coming up with no peaks is a legitimate result for many chip-seq
  * experiments. */
 {
 /* For large files just rely on MD5. */
 char query[PATH_LEN+512];
 if (size > 100000)
     {
     sqlSafef(query, sizeof(query),
         "select id from edwFile where md5='%s' order by submitId desc limit 1" , md5);
     long long result = sqlQuickLongLong(conn, query);
     if (result == 0)
         result = -1;
     return result;
     }
 
 /* Rest of the routine deals with smaller files,  which we are less worried about
  * duplicating,  and indeed expect a little duplication of the empty file if none
  * other. */
 
 /* First see if we have even got the directory. */
 sqlSafef(query, sizeof(query), "select id from edwSubmitDir where url='%s'", submitDir);
 int submitDirId = sqlQuickNum(conn, query);
 if (submitDirId <= 0)
     return -1;
 
 /* The complex truth is that we may have gotten this file multiple times. 
  * We return the most recent version where it got uploaded and passed the post-upload
  * MD5 sum, and thus where the MD5 field is filled in the database. */
 sqlSafef(query, sizeof(query), 
     "select md5,id from edwFile "
     "where submitFileName='%s' and submitDirId = %d and md5 != '' "
     "order by submitId desc limit 1"
     , submitFileName, submitDirId);
 struct sqlResult *sr = sqlGetResult(conn, query);
 char **row;
 long fileId = -1;
 if ((row = sqlNextRow(sr)) != NULL)
     {
     char *dbMd5 = row[0];
     if (sameWord(md5, dbMd5))
 	fileId = sqlLongLong(row[1]);
     }
 sqlFreeResult(&sr);
 
 return fileId;
 }
 
 long long edwNow()
 /* Return current time in seconds since Epoch. */
 {
 return time(NULL);
 }
 
 /* This is size of base64 encoded hash plus 1 for the terminating zero. */
 #define EDW_SID_SIZE 65   
 
 static void makeShaBase64(unsigned char *inputBuf, int inputSize, char out[EDW_SID_SIZE])
 /* Make zero terminated printable cryptographic hash out of in */
 {
 unsigned char shaBuf[48];
 SHA384(inputBuf, inputSize, shaBuf);
 char *base64 = base64Encode((char*)shaBuf, sizeof(shaBuf));
 memcpy(out, base64, EDW_SID_SIZE);
 out[EDW_SID_SIZE-1] = 0; 
 freeMem(base64);
 }
 
 void edwMakeSid(char *user, char sid[EDW_SID_SIZE])
 /* Convert users to sid */
 {
 /* Salt it well with stuff that is reproducible but hard to guess */
 unsigned char inputBuf[512];
 memset(inputBuf, 0, sizeof(inputBuf));
 int i;
 for (i=0; i<ArraySize(inputBuf); i += 2)
     {
     inputBuf[i] = i ^ 0x29;
     inputBuf[i+1] = ~i;
     }
 safef((char*)inputBuf, sizeof(inputBuf), 
 	"186ED79BAEXzeusdioIsdklnw88e86cd73%s<*#$*(#)!DSDFOUIHLjksdf", user);
 makeShaBase64(inputBuf, sizeof(inputBuf), sid);
 }
 
 static void edwVerifySid(char *user, char *sidToCheck)
 /* Make sure sid/user combo is good. */
 {
 char sid[EDW_SID_SIZE];
 edwMakeSid(user, sid);
 if (sidToCheck == NULL || memcmp(sidToCheck, sid, EDW_SID_SIZE) != 0)
     errAbort("Authentication failed, sid %s", (sidToCheck ? "fail" : "miss"));
 }
 
 char *edwGetEmailAndVerify()
 /* Get email from persona-managed cookies and validate them.
  * Return email address if all is good and user is logged in.
  * If user not logged in return NULL.  If user logged in but
  * otherwise things are wrong abort. */
 {
 char *email = findCookieData("email");
 if (email)
     {
     char *sid = findCookieData("sid");
     edwVerifySid(email, sid);
     }
 return email;
 }
 
 
 struct edwUser *edwUserFromEmail(struct sqlConnection *conn, char *email)
 /* Return user associated with that email or NULL if not found */
 {
 char query[256];
 sqlSafef(query, sizeof(query), "select * from edwUser where email='%s'", email);
 struct edwUser *user = edwUserLoadByQuery(conn, query);
 return user;
 }
 
 struct edwUser *edwUserFromId(struct sqlConnection *conn, int id)
 /* Return user associated with that id or NULL if not found */
 {
 char query[256];
 sqlSafef(query, sizeof(query), "select * from edwUser where id='%d'", id);
 struct edwUser *user = edwUserLoadByQuery(conn, query);
 return user;
 }
 
 int edwUserIdFromFileId(struct sqlConnection *conn, int fId)
 /* Return user id who submit the file originally */
 {
 char query[256];
 sqlSafef(query, sizeof(query), "select s.userId from edwSubmit s, edwFile f where f.submitId=s.id and f.id='%d'", fId);
 int sId = sqlQuickNum(conn, query);
 sqlSafef(query, sizeof(query), "select u.id from edwSubmit s, edwUser u where  u.id=s.id and s.id='%d'", sId);
 return sqlQuickNum(conn, query);
 }
 
 struct edwUser *edwFindUserFromFileId(struct sqlConnection *conn, int fId)
 /* Return user who submit the file originally */
 {
 int uId = edwUserIdFromFileId(conn, fId);
 struct edwUser *user=edwUserFromId(conn, uId);
 return user; 
 }
 
 char *edwFindOwnerNameFromFileId(struct sqlConnection *conn, int fId)
 /* Return name of submitter. Return "an unknown user" if name is NULL */
 {
 struct edwUser *owner = edwFindUserFromFileId(conn, fId);
 if (owner == NULL)
     return ("an unknown user");
 return cloneString(owner->email);
 }
 
 int edwFindUserIdFromEmail(struct sqlConnection *conn, char *userEmail)
 /* Return true id of this user */
 {
 char query[256];
 sqlSafef(query, sizeof(query), "select id from edwUser where email = '%s'", userEmail);
 return sqlQuickNum(conn, query);
 }
 
 boolean edwUserIsAdmin(struct sqlConnection *conn, char *userEmail)
 /* Return true if the user is an admin */
 {
 char query[256];
 sqlSafef(query, sizeof(query), "select isAdmin from edwUser where email = '%s'", userEmail);
 int isAdmin = sqlQuickNum(conn, query);
 if (isAdmin == 1) return TRUE;
 return FALSE;
 }
 
 void edwWarnUnregisteredUser(char *email)
 /* Put up warning message about unregistered user and tell them how to register. */
 {
 warn("No user exists with email %s. If you need an account please contact your "
 	 "ENCODE DCC data wrangler and have them create an account for you."
 	 , email);
 }
 
 
 struct edwUser *edwMustGetUserFromEmail(struct sqlConnection *conn, char *email)
 /* Return user associated with email or put up error message. */
 {
 struct edwUser *user = edwUserFromEmail(conn, email);
 if (user == NULL)
     {
     edwWarnUnregisteredUser(email);
     noWarnAbort();
     }
 return user;
 }
 
 int edwGetHost(struct sqlConnection *conn, char *hostName)
 /* Look up host name in table and return associated ID.  If not found
  * make up new table entry. */
 {
 /* If it's already in table, just return ID. */
 char query[512];
 sqlSafef(query, sizeof(query), "select id from edwHost where name='%s'", hostName);
 int hostId = sqlQuickNum(conn, query);
 if (hostId > 0)
     return hostId;
 sqlSafef(query, sizeof(query), "insert edwHost (name, firstAdded, paraFetchStreams) values('%s', %lld, 10)", 
        hostName, edwNow());
 sqlUpdate(conn, query);
 return sqlLastAutoId(conn);
 }
 
 int edwGetSubmitDir(struct sqlConnection *conn, int hostId, char *submitDir)
 /* Get submitDir from database, creating it if it doesn't already exist. */
 {
 /* If it's already in table, just return ID. */
 char query[512];
 sqlSafef(query, sizeof(query), "select id from edwSubmitDir where url='%s'", submitDir);
 int dirId = sqlQuickNum(conn, query);
 if (dirId > 0)
     return dirId;
 
 sqlSafef(query, sizeof(query), 
    "insert edwSubmitDir (url, firstAdded, hostId) values('%s', %lld, %d)", 
    submitDir, edwNow(), hostId);
 sqlUpdate(conn, query);
 return sqlLastAutoId(conn);
 }
 
 void edwMakeLicensePlate(char *prefix, int ix, char *out, int outSize)
 /* Make a license-plate type string composed of prefix + funky coding of ix
  * and put result in out. */
 {
 int maxIx = 10*10*10*26*26*26;
 if (ix < 0)
     errAbort("ix must be positive in edwMakeLicensePlate");
 if (ix > maxIx)
     errAbort("ix exceeds max in edwMakeLicensePlate.  ix %d, max %d\n", ix, maxIx);
 int prefixSize = strlen(prefix);
 int minSize = prefixSize + 6 + 1;
 if (outSize < minSize)
     errAbort("outSize (%d) not big enough in edwMakeLicensePlate", outSize);
 
 /* Copy in prefix. */
 strcpy(out, prefix);
 
 /* Generate the 123ABC part of license plate backwards. */
 char *s = out+minSize;
 int x = ix - 1;	// -1 so start with AAA not AAB
 *(--s) = 0;	// zero tag at end;
 int i;
 for (i=0; i<3; ++i)
     {
     int remainder = x%26;
     *(--s) = 'A' + remainder;
     x /= 26;
     }
 for (i=0; i<3; ++i)
     {
     int remainder = x%10;
     *(--s) = '0' + remainder;
     x /= 10;
     }
 }
 
 void edwDirForTime(time_t sinceEpoch, char dir[PATH_LEN])
 /* Return the output directory for a given time. */
 {
 /* Get current time parsed into struct tm */
 struct tm now;
 gmtime_r(&sinceEpoch, &now);
 
 /* make directory string out of year/month/day/ */
 safef(dir, PATH_LEN, "%d/%d/%d/", now.tm_year+1900, now.tm_mon+1, now.tm_mday);
 }
 
 char *lastMatchCharExcept(char *start, char *end, char match, char except)
 /* Return last char between start up to but not including end that is match.
  * However if except occurs between end and this match, return NULL instead.
  * Also return NULL if there is no match */
 {
 char *e = end;
 while (--e >= start)
     {
     char c = *e;
     if (c == except)
        return NULL;
     if (c == match)
        return e;
     }
 return NULL;
 }
 
 void edwMakeBabyName(unsigned long id, char *baseName, int baseNameSize)
 /* Given a numerical ID, make an easy to pronouce file name */
 {
 char *consonants = "bdfghjklmnprstvwxyz";   // Avoid c and q because make sound ambiguous
 char *vowels = "aeiou";
 int consonantCount = strlen(consonants);
 int vowelCount = strlen(vowels);
 assert(id >= 1);
 unsigned long ix = id - 1;   /* We start at zero not 1 */
 int basePos = 0;
 do
     {
     char v = vowels[ix%vowelCount];
     ix /= vowelCount;
     char c = consonants[ix%consonantCount];
     ix /= consonantCount;
     if (basePos + 2 >= baseNameSize)
         errAbort("Not enough room for %lu in %d letters in edwMakeBabyName", id, baseNameSize);
     baseName[basePos] = c;
     baseName[basePos+1] = v;
     basePos += 2;
     }
 while (ix > 0);
 baseName[basePos] = 0;
 }
 
 char *edwFindDoubleFileSuffix(char *path)
 /* Return pointer to second from last '.' in part of path between last / and end.  
  * If there aren't two dots, just return pointer to normal single dot suffix. */
 {
 int nameSize = strlen(path);
 char *suffix = lastMatchCharExcept(path, path + nameSize, '.', '/');
 if (suffix != NULL)
     {
     if (sameString(suffix, ".gz") || sameString(suffix, ".bigBed"))
 	{
 	char *secondSuffix = lastMatchCharExcept(path, suffix, '.', '/');
 	if (secondSuffix != NULL)
 	    suffix = secondSuffix;
 	}
     }
 else
     suffix = path + nameSize;
 return suffix;
 }
 
 void edwMakeFileNameAndPath(int edwFileId, char *submitFileName, char edwFile[PATH_LEN], char serverPath[PATH_LEN])
 /* Convert file id to local file name, and full file path. Make any directories needed
  * along serverPath. */
 {
 /* Preserve suffix.  Give ourselves up to two suffixes. */
 char *suffix = edwFindDoubleFileSuffix(submitFileName);
 
 /* Figure out edw file name, starting with baseName. */
 char baseName[32];
 edwMakeBabyName(edwFileId, baseName, sizeof(baseName));
 
 /* Figure out directory and make any components not already there. */
 char edwDir[PATH_LEN];
 edwDirForTime(edwNow(), edwDir);
 char uploadDir[PATH_LEN];
 safef(uploadDir, sizeof(uploadDir), "%s%s", edwRootDir, edwDir);
 makeDirsOnPath(uploadDir);
 
 /* Figure out full file names */
 safef(edwFile, PATH_LEN, "%s%s%s", edwDir, baseName, suffix);
 safef(serverPath, PATH_LEN, "%s%s", edwRootDir, edwFile);
 }
 
 char *edwSetting(struct sqlConnection *conn, char *name)
 /* Return named settings value,  or NULL if setting doesn't exist. FreeMem when done. */
 {
 char query[256];
 sqlSafef(query, sizeof(query), "select val from edwSettings where name='%s'", name);
 return sqlQuickString(conn, query);
 }
 
 char *edwRequiredSetting(struct sqlConnection *conn, char *name)
 /* Returns setting, abort if it isn't found. FreeMem when done. */
 {
 char *val = edwSetting(conn, name);
 if (val == NULL)
     errAbort("Required %s setting is not defined in edwSettings table", name);
 return val;
 }
 
 char *edwLicensePlateHead(struct sqlConnection *conn)
 /* Return license plate prefix for current database - something like TSTFF or DEVFF or ENCFF */
 {
 static char head[32];
 if (head[0] == 0)
      {
      char *prefix = edwRequiredSetting(conn, "prefix");
      safef(head, sizeof(head), "%s", prefix);
      }
 return head;
 }
 
 
 static char *localHostName = "localhost";
 static char *localHostDir = "";  
 
 static int getLocalHost(struct sqlConnection *conn)
 /* Make up record for local host if it is not there already. */
 {
 char query[256];
 sqlSafef(query, sizeof(query), "select id from edwHost where name = '%s'", localHostName);
 int hostId = sqlQuickNum(conn, query);
 if (hostId == 0)
     {
     sqlSafef(query, sizeof(query), "insert edwHost(name, firstAdded) values('%s', %lld)",
 	localHostName,  edwNow());
     sqlUpdate(conn, query);
     hostId = sqlLastAutoId(conn);
     }
 return hostId;
 }
 
 static int getLocalSubmitDir(struct sqlConnection *conn)
 /* Get submit dir for local submissions, making it up if it does not exist. */
 {
 int hostId = getLocalHost(conn);
 char query[256];
 sqlSafef(query, sizeof(query), "select id from edwSubmitDir where url='%s' and hostId=%d", 
     localHostDir, hostId);
 int dirId = sqlQuickNum(conn, query);
 if (dirId == 0)
     {
     sqlSafef(query, sizeof(query), "insert edwSubmitDir(url,hostId,firstAdded) values('%s',%d,%lld)",
 	localHostDir, hostId, edwNow());
     sqlUpdate(conn, query);
     dirId = sqlLastAutoId(conn);
     }
 return dirId;
 }
 
 static int getLocalSubmit(struct sqlConnection *conn)
 /* Get the submission that covers all of our local additions. */
 {
 int dirId = getLocalSubmitDir(conn);
 char query[256];
 sqlSafef(query, sizeof(query), "select id from edwSubmit where submitDirId='%d'", dirId);
 int submitId = sqlQuickNum(conn, query);
 if (submitId == 0)
     {
     sqlSafef(query, sizeof(query), "insert edwSubmit (submitDirId,startUploadTime) values(%d,%lld)",
 	dirId, edwNow());
     sqlUpdate(conn, query);
     submitId = sqlLastAutoId(conn);
     }
 return submitId;
 }
 
 char **sqlNeedNextRow(struct sqlResult *sr)
 /* Get next row or die trying.  Since the error reporting is not good, please only
  * use when an error would be unusual. */
 {
 char **row = sqlNextRow(sr);
 if (row == NULL) 
     errAbort("Unexpected empty result from database.");
 return row;
 }
 
 void edwUpdateFileTags(struct sqlConnection *conn, long long fileId, struct dyString *tags)
 /* Update tags field in edwFile with given value */
 {
 struct dyString *query = dyStringNew(0);
 sqlDyStringPrintf(query, "update edwFile set tags='%s'", tags->string);
 sqlDyStringPrintf(query, " where id=%lld", fileId);
 sqlUpdate(conn, query->string);
 dyStringFree(&query);
 }
 
 struct edwFile *edwGetLocalFile(struct sqlConnection *conn, char *localAbsolutePath, 
     char *symLinkMd5Sum)
 /* Get record of local file from database, adding it if it doesn't already exist.
  * Can make it a symLink rather than a copy in which case pass in valid MD5 sum
  * for symLinkM5dSum. */
 {
 /* First do a reality check on the local absolute path.  Is there a file there? */
 if (localAbsolutePath[0] != '/')
     errAbort("Using relative path in edwAddLocalFile.");
 long long size = fileSize(localAbsolutePath);
 if (size == -1)
     errAbort("%s does not exist", localAbsolutePath);
 long long updateTime = fileModTime(localAbsolutePath);
 
 /* Get file if it's in database already. */
 int submitDirId = getLocalSubmitDir(conn);
 int submitId = getLocalSubmit(conn);
 char query[256+PATH_LEN];
 sqlSafef(query, sizeof(query), "select * from edwFile where submitId=%d and submitFileName='%s'",
     submitId, localAbsolutePath);
 struct edwFile *ef = edwFileLoadByQuery(conn, query);
 
 /* If we got something in database, check update time and size, and if it's no change just 
  * return existing database id. */
 if (ef != NULL && ef->updateTime == updateTime && ef->size == size)
     return ef;
 
 /* If we got here, then we need to make a new file record. Start with pretty empty record
  * that just has file ID, submitted file name and a few things*/
 sqlSafef(query, sizeof(query), 
     "insert edwFile (submitId,submitDirId,submitFileName,startUploadTime) "
             " values(%d, %d, '%s', %lld)"
 	    , submitId, submitDirId, localAbsolutePath, edwNow());
 sqlUpdate(conn, query);
 long long fileId = sqlLastAutoId(conn);
 
 /* Create big data warehouse file/path name. */
 char edwFile[PATH_LEN], edwPath[PATH_LEN];
 edwMakeFileNameAndPath(fileId, localAbsolutePath, edwFile, edwPath);
 
 /* We're a little paranoid so md5 it */
 char *md5;
 
 /* Do copy or symbolic linking of file into warehouse managed dir. */
 if (symLinkMd5Sum)
     {
     md5 = symLinkMd5Sum;
     makeSymLink(localAbsolutePath, edwPath);  
     }
 else
     {
     copyFile(localAbsolutePath, edwPath);
     md5 = md5HexForFile(localAbsolutePath);
     }
 
 /* Update file record. */
 sqlSafef(query, sizeof(query), 
     "update edwFile set edwFileName='%s', endUploadTime=%lld,"
                        "updateTime=%lld, size=%lld, md5='%s' where id=%lld"
 			, edwFile, edwNow(), updateTime, size, md5, fileId);
 sqlUpdate(conn, query);
 
 /* Now, it's a bit of a time waste, but cheap in code, to just load it back from DB. */
 sqlSafef(query, sizeof(query), "select * from edwFile where id=%lld", fileId);
 return edwFileLoadByQuery(conn, query);
 }
 
 struct edwFile *edwFileAllIntactBetween(struct sqlConnection *conn, int startId, int endId)
 /* Return list of all files that are intact (finished uploading and MD5 checked) 
  * with file IDs between startId and endId - including endId */
 {
 char query[256];
 sqlSafef(query, sizeof(query), 
     "select * from edwFile where id>=%d and id<=%d and endUploadTime != 0 "
     "and updateTime != 0 and errorMessage = '' and deprecated = ''", 
     startId, endId);
 return edwFileLoadByQuery(conn, query);
 }
 
 struct edwFile *edwFileFromId(struct sqlConnection *conn, long long fileId)
 /* Return edwValidFile given fileId - return NULL if not found. */
 {
 char query[128];
 sqlSafef(query, sizeof(query), "select * from edwFile where id=%lld", fileId);
 return edwFileLoadByQuery(conn, query);
 }
 
 struct edwFile *edwFileFromIdOrDie(struct sqlConnection *conn, long long fileId)
 /* Return edwValidFile given fileId - aborts if not found. */
 {
 struct edwFile *ef = edwFileFromId(conn, fileId);
 if (ef == NULL)
     errAbort("Couldn't find file for id %lld\n", fileId);
 return ef;
 }
 
 struct edwValidFile *edwValidFileFromFileId(struct sqlConnection *conn, long long fileId)
 /* Return edwValidFile give fileId - returns NULL if not validated. */
 {
 char query[128];
 sqlSafef(query, sizeof(query), "select * from edwValidFile where fileId=%lld", fileId);
 return edwValidFileLoadByQuery(conn, query);
 }
 
 struct edwExperiment *edwExperimentFromAccession(struct sqlConnection *conn, char *acc)
 /* Given something like 'ENCSR123ABC' return associated experiment. */
 {
 char query[128];
 sqlSafef(query, sizeof(query), "select * from edwExperiment where accession='%s'", acc);
 return edwExperimentLoadByQuery(conn, query);
 }
 
 struct genomeRangeTree *edwMakeGrtFromBed3List(struct bed3 *bedList)
 /* Make up a genomeRangeTree around bed file. */
 {
 struct genomeRangeTree *grt = genomeRangeTreeNew();
 struct bed3 *bed;
 for (bed = bedList; bed != NULL; bed = bed->next)
     genomeRangeTreeAdd(grt, bed->chrom, bed->chromStart, bed->chromEnd);
 return grt;
 }
 
 struct edwAssembly *edwAssemblyForUcscDb(struct sqlConnection *conn, char *ucscDb)
 /* Get assembly for given UCSC ID or die trying */
 {
 char query[256];
 sqlSafef(query, sizeof(query), "select * from edwAssembly where ucscDb='%s'", ucscDb);
 struct edwAssembly *assembly = edwAssemblyLoadByQuery(conn, query);
 if (assembly == NULL)
     errAbort("Can't find assembly for %s", ucscDb);
 return assembly;
 }
 
 struct edwAssembly *edwAssemblyForId(struct sqlConnection *conn, long long id)
 /* Get assembly of given ID. */
 {
 char query[128];
 sqlSafef(query, sizeof(query), "select * from edwAssembly where id=%lld", id);
 struct edwAssembly *assembly = edwAssemblyLoadByQuery(conn, query);
 if (assembly == NULL)
     errAbort("Can't find assembly for %lld", id);
 return assembly;
 }
 
 char *edwSimpleAssemblyName(char *assembly)
 /* Given compound name like male.hg19 return just hg19 */
 /* Given name of assembly return name where we want to do enrichment calcs. */
 {
 /* If it ends with one of our common assembly suffix, then do enrichment calcs
  * in that space, rather than some subspace such as male, female, etc. */
 static char *specialAsm[] = {".hg19",".hg38",".mm9",".mm10",".dm3",".ce10",".dm6"};
 int i;
 for (i=0; i<ArraySize(specialAsm); ++i)
     {
     char *special = specialAsm[i];
     if (endsWith(assembly, special))
         return special+1;
     }
 return assembly;
 }
 
 
 struct genomeRangeTree *edwGrtFromBigBed(char *fileName)
 /* Return genome range tree for simple (unblocked) bed */
 {
 struct bbiFile *bbi = bigBedFileOpen(fileName);
 struct bbiChromInfo *chrom, *chromList = bbiChromList(bbi);
 struct genomeRangeTree *grt = genomeRangeTreeNew();
 for (chrom = chromList; chrom != NULL; chrom = chrom->next)
     {
     struct rbTree *tree = genomeRangeTreeFindOrAddRangeTree(grt, chrom->name);
     struct lm *lm = lmInit(0);
     struct bigBedInterval *iv, *ivList = NULL;
     ivList = bigBedIntervalQuery(bbi, chrom->name, 0, chrom->size, 0, lm);
     for (iv = ivList; iv != NULL; iv = iv->next)
         rangeTreeAdd(tree, iv->start, iv->end);
     lmCleanup(&lm);
     }
 bigBedFileClose(&bbi);
 bbiChromInfoFreeList(&chromList);
 return grt;
 }
 
 boolean edwIsSupportedBigBedFormat(char *format)
 /* Return TRUE if it's one of the bigBed formats we support. */
 {
 int i;
 for (i=0; i<encode3BedTypeCount; ++i)
     {
     if (sameString(format, encode3BedTypeTable[i].name))
         return TRUE;
     }
 return FALSE;
 }
 
 void edwWriteErrToTable(struct sqlConnection *conn, char *table, int id, char *err)
 /* Write out error message to errorMessage field of table. */
 {
 char *trimmedError = trimSpaces(err);
 struct dyString *query = dyStringNew(0);
 sqlDyStringPrintf(query, "update %s set errorMessage='%s' where id=%d", 
     table, trimmedError, id);
 sqlUpdate(conn, query->string);
 dyStringFree(&query);
 }
 
 void edwWriteErrToStderrAndTable(struct sqlConnection *conn, char *table, int id, char *err)
 /* Write out error message to errorMessage field of table and through stderr. */
 {
 warn("%s", trimSpaces(err));
 edwWriteErrToTable(conn, table, id, err);
 }
 
 
 void edwAddJob(struct sqlConnection *conn, char *command)
 /* Add job to queue to run. */
 {
 char query[256+strlen(command)];
 sqlSafef(query, sizeof(query), "insert into edwJob (commandLine) values('%s')", command);
 sqlUpdate(conn, query);
 }
 
 void edwAddQaJob(struct sqlConnection *conn, long long fileId)
 /* Create job to do QA on this and add to queue */
 {
 char command[64];
 safef(command, sizeof(command), "edwQaAgent %lld", fileId);
 edwAddJob(conn, command);
 }
 
 int edwSubmitPositionInQueue(struct sqlConnection *conn, char *url, unsigned *retJobId)
 /* Return position of our URL in submission queue.  Optionally return id in edwSubmitJob
  * table of job. */
 {
 char query[256];
 sqlSafef(query, sizeof(query), "select id,commandLine from edwSubmitJob where startTime = 0");
 struct sqlResult *sr = sqlGetResult(conn, query);
 char **row;
 int aheadOfUs = -1;
 int pos = 0;
 unsigned jobId = 0;
 while ((row = sqlNextRow(sr)) != NULL)
     {
     jobId = sqlUnsigned(row[0]);
     char *line = row[1];
     char *edwSubmit = nextQuotedWord(&line);
     char *lineUrl = nextQuotedWord(&line);
     if (sameOk(edwSubmit, "edwSubmit") && sameOk(url, lineUrl))
         {
 	aheadOfUs = pos;
 	break;
 	}
     ++pos;
     }
 sqlFreeResult(&sr);
 if (retJobId != NULL)
     *retJobId = jobId;
 return aheadOfUs;
 }
 
 struct edwSubmit *edwSubmitFromId(struct sqlConnection *conn, long long id)
 /* Return submission with given ID or NULL if no such submission. */
 {
 char query[256];
 sqlSafef(query, sizeof(query), "select * from edwSubmit where id=%lld", id);
 return edwSubmitLoadByQuery(conn, query);
 }
 
 
 struct edwSubmit *edwMostRecentSubmission(struct sqlConnection *conn, char *url)
 /* Return most recent submission, possibly in progress, from this url */
 {
 int urlSize = strlen(url);
 char query[128 + 2*urlSize + 1];
 sqlSafef(query, sizeof(query), 
     "select * from edwSubmit where url='%s' order by id desc limit 1", url);
 return edwSubmitLoadByQuery(conn, query);
 }
 
 long long edwSubmitMaxStartTime(struct edwSubmit *submit, struct sqlConnection *conn)
 /* Figure out when we started most recent single file in the upload, or when
  * we started if not files started yet. */
 {
 char query[256];
 sqlSafef(query, sizeof(query), 
     "select max(startUploadTime) from edwFile where submitId=%u", submit->id);
 long long maxStartTime = sqlQuickLongLong(conn, query);
 if (maxStartTime == 0)
     maxStartTime = submit->startUploadTime;
 return maxStartTime;
 }
 
 int edwSubmitCountNewValid(struct edwSubmit *submit, struct sqlConnection *conn)
 /* Count number of new files in submission that have been validated. */
 {
 char query[256];
 sqlSafef(query, sizeof(query), 
     "select count(*) from edwFile e,edwValidFile v where e.id = v.fileId and e.submitId=%u",
     submit->id);
 return sqlQuickNum(conn, query);
 }
 
 int edwSubmitCountErrors(struct edwSubmit *submit, struct sqlConnection *conn)
 /* Count number of errors with submitted files */
 {
 char query[256];
 sqlSafef(query, sizeof(query), 
     "select count(*) from edwFile where submitId=%u and errorMessage != '' and errorMessage is not null",
     submit->id);
 return sqlQuickNum(conn, query);
 }
 
 boolean edwSubmitIsValidated(struct edwSubmit *submit, struct sqlConnection *conn)
 /* Return TRUE if validation has run.  This does not mean that they all passed validation.
  * It just means the validator has run and has made a decision on each file in the submission. */
 {
 /* Is this off by one because of the validated.txt being in the submission but never validated? */
 return edwSubmitCountErrors(submit,conn) + edwSubmitCountNewValid(submit, conn) == submit->newFiles;
 }
 
 void edwAddSubmitJob(struct sqlConnection *conn, char *userEmail, char *url, boolean update)
 /* Add submission job to table and wake up daemon. */
 {
 /* Create command and add it to edwSubmitJob table. */
 char command[strlen(url) + strlen(userEmail) + 256];
 safef(command, sizeof(command), "edwSubmit %s'%s' %s", (update ? "-update " : ""), url, userEmail);
 char query[strlen(command)+128];
 sqlSafef(query, sizeof(query), "insert edwSubmitJob (commandLine) values('%s')", command);
 sqlUpdate(conn, query);
 
 /* Write sync signal (any string ending with newline) to fifo to wake up daemon. */
 FILE *fifo = mustOpen("../userdata/edwSubmit.fifo", "w");
 fputc('\n', fifo);
 carefulClose(&fifo);
 }
 
 
 struct edwValidFile *edwFindElderReplicates(struct sqlConnection *conn, struct edwValidFile *vf)
 /* Find all replicates of same output and format type for experiment that are elder
  * (fileId less than your file Id).  Younger replicates are responsible for taking care 
  * of correlations with older ones.  Sorry younguns, it's like social security. */
 {
 if (sameString(vf->format, "unknown"))
     return NULL;
 char query[256];
 sqlSafef(query, sizeof(query), 
     "select * from edwValidFile where id<%d and experiment='%s' and format='%s'"
     " and outputType='%s'"
     , vf->id, vf->experiment, vf->format, vf->outputType);
 return edwValidFileLoadByQuery(conn, query);
 }
 
 void edwWebHeaderWithPersona(char *title)
 /* Print out HTTP and HTML header through <BODY> tag with persona info */
 {
 printf("Content-Type:text/html\r\n");
 printf("\r\n\r\n");
 puts("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" "
 	      "\"http://www.w3.org/TR/html4/loose.dtd\">");
 printf("<HTML><HEAD><TITLE>%s</TITLE>\n", "ENCODE Data Warehouse");
 puts("<meta http-equiv='X-UA-Compatible' content='IE=Edge'>");
 
 // Use Stanford ENCODE3 CSS for common look
 puts("<link rel='stylesheet' href='/style/encode3.css' type='text/css'>");
 puts("<link rel='stylesheet' href='/style/encode3Ucsc.css' type='text/css'>");
 // external link icon (box with arrow) is from FontAwesome (fa-external-link)
 puts("<link href='//netdna.bootstrapcdn.com/font-awesome/4.0.3/css/font-awesome.css' rel='stylesheet'>");
 
 puts("<script type='text/javascript' SRC='/js/jquery.js'></script>");
 puts("<script type='text/javascript' SRC='/js/jquery.cookie.js'></script>");
 puts("<script type='text/javascript' src='https://login.persona.org/include.js'></script>");
 puts("<script type='text/javascript' src='/js/edwPersona.js'></script>");
 puts("</HEAD>");
 
 /* layout with navigation bar */
 puts("<BODY>\n");
 
 edwWebNavBarStart();
 }
 
 
 void edwWebFooterWithPersona()
 /* Print out end tags and persona script stuff */
 {
 edwWebNavBarEnd();
 htmlEnd();
 }
 
 
 void edwCreateNewUser(char *email)
 /* Create new user, checking that user does not already exist. */
 {
 /* Now make sure user is not already in user table. */
 struct sqlConnection *conn = edwConnectReadWrite();
 struct dyString *query = dyStringNew(0);
 sqlDyStringPrintf(query, "select count(*) from edwUser where email = '%s'", email);
 if (sqlQuickNum(conn, query->string) > 0)
     errAbort("User %s already exists", email);
 
 /* Do database insert. */
 dyStringClear(query);
 sqlDyStringPrintf(query, "insert into edwUser (email) values('%s')", email);
 sqlUpdate(conn, query->string);
 
 sqlDisconnect(&conn);
 }
 
 void edwPrintLogOutButton()
 /* Print log out button */
 {
 printf("<INPUT TYPE=button NAME=\"signOut\" VALUE=\"sign out\" id=\"signout\">");
 }
 
 struct dyString *edwFormatDuration(long long seconds)
 /* Convert seconds to days/hours/minutes. Return result in a dyString you can free */
 {
 struct dyString *dy = dyStringNew(0);
 int days = seconds/(3600*24);
 if (days > 0)
     dyStringPrintf(dy, "%d days, ", days);
 seconds -= days*3600*24;
 
 int hours = seconds/3600;
 if (hours > 0 || days > 0)
     dyStringPrintf(dy, "%d hours", hours);
 seconds -= hours*3600;
 
 if (days == 0)
     {
     int minutes = seconds/60;
     if (minutes > 0)
 	{
 	if (hours > 0)
 	   dyStringPrintf(dy, ", ");
 	dyStringPrintf(dy, "%d minutes", minutes);
 	}
 
     if (hours == 0)
 	{
 	if (minutes > 0)
 	   dyStringPrintf(dy, ", ");
 	seconds -= minutes*60;
 	dyStringPrintf(dy, "%d seconds", (int)seconds);
 	}
     }
 return dy;
 }
 
 struct edwFile *edwFileInProgress(struct sqlConnection *conn, int submitId)
 /* Return file in submission in process of being uploaded if any. */
 {
 char query[256];
 sqlSafef(query, sizeof(query), "select fileIdInTransit from edwSubmit where id=%u", submitId);
 long long fileId = sqlQuickLongLong(conn, query);
 if (fileId == 0)
     return NULL;
 sqlSafef(query, sizeof(query), "select * from edwFile where id=%lld", (long long)fileId);
 return edwFileLoadByQuery(conn, query);
 }
 
 
 static void accessDenied()
 /* Sleep a bit and then deny access. */
 {
 sleep(5);
 errAbort("Access denied!");
 }
 
 struct edwScriptRegistry *edwScriptRegistryFromCgi()
 /* Get script registery from cgi variables.  Does authentication too. */
 {
 struct sqlConnection *conn = edwConnect();
 char *user = sqlEscapeString(cgiString("user"));
 char *password = sqlEscapeString(cgiString("password"));
 char query[256];
 sqlSafef(query, sizeof(query), "select * from edwScriptRegistry where name='%s'", user);
 struct edwScriptRegistry *reg = edwScriptRegistryLoadByQuery(conn, query);
 if (reg == NULL)
     accessDenied();
 char key[EDW_SID_SIZE];
 edwMakeSid(password, key);
 if (!sameString(reg->secretHash, key))
     accessDenied();
 sqlDisconnect(&conn);
 return reg;
 }
 
 void edwValidFileUpdateDb(struct sqlConnection *conn, struct edwValidFile *el, long long id)
 /* Save edwValidFile as a row to the table specified by tableName, replacing existing record at 
  * id. */
 {
 struct dyString *dy = newDyString(512);
 sqlDyStringPrintf(dy, "update edwValidFile set ");
 // omit id and licensePlate fields - one autoupdates and the other depends on this
 // also omit fileId which also really can't change.
 sqlDyStringPrintf(dy, " format='%s',", el->format);
 sqlDyStringPrintf(dy, " outputType='%s',", el->outputType);
 sqlDyStringPrintf(dy, " experiment='%s',", el->experiment);
 sqlDyStringPrintf(dy, " replicate='%s',", el->replicate);
 sqlDyStringPrintf(dy, " validKey='%s',", el->validKey);
 sqlDyStringPrintf(dy, " enrichedIn='%s',", el->enrichedIn);
 sqlDyStringPrintf(dy, " ucscDb='%s',", el->ucscDb);
 sqlDyStringPrintf(dy, " itemCount=%lld,", (long long)el->itemCount);
 sqlDyStringPrintf(dy, " basesInItems=%lld,", (long long)el->basesInItems);
 sqlDyStringPrintf(dy, " sampleCount=%lld,", (long long)el->sampleCount);
 sqlDyStringPrintf(dy, " basesInSample=%lld,", (long long)el->basesInSample);
 sqlDyStringPrintf(dy, " sampleBed='%s',", el->sampleBed);
 sqlDyStringPrintf(dy, " mapRatio=%g,", el->mapRatio);
 sqlDyStringPrintf(dy, " sampleCoverage=%g,", el->sampleCoverage);
 sqlDyStringPrintf(dy, " depth=%g,", el->depth);
 sqlDyStringPrintf(dy, " singleQaStatus=0,");
 sqlDyStringPrintf(dy, " replicateQaStatus=0,");
 sqlDyStringPrintf(dy, " technicalReplicate='%s',", el->technicalReplicate);
 sqlDyStringPrintf(dy, " pairedEnd='%s',", el->pairedEnd);
 sqlDyStringPrintf(dy, " qaVersion='%d',", el->qaVersion);
 sqlDyStringPrintf(dy, " uniqueMapRatio=%g", el->uniqueMapRatio);
 #if (EDWVALIDFILE_NUM_COLS != 24)
    #error "Please update this routine with new column"
 #endif
 sqlDyStringPrintf(dy, " where id=%lld\n", (long long)id);
 sqlUpdate(conn, dy->string);
 freeDyString(&dy);
 }
 
 static char *findTagOrEmpty(struct cgiParsedVars *tags, char *key)
 /* Find key in tags.  If it is not there, or empty, or 'n/a' valued return empty string
  * otherwise return val */
 {
 char *val = hashFindVal(tags->hash, key);
 if (val == NULL || sameString(val, "n/a"))
    return "";
 else
    return val;
 }
 
 void edwValidFileFieldsFromTags(struct edwValidFile *vf, struct cgiParsedVars *tags)
 /* Fill in many of vf's fields from tags. */
 {
 vf->format = cloneString(hashFindVal(tags->hash, "format"));
 vf->outputType = cloneString(findTagOrEmpty(tags, "output_type"));
 vf->experiment = cloneString(findTagOrEmpty(tags, "experiment"));
 vf->replicate = cloneString(findTagOrEmpty(tags, "replicate"));
 vf->validKey = cloneString(hashFindVal(tags->hash, "valid_key"));
 vf->enrichedIn = cloneString(findTagOrEmpty(tags, "enriched_in"));
 vf->ucscDb = cloneString(findTagOrEmpty(tags, "ucsc_db"));
 vf->technicalReplicate = cloneString(findTagOrEmpty(tags, "technical_replicate"));
 vf->pairedEnd = cloneString(findTagOrEmpty(tags, "paired_end"));
 #if (EDWVALIDFILE_NUM_COLS != 24)
    #error "Please update this routine with new column"
 #endif
 }
 
 void edwFileResetTags(struct sqlConnection *conn, struct edwFile *ef, char *newTags, 
     boolean revalidate)
 /* Reset tags on file, strip out old validation and QA,  schedule new validation and QA. */
 /* Remove existing QA records and rerun QA agent on given file.   */
 {
 long long fileId = ef->id;
 /* Update database to let people know format revalidation is in progress. */
 char query[4*1024];
 
 /* Update tags for file in edwFile table. */
 sqlSafef(query, sizeof(query), "update edwFile set tags='%s' where id=%lld", newTags, fileId);
 sqlUpdate(conn, query);
     
 if (revalidate)
     {
     sqlSafef(query, sizeof(query), "update edwFile set errorMessage = '%s' where id=%lld",
 	 "Revalidation in progress.", fileId); 
     sqlUpdate(conn, query);
 
     /* Get rid of records referring to file in other validation and qa tables. */
     sqlSafef(query, sizeof(query), "delete from edwFastqFile where fileId=%lld", fileId);
     sqlUpdate(conn, query);
     sqlSafef(query, sizeof(query),
 	"delete from edwQaPairSampleOverlap where elderFileId=%lld or youngerFileId=%lld",
 	fileId, fileId);
     sqlUpdate(conn, query);
     sqlSafef(query, sizeof(query),
 	"delete from edwQaPairCorrelation where elderFileId=%lld or youngerFileId=%lld",
 	fileId, fileId);
     sqlUpdate(conn, query);
     sqlSafef(query, sizeof(query), "delete from edwQaEnrich where fileId=%lld", fileId);
     sqlUpdate(conn, query);
     sqlSafef(query, sizeof(query), "delete from edwQaContam where fileId=%lld", fileId);
     sqlUpdate(conn, query);
     sqlSafef(query, sizeof(query), "delete from edwQaRepeat where fileId=%lld", fileId);
     sqlUpdate(conn, query);
     sqlSafef(query, sizeof(query), 
 	"delete from edwQaPairedEndFastq where fileId1=%lld or fileId2=%lld",
 	fileId, fileId);
     sqlUpdate(conn, query);
 
     /* schedule validator */
     edwAddQaJob(conn, ef->id);
     }
 else
     {
     /* The revalidation case relies on edwMakeValidFile to update the edwValidFile table.
      * Here we must do it ourselves. */
     struct edwValidFile *vf = edwValidFileFromFileId(conn, ef->id);
     struct cgiParsedVars *tags = cgiParsedVarsNew(newTags);
     edwValidFileFieldsFromTags(vf, tags);
     edwValidFileUpdateDb(conn, vf, vf->id);
     cgiParsedVarsFree(&tags);
     edwValidFileFree(&vf);
     }
 }
 
 static void scanSam(char *samIn, FILE *f, struct genomeRangeTree *grt, long long *retHit, 
     long long *retMiss,  long long *retTotalBasesInHits, long long *retUniqueHitCount)
 /* Scan through sam file doing several things:counting how many reads hit and how many 
  * miss target during mapping phase, copying those that hit to a little bed file, and 
  * also defining regions covered in a genomeRangeTree. */
 {
 samfile_t *sf = samopen(samIn, "r", NULL);
 bam_hdr_t *bamHeader = sam_hdr_read(sf);
 bam1_t one;
 ZeroVar(&one);
 int err;
 long long hit = 0, miss = 0, unique = 0, totalBasesInHits = 0;
 while ((err = sam_read1(sf, bamHeader, &one)) >= 0)
     {
     int32_t tid = one.core.tid;
     if (tid < 0)
 	{
 	++miss;
         continue;
 	}
     ++hit;
     if (one.core.qual > edwMinMapQual)
         ++unique;
     char *chrom = bamHeader->target_name[tid];
     // Approximate here... can do better if parse cigar.
     int start = one.core.pos;
     int size = one.core.l_qseq;
     int end = start + size;	
     totalBasesInHits += size;
     boolean isRc = (one.core.flag & BAM_FREVERSE);
     char strand = (isRc ? '-' : '+');
     if (start < 0) start=0;
     if (f != NULL)
 	fprintf(f, "%s\t%d\t%d\t.\t0\t%c\n", chrom, start, end, strand);
     genomeRangeTreeAdd(grt, chrom, start, end);
     }
 if (err < 0 && err != -1)
     errnoAbort("samread err %d", err);
 samclose(sf);
 *retHit = hit;
 *retMiss = miss;
 *retTotalBasesInHits = totalBasesInHits;
 *retUniqueHitCount = unique;
 }
 
 void edwReserveTempFile(char *path)
 /* Call mkstemp on path.  This will fill in terminal XXXXXX in path with file name
  * and create an empty file of that name.  Generally that empty file doesn't stay empty for long. */
 {
 int fd = mkstemp(path);
 if (fd == -1)
      errnoAbort("Couldn't create temp file %s", path);
 mustCloseFd(&fd);
 }
 
 void edwBwaIndexPath(struct edwAssembly *assembly, char indexPath[PATH_LEN])
 /* Fill in path to BWA index. */
 {
 safef(indexPath, PATH_LEN, "%s%s/bwaData/%s.fa", 
     edwValDataDir, assembly->ucscDb, assembly->ucscDb);
 }
 
 void edwAsPath(char *format, char path[PATH_LEN])
 /* Convert something like "narrowPeak" in format to full path involving
  * encValDir/as/narrowPeak.as */
 {
 safef(path, PATH_LEN, "%sas/%s.as", edwValDataDir, format);
 }
 
 void edwAlignFastqMakeBed(struct edwFile *ef, struct edwAssembly *assembly,
     char *fastqPath, struct edwValidFile *vf, FILE *bedF,
     double *retMapRatio,  double *retDepth,  double *retSampleCoverage, 
     double *retUniqueMapRatio)
 /* Take a sample fastq and run bwa on it, and then convert that file to a bed. 
  * bedF and all the ret parameters can be NULL. */
 {
 /* Hmm, tried doing this with Mark's pipeline code, but somehow it would be flaky the
  * second time it was run in same app.  Resorting therefore to temp files. */
 char genoFile[PATH_LEN];
 edwBwaIndexPath(assembly, genoFile);
 
 char cmd[3*PATH_LEN];
 char *saiName = cloneString(rTempName(edwTempDir(), "edwSample1", ".sai"));
 safef(cmd, sizeof(cmd), "bwa aln -t 3 %s %s > %s", genoFile, fastqPath, saiName);
 mustSystem(cmd);
 
 char *samName = cloneString(rTempName(edwTempDir(), "ewdSample1", ".sam"));
 safef(cmd, sizeof(cmd), "bwa samse %s %s %s > %s", genoFile, saiName, fastqPath, samName);
 mustSystem(cmd);
 remove(saiName);
 
 /* Scan sam file to calculate vf->mapRatio, vf->sampleCoverage and vf->depth. 
  * and also to produce little bed file for enrichment step. */
 struct genomeRangeTree *grt = genomeRangeTreeNew();
 long long hitCount=0, missCount=0, uniqueHitCount=0, totalBasesInHits=0;
 scanSam(samName, bedF, grt, &hitCount, &missCount, &totalBasesInHits, &uniqueHitCount);
 verbose(1, "hitCount=%lld, missCount=%lld, totalBasesInHits=%lld, grt=%p\n", 
     hitCount, missCount, totalBasesInHits, grt);
 if (retMapRatio)
     *retMapRatio = (double)hitCount/(hitCount+missCount);
 if (retDepth)
     *retDepth = (double)totalBasesInHits/assembly->baseCount 
 	    * (double)vf->itemCount/vf->sampleCount;
 long long basesHitBySample = genomeRangeTreeSumRanges(grt);
 if (retSampleCoverage)
     *retSampleCoverage = (double)basesHitBySample/assembly->baseCount;
 if (retUniqueMapRatio)
     *retUniqueMapRatio = (double)uniqueHitCount/(hitCount+missCount);
 genomeRangeTreeFree(&grt);
 remove(samName);
 }
 
 struct edwFastqFile *edwFastqFileFromFileId(struct sqlConnection *conn, long long fileId)
 /* Get edwFastqFile with given fileId or NULL if none such */
 {
 char query[256];
 sqlSafef(query, sizeof(query), "select * from edwFastqFile where fileId=%lld", fileId);
 return edwFastqFileLoadByQuery(conn, query);
 }
 
 static int mustMkstemp(char *template)
 /* Call mkstemp to make a temp file with name based on template (which is altered)
  * by the call to be the file name.   Return unix file descriptor. */
 {
 int fd = mkstemp(template);
 if (fd == -1)
     errnoAbort("Couldn't make temp file based on %s", template);
 return fd;
 }
 
 void edwMakeTempFastqSample(char *source, int size, char dest[PATH_LEN])
 /* Copy size records from source into a new temporary dest.  Fills in dest */
 {
 /* Make temporary file to save us a unique place in file system. */
 safef(dest, PATH_LEN, "%sedwSampleFastqXXXXXX", edwTempDir());
 int fd = mustMkstemp(dest);
 close(fd);
 
 char command[3*PATH_LEN];
 safef(command, sizeof(command), 
     "fastqStatsAndSubsample %s /dev/null %s -smallOk -sampleSize=%d", source, dest, size);
 verbose(2, "command: %s\n", command);
 mustSystem(command);
 }
 
 void edwMakeFastqStatsAndSample(struct sqlConnection *conn, long long fileId)
 /* Run fastqStatsAndSubsample, and put results into edwFastqFile table. */
 {
 struct edwFastqFile *fqf = edwFastqFileFromFileId(conn, fileId);
 if (fqf == NULL)
     {
     char *path = edwPathForFileId(conn, fileId);
     char statsFile[PATH_LEN], sampleFile[PATH_LEN];
     safef(statsFile, PATH_LEN, "%sedwFastqStatsXXXXXX", edwTempDir());
     edwReserveTempFile(statsFile);
     char dayTempDir[PATH_LEN];
     safef(sampleFile, PATH_LEN, "%sedwFastqSampleXXXXXX", edwTempDirForToday(dayTempDir));
     edwReserveTempFile(sampleFile);
     char command[3*PATH_LEN];
     safef(command, sizeof(command), "fastqStatsAndSubsample -sampleSize=%d -smallOk %s %s %s",
 	edwSampleTargetSize, path, statsFile, sampleFile);
     mustSystem(command);
     safef(command, sizeof(command), "gzip %s", sampleFile);
     mustSystem(command);
     strcat(sampleFile, ".gz");
     fqf = edwFastqFileOneFromRa(statsFile);
     fqf->fileId = fileId;
     fqf->sampleFileName = cloneString(sampleFile);
     edwFastqFileSaveToDb(conn, fqf, "edwFastqFile", 1024);
     remove(statsFile);
     freez(&path);
     }
 edwFastqFileFree(&fqf);
 }
 
 struct edwQaWigSpot *edwMakeWigSpot(struct sqlConnection *conn, long long wigId, long long spotId)
 /* Create a new edwQaWigSpot record in database based on comparing wig file to spot file
  * (specified by id's in edwFile table). */
 {
 /* Get valid files from fileIds and check format */
 struct edwValidFile *wigVf = edwValidFileFromFileId(conn, wigId);
 if (!sameString(wigVf->format, "bigWig"))
     errAbort("%lld is not a bigWig file, is %s instead", wigId, wigVf->format);
 struct edwValidFile *spotVf = edwValidFileFromFileId(conn, spotId);
 if (!sameString(spotVf->format, "narrowPeak") && !sameString(spotVf->format, "broadPeak") &&
     !sameString(spotVf->format, "bigBed"))
     errAbort("%lld is not a recognized peak type format, is %s", spotId, spotVf->format);
 
 /* Remove any old record for files. */
 char query[256];
 sqlSafef(query, sizeof(query), 
     "delete from edwQaWigSpot where wigId=%lld and spotId=%lld", wigId, spotId);
 sqlUpdate(conn, query);
 
 /* Figure out file names */
 char *wigPath = edwPathForFileId(conn, wigId);
 char *spotPath = edwPathForFileId(conn, spotId);
 char statsFile[PATH_LEN];
 safef(statsFile, PATH_LEN, "%sedwQaWigSpotXXXXXX", edwTempDir());
 edwReserveTempFile(statsFile);
 char peakFile[PATH_LEN];
 safef(peakFile, PATH_LEN, "%sedwQaWigSpotXXXXXX", edwTempDir());
 edwReserveTempFile(peakFile);
 
 /* Convert narrowPeak input into a temporary bed4 file */
 char command[3*PATH_LEN];
 safef(command, sizeof(command), "bigBedToBed %s stdout | cut -f 1-4 > %s", spotPath, peakFile);
 mustSystem(command);
 
 /* Call on bigWigAverageOverBed on peaks */
 safef(command, sizeof(command), 
     "bigWigAverageOverBed %s %s /dev/null -stats=%s", wigPath, peakFile, statsFile);
 mustSystem(command);
 remove(peakFile);
 
 /* Parse out ra file,  save it to database, and remove ra file. */
 struct edwQaWigSpot *spot = edwQaWigSpotOneFromRa(statsFile);
 spot->wigId = wigId;
 spot->spotId = spotId;
 edwQaWigSpotSaveToDb(conn, spot, "edwQaWigSpot", 1024);
 spot->id = sqlLastAutoId(conn);
 
 /* Clean up and go home. */
 edwQaWigSpotFree(&spot);
 edwValidFileFree(&wigVf);
 edwValidFileFree(&spotVf);
 freez(&wigPath);
 freez(&spotPath);
 return spot;
 }
 
 struct edwQaWigSpot *edwQaWigSpotFor(struct sqlConnection *conn, 
     long long wigFileId, long long spotFileId) 
 /* Return wigSpot relationship if any we have in database for these two files. */
 {
 char query[256];
 sqlSafef(query, sizeof(query), 
     "select * from edwQaWigSpot where wigId=%lld and spotId=%lld", wigFileId, spotFileId);
 return edwQaWigSpotLoadByQuery(conn, query);
 }
 
 
 
 
 struct edwBamFile *edwBamFileFromFileId(struct sqlConnection *conn, long long fileId)
 /* Get edwBamFile with given fileId or NULL if none such */
 {
 char query[256];
 sqlSafef(query, sizeof(query), "select * from edwBamFile where fileId=%lld", fileId);
 return edwBamFileLoadByQuery(conn, query);
 }
 
 struct edwBamFile * edwMakeBamStatsAndSample(struct sqlConnection *conn, long long fileId, 
     char sampleBed[PATH_LEN])
 /* Run edwBamStats and put results into edwBamFile table, and also a sample bed.
  * The sampleBed will be filled in by this routine. */
 {
 /* Remove any old record for file. */
 char query[256];
 sqlSafef(query, sizeof(query), "delete from edwBamFile where fileId=%lld", fileId);
 sqlUpdate(conn, query);
 
 /* Figure out file names */
 char *path = edwPathForFileId(conn, fileId);
 char statsFile[PATH_LEN];
 safef(statsFile, PATH_LEN, "%sedwBamStatsXXXXXX", edwTempDir());
 edwReserveTempFile(statsFile);
 char dayTempDir[PATH_LEN];
 safef(sampleBed, PATH_LEN, "%sedwBamSampleXXXXXX", edwTempDirForToday(dayTempDir));
 edwReserveTempFile(sampleBed);
 
 /* Make system call to make ra and bed, and then another system call to zip bed.*/
 char command[3*PATH_LEN];
 safef(command, sizeof(command), "edwBamStats -sampleBed=%s -sampleBedSize=%d %s %s",
     sampleBed, edwSampleTargetSize, path, statsFile);
 mustSystem(command);
 safef(command, sizeof(command), "gzip %s", sampleBed);
 mustSystem(command);
 strcat(sampleBed, ".gz");
 
 /* Parse out ra file,  save it to database, and remove ra file. */
 struct edwBamFile *ebf = edwBamFileOneFromRa(statsFile);
 ebf->fileId = fileId;
 edwBamFileSaveToDb(conn, ebf, "edwBamFile", 1024);
 remove(statsFile);
 
 /* Clean up and go home. */
 freez(&path);
 return ebf;
 }
 
 
 char *edwOppositePairedEndString(char *end)
 /* Return "1" for "2" and vice versa */
 {
 if (sameString(end, "1"))
     return "2";
 else if (sameString(end, "2"))
     return "1";
 else
     {
     errAbort("Expecting 1 or 2, got %s in oppositeEnd", end);
     return NULL;
     }
 }
 
 struct edwValidFile *edwOppositePairedEnd(struct sqlConnection *conn, struct edwValidFile *vf)
 /* Given one file of a paired end set of fastqs, find the file with opposite ends. */
 {
 char *otherEnd = edwOppositePairedEndString(vf->pairedEnd);
 char query[1024];
 sqlSafef(query, sizeof(query), 
     "select edwValidFile.* from edwValidFile join edwFile on edwValidFile.fileId=edwFile.id"
     " where experiment='%s' and outputType='%s' and replicate='%s' "
     " and technicalReplicate='%s' and pairedEnd='%s' and itemCount=%lld and deprecated=''"
     , vf->experiment, vf->outputType, vf->replicate, vf->technicalReplicate, otherEnd
     , vf->itemCount);
 struct edwValidFile *otherVf = edwValidFileLoadByQuery(conn, query);
 if (otherVf == NULL)
     return NULL;
 if (otherVf->next != NULL)
     errAbort("Multiple results from pairedEnd query %s", query);
 return otherVf;
 }
 
 struct edwQaPairedEndFastq *edwQaPairedEndFastqFromVfs(struct sqlConnection *conn,
     struct edwValidFile *vfA, struct edwValidFile *vfB,
     struct edwValidFile **retVf1,  struct edwValidFile **retVf2)
 /* Return pair record if any for the two fastq files. */
 {
 /* Sort the two ends. */
 struct edwValidFile *vf1 = NULL, *vf2 = NULL;
 if (sameString(vfA->pairedEnd, "1"))
     {
     vf1 = vfA;
     vf2 = vfB;
     }
 else
     {
     vf1 = vfB;
     vf2 = vfA;
     }
 if (retVf1 != NULL)
    *retVf1 = vf1;
 if (retVf2 != NULL)
    *retVf2 = vf2;
 
 /* See if we already have a record for these two. */
 /* Return record for these two. */
 char query[1024];
 sqlSafef(query, sizeof(query), 
     "select * from edwQaPairedEndFastq where fileId1=%u and fileId2=%u",
     vf1->fileId, vf2->fileId);
 return edwQaPairedEndFastqLoadByQuery(conn, query);
 }
 
 FILE *edwPopen(char *command, char *mode)
 /* do popen or die trying */
 {
 /* Because of bugs with popen(...,"r") and programs that use stdin otherwise
  * it's probably better to use Mark's pipeline library,  but it is ever so
  * much harder to use... */
 FILE *f = popen(command,  mode);
 if (f == NULL)
     errnoAbort("Can't popen(%s, %s)", command, mode);
 return f;
 }
 
 boolean edwOneLineSystemAttempt(char *command, char *line, int maxLineSize)
 /* Execute system command and return one line result from it in line */
 {
 FILE *f = popen(command, "r");
 boolean ok = FALSE;
 if (f != NULL)
     {
     char *result  = fgets(line, maxLineSize, f);
     if (result != NULL)
 	ok = TRUE;
     pclose(f);
     }
 else
     {
     errnoWarn("failed popen %s", command);
     }
 return ok;
 }
 
 void edwOneLineSystemResult(char *command, char *line, int maxLineSize)
 /* Execute system command and return one line result from it in line */
 {
 if (!edwOneLineSystemAttempt(command, line, maxLineSize) )
     errAbort("Can't get line from %s", command);
 }
 
 void edwMd5File(char *fileName, char md5Hex[33])
 /* call md5sum utility to calculate md5 for file and put result in hex format md5Hex 
  * This ends up being about 30% faster than library routine md5HexForFile,
  * however since there's popen() weird interactions with  stdin involved
  * it's not suitable for a general purpose library.  Environment inside edw
  * is controlled enough it should be ok. */
 {
 char command[PATH_LEN + 16];
 safef(command, sizeof(command), "md5sum %s", fileName);
 char line[2*PATH_LEN];
 edwOneLineSystemResult(command, line, sizeof(line));
 memcpy(md5Hex, line, 32);
 md5Hex[32] = 0;
 }
 
 
 void edwPokeFifo(char *fifoName)
 /* Send '\n' to fifo to wake up associated daemon */
 {
 /* Sadly we loop through places it might be since it varies. It has to live somewhere
  * that web CGIs can poke is the problem. */
 char *places[] = {"/data/www/userdata/", "/usr/local/apache/userdata/"};
 int i;
 for (i=0; i<ArraySize(places); ++i)
     {
     char path[PATH_LEN];
     safef(path, sizeof(path), "%s%s", places[i], fifoName);
     if (fileExists(path))
         {
 	char *message = "\n";
 	writeGulp(path, message, strlen(message));
 	break;
 	}
     }
 }
 
 /***/
 /* Shared functions for EDW web CGI's.
    Mostly wrappers for javascript tweaks */
 
 void edwWebAutoRefresh(int msec)
 /* Refresh page after msec.  Use 0 to cancel autorefresh */
 {
 if (msec > 0)
     {
     // set timeout to refresh page (saving/restoring scroll position via cookie)
     printf("<script type='text/javascript'>var edwRefresh = setTimeout(function() { $.cookie('edwWeb.scrollTop', $(window).scrollTop()); $('form').submit(); }, %d);</script>", msec);
     puts("<script type='text/javascript'>$(document).ready(function() {$(document).scrollTop($.cookie('edwWeb.scrollTop'))});</script>");
 
     // disable autorefresh when user is changing page settings
     puts("<script type='text/javascript'>$('form').click(function() {clearTimeout(edwRefresh); $.cookie('edwWeb.scrollTop', null);});</script>");
     }
 else if (msec == 0)
     puts("clearTimeout(edwRefresh);</script>");
 
 // Negative msec ignored
 }
 
 /***/
 /* Navigation bar */
 
 void edwWebNavBarStart()
 /* Layout navigation bar */
 {
 puts("<div id='layout'>");
 puts("<div id='navbar' class='navbar navbar-fixed-top navbar-inverse'>");
 webIncludeFile("/inc/edwNavBar.html");
 puts("</div>");
 puts("<div id='content' class='container'><div>");
 }
 
 void edwWebNavBarEnd()
 /* Close layout after navigation bar */
 {
 puts("</div></div></div>");
 }
 
 void edwWebBrowseMenuItem(boolean on)
 /* Toggle visibility of 'Browse submissions' link on navigation menu */
 {
 printf("<script type='text/javascript'>$('#edw-browse').%s();</script>", on ? "show" : "hide");
 }
 
 void edwWebSubmitMenuItem(boolean on)
 /* Toggle visibility of 'Submit data' link on navigation menu */
 {
 printf("<script type='text/javascript'>$('#edw-submit').%s();</script>", on ? "show" : "hide");
 }