5c2dea36479f0669a316e693c21a375d192b9e38
kent
  Wed Mar 13 12:13:20 2019 -0700
Making cdwPairedEnd basically ignore 10x files by allowing new paired_end values cell_barcode and sample_barcode instead of 1 2.

diff --git src/hg/cirm/cdw/lib/cdwLib.c src/hg/cirm/cdw/lib/cdwLib.c
index 392ca6f..cedcb59 100644
--- src/hg/cirm/cdw/lib/cdwLib.c
+++ src/hg/cirm/cdw/lib/cdwLib.c
@@ -1,2750 +1,2754 @@
 /* cdwLib - routines shared by various cdw programs.    See also cdw
  * module for tables and routines to access structs built on tables. */
 
 /* Copyright (C) 2014 The Regents of the University of California 
  * See README in this or parent directory for licensing information. */
 
 #include "common.h"
 #include "hash.h"
 #include "dystring.h"
 #include "jksql.h"
 #include "errAbort.h"
 #include "cheapcgi.h"
 #include "hex.h"
 #include "openssl/sha.h"
 #include "base64.h"
 #include "basicBed.h"
 #include "bigBed.h"
 #include "portable.h"
 #include "filePath.h"
 #include "genomeRangeTree.h"
 #include "md5.h"
 #include "htmshell.h"
 #include "obscure.h"
 #include "bamFile.h"
 #include "raToStruct.h"
 #include "web.h"
 #include "hdb.h"
 #include "cdwValid.h"
 #include "cdw.h"
 #include "cdwFastqFileFromRa.h"
 #include "cdwBamFileFromRa.h"
 #include "cdwQaWigSpotFromRa.h"
 #include "cdwVcfFileFromRa.h"
 #include "rql.h"
 #include "intValTree.h"
 #include "tagStorm.h"
 #include "cdwLib.h"
 #include "trashDir.h"
 #include "wikiLink.h"
 #include "hgConfig.h"
 
 
 /* System globals - just a few ... for now.  Please seriously not too many more. */
 char *cdwDatabase = "cdw";
 int cdwSingleFileTimeout = 4*60*60;   // How many seconds we give ourselves to fetch a single file
 
 char *cdwRootDir = "/data/cirm/cdw/";
 char *eapRootDir = "/data/cirm/encodeAnalysisPipeline/";
 char *cdwValDataDir = "/data/cirm/valData/";
 char *cdwDaemonEmail = "cdw@cirm-01.sdsc.edu";
 
 struct sqlConnection *cdwConnect()
 /* Returns a read only connection to database. */
 {
 return sqlConnect(cdwDatabase);
 }
 
 struct sqlConnection *cdwConnectReadWrite()
 /* Returns read/write connection to database. */
 {
 return sqlConnectProfile("cdw", cdwDatabase);
 }
 
 char *cdwPathForFileId(struct sqlConnection *conn, long long fileId)
 /* Return full path (which eventually should be freeMem'd) for fileId */
 {
 char query[256];
 char fileName[PATH_LEN];
 sqlSafef(query, sizeof(query), "select cdwFileName from cdwFile where id=%lld", fileId);
 sqlNeedQuickQuery(conn, query, fileName, sizeof(fileName));
 char path[512];
 safef(path, sizeof(path), "%s%s", cdwRootDir, fileName);
 return cloneString(path);
 }
 
 char *cdwTempDir()
 /* Returns pointer to cdwTempDir.  This is shared, so please don't modify. */
 {
 static char path[PATH_LEN];
 if (path[0] == 0)
     {
     /* Note code elsewhere depends on tmp dir being inside of cdwRootDir - also good
      * to have it there so move to a permanent file is quick and unlikely to fail. */
     safef(path, sizeof(path), "%s%s", cdwRootDir, "tmp");
     makeDirsOnPath(path);
     strcat(path, "/");
     }
 return path;
 }
 
 char *cdwTempDirForToday(char dir[PATH_LEN])
 /* Fills in dir with temp dir of the day, and returns a pointer to it. */
 {
 char dayDir[PATH_LEN];
 cdwDirForTime(cdwNow(), dayDir);
 safef(dir, PATH_LEN, "%s%stmp/", cdwRootDir, dayDir);
 
 /* Bracket time consuming call to makeDirsOnPath with check that we didn't just do same
  * thing. */
 static char lastDayDir[PATH_LEN] = "";
 if (!sameString(dayDir, lastDayDir))
     {
     strcpy(lastDayDir, dayDir);
     int len = strlen(dir);
     dir[len-1] = 0;
     makeDirsOnPath(dir);
     dir[len-1] = '/';
     }
 return dir;
 }
 
 
 long long cdwGettingFile(struct sqlConnection *conn, char *submitDir, char *submitFileName)
 /* See if we are in process of getting file.  Return file record id if it exists even if
  * it's not complete. Return -1 if record does not exist. */
 {
 /* First see if we have even got the directory. */
 char query[PATH_LEN+512];
 sqlSafef(query, sizeof(query), "select id from cdwSubmitDir where url='%s'", submitDir);
 int submitDirId = sqlQuickNum(conn, query);
 if (submitDirId <= 0)
     return -1;
 
 /* Then see if we have file that matches submitDir and submitFileName. */
 sqlSafef(query, sizeof(query), 
     "select id from cdwFile "
     "where submitFileName='%s' and submitDirId = %d and errorMessage = '' and deprecated=''"
     " and (endUploadTime >= startUploadTime or startUploadTime < %lld) "
     "order by submitId desc limit 1"
     , submitFileName, submitDirId
     , (long long)cdwNow() - cdwSingleFileTimeout);
 long long id = sqlQuickLongLong(conn, query);
 if (id == 0)
     return -1;
 return id;
 }
 
 long long cdwGotFile(struct sqlConnection *conn, char *submitDir, char *submitFileName, 
     char *md5, long long size)
 /* See if we already got file.  Return fileId if we do,  otherwise -1.  This returns
  * TRUE based mostly on the MD5sum.  For short files (less than 100k) then we also require
  * the submitDir and submitFileName to match.  This is to cover the case where you might
  * have legitimate empty files duplicated even though they were computed based on different
  * things. For instance coming up with no peaks is a legitimate result for many chip-seq
  * experiments. */
 {
 /* For large files just rely on MD5. */
 char query[PATH_LEN+512];
 if (size > 100000)
     {
     sqlSafef(query, sizeof(query),
         "select id from cdwFile where md5='%s' order by submitId desc limit 1" , md5);
     long long result = sqlQuickLongLong(conn, query);
     if (result == 0)
         result = -1;
     return result;
     }
 
 /* Rest of the routine deals with smaller files,  which we are less worried about
  * duplicating,  and indeed expect a little duplication of the empty file if none
  * other. */
 
 /* First see if we have even got the directory. */
 sqlSafef(query, sizeof(query), "select id from cdwSubmitDir where url='%s'", submitDir);
 int submitDirId = sqlQuickNum(conn, query);
 if (submitDirId <= 0)
     return -1;
 
 /* The complex truth is that we may have gotten this file multiple times. 
  * We return the most recent version where it got uploaded and passed the post-upload
  * MD5 sum, and thus where the MD5 field is filled in the database. */
 sqlSafef(query, sizeof(query), 
     "select md5,id from cdwFile "
     "where submitFileName='%s' and submitDirId = %d and md5 != '' "
     "order by submitId desc limit 1"
     , submitFileName, submitDirId);
 struct sqlResult *sr = sqlGetResult(conn, query);
 char **row;
 long fileId = -1;
 if ((row = sqlNextRow(sr)) != NULL)
     {
     char *dbMd5 = row[0];
     if (sameWord(md5, dbMd5))
 	fileId = sqlLongLong(row[1]);
     }
 sqlFreeResult(&sr);
 
 return fileId;
 }
 
 long long cdwNow()
 /* Return current time in seconds since Epoch. */
 {
 return time(NULL);
 }
 
 /* This is size of base64 encoded hash plus 1 for the terminating zero. */
 #define CDW_SID_SIZE 65   
 
 static void makeShaBase64(unsigned char *inputBuf, int inputSize, char out[CDW_SID_SIZE])
 /* Make zero terminated printable cryptographic hash out of in */
 {
 unsigned char shaBuf[48];
 SHA384(inputBuf, inputSize, shaBuf);
 char *base64 = base64Encode((char*)shaBuf, sizeof(shaBuf));
 memcpy(out, base64, CDW_SID_SIZE);
 out[CDW_SID_SIZE-1] = 0; 
 freeMem(base64);
 }
 
 void cdwMakeSid(char *user, char sid[CDW_SID_SIZE])
 /* Convert users to sid */
 {
 /* Salt it well with stuff that is reproducible but hard to guess */
 unsigned char inputBuf[512];
 memset(inputBuf, 0, sizeof(inputBuf));
 int i;
 for (i=0; i<ArraySize(inputBuf); i += 2)
     {
     inputBuf[i] = i ^ 0x29;
     inputBuf[i+1] = ~i;
     }
 safef((char*)inputBuf, sizeof(inputBuf), 
 	"186ED79BAEXzeusdioIsdklnw88e86cd73%s<*#$*(#)!DSDFOUIHLjksdf", user);
 makeShaBase64(inputBuf, sizeof(inputBuf), sid);
 }
 
 static void cdwVerifySid(char *user, char *sidToCheck)
 /* Make sure sid/user combo is good. */
 {
 char sid[CDW_SID_SIZE];
 cdwMakeSid(user, sid);
 if (sidToCheck == NULL || memcmp(sidToCheck, sid, CDW_SID_SIZE) != 0)
     errAbort("Authentication failed, sid %s", (sidToCheck ? "fail" : "miss"));
 }
 
 char *cdwGetEmailAndVerify()
 /* Get email from persona-managed cookies and validate them.
  * Return email address if all is good and user is logged in.
  * If user not logged in return NULL.  If user logged in but
  * otherwise things are wrong abort. */
 {
 char *email = findCookieData("email");
 if (email)
     {
     char *sid = findCookieData("sid");
     cdwVerifySid(email, sid);
     }
 return email;
 }
 
 struct cdwUser *cdwCurrentUser(struct sqlConnection *conn)
 /* Look in a few places for the currently logged in user and return it or NULL */
 {
 char *userName = wikiLinkUserName();
 struct cdwUser *user = NULL;
 
 // for debugging, accept the userName on the cgiSpoof command line
 // instead of a cookie
 if (!cgiIsOnWeb() && userName == NULL)
     userName = cgiOptionalString("userName");
 
 if (userName != NULL)
     user = cdwUserFromUserName(conn, userName);
 return user;
 }
 
 
 struct cdwUser *cdwUserFromUserName(struct sqlConnection *conn, char* userName)
 /* Return user associated with that username or NULL if not found */
 {
 char *email = NULL;
 // if the username is already an email address, then there is no need to go through the 
 // gbMembers table
 if (strstr(userName, "@")!=NULL)
     email = userName;
 else 
     {
     struct sqlConnection *cc = hConnectCentral();
     char query[512];
     sqlSafef(query, sizeof(query), "select email from gbMembers where userName='%s'", userName);
     email = sqlQuickString(cc, query);
     hDisconnectCentral(&cc);
     }
 
 struct cdwUser *user = cdwUserFromEmail(conn, email);
 return user;
 }
 
 struct cdwUser *cdwUserFromEmail(struct sqlConnection *conn, char *email)
 /* Return user associated with that email or NULL if not found */
 {
 char query[256];
 sqlSafef(query, sizeof(query), "select * from cdwUser where email='%s'", email);
 struct cdwUser *user = cdwUserLoadByQuery(conn, query);
 return user;
 }
 
 struct cdwUser *cdwUserFromId(struct sqlConnection *conn, int id)
 /* Return user associated with that id or NULL if not found */
 {
 char query[256];
 sqlSafef(query, sizeof(query), "select * from cdwUser where id='%d'", id);
 struct cdwUser *user = cdwUserLoadByQuery(conn, query);
 return user;
 }
 
 int cdwUserIdFromFileId(struct sqlConnection *conn, int fId)
 /* Return user id who submit the file originally */
 {
 char query[256];
 sqlSafef(query, sizeof(query), "select s.userId from cdwSubmit s, cdwFile f where f.submitId=s.id and f.id='%d'", fId);
 int sId = sqlQuickNum(conn, query);
 sqlSafef(query, sizeof(query), "select u.id from cdwSubmit s, cdwUser u where  u.id=s.id and s.id='%d'", sId);
 return sqlQuickNum(conn, query);
 }
 
 struct cdwUser *cdwFindUserFromFileId(struct sqlConnection *conn, int fId)
 /* Return user who submit the file originally */
 {
 int uId = cdwUserIdFromFileId(conn, fId);
 struct cdwUser *user=cdwUserFromId(conn, uId);
 return user; 
 }
 
 char *cdwFindOwnerNameFromFileId(struct sqlConnection *conn, int fId)
 /* Return name of submitter. Return "an unknown user" if name is NULL */
 {
 struct cdwUser *owner = cdwFindUserFromFileId(conn, fId);
 if (owner == NULL)
     return ("an unknown user");
 return cloneString(owner->email);
 }
 
 int cdwFindUserIdFromEmail(struct sqlConnection *conn, char *userEmail)
 /* Return true id of this user */
 {
 char query[256];
 sqlSafef(query, sizeof(query), "select id from cdwUser where email = '%s'", userEmail);
 return sqlQuickNum(conn, query);
 }
 
 boolean cdwUserIsAdmin(struct sqlConnection *conn, char *userEmail)
 /* Return true if the user is an admin */
 {
 char query[256];
 sqlSafef(query, sizeof(query), "select isAdmin from cdwUser where email = '%s'", userEmail);
 int isAdmin = sqlQuickNum(conn, query);
 if (isAdmin == 1) return TRUE;
 return FALSE;
 }
 
 void cdwWarnUnregisteredUser(char *email)
 /* Put up warning message about unregistered user and tell them how to register. */
 {
 warn("No user exists with email %s. If you need an account please contact your "
 	 "CIRM DCC data wrangler and have them create an account for you."
 	 , email);
 }
 
 
 struct cdwUser *cdwMustGetUserFromEmail(struct sqlConnection *conn, char *email)
 /* Return user associated with email or put up error message. */
 {
 struct cdwUser *user = cdwUserFromEmail(conn, email);
 if (user == NULL)
     {
     cdwWarnUnregisteredUser(email);
     noWarnAbort();
     }
 return user;
 }
 
 struct cdwGroup *cdwGroupFromName(struct sqlConnection *conn, char *name)
 /* Return cdwGroup of given name or NULL if not found. */
 {
 char query[256];
 sqlSafef(query, sizeof(query), "select * from cdwGroup where name='%s'", name);
 return cdwGroupLoadByQuery(conn, query);
 }
 
 struct cdwGroup *cdwNeedGroupFromName(struct sqlConnection *conn, char *groupName)
 /* Get named group or die trying */
 {
 struct cdwGroup *group = cdwGroupFromName(conn, groupName);
 if (group == NULL)
     errAbort("Group %s doesn't exist", groupName);
 return group;
 }
 
 boolean cdwFileInGroup(struct sqlConnection *conn, unsigned int fileId, unsigned int groupId)
 /* Return TRUE if file is in group */
 {
 char query[256];
 sqlSafef(query, sizeof(query), "select count(*) from cdwGroupFile where fileId=%u and groupId=%u",
     fileId, groupId);
 return sqlQuickNum(conn, query) > 0;
 }
 
 int cdwUserFileGroupsIntersect(struct sqlConnection *conn, long long fileId, int userId)
 /* Return the number of groups file and user have in common,  zero for no match */
 {
 char query[512];
 sqlSafef(query, sizeof(query),
     "select count(*) from cdwGroupUser,cdwGroupFile "
     " where cdwGroupUser.groupId = cdwGroupFile.groupId "
     " and cdwGroupUser.userId = %d and cdwGroupFile.fileId = %lld"
     , userId, fileId);
 verbose(2, "%s\n", query);
 return sqlQuickNum(conn, query);
 }
 
 struct rbTree *cdwFilesWithSharedGroup(struct sqlConnection *conn, int userId)
 /* Make an intVal type tree where the keys are fileIds and the val is null 
  * This contains all files that are associated with any group that user is part of. 
  * Can be used to do quicker version of cdwCheckAccess. */
 {
 struct rbTree *groupedFiles = intValTreeNew();
 char query[256];
 sqlSafef(query, sizeof(query), 
     "select distinct(fileId) from cdwGroupFile,cdwGroupUser "
     " where cdwGroupUser.groupId = cdwGroupFile.groupId "
     " and cdwGroupUser.userId = %d", userId);
 struct sqlResult *sr = sqlGetResult(conn, query);
 char **row;
 while ((row = sqlNextRow(sr)) != NULL)
     {
     long long fileId = sqlLongLong(row[0]);
     intValTreeAdd(groupedFiles, fileId, NULL);
     }
 sqlFreeResult(&sr);
 return groupedFiles;
 }
 
 static boolean checkAccess(struct rbTree *groupedFiles, struct sqlConnection *conn, 
     struct cdwFile *ef, struct cdwUser *user, int accessType)
 /* See if user should be allowed this level of access.  The accessType is one of
  * cdwAccessRead or cdwAccessWrite.  Write access implies read access too. 
  * This can be called with user as NULL, in which case only access to shared-with-all
  * files is granted. 
  * Since the most time consuming part of the operation involved the group access
  * check, parts of this can be precomputed in the groupedFiles tree. */
 {
 /* First check for public access. */
 if (ef->allAccess >= accessType)
     return TRUE;
 
 /* Everything else requires an actual user */
 if (user == NULL)
     return FALSE;
 
 /* Check for user individual access */
 if (ef->userId == user->id && ef->userAccess >= accessType)
     return TRUE;
 
 /* Check admin-level access */
 if (user->isAdmin)
     return TRUE;
 
 /* Check group access, this involves SQL query  */
 if (ef->groupAccess >= accessType)
     {
     if (groupedFiles != NULL)
 	return intValTreeLookup(groupedFiles, ef->id) != NULL;
     else
 	return cdwUserFileGroupsIntersect(conn, ef->id, user->id);
     }
     
 return FALSE;
 }
 
 boolean cdwCheckAccess(struct sqlConnection *conn, struct cdwFile *ef,
     struct cdwUser *user, int accessType)
 /* See if user should be allowed this level of access.  The accessType is one of
  * cdwAccessRead or cdwAccessWrite.  Write access implies read access too. 
  * This can be called with user as NULL, in which case only access to shared-with-all
  * files is granted. This function takes almost a millisecond.  If you are doing it
  * to many files consider using cdwQuickCheckAccess instead. */
 {
 return checkAccess(NULL, conn, ef, user, accessType);
 }
 
 boolean cdwQuickCheckAccess(struct rbTree *groupedFiles, struct cdwFile *ef,
     struct cdwUser *user, int accessType)
 /* See if user should be allowed this level of access.  The groupedFiles is
  * the result of a call to cdwFilesWithSharedGroup. The other parameters are as
  * cdwCheckAccess.  If you are querying thousands of files, this function is hundreds
  * of times faster though. */
 {
 return checkAccess(groupedFiles, NULL, ef, user, accessType);
 }
 
 long long cdwCountAccessible(struct sqlConnection *conn, struct cdwUser *user)
 /* Return total number of files associated user can access */
 {
 long long count = 0;
 if (user == NULL)
     {
     char query[256];
     sqlSafef(query, sizeof(query), 
 	"select count(*) from cdwFile,cdwValidFile "
 	" where cdwFile.id = cdwValidFile.fileId and allAccess > 0"
 	" and (errorMessage='' or errorMessage is null)"
 	);
     count = sqlQuickLongLong(conn, query);
     }
 else
     {
     struct rbTree *groupedFiles = cdwFilesWithSharedGroup(conn, user->id);
     char query[256];
     sqlSafef(query, sizeof(query), 
 	"select cdwFile.* from cdwFile,cdwValidFile "
 	" where cdwFile.id = cdwValidFile.fileId "
 	" and (errorMessage='' or errorMessage is null)"
 	);
     struct cdwFile *ef, *efList = cdwFileLoadByQuery(conn, query);
     for (ef = efList; ef != NULL; ef = ef->next)
 	{
 	if (cdwQuickCheckAccess(groupedFiles, ef, user, cdwAccessRead))
 	    ++count;
 	}
     cdwFileFree(&efList);
     rbTreeFree(&groupedFiles);
     }
 return count;
 }
 
 struct cdwFile *cdwAccessibleFileList(struct sqlConnection *conn, struct cdwUser *user)
 /* Get list of all files user can access.  Null user means just publicly accessible.  */
 {
 if (user == NULL)  // No user, just publicly readable files then
     {
     char query[256];
     sqlSafef(query, sizeof(query), 
 	"select cdwFile.* from cdwFile,cdwValidFile "
 	" where cdwFile.id = cdwValidFile.fileId and allAccess > 0"
 	" and (errorMessage='' or errorMessage is null)");
     return cdwFileLoadByQuery(conn, query);
     }
 else	// Load all valid files and check access one at a time
     {
     struct rbTree *groupedFiles = cdwFilesWithSharedGroup(conn, user->id);
     struct cdwFile *accessibleList = NULL, *validList = cdwFileLoadAllValid(conn);
     struct cdwFile *ef, *next;
     for (ef = validList; ef != NULL; ef = next)
         {
 	next = ef->next;
 	if (cdwQuickCheckAccess(groupedFiles, ef, user, cdwAccessRead))
 	    {
 	    slAddHead(&accessibleList, ef);
 	    }
 	else
 	    {
 	    cdwFileFree(&ef);
 	    }
 	}
     rbTreeFree(&groupedFiles);
     slReverse(&accessibleList);
     return accessibleList;
     }
 }
 
 struct rbTree *cdwAccessTreeForUser(struct sqlConnection *conn, struct cdwUser *user, 
     struct cdwFile *efList, struct rbTree *groupedFiles)
 /* Construct intVal tree of files from efList that we have access to.  The
  * key is the fileId,  the value is the cdwFile object */
 {
 struct rbTree *accessTree = intValTreeNew(0);
 struct cdwFile *ef;
 for (ef = efList; ef != NULL; ef = ef->next)
     {
     if (cdwQuickCheckAccess(groupedFiles, ef, user, cdwAccessRead))
 	intValTreeAdd(accessTree, ef->id, ef);
     }
 return accessTree;
 }
 
 
 int cdwGetHost(struct sqlConnection *conn, char *hostName)
 /* Look up host name in table and return associated ID.  If not found
  * make up new table entry. */
 {
 /* If it's already in table, just return ID. */
 char query[512];
 sqlSafef(query, sizeof(query), "select id from cdwHost where name='%s'", hostName);
 int hostId = sqlQuickNum(conn, query);
 if (hostId > 0)
     return hostId;
 sqlSafef(query, sizeof(query), "insert cdwHost (name, firstAdded, paraFetchStreams) values('%s', %lld, 10)", 
        hostName, cdwNow());
 sqlUpdate(conn, query);
 return sqlLastAutoId(conn);
 }
 
 int cdwGetSubmitDir(struct sqlConnection *conn, int hostId, char *submitDir)
 /* Get submitDir from database, creating it if it doesn't already exist. */
 {
 /* If it's already in table, just return ID. */
 char query[512];
 sqlSafef(query, sizeof(query), "select id from cdwSubmitDir where url='%s'", submitDir);
 int dirId = sqlQuickNum(conn, query);
 if (dirId > 0)
     return dirId;
 
 sqlSafef(query, sizeof(query), 
    "insert cdwSubmitDir (url, firstAdded, hostId) values('%s', %lld, %d)", 
    submitDir, cdwNow(), hostId);
 sqlUpdate(conn, query);
 return sqlLastAutoId(conn);
 }
 
 void cdwMakeLicensePlate(char *prefix, int ix, char *out, int outSize)
 /* Make a license-plate type string composed of prefix + funky coding of ix
  * and put result in out. */
 {
 int maxIx = 10*10*10*26*26*26;
 if (ix < 0)
     errAbort("ix must be positive in cdwMakeLicensePlate");
 if (ix > maxIx)
     errAbort("ix exceeds max in cdwMakeLicensePlate.  ix %d, max %d\n", ix, maxIx);
 int prefixSize = strlen(prefix);
 int minSize = prefixSize + 6 + 1;
 if (outSize < minSize)
     errAbort("outSize (%d) not big enough in cdwMakeLicensePlate", outSize);
 
 /* Copy in prefix. */
 strcpy(out, prefix);
 
 /* Generate the 123ABC part of license plate backwards. */
 char *s = out+minSize;
 int x = ix - 1;	// -1 so start with AAA not AAB
 *(--s) = 0;	// zero tag at end;
 int i;
 for (i=0; i<3; ++i)
     {
     int remainder = x%26;
     *(--s) = 'A' + remainder;
     x /= 26;
     }
 for (i=0; i<3; ++i)
     {
     int remainder = x%10;
     *(--s) = '0' + remainder;
     x /= 10;
     }
 }
 
 void cdwDirForTime(time_t sinceEpoch, char dir[PATH_LEN])
 /* Return the output directory for a given time. */
 {
 /* Get current time parsed into struct tm */
 struct tm now;
 gmtime_r(&sinceEpoch, &now);
 
 /* make directory string out of year/month/day/ */
 safef(dir, PATH_LEN, "%d/%d/%d/", now.tm_year+1900, now.tm_mon+1, now.tm_mday);
 }
 
 char *lastMatchCharExcept(char *start, char *end, char match, char except)
 /* Return last char between start up to but not including end that is match.
  * However if except occurs between end and this match, return NULL instead.
  * Also return NULL if there is no match */
 {
 char *e = end;
 while (--e >= start)
     {
     char c = *e;
     if (c == except)
        return NULL;
     if (c == match)
        return e;
     }
 return NULL;
 }
 
 void cdwMakeBabyName(unsigned long id, char *baseName, int baseNameSize)
 /* Given a numerical ID, make an easy to pronouce file name */
 {
 char *consonants = "bdfghjklmnprstvwxyz";   // Avoid c and q because make sound ambiguous
 char *vowels = "aeiou";
 int consonantCount = strlen(consonants);
 int vowelCount = strlen(vowels);
 assert(id >= 1);
 unsigned long ix = id - 1;   /* We start at zero not 1 */
 int basePos = 0;
 do
     {
     char v = vowels[ix%vowelCount];
     ix /= vowelCount;
     char c = consonants[ix%consonantCount];
     ix /= consonantCount;
     if (basePos + 2 >= baseNameSize)
         errAbort("Not enough room for %lu in %d letters in cdwMakeBabyName", id, baseNameSize);
     baseName[basePos] = c;
     baseName[basePos+1] = v;
     basePos += 2;
     }
 while (ix > 0);
 baseName[basePos] = 0;
 }
 
 char *cdwFindDoubleFileSuffix(char *path)
 /* Return pointer to second from last '.' in part of path between last / and end.  
  * If there aren't two dots, just return pointer to normal single dot suffix. */
 {
 int nameSize = strlen(path);
 char *suffix = lastMatchCharExcept(path, path + nameSize, '.', '/');
 if (suffix != NULL)
     {
     if (sameString(suffix, ".gz") || sameString(suffix, ".bigBed"))
 	{
 	char *secondSuffix = lastMatchCharExcept(path, suffix, '.', '/');
 	if (secondSuffix != NULL)
 	    suffix = secondSuffix;
 	}
     }
 else
     suffix = path + nameSize;
 return suffix;
 }
 
 void cdwMakeFileNameAndPath(int cdwFileId, char *submitFileName, char cdwFile[PATH_LEN], char serverPath[PATH_LEN])
 /* Convert file id to local file name, and full file path. Make any directories needed
  * along serverPath. */
 {
 /* Preserve suffix.  Give ourselves up to two suffixes. */
 char *suffix = cdwFindDoubleFileSuffix(submitFileName);
 
 /* Figure out cdw file name, starting with baseName. */
 char baseName[32];
 cdwMakeBabyName(cdwFileId, baseName, sizeof(baseName));
 
 /* Figure out directory and make any components not already there. */
 char cdwDir[PATH_LEN];
 cdwDirForTime(cdwNow(), cdwDir);
 char uploadDir[PATH_LEN];
 safef(uploadDir, sizeof(uploadDir), "%s%s", cdwRootDir, cdwDir);
 makeDirsOnPath(uploadDir);
 
 /* Figure out full file names */
 safef(cdwFile, PATH_LEN, "%s%s%s", cdwDir, baseName, suffix);
 safef(serverPath, PATH_LEN, "%s%s", cdwRootDir, cdwFile);
 }
 
 char *cdwSetting(struct sqlConnection *conn, char *name)
 /* Return named settings value,  or NULL if setting doesn't exist. FreeMem when done. */
 {
 char query[256];
 sqlSafef(query, sizeof(query), "select val from cdwSettings where name='%s'", name);
 return sqlQuickString(conn, query);
 }
 
 char *cdwRequiredSetting(struct sqlConnection *conn, char *name)
 /* Returns setting, abort if it isn't found. FreeMem when done. */
 {
 char *val = cdwSetting(conn, name);
 if (val == NULL)
     errAbort("Required %s setting is not defined in cdwSettings table", name);
 return val;
 }
 
 char *cdwLicensePlateHead(struct sqlConnection *conn)
 /* Return license plate prefix for current database - something like TSTFF or DEVFF or ENCFF */
 {
 static char head[32];
 if (head[0] == 0)
      {
      char *prefix = cdwRequiredSetting(conn, "prefix");
      safef(head, sizeof(head), "%s", prefix);
      }
 return head;
 }
 
 
 static char *localHostName = "localhost";
 static char *localHostDir = "";  
 
 static int getLocalHost(struct sqlConnection *conn)
 /* Make up record for local host if it is not there already. */
 {
 char query[256];
 sqlSafef(query, sizeof(query), "select id from cdwHost where name = '%s'", localHostName);
 int hostId = sqlQuickNum(conn, query);
 if (hostId == 0)
     {
     sqlSafef(query, sizeof(query), "insert cdwHost(name, firstAdded) values('%s', %lld)",
 	localHostName,  cdwNow());
     sqlUpdate(conn, query);
     hostId = sqlLastAutoId(conn);
     }
 return hostId;
 }
 
 static int getLocalSubmitDir(struct sqlConnection *conn)
 /* Get submit dir for local submissions, making it up if it does not exist. */
 {
 int hostId = getLocalHost(conn);
 char query[256];
 sqlSafef(query, sizeof(query), "select id from cdwSubmitDir where url='%s' and hostId=%d", 
     localHostDir, hostId);
 int dirId = sqlQuickNum(conn, query);
 if (dirId == 0)
     {
     sqlSafef(query, sizeof(query), "insert cdwSubmitDir(url,hostId,firstAdded) values('%s',%d,%lld)",
 	localHostDir, hostId, cdwNow());
     sqlUpdate(conn, query);
     dirId = sqlLastAutoId(conn);
     }
 return dirId;
 }
 
 static int getLocalSubmit(struct sqlConnection *conn)
 /* Get the submission that covers all of our local additions. */
 {
 int dirId = getLocalSubmitDir(conn);
 char query[256];
 sqlSafef(query, sizeof(query), "select id from cdwSubmit where submitDirId='%d'", dirId);
 int submitId = sqlQuickNum(conn, query);
 if (submitId == 0)
     {
     sqlSafef(query, sizeof(query), "insert cdwSubmit (submitDirId,startUploadTime) values(%d,%lld)",
 	dirId, cdwNow());
     sqlUpdate(conn, query);
     submitId = sqlLastAutoId(conn);
     }
 return submitId;
 }
 
 char **sqlNeedNextRow(struct sqlResult *sr)
 /* Get next row or die trying.  Since the error reporting is not good, please only
  * use when an error would be unusual. */
 {
 char **row = sqlNextRow(sr);
 if (row == NULL) 
     errAbort("Unexpected empty result from database.");
 return row;
 }
 
 void cdwUpdateFileTags(struct sqlConnection *conn, long long fileId, struct dyString *tags)
 /* Update tags field in cdwFile with given value */
 {
 struct dyString *query = dyStringNew(0);
 sqlDyStringPrintf(query, "update cdwFile set tags='%s' ", tags->string);
 sqlDyStringPrintf(query, " where id=%lld", fileId);
 sqlUpdate(conn, query->string);
 dyStringFree(&query);
 }
 
 struct cdwFile *cdwGetLocalFile(struct sqlConnection *conn, char *localAbsolutePath, 
     char *givenMd5Sum)
 /* Get record of local file from database, adding it if it doesn't already exist.
  * Can make it a symLink rather than a copy in which case pass in valid MD5 sum
  * for symLinkM5dSum. */
 {
 /* First do a reality check on the local absolute path.  Is there a file there? */
 if (localAbsolutePath[0] != '/')
     errAbort("Using relative path in cdwAddLocalFile.");
 long long size = fileSize(localAbsolutePath);
 if (size == -1)
     errAbort("%s does not exist", localAbsolutePath);
 long long updateTime = fileModTime(localAbsolutePath);
 
 /* Get file if it's in database already. */
 int submitDirId = getLocalSubmitDir(conn);
 int submitId = getLocalSubmit(conn);
 char query[256+PATH_LEN];
 sqlSafef(query, sizeof(query), "select * from cdwFile where submitId=%d and submitFileName='%s'",
     submitId, localAbsolutePath);
 struct cdwFile *ef = cdwFileLoadByQuery(conn, query);
 
 /* If we got something in database, check update time and size, and if it's no change just 
  * return existing database id. */
 if (ef != NULL && ef->updateTime == updateTime && ef->size == size)
     return ef;
 
 /* If we got here, then we need to make a new file record. Start with pretty empty record
  * that just has file ID, submitted file name and a few things*/
 sqlSafef(query, sizeof(query), 
     "insert cdwFile (submitId,submitDirId,submitFileName,startUploadTime) "
             " values(%d, %d, '%s', %lld)"
 	    , submitId, submitDirId, localAbsolutePath, cdwNow());
 sqlUpdate(conn, query);
 long long fileId = sqlLastAutoId(conn);
 
 /* Create big data warehouse file/path name. */
 char cdwFile[PATH_LEN], cdwPath[PATH_LEN];
 cdwMakeFileNameAndPath(fileId, localAbsolutePath, cdwFile, cdwPath);
 
 /* We're a little paranoid so md5 it */
 char *md5;
 
 /* Do copy or symbolic linking of file into warehouse managed dir. */
 if (givenMd5Sum)
     {
     md5 = givenMd5Sum;
     }
 else
     {
     md5 = md5HexForFile(localAbsolutePath);
     }
 copyFile(localAbsolutePath, cdwPath);
 touchFileFromFile(localAbsolutePath, cdwPath);
 chmod(cdwPath, 0444);
 replaceOriginalWithSymlink(localAbsolutePath, "", cdwPath);
 
 /* Update file record. */
 sqlSafef(query, sizeof(query), 
     "update cdwFile set cdwFileName='%s', endUploadTime=%lld,"
                        "updateTime=%lld, size=%lld, md5='%s' where id=%lld"
 			, cdwFile, cdwNow(), updateTime, size, md5, fileId);
 sqlUpdate(conn, query);
 
 /* Now, it's a bit of a time waste, but cheap in code, to just load it back from DB. */
 sqlSafef(query, sizeof(query), "select * from cdwFile where id=%lld", fileId);
 return cdwFileLoadByQuery(conn, query);
 }
 
 struct cdwFile *cdwFileLoadAllValid(struct sqlConnection *conn)
 /* Get list of cdwFiles that have been validated with no error */
 {
 char query[256];
 sqlSafef(query, sizeof(query), 
     "select cdwFile.* from cdwFile,cdwValidFile "
     " where cdwFile.id=cdwValidFile.fileId "
     " and (cdwFile.errorMessage='' or cdwFile.errorMessage is null)");
 return cdwFileLoadByQuery(conn, query);
 }
 
 struct cdwFile *cdwFileAllIntactBetween(struct sqlConnection *conn, int startId, int endId)
 /* Return list of all files that are intact (finished uploading and MD5 checked) 
  * with file IDs between startId and endId - including endId */
 {
 char query[256];
 sqlSafef(query, sizeof(query), 
     "select * from cdwFile where id>=%d and id<=%d and endUploadTime != 0 "
     "and updateTime != 0 and (errorMessage = '' or errorMessage is NULL) and deprecated = ''", 
     startId, endId);
 return cdwFileLoadByQuery(conn, query);
 }
 
 long long cdwFindInSameSubmitDir(struct sqlConnection *conn, 
     struct cdwFile *ef, char *submitFileName)
 /* Return fileId of most recent file of given submitFileName from submitDir
  * associated with file */
 {
 char query[3*PATH_LEN];
 sqlSafef(query, sizeof(query),
     "select cdwFile.id from cdwFile,cdwSubmitDir "
     "where cdwFile.submitDirId = cdwSubmitDir.id and "
     "cdwSubmitDir.id = %d and "
     "cdwFile.submitFileName = '%s' order by cdwFile.id desc"
     ,  ef->submitDirId, submitFileName);
 return sqlQuickLongLong(conn, query);
 }
 
 struct cdwFile *cdwFileFromId(struct sqlConnection *conn, long long fileId)
 /* Return cdwValidFile given fileId - return NULL if not found. */
 {
 char query[128];
 sqlSafef(query, sizeof(query), "select * from cdwFile where id=%lld", fileId);
 return cdwFileLoadByQuery(conn, query);
 }
 
 struct cdwFile *cdwFileFromIdOrDie(struct sqlConnection *conn, long long fileId)
 /* Return cdwValidFile given fileId - aborts if not found. */
 {
 struct cdwFile *ef = cdwFileFromId(conn, fileId);
 if (ef == NULL)
     errAbort("Couldn't find file for id %lld\n", fileId);
 return ef;
 }
 
 int cdwFileIdFromPathSuffix(struct sqlConnection *conn, char *suf)
 /* return most recent fileId for file where submitDir.url+submitFname ends with suf. 0 if not found. */
 {
 char query[4096];
 int sufLen = strlen(suf);
 // This is a bit slow, on the order of 1 second.  -jk
 sqlSafef(query, sizeof(query), "SELECT cdwFile.id FROM cdwSubmitDir, cdwFile " 
     "WHERE cdwFile.submitDirId=cdwSubmitDir.id AND RIGHT(CONCAT_WS('/', cdwSubmitDir.url, submitFileName), %d)='%s' "
     "ORDER BY cdwFile.id DESC LIMIT 1;", sufLen, suf);
 int fileId = sqlQuickNum(conn, query);
 return fileId;
 }
 
 struct cdwValidFile *cdwValidFileFromFileId(struct sqlConnection *conn, long long fileId)
 /* Return cdwValidFile give fileId - returns NULL if not validated. */
 {
 char query[128];
 sqlSafef(query, sizeof(query), "select * from cdwValidFile where fileId=%lld", fileId);
 return cdwValidFileLoadByQuery(conn, query);
 }
 
 struct cdwValidFile *cdwValidFileFromLicensePlate(struct sqlConnection *conn, char *licensePlate)
 /* Return cdwValidFile from license plate - returns NULL if not found. */
 {
 char query[128];
 sqlSafef(query, sizeof(query), "select * from cdwValidFile where licensePlate='%s'", licensePlate);
 return cdwValidFileLoadByQuery(conn, query);
 }
 
 struct cdwExperiment *cdwExperimentFromAccession(struct sqlConnection *conn, char *acc)
 /* Given something like 'ENCSR123ABC' return associated experiment. */
 {
 char query[128];
 sqlSafef(query, sizeof(query), "select * from cdwExperiment where accession='%s'", acc);
 return cdwExperimentLoadByQuery(conn, query);
 }
 
 struct genomeRangeTree *cdwMakeGrtFromBed3List(struct bed3 *bedList)
 /* Make up a genomeRangeTree around bed file. */
 {
 struct genomeRangeTree *grt = genomeRangeTreeNew();
 struct bed3 *bed;
 for (bed = bedList; bed != NULL; bed = bed->next)
     genomeRangeTreeAdd(grt, bed->chrom, bed->chromStart, bed->chromEnd);
 return grt;
 }
 
 struct cdwAssembly *cdwAssemblyForUcscDb(struct sqlConnection *conn, char *ucscDb)
 /* Get assembly for given UCSC ID or die trying */
 {
 char query[256];
 sqlSafef(query, sizeof(query), "select * from cdwAssembly where ucscDb='%s'", ucscDb);
 struct cdwAssembly *assembly = cdwAssemblyLoadByQuery(conn, query);
 if (assembly == NULL)
     errAbort("Can't find assembly for %s", ucscDb);
 return assembly;
 }
 
 struct cdwAssembly *cdwAssemblyForId(struct sqlConnection *conn, long long id)
 /* Get assembly of given ID. */
 {
 char query[128];
 sqlSafef(query, sizeof(query), "select * from cdwAssembly where id=%lld", id);
 struct cdwAssembly *assembly = cdwAssemblyLoadByQuery(conn, query);
 if (assembly == NULL)
     errAbort("Can't find assembly for %lld", id);
 return assembly;
 }
 
 char *cdwSimpleAssemblyName(char *assembly)
 /* Given compound name like male.hg19 return just hg19 */
 /* Given name of assembly return name where we want to do enrichment calcs. */
 {
 /* If it ends with one of our common assembly suffix, then do enrichment calcs
  * in that space, rather than some subspace such as male, female, etc. */
 static char *specialAsm[] = {".hg19",".hg38",".mm9",".mm10",".dm3",".ce10",".dm6"};
 int i;
 for (i=0; i<ArraySize(specialAsm); ++i)
     {
     char *special = specialAsm[i];
     if (endsWith(assembly, special))
         return special+1;
     }
 return assembly;
 }
 
 
 struct genomeRangeTree *cdwGrtFromBigBed(char *fileName)
 /* Return genome range tree for simple (unblocked) bed */
 {
 struct bbiFile *bbi = bigBedFileOpen(fileName);
 struct bbiChromInfo *chrom, *chromList = bbiChromList(bbi);
 struct genomeRangeTree *grt = genomeRangeTreeNew();
 for (chrom = chromList; chrom != NULL; chrom = chrom->next)
     {
     struct rbTree *tree = genomeRangeTreeFindOrAddRangeTree(grt, chrom->name);
     struct lm *lm = lmInit(0);
     struct bigBedInterval *iv, *ivList = NULL;
     ivList = bigBedIntervalQuery(bbi, chrom->name, 0, chrom->size, 0, lm);
     for (iv = ivList; iv != NULL; iv = iv->next)
         rangeTreeAdd(tree, iv->start, iv->end);
     lmCleanup(&lm);
     }
 bigBedFileClose(&bbi);
 bbiChromInfoFreeList(&chromList);
 return grt;
 }
 
 boolean cdwIsSupportedBigBedFormat(char *format)
 /* Return TRUE if it's one of the bigBed formats we support. */
 {
 int i;
 if (sameString(format, "bigBed"))   // Generic bigBed ok
     return TRUE;
 for (i=0; i<cdwBedTypeCount; ++i)
     {
     if (sameString(format, cdwBedTypeTable[i].name))
         return TRUE;
     }
 return FALSE;
 }
 
 void cdwWriteErrToTable(struct sqlConnection *conn, char *table, int id, char *err)
 /* Write out error message to errorMessage field of table. */
 {
 char *trimmedError = trimSpaces(err);
 struct dyString *query = dyStringNew(0);
 sqlDyStringPrintf(query, "update %s set errorMessage='%s' where id=%d", 
     table, trimmedError, id);
 sqlUpdate(conn, query->string);
 dyStringFree(&query);
 }
 
 void cdwWriteErrToStderrAndTable(struct sqlConnection *conn, char *table, int id, char *err)
 /* Write out error message to errorMessage field of table and through stderr. */
 {
 warn("%s", trimSpaces(err));
 cdwWriteErrToTable(conn, table, id, err);
 }
 
 
 void cdwAddJob(struct sqlConnection *conn, char *command, int submitId)
 /* Add job to queue to run. */
 {
 char query[256+strlen(command)];
 sqlSafef(query, sizeof(query), "insert into cdwJob (commandLine,submitId) values('%s',%d)", 
     command, submitId);
 sqlUpdate(conn, query);
 }
 
 void cdwAddQaJob(struct sqlConnection *conn, long long fileId, int submitId)
 /* Create job to do QA on this and add to queue */
 {
 char command[64];
 safef(command, sizeof(command), "cdwQaAgent %lld", fileId);
 cdwAddJob(conn, command, submitId);
 }
 
 int cdwSubmitPositionInQueue(struct sqlConnection *conn, char *url, unsigned *retJobId)
 /* Return position of our URL in submission queue.  Optionally return id in cdwSubmitJob
  * table of job. */
 {
 char query[256];
 sqlSafef(query, sizeof(query), "select id,commandLine from cdwSubmitJob where startTime = 0");
 struct sqlResult *sr = sqlGetResult(conn, query);
 char **row;
 int aheadOfUs = -1;
 int pos = 0;
 unsigned jobId = 0;
 while ((row = sqlNextRow(sr)) != NULL)
     {
     jobId = sqlUnsigned(row[0]);
     char *line = row[1];
     char *cdwSubmit = nextQuotedWord(&line);
     char *lineUrl = nextQuotedWord(&line);
     if (sameOk(cdwSubmit, "cdwSubmit") && sameOk(url, lineUrl))
         {
 	aheadOfUs = pos;
 	break;
 	}
     ++pos;
     }
 sqlFreeResult(&sr);
 if (retJobId != NULL)
     *retJobId = jobId;
 return aheadOfUs;
 }
 
 struct cdwSubmitDir *cdwSubmitDirFromId(struct sqlConnection *conn, long long id)
 /* Return submissionDir with given ID or NULL if no such submission. */
 {
 char query[256];
 sqlSafef(query, sizeof(query), "select * from cdwSubmitDir where id=%lld", id);
 return cdwSubmitDirLoadByQuery(conn, query);
 }
 
 
 struct cdwSubmit *cdwSubmitFromId(struct sqlConnection *conn, long long id)
 /* Return submission with given ID or NULL if no such submission. */
 {
 char query[256];
 sqlSafef(query, sizeof(query), "select * from cdwSubmit where id=%lld", id);
 return cdwSubmitLoadByQuery(conn, query);
 }
 
 
 struct cdwSubmit *cdwMostRecentSubmission(struct sqlConnection *conn, char *url)
 /* Return most recent submission, possibly in progress, from this url */
 {
 int urlSize = strlen(url);
 char query[128 + 2*urlSize + 1];
 sqlSafef(query, sizeof(query), 
     "select * from cdwSubmit where url='%s' order by id desc limit 1", url);
 return cdwSubmitLoadByQuery(conn, query);
 }
 
 long long cdwSubmitMaxStartTime(struct cdwSubmit *submit, struct sqlConnection *conn)
 /* Figure out when we started most recent single file in the upload, or when
  * we started if not files started yet. */
 {
 char query[256];
 sqlSafef(query, sizeof(query), 
     "select max(startUploadTime) from cdwFile where submitId=%u", submit->id);
 long long maxStartTime = sqlQuickLongLong(conn, query);
 if (maxStartTime == 0)
     maxStartTime = submit->startUploadTime;
 return maxStartTime;
 }
 
 int cdwSubmitCountNewValid(struct cdwSubmit *submit, struct sqlConnection *conn)
 /* Count number of new files in submission that have been validated. */
 {
 char query[256];
 sqlSafef(query, sizeof(query), 
     "select count(*) from cdwFile e,cdwValidFile v where e.id = v.fileId and e.submitId=%u",
     submit->id);
 return sqlQuickNum(conn, query);
 }
 
 int cdwSubmitCountErrors(struct cdwSubmit *submit, struct sqlConnection *conn)
 /* Count number of errors with submitted files */
 {
 char query[256];
 sqlSafef(query, sizeof(query), 
     "select count(*) from cdwFile where submitId=%u and errorMessage != '' and errorMessage is not null",
     submit->id);
 return sqlQuickNum(conn, query);
 }
 
 boolean cdwSubmitIsValidated(struct cdwSubmit *submit, struct sqlConnection *conn)
 /* Return TRUE if validation has run.  This does not mean that they all passed validation.
  * It just means the validator has run and has made a decision on each file in the submission. */
 {
 /* Is this off by one because of the validated.txt being in the submission but never validated? */
 return cdwSubmitCountErrors(submit,conn) + cdwSubmitCountNewValid(submit, conn) == submit->newFiles;
 }
 
 void cdwAddSubmitJob(struct sqlConnection *conn, char *userEmail, char *url, boolean update)
 /* Add submission job to table and wake up daemon. */
 {
 /* Create command and add it to cdwSubmitJob table. */
 char command[strlen(url) + strlen(userEmail) + 256];
 safef(command, sizeof(command), "cdwSubmit %s'%s' %s", (update ? "-update " : ""), url, userEmail);
 char query[strlen(command)+128];
 sqlSafef(query, sizeof(query), "insert cdwSubmitJob (commandLine) values('%s')", command);
 sqlUpdate(conn, query);
 
 /* Write sync signal (any string ending with newline) to fifo to wake up daemon. */
 FILE *fifo = mustOpen("../userdata/cdwSubmit.fifo", "w");
 fputc('\n', fifo);
 carefulClose(&fifo);
 }
 
 
 struct cdwValidFile *cdwFindElderReplicates(struct sqlConnection *conn, struct cdwValidFile *vf)
 /* Find all replicates of same output and format type for experiment that are elder
  * (fileId less than your file Id).  Younger replicates are responsible for taking care 
  * of correlations with older ones.  Sorry younguns, it's like social security. */
 {
 if (sameString(vf->format, "unknown"))
     return NULL;
 char query[256];
 sqlSafef(query, sizeof(query), 
     "select * from cdwValidFile where id<%d and experiment='%s' and format='%s'"
     " and outputType='%s'"
     , vf->id, vf->experiment, vf->format, vf->outputType);
 return cdwValidFileLoadByQuery(conn, query);
 }
 
 #ifdef OLD
 void cdwWebHeaderWithPersona(char *title)
 /* Print out HTTP and HTML header through <BODY> tag with persona info */
 {
 printf("Content-Type:text/html\r\n");
 printf("\r\n\r\n");
 puts("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" "
 	      "\"http://www.w3.org/TR/html4/loose.dtd\">");
 printf("<HTML><HEAD>\n%s<TITLE>%s</TITLE>\n", getCspMetaHeader(), "CIRM Data Warehouse");
 puts("<meta http-equiv='X-UA-Compatible' content='IE=Edge'>");
 
 // Use CIRM3 CSS for common look
 puts("<link rel='stylesheet' href='/style/cirm.css' type='text/css'>");
 puts("<link rel='stylesheet' href='/style/cirmUcsc.css' type='text/css'>");
 // external link icon (box with arrow) is from FontAwesome (fa-external-link)
 puts("<link href='//netdna.bootstrapcdn.com/font-awesome/4.0.3/css/font-awesome.css' rel='stylesheet'>");
 
 puts("<script type='text/javascript' SRC='/js/jquery.js'></script>");
 puts("<script type='text/javascript' SRC='/js/jquery.cookie.js'></script>");
 puts("<script type='text/javascript' src='https://login.persona.org/include.js'></script>");
 puts("<script type='text/javascript' src='/js/cdwPersona.js'></script>");
 puts("<script type='text/javascript' src='https://cdnjs.cloudflare.com/ajax/libs/bowser/1.6.1/bowser.min.js'></script>");
 puts("</HEAD>");
 
 /* layout with navigation bar */
 puts("<BODY>\n");
 
 cdwWebNavBarStart();
 }
 #endif /* OLD */
 
 
 #ifdef OLD
 void cdwWebFooterWithPersona()
 /* Print out end tags and persona script stuff */
 {
 cdwWebNavBarEnd();
 htmlEnd();
 }
 #endif /* OLD */
 
 
 void cdwCreateNewUser(char *email)
 /* Create new user, checking that user does not already exist. */
 {
 /* Now make sure user is not already in user table. */
 struct sqlConnection *conn = cdwConnectReadWrite();
 struct dyString *query = dyStringNew(0);
 sqlDyStringPrintf(query, "select count(*) from cdwUser where email = '%s'", email);
 if (sqlQuickNum(conn, query->string) > 0)
     errAbort("User %s already exists", email);
 
 /* Do database insert. */
 dyStringClear(query);
 sqlDyStringPrintf(query, "insert into cdwUser (email) values('%s')", email);
 sqlUpdate(conn, query->string);
 
 sqlDisconnect(&conn);
 }
 
 void cdwPrintLogOutButton()
 /* Print log out button */
 {
 printf("<INPUT TYPE=button NAME=\"signOut\" VALUE=\"sign out\" id=\"signout\">");
 }
 
 struct dyString *cdwFormatDuration(long long seconds)
 /* Convert seconds to days/hours/minutes. Return result in a dyString you can free */
 {
 struct dyString *dy = dyStringNew(0);
 int days = seconds/(3600*24);
 if (days > 0)
     dyStringPrintf(dy, "%d days, ", days);
 seconds -= days*3600*24;
 
 int hours = seconds/3600;
 if (hours > 0 || days > 0)
     dyStringPrintf(dy, "%d hours", hours);
 seconds -= hours*3600;
 
 if (days == 0)
     {
     int minutes = seconds/60;
     if (minutes > 0)
 	{
 	if (hours > 0)
 	   dyStringPrintf(dy, ", ");
 	dyStringPrintf(dy, "%d minutes", minutes);
 	}
 
     if (hours == 0)
 	{
 	if (minutes > 0)
 	   dyStringPrintf(dy, ", ");
 	seconds -= minutes*60;
 	dyStringPrintf(dy, "%d seconds", (int)seconds);
 	}
     }
 return dy;
 }
 
 struct cdwFile *cdwFileInProgress(struct sqlConnection *conn, int submitId)
 /* Return file in submission in process of being uploaded if any. */
 {
 char query[256];
 sqlSafef(query, sizeof(query), "select fileIdInTransit from cdwSubmit where id=%u", submitId);
 long long fileId = sqlQuickLongLong(conn, query);
 if (fileId == 0)
     return NULL;
 sqlSafef(query, sizeof(query), "select * from cdwFile where id=%lld", (long long)fileId);
 return cdwFileLoadByQuery(conn, query);
 }
 
 
 static void accessDenied()
 /* Sleep a bit and then deny access. */
 {
 sleep(5);
 errAbort("Access denied!");
 }
 
 struct cdwScriptRegistry *cdwScriptRegistryFromCgi()
 /* Get script registery from cgi variables.  Does authentication too. */
 {
 struct sqlConnection *conn = cdwConnect();
 char *user = sqlEscapeString(cgiString("user"));
 char *password = sqlEscapeString(cgiString("password"));
 char query[256];
 sqlSafef(query, sizeof(query), "select * from cdwScriptRegistry where name='%s'", user);
 struct cdwScriptRegistry *reg = cdwScriptRegistryLoadByQuery(conn, query);
 if (reg == NULL)
     accessDenied();
 char key[CDW_SID_SIZE];
 cdwMakeSid(password, key);
 if (!sameString(reg->secretHash, key))
     accessDenied();
 sqlDisconnect(&conn);
 return reg;
 }
 
 void cdwValidFileUpdateDb(struct sqlConnection *conn, struct cdwValidFile *el, long long id)
 /* Save cdwValidFile as a row to the table specified by tableName, replacing existing record at 
  * id. */
 {
 struct dyString *dy = newDyString(512);
 sqlDyStringPrintf(dy, "update cdwValidFile set ");
 // omit id and licensePlate fields - one autoupdates and the other depends on this
 // also omit fileId which also really can't change.
 sqlDyStringPrintf(dy, " format='%s',", el->format);
 sqlDyStringPrintf(dy, " outputType='%s',", el->outputType);
 sqlDyStringPrintf(dy, " experiment='%s',", el->experiment);
 sqlDyStringPrintf(dy, " replicate='%s',", el->replicate);
 sqlDyStringPrintf(dy, " enrichedIn='%s',", el->enrichedIn);
 sqlDyStringPrintf(dy, " ucscDb='%s',", el->ucscDb);
 sqlDyStringPrintf(dy, " itemCount=%lld,", (long long)el->itemCount);
 sqlDyStringPrintf(dy, " basesInItems=%lld,", (long long)el->basesInItems);
 sqlDyStringPrintf(dy, " sampleCount=%lld,", (long long)el->sampleCount);
 sqlDyStringPrintf(dy, " basesInSample=%lld,", (long long)el->basesInSample);
 sqlDyStringPrintf(dy, " sampleBed='%s',", el->sampleBed);
 sqlDyStringPrintf(dy, " mapRatio=%g,", el->mapRatio);
 sqlDyStringPrintf(dy, " sampleCoverage=%g,", el->sampleCoverage);
 sqlDyStringPrintf(dy, " depth=%g,", el->depth);
 sqlDyStringPrintf(dy, " singleQaStatus=0,");
 sqlDyStringPrintf(dy, " replicateQaStatus=0,");
 sqlDyStringPrintf(dy, " part='%s',", el->part);
 sqlDyStringPrintf(dy, " pairedEnd='%s',", el->pairedEnd);
 sqlDyStringPrintf(dy, " qaVersion='%d',", el->qaVersion);
 sqlDyStringPrintf(dy, " uniqueMapRatio=%g,", el->uniqueMapRatio);
 sqlDyStringPrintf(dy, " lane='%s'", el->lane);
 #if (CDWVALIDFILE_NUM_COLS != 24)
    #error "Please update this routine with new column"
 #endif
 sqlDyStringPrintf(dy, " where id=%lld\n", (long long)id);
 sqlUpdate(conn, dy->string);
 freeDyString(&dy);
 }
 
 char *cdwLookupTag(struct cgiParsedVars *list, char *tag)
 /* Return first occurence of tag on list, or empty string if not found */
 {
 char *ret = "";
 struct cgiParsedVars *tags;
 for (tags = list; tags != NULL; tags = tags->next)
     {
     char *val = hashFindVal(tags->hash, tag);
     if (val != NULL && !sameString(val, "n/a"))
 	{
 	ret = val;
 	break;
 	}
     }
 return ret;
 }
 
 void cdwValidFileFieldsFromTags(struct cdwValidFile *vf, struct cgiParsedVars *tags)
 /* Fill in many of vf's fields from tags. */
 {
 // Most fields are just taken directly from tags, converted from underbar to camelCased
 // separation conventions.
 vf->format = cloneString(cdwLookupTag(tags, "format"));
 vf->outputType = cloneString(cdwLookupTag(tags, "output_type"));
 vf->replicate = cloneString(cdwLookupTag(tags, "replicate"));
 vf->enrichedIn = cloneString(cdwLookupTag(tags, "enriched_in"));
 vf->ucscDb = cloneString(cdwLookupTag(tags, "ucsc_db"));
 vf->part = cloneString(cdwLookupTag(tags, "file_part"));
 vf->pairedEnd = cloneString(cdwLookupTag(tags, "paired_end"));
 vf->lane = cloneString(cdwLookupTag(tags, "lane"));
 
 // Experiment field defaults to same as meta, but sometimes needs to be different.
 // Experiment field drives replicate comparisons.
 char *experiment = cdwLookupTag(tags, "experiment");
 if (isEmpty(experiment))
     experiment = cdwLookupTag(tags, "meta");
 vf->experiment = cloneString(experiment);
 
 // Make sure we update this routine if we add fields to cdwValidFile
 #if (CDWVALIDFILE_NUM_COLS != 24)
    #error "Please update this routine with new column"
 #endif
 }
 
 void cdwRemoveQaRecords(struct sqlConnection *conn, long long fileId)
 /* Remove records associated with a file from all of the cdwQaXxx and cdwXxxFile
  * tables */
 {
 char query[1024];
 sqlSafef(query, sizeof(query), "delete from cdwFastqFile where fileId=%lld", fileId);
 sqlUpdate(conn, query);
 sqlSafef(query, sizeof(query), "delete from cdwBamFile where fileId=%lld", fileId);
 sqlUpdate(conn, query);
 sqlSafef(query, sizeof(query), "delete from cdwVcfFile where fileId=%lld", fileId);
 sqlUpdate(conn, query);
 sqlSafef(query, sizeof(query),
     "delete from cdwQaPairSampleOverlap where elderFileId=%lld or youngerFileId=%lld",
     fileId, fileId);
 sqlUpdate(conn, query);
 sqlSafef(query, sizeof(query),
     "delete from cdwQaPairCorrelation where elderFileId=%lld or youngerFileId=%lld",
     fileId, fileId);
 sqlUpdate(conn, query);
 sqlSafef(query, sizeof(query), "delete from cdwQaEnrich where fileId=%lld", fileId);
 sqlUpdate(conn, query);
 sqlSafef(query, sizeof(query), "delete from cdwQaContam where fileId=%lld", fileId);
 sqlUpdate(conn, query);
 sqlSafef(query, sizeof(query), "delete from cdwQaRepeat where fileId=%lld", fileId);
 sqlUpdate(conn, query);
 sqlSafef(query, sizeof(query), 
     "delete from cdwQaPairedEndFastq where fileId1=%lld or fileId2=%lld",
     fileId, fileId);
 sqlUpdate(conn, query);
 }
 
 int findSubmitSymlinkExt(char *submitFileName, char *submitDir, char **pPath, char **pLastPath, int *pSymlinkLevels)
 /* Find the last symlink and real file in the chain from submitDir/submitFileName.
  * This is useful for when target of symlink in cdw/ gets renamed 
  * (e.g. license plate after passes validation), or removed (e.g. cdwReallyRemove* commands). 
  * Returns 0 for success. /
  * Returns -1 if path does not exist. */
 {
 int result = 0;
 struct stat sb;
 char *lastPath = NULL;
 char *path = mustExpandRelativePath(submitDir, submitFileName);
 
 int symlinkLevels = 0;
 while (TRUE)
     {
     if (!fileExists(path))
 	{
 	//path=does not exist
 	result = -1;
 	break;
 	}
     if (lstat(path, &sb) == -1)
 	errnoAbort("lstat failure on %s", path);
     if ((sb.st_mode & S_IFMT) != S_IFLNK)
 	break;
 
     // follow the symlink
     ++symlinkLevels;
     if (symlinkLevels > 10)
 	errAbort("Too many symlinks followed: %d symlinks. Probably a symlink loop.", symlinkLevels);
 
     // read the symlink
     char *symPath = mustReadSymlinkExt(path, &sb);
 
     // apply symPath to path
     char *newPath = mustPathRelativeToFile(path, symPath);
     freeMem(lastPath);
     lastPath = path;
     freeMem(symPath);
     path = newPath;
     }
 if (result == 0 && ((sb.st_mode & S_IFMT) != S_IFREG))
     errAbort("Expecting regular file. Followed symlinks to %s but it is not a regular file.", path);
 
 *pPath = path;
 *pLastPath = lastPath;
 *pSymlinkLevels = symlinkLevels;
 return result;
 }
 
 char *testOriginalSymlink(char *submitFileName, char *submitDir)
 /* Follows submitted symlinks to real file.
  * Aborts if real file path starts with cdwRootDir
  * since it should not point to a file already under cdwRoot. */
 {
 char *lastPath = NULL;
 char *path = NULL;
 int symlinkLevels = 0;
 
 int result = findSubmitSymlinkExt(submitFileName, submitDir, &path, &lastPath, &symlinkLevels);
 if (result == -1)  // path does not exist
     {
     errAbort("path=[%s] does not exist following submitDir/submitFileName through symlinks.", path);
     }
 if (startsWith(cdwRootDir, path))
     errAbort("Unexpected operation. The symlink %s points to %s. It should not point to a file already under cdwRoot %s", 
 	submitFileName, path, cdwRootDir);
 freeMem(lastPath);
 return path;
 }
 
 
 void replaceOriginalWithSymlink(char *submitFileName, char *submitDir, char *cdwPath)
 /* For a file that was just copied, remove original and symlink to new one instead
  * to save space. Follows symlinks if any to the real file and replaces it with a symlink */
 {
 char *path = testOriginalSymlink(submitFileName, submitDir);
 if (unlink(path) == -1)  // save space
     errnoAbort("unlink failure %s", path);
 makeSymLink(cdwPath, path);
 verbose(1, "%s converted to symlink to %s\n", path, cdwPath);
 freeMem(path);
 }
 
 
 
 char *findSubmitSymlink(char *submitFileName, char *submitDir, char *oldPath)
 /* Find the last symlink in the chain from submitDir/submitFileName.
  * This is useful for when target of symlink in cdw/ gets renamed 
  * (e.g. license plate after passes validation), or removed (e.g. cdwReallyRemove* commands). */
 {
 char *lastPath = NULL;
 char *path = NULL;
 int symlinkLevels = 0;
 
 int result = findSubmitSymlinkExt(submitFileName, submitDir, &path, &lastPath, &symlinkLevels);
 if (result == -1)  // path does not exist
     {
     warn("path=[%s] does not exist following submitDir/submitFileName through symlinks.", path);
     return NULL;
     }
 if (symlinkLevels < 1)
     {
     warn("Too few symlinks followed: %d symlinks. Where is the symlink created by cdwSubmit?", symlinkLevels);
     return NULL;
     }
 if (!sameString(path, oldPath))
     {
     warn("Found symlinks point to %s, expecting to find symlink pointing to old path %s", path, oldPath);
     return NULL;
     }
 
 freeMem(path);
 return lastPath;
 }
 
 
 void cdwReallyRemoveFile(struct sqlConnection *conn, char *submitDir, long long fileId, boolean unSymlinkOnly, boolean really)
 /* If unSymlinkOnly is NOT specified, removes all records of file from database and from Unix file system if 
  * the really flag is set.  Otherwise just print some info on the file.
  * Tries to find original submitdir and replace symlink with real file to restore it. */
 {
 struct cdwFile *ef = cdwFileFromId(conn, fileId);
 char *path = cdwPathForFileId(conn, fileId);
 verbose(1, "%s id=%u, submitFileName=%s, path=%s\n", 
     unSymlinkOnly ? "unlocking" : "removing", ef->id, ef->submitFileName, path);
 if (really)
     {
     char query[1024];
     struct cdwSubmit *es = cdwSubmitFromId(conn, ef->submitId);
 
     if (!unSymlinkOnly)
 	{
 	cdwRemoveQaRecords(conn, fileId);
 	sqlSafef(query, sizeof(query),
 	    "delete from cdwGroupFile where fileId=%lld", fileId);
 	sqlUpdate(conn, query);
 	sqlSafef(query, sizeof(query), "delete from cdwValidFile where fileId=%lld", fileId);
 	sqlUpdate(conn, query);
 	sqlSafef(query, sizeof(query), "delete from cdwFile where id=%lld", fileId);
 	sqlUpdate(conn, query);
 	}
 
     char *lastPath = NULL;
     // skip symlink check if meta or manifest which do not get validated or license plate or symlink
     if (!((fileId == es->manifestFileId) || (fileId == es->metaFileId)))
 	lastPath = findSubmitSymlink(ef->submitFileName, submitDir, path);
     if (lastPath)
 	{
 	verbose(3, "lastPath=%s path=%s\n", lastPath, path);
 	if (unlink(lastPath) == -1)  // drop about to be invalid symlink
 	    errnoAbort("unlink failure %s", lastPath);
 	copyFile(path, lastPath);
 	touchFileFromFile(path, lastPath);
 	chmod(lastPath, 0664);
 	freeMem(lastPath);
 	}
 
     if (!unSymlinkOnly)
 	mustRemove(path);
     }
 freez(&path);
 cdwFileFree(&ef);
 }
 
 void cdwFileResetTags(struct sqlConnection *conn, struct cdwFile *ef, char *newTags, 
     boolean revalidate, int submitId)
 /* Reset tags on file, strip out old validation and QA,  schedule new validation and QA. */
 /* Remove existing QA records and rerun QA agent on given file.   */
 {
 long long fileId = ef->id;
 /* Update database to let people know format revalidation is in progress. */
 char query[4*1024];
 
 /* Update tags for file in cdwFile table. */
 sqlSafef(query, sizeof(query), "update cdwFile set tags='%s' where id=%lld", newTags, fileId);
 sqlUpdate(conn, query);
     
 if (revalidate)
     {
     sqlSafef(query, sizeof(query), "update cdwFile set errorMessage = '%s' where id=%lld",
 	 "Revalidation in progress.", fileId); 
     sqlUpdate(conn, query);
 
     /* Get rid of records referring to file in other validation and qa tables. */
     cdwRemoveQaRecords(conn, fileId);
 
     /* schedule validator */
     cdwAddQaJob(conn, ef->id, submitId);
     }
 else
     {
     /* The revalidation case relies on cdwMakeValidFile to update the cdwValidFile table.
      * Here we must do it ourselves. */
     struct cdwValidFile *vf = cdwValidFileFromFileId(conn, ef->id);
     if (vf != NULL)
 	{
 	struct cgiParsedVars *tags = cdwMetaVarsList(conn, ef);
 	cdwValidFileFieldsFromTags(vf, tags);
 	cdwValidFileUpdateDb(conn, vf, vf->id);
 	cgiParsedVarsFreeList(&tags);
 	cdwValidFileFree(&vf);
 	}
     }
 }
 
 static void scanSam(char *samIn, FILE *f, struct genomeRangeTree *grt, long long *retHit, 
     long long *retMiss,  long long *retTotalBasesInHits, long long *retUniqueHitCount)
 /* Scan through sam file doing several things:counting how many reads hit and how many 
  * miss target during mapping phase, copying those that hit to a little bed file, and 
  * also defining regions covered in a genomeRangeTree. */
 {
 samfile_t *sf = samopen(samIn, "r", NULL);
 bam_hdr_t *bamHeader = sam_hdr_read(sf);
 bam1_t one;
 ZeroVar(&one);
 int err;
 long long hit = 0, miss = 0, unique = 0, totalBasesInHits = 0;
 while ((err = sam_read1(sf, bamHeader, &one)) >= 0)
     {
     int32_t tid = one.core.tid;
     if (tid < 0)
 	{
 	++miss;
         continue;
 	}
     ++hit;
     if (one.core.qual > cdwMinMapQual)
         ++unique;
     char *chrom = bamHeader->target_name[tid];
     // Approximate here... can do better if parse cigar.
     int start = one.core.pos;
     int size = one.core.l_qseq;
     int end = start + size;	
     totalBasesInHits += size;
     boolean isRc = (one.core.flag & BAM_FREVERSE);
     char strand = (isRc ? '-' : '+');
     if (start < 0) start=0;
     if (f != NULL)
 	fprintf(f, "%s\t%d\t%d\t.\t0\t%c\n", chrom, start, end, strand);
     genomeRangeTreeAdd(grt, chrom, start, end);
     }
 if (err < 0 && err != -1)
     errnoAbort("samread err %d", err);
 samclose(sf);
 *retHit = hit;
 *retMiss = miss;
 *retTotalBasesInHits = totalBasesInHits;
 *retUniqueHitCount = unique;
 }
 
 void cdwReserveTempFile(char *path)
 /* Call mkstemp on path.  This will fill in terminal XXXXXX in path with file name
  * and create an empty file of that name.  Generally that empty file doesn't stay empty for long. */
 {
 int fd = mkstemp(path);
 if (fd == -1)
      errnoAbort("Couldn't create temp file %s", path);
 if (fchmod(fd, 0664) == -1)
     errnoAbort("Couldn't change permissions on temp file %s", path);
 mustCloseFd(&fd);
 }
 
 void cdwIndexPath(struct cdwAssembly *assembly, char indexPath[PATH_LEN])
 /* Fill in path to BWA/Bowtie index. */
 {
 safef(indexPath, PATH_LEN, "/dev/shm/btData/%s", assembly->ucscDb);
 }
 
 
 void cdwAsPath(char *format, char path[PATH_LEN])
 /* Convert something like "narrowPeak" in format to full path involving
  * encValDir/as/narrowPeak.as */
 {
 safef(path, PATH_LEN, "%sas/%s.as", cdwValDataDir, format);
 }
 
 boolean cdwTrimReadsForAssay(char *fastqPath, char trimmedPath[PATH_LEN], char *assay)
 /* Look at assay and see if it's one that needs trimming.  If so make a new trimmed
  * file and put file name in trimmedPath.  Otherwise just copy fastqPath to trimmed
  * path and return FALSE. */
 {
 if (sameString(assay, "long-RNA-seq"))
     {
     char cmd[3*PATH_LEN];
     // Make up temp file name for poly-A trimmed file
     safef(trimmedPath, PATH_LEN, "%scdwFastqPolyFilterXXXXXX", cdwTempDir());
     cdwReserveTempFile(trimmedPath);
 
     // Run cdwFastqPolyFilter on the new file then pass the output into BWA. 
     safef(cmd, sizeof(cmd), "cdwFastqPolyFilter %s %s", 
 	fastqPath, trimmedPath); 
     mustSystem(cmd);
     return TRUE;
     }
 else
     {
     strcpy(trimmedPath, fastqPath);
     return FALSE;
     }
 }
 
 void cdwCleanupTrimReads(char *fastqPath, char trimmedPath[PATH_LEN])
 /* Remove trimmed sample file.  Does nothing if fastqPath and trimmedPath the same. */
 {
 if (!sameString(fastqPath, trimmedPath))
     remove(trimmedPath);
 }
 
 void cdwAlignFastqMakeBed(struct cdwFile *ef, struct cdwAssembly *assembly,
     char *fastqPath, struct cdwValidFile *vf, FILE *bedF,
     double *retMapRatio,  double *retDepth,  double *retSampleCoverage, 
     double *retUniqueMapRatio, char *assay)
 /* Take a sample fastq, run the aligner on it and then convert that file to a bed. 
  * bedF and all the ret parameters can be NULL. */
 {
 // Figure out BWA index
 char genoFile[PATH_LEN];
 cdwIndexPath(assembly, genoFile);
 
 // Trim reads if need be
 char trimmedFile[PATH_LEN];
 cdwTrimReadsForAssay(fastqPath, trimmedFile, assay);
 
 char *samName;
 samName = cloneString(rTempName(cdwTempDir(), "ewdSample1", ".sam"));
 
 char cmd[3*PATH_LEN];
 // We used to use bwa backtrack here ("bwa aln"), but this mode does not allow mmap indices
 // BWA mem allows preloaded mmap'ed indices, but it is very slow to start, like bowtie2
 // Also BWA mem and bowtie2 are local aligners, so their stats are very different and they do not seem
 // to have a "require 99% identity option", which is very, very strange.
 // So bowtie seemed like the best of both worlds, mmap and also global alignment
 // Max did various comparisons with 80 sample fastq files and the count of aligned reads were very very similar
 // for those files that have enough reads in them. Plots were in email to Jim/Clay.
 
 // -l 40 is seed length. Makes it a lot faster. Today's reads should be longer than 40 bp
 // -n 1 is the number of mismatches in the seed. lower this to 1 makes it a lot faster.
 // -mm activates mmap for the index, which is now in ramdisk
 // -S is for SAM output (so no more useless .sai temp files)
 // these options require a decently recent bowtie version, I used 1.2.2
 safef(cmd, sizeof(cmd), "bowtie -l 40 -n 1 --mm --threads 3 %s %s -S > %s", genoFile, trimmedFile, samName);
 mustSystem(cmd);
 
 /* Scan sam file to calculate vf->mapRatio, vf->sampleCoverage and vf->depth. 
  * and also to produce little bed file for enrichment step. */
 struct genomeRangeTree *grt = genomeRangeTreeNew();
 long long hitCount=0, missCount=0, uniqueHitCount, totalBasesInHits=0;
 scanSam(samName, bedF, grt, &hitCount, &missCount, &totalBasesInHits, &uniqueHitCount);
 verbose(1, "hitCount=%lld, missCount=%lld, totalBasesInHits=%lld, grt=%p\n", 
     hitCount, missCount, totalBasesInHits, grt);
 if (retMapRatio)
     *retMapRatio = (double)hitCount/(hitCount+missCount);
 if (retDepth)
     *retDepth = (double)totalBasesInHits/assembly->baseCount 
 	    * (double)vf->itemCount/vf->sampleCount;
 long long basesHitBySample = genomeRangeTreeSumRanges(grt);
 if (retSampleCoverage)
     *retSampleCoverage = (double)basesHitBySample/assembly->baseCount;
 if (retUniqueMapRatio)
     *retUniqueMapRatio = (double)uniqueHitCount/(hitCount+missCount);
 
 // Clean up and go home
 cdwCleanupTrimReads(fastqPath, trimmedFile);
 genomeRangeTreeFree(&grt);
 remove(samName);
 }
 
 struct cdwFastqFile *cdwFastqFileFromFileId(struct sqlConnection *conn, long long fileId)
 /* Get cdwFastqFile with given fileId or NULL if none such */
 {
 char query[256];
 sqlSafef(query, sizeof(query), "select * from cdwFastqFile where fileId=%lld", fileId);
 return cdwFastqFileLoadByQuery(conn, query);
 }
 
 struct cdwVcfFile *cdwVcfFileFromFileId(struct sqlConnection *conn, long long fileId)
 /* Get cdwVcfFile with given fileId or NULL if none such */
 {
 char query[256];
 sqlSafef(query, sizeof(query), "select * from cdwVcfFile where fileId=%lld", fileId);
 return cdwVcfFileLoadByQuery(conn, query);
 }
 
 static int mustMkstemp(char *template)
 /* Call mkstemp to make a temp file with name based on template (which is altered)
  * by the call to be the file name.   Return unix file descriptor. */
 {
 int fd = mkstemp(template);
 if (fd == -1)
     errnoAbort("Couldn't make temp file based on %s", template);
 return fd;
 }
 
 void cdwMakeTempFastqSample(char *source, int size, char dest[PATH_LEN])
 /* Copy size records from source into a new temporary dest.  Fills in dest */
 {
 /* Make temporary file to save us a unique place in file system. */
 safef(dest, PATH_LEN, "%scdwSampleFastqXXXXXX", cdwTempDir());
 int fd = mustMkstemp(dest);
 close(fd);
 
 char command[3*PATH_LEN];
 safef(command, sizeof(command), 
     "fastqStatsAndSubsample %s /dev/null %s -smallOk -sampleSize=%d", source, dest, size);
 verbose(2, "command: %s\n", command);
 mustSystem(command);
 }
 
 void cdwMakeFastqStatsAndSample(struct sqlConnection *conn, long long fileId)
 /* Run fastqStatsAndSubsample, and put results into cdwFastqFile table. */
 {
 struct cdwFastqFile *fqf = cdwFastqFileFromFileId(conn, fileId);
 if (fqf == NULL)
     {
     char *path = cdwPathForFileId(conn, fileId);
     char statsFile[PATH_LEN], sampleFile[PATH_LEN];
     char command[3*PATH_LEN];
     // Cut adapt on RNA seq files. 
     safef(statsFile, PATH_LEN, "%scdwFastqStatsXXXXXX", cdwTempDir());
     cdwReserveTempFile(statsFile);
     char dayTempDir[PATH_LEN];
     safef(sampleFile, PATH_LEN, "%scdwFastqSampleXXXXXX", cdwTempDirForToday(dayTempDir));
     cdwReserveTempFile(sampleFile);
     // For RNA seq files run on the fastqTrimmed output, otherwise run on the unaltered CDW file.  
     safef(command, sizeof(command), "fastqStatsAndSubsample -sampleSize=%d -smallOk %s %s %s",
 	cdwSampleTargetSize, path, statsFile, sampleFile);
     mustSystem(command);
     safef(command, sizeof(command), "gzip -c %s > %s.fastq.gz", sampleFile, sampleFile);
     mustSystem(command);
     strcat(sampleFile, ".fastq.gz");
     fqf = cdwFastqFileOneFromRa(statsFile);
     fqf->fileId = fileId;
     fqf->sampleFileName = cloneString(sampleFile);
     cdwFastqFileSaveToDb(conn, fqf, "cdwFastqFile", 1024);
     remove(statsFile);
     freez(&path);
     }
 cdwFastqFileFree(&fqf);
 }
 
 struct cdwQaWigSpot *cdwMakeWigSpot(struct sqlConnection *conn, long long wigId, long long spotId)
 /* Create a new cdwQaWigSpot record in database based on comparing wig file to spot file
  * (specified by id's in cdwFile table). */
 {
 /* Get valid files from fileIds and check format */
 struct cdwValidFile *wigVf = cdwValidFileFromFileId(conn, wigId);
 if (!sameString(wigVf->format, "bigWig"))
     errAbort("%lld is not a bigWig file, is %s instead", wigId, wigVf->format);
 struct cdwValidFile *spotVf = cdwValidFileFromFileId(conn, spotId);
 if (!sameString(spotVf->format, "narrowPeak") && !sameString(spotVf->format, "broadPeak") &&
     !sameString(spotVf->format, "bigBed"))
     errAbort("%lld is not a recognized peak type format, is %s", spotId, spotVf->format);
 
 /* Remove any old record for files. */
 char query[256];
 sqlSafef(query, sizeof(query), 
     "delete from cdwQaWigSpot where wigId=%lld and spotId=%lld", wigId, spotId);
 sqlUpdate(conn, query);
 
 /* Figure out file names */
 char *wigPath = cdwPathForFileId(conn, wigId);
 char *spotPath = cdwPathForFileId(conn, spotId);
 char statsFile[PATH_LEN];
 safef(statsFile, PATH_LEN, "%scdwQaWigSpotXXXXXX", cdwTempDir());
 cdwReserveTempFile(statsFile);
 char peakFile[PATH_LEN];
 safef(peakFile, PATH_LEN, "%scdwQaWigSpotXXXXXX", cdwTempDir());
 cdwReserveTempFile(peakFile);
 
 /* Convert narrowPeak input into a temporary bed4 file */
 char command[3*PATH_LEN];
 safef(command, sizeof(command), "bigBedToBed %s stdout | cut -f 1-4 > %s", spotPath, peakFile);
 mustSystem(command);
 
 /* Call on bigWigAverageOverBed on peaks */
 safef(command, sizeof(command), 
     "bigWigAverageOverBed %s %s /dev/null -stats=%s", wigPath, peakFile, statsFile);
 mustSystem(command);
 remove(peakFile);
 
 /* Parse out ra file,  save it to database, and remove ra file. */
 struct cdwQaWigSpot *spot = cdwQaWigSpotOneFromRa(statsFile);
 spot->wigId = wigId;
 spot->spotId = spotId;
 cdwQaWigSpotSaveToDb(conn, spot, "cdwQaWigSpot", 1024);
 spot->id = sqlLastAutoId(conn);
 
 /* Clean up and go home. */
 cdwQaWigSpotFree(&spot);
 cdwValidFileFree(&wigVf);
 cdwValidFileFree(&spotVf);
 freez(&wigPath);
 freez(&spotPath);
 return spot;
 }
 
 struct cdwQaWigSpot *cdwQaWigSpotFor(struct sqlConnection *conn, 
     long long wigFileId, long long spotFileId) 
 /* Return wigSpot relationship if any we have in database for these two files. */
 {
 char query[256];
 sqlSafef(query, sizeof(query), 
     "select * from cdwQaWigSpot where wigId=%lld and spotId=%lld", wigFileId, spotFileId);
 return cdwQaWigSpotLoadByQuery(conn, query);
 }
 
 
 
 
 struct cdwBamFile *cdwBamFileFromFileId(struct sqlConnection *conn, long long fileId)
 /* Get cdwBamFile with given fileId or NULL if none such */
 {
 char query[256];
 sqlSafef(query, sizeof(query), "select * from cdwBamFile where fileId=%lld", fileId);
 return cdwBamFileLoadByQuery(conn, query);
 }
 
 struct cdwBamFile * cdwMakeBamStatsAndSample(struct sqlConnection *conn, long long fileId, 
     char sampleBed[PATH_LEN])
 /* Run cdwBamStats and put results into cdwBamFile table, and also a sample bed.
  * The sampleBed will be filled in by this routine. */
 {
 /* Remove any old record for file. */
 char query[256];
 sqlSafef(query, sizeof(query), "delete from cdwBamFile where fileId=%lld", fileId);
 sqlUpdate(conn, query);
 
 /* Figure out file names */
 char *path = cdwPathForFileId(conn, fileId);
 char statsFile[PATH_LEN];
 safef(statsFile, PATH_LEN, "%scdwBamStatsXXXXXX", cdwTempDir());
 cdwReserveTempFile(statsFile);
 char dayTempDir[PATH_LEN];
 safef(sampleBed, PATH_LEN, "%scdwBamSampleXXXXXX", cdwTempDirForToday(dayTempDir));
 cdwReserveTempFile(sampleBed);
 
 /* Make system call to make ra and bed, and then another system call to zip bed.*/
 char command[3*PATH_LEN];
 safef(command, sizeof(command), "edwBamStats -sampleBed=%s -sampleBedSize=%d %s %s",
     sampleBed, cdwSampleTargetSize, path, statsFile);
 mustSystem(command);
 safef(command, sizeof(command), "gzip %s", sampleBed);
 mustSystem(command);
 strcat(sampleBed, ".gz");
 
 /* Parse out ra file,  save it to database, and remove ra file. */
 struct cdwBamFile *ebf = cdwBamFileOneFromRa(statsFile);
 ebf->fileId = fileId;
 cdwBamFileSaveToDb(conn, ebf, "cdwBamFile", 1024);
 remove(statsFile);
 
 /* Clean up and go home. */
 freez(&path);
 return ebf;
 }
 
 struct cdwVcfFile * cdwMakeVcfStatsAndSample(struct sqlConnection *conn, long long fileId, 
     char sampleBed[PATH_LEN])
 /* Run cdwVcfStats and put results into cdwVcfFile table, and also a sample bed.
  * The sampleBed will be filled in by this routine. */
 {
 /* Remove any old record for file. */
 char query[256];
 sqlSafef(query, sizeof(query), "delete from cdwVcfFile where fileId=%lld", fileId);
 sqlUpdate(conn, query);
 
 /* Figure out file names */
 char *path = cdwPathForFileId(conn, fileId);
 char statsFile[PATH_LEN];
 safef(statsFile, PATH_LEN, "%scdwVcfStatsXXXXXX", cdwTempDir());
 cdwReserveTempFile(statsFile);
 char dayTempDir[PATH_LEN];
 safef(sampleBed, PATH_LEN, "%scdwVcfSampleXXXXXX", cdwTempDirForToday(dayTempDir));
 cdwReserveTempFile(sampleBed);
 
 /* Make system call to make ra and bed, and then another system call to zip bed.*/
 char command[3*PATH_LEN];
 safef(command, sizeof(command), "cdwVcfStats -bed=%s %s %s",
     sampleBed, path, statsFile);
 mustSystem(command);
 safef(command, sizeof(command), "gzip %s", sampleBed);
 mustSystem(command);
 strcat(sampleBed, ".gz");
 
 /* Parse out ra file,  save it to database, and remove ra file. */
 struct cdwVcfFile *vcf = cdwVcfFileOneFromRa(statsFile);
 vcf->fileId = fileId;
 cdwVcfFileSaveToDb(conn, vcf, "cdwVcfFile", 1024);
 remove(statsFile);
 
 /* Clean up and go home. */
 freez(&path);
 return vcf;
 }
 
 char *cdwOppositePairedEndString(char *end)
 /* Return "1" for "2" and vice versa */
 {
 if (sameString(end, "1"))
     return "2";
 else if (sameString(end, "2"))
     return "1";
+else if (sameString(end, "cell_barcode") || sameString(end, "sample_barcode"))
+    return NULL;
 else
     {
     errAbort("Expecting 1 or 2, got %s in oppositeEnd", end);
     return NULL;
     }
 }
 
 struct cdwValidFile *cdwOppositePairedEnd(struct sqlConnection *conn, struct cdwFile *ef, struct cdwValidFile *vf)
 /* Given one file of a paired end set of fastqs, find the file with opposite ends. */
 {
 char *otherEnd = cdwOppositePairedEndString(vf->pairedEnd);
+if (otherEnd == NULL)
+    return NULL;
 char query[1024];
 sqlSafef(query, sizeof(query), 
     "select cdwValidFile.* from cdwValidFile join cdwFile on cdwValidFile.fileId=cdwFile.id"
     " where experiment='%s' and submitDirId=%d and outputType='%s' and replicate='%s' "
     " and part='%s' and pairedEnd='%s' and itemCount=%lld and deprecated=''"
     , vf->experiment, ef->submitDirId, vf->outputType, vf->replicate, vf->part, otherEnd
     , vf->itemCount);
 struct cdwValidFile *otherVf = cdwValidFileLoadByQuery(conn, query);
 if (otherVf == NULL)
     return NULL;
 if (otherVf->next != NULL)
     errAbort("Multiple results from pairedEnd query %s", query);
 return otherVf;
 }
 
 struct cdwQaPairedEndFastq *cdwQaPairedEndFastqFromVfs(struct sqlConnection *conn,
     struct cdwValidFile *vfA, struct cdwValidFile *vfB,
     struct cdwValidFile **retVf1,  struct cdwValidFile **retVf2)
 /* Return pair record if any for the two fastq files. */
 {
 /* Sort the two ends. */
 struct cdwValidFile *vf1 = NULL, *vf2 = NULL;
 if (sameString(vfA->pairedEnd, "1"))
     {
     vf1 = vfA;
     vf2 = vfB;
     }
 else
     {
     vf1 = vfB;
     vf2 = vfA;
     }
 if (retVf1 != NULL)
    *retVf1 = vf1;
 if (retVf2 != NULL)
    *retVf2 = vf2;
 
 /* See if we already have a record for these two. */
 /* Return record for these two. */
 char query[1024];
 sqlSafef(query, sizeof(query), 
     "select * from cdwQaPairedEndFastq where fileId1=%u and fileId2=%u",
     vf1->fileId, vf2->fileId);
 return cdwQaPairedEndFastqLoadByQuery(conn, query);
 }
 
 FILE *cdwPopen(char *command, char *mode)
 /* do popen or die trying */
 {
 /* Because of bugs with popen(...,"r") and programs that use stdin otherwise
  * it's probably better to use Mark's pipeline library,  but it is ever so
  * much harder to use... */
 FILE *f = popen(command,  mode);
 if (f == NULL)
     errnoAbort("Can't popen(%s, %s)", command, mode);
 return f;
 }
 
 boolean cdwOneLineSystemAttempt(char *command, char *line, int maxLineSize)
 /* Execute system command and return one line result from it in line */
 {
 FILE *f = popen(command, "r");
 boolean ok = FALSE;
 if (f != NULL)
     {
     char *result  = fgets(line, maxLineSize, f);
     if (result != NULL)
 	ok = TRUE;
     pclose(f);
     }
 else
     {
     errnoWarn("failed popen %s", command);
     }
 return ok;
 }
 
 void cdwOneLineSystemResult(char *command, char *line, int maxLineSize)
 /* Execute system command and return one line result from it in line */
 {
 if (!cdwOneLineSystemAttempt(command, line, maxLineSize) )
     errAbort("Can't get line from %s", command);
 }
 
 void cdwMd5File(char *fileName, char md5Hex[33])
 /* call md5sum utility to calculate md5 for file and put result in hex format md5Hex 
  * This ends up being about 30% faster than library routine md5HexForFile,
  * however since there's popen() weird interactions with  stdin involved
  * it's not suitable for a general purpose library.  Environment inside cdw
  * is controlled enough it should be ok. */
 {
 char command[PATH_LEN + 16];
 safef(command, sizeof(command), "md5sum %s", fileName);
 char line[2*PATH_LEN];
 cdwOneLineSystemResult(command, line, sizeof(line));
 memcpy(md5Hex, line, 32);
 md5Hex[32] = 0;
 }
 
 
 void cdwPokeFifo(char *fifoName)
 /* Send '\n' to fifo to wake up associated daemon */
 {
 /* Sadly we loop through places it might be since it varies. It has to live somewhere
  * that web CGIs can poke is the problem. */
 char *places[] = {"/data/www/userdata/", "/usr/local/apache/userdata/"};
 int i;
 for (i=0; i<ArraySize(places); ++i)
     {
     char path[PATH_LEN];
     safef(path, sizeof(path), "%s%s", places[i], fifoName);
     if (fileExists(path))
         {
 	char *message = "\n";
 	writeGulp(path, message, strlen(message));
 	break;
 	}
     }
 }
 
 /***/
 /* Shared functions for CDW web CGI's.
    Mostly wrappers for javascript tweaks */
 
 void cdwWebAutoRefresh(int msec)
 /* Refresh page after msec.  Use 0 to cancel autorefresh */
 {
 if (msec > 0)
     {
     // set timeout to refresh page (saving/restoring scroll position via cookie)
     printf("<script type='text/javascript'>var cdwRefresh = setTimeout(function() { $.cookie('cdwWeb.scrollTop', $(window).scrollTop()); $('form').submit(); }, %d);</script>", msec);
     puts("<script type='text/javascript'>$(document).ready(function() {$(document).scrollTop($.cookie('cdwWeb.scrollTop'))});</script>");
 
     // disable autorefresh when user is changing page settings
     puts("<script type='text/javascript'>$('form').click(function() {clearTimeout(cdwRefresh); $.cookie('cdwWeb.scrollTop', null);});</script>");
     }
 else if (msec == 0)
     puts("clearTimeout(cdwRefresh);</script>");
 
 // Negative msec ignored
 }
 
 /***/
 /* Navigation bar */
 
 void cdwWebNavBarStart()
 /* Layout navigation bar */
 {
 puts("<div id='layout'>");
 puts("<div id='navbar' class='navbar navbar-fixed-top navbar-inverse'>");
 webIncludeFile("/inc/cdwNavBar.html");
 puts("</div>");
 puts("<div id='content' class='container'><div>");
 }
 
 void cdwWebNavBarEnd()
 /* Close layout after navigation bar */
 {
 puts("</div></div></div>");
 }
 
 void cdwWebBrowseMenuItem(boolean on)
 /* Toggle visibility of 'Browse submissions' link on navigation menu */
 {
 printf("<script type='text/javascript'>$('#cdw-browse').%s();</script>", on ? "show" : "hide");
 }
 
 void cdwWebSubmitMenuItem(boolean on)
 /* Toggle visibility of 'Submit data' link on navigation menu */
 {
 printf("<script type='text/javascript'>$('#cdw-submit').%s();</script>", on ? "show" : "hide");
 }
 
 char *cdwRqlLookupField(void *record, char *key)
 /* Lookup a field in a tagStanza. */
 {
 struct tagStanza *stanza = record;
 return tagFindVal(stanza, key);
 }
 
 boolean cdwRqlStatementMatch(struct rqlStatement *rql, struct tagStanza *stanza,
 	struct lm *lm)
 /* Return TRUE if where clause and tableList in statement evaluates true for stanza. */
 {
 struct rqlParse *whereClause = rql->whereClause;
 if (whereClause == NULL)
     return TRUE;
 else
     {
     struct rqlEval res = rqlEvalOnRecord(whereClause, stanza, cdwRqlLookupField, lm);
     res = rqlEvalCoerceToBoolean(res);
     return res.val.b;
     }
 }
 
 static void rBuildStanzaRefList(struct tagStorm *tags, struct tagStanza *stanzaList,
     struct rqlStatement *rql, struct lm *lm, int *pMatchCount, struct slRef **pList)
 /* Recursively add stanzas that match query to list */
 {
 struct tagStanza *stanza;
 for (stanza = stanzaList; stanza != NULL; stanza = stanza->next)
     {
     if (rql->limit < 0 || rql->limit > *pMatchCount)
 	{
 	if (cdwRqlStatementMatch(rql, stanza, lm))
 	    {
 	    refAdd(pList, stanza);
 	    *pMatchCount += 1;
 	    }
 	if (stanza->children != NULL)
 	    rBuildStanzaRefList(tags, stanza->children, rql, lm, pMatchCount, pList);
 	}
     }
 }
 
 void cdwCheckRqlFields(struct rqlStatement *rql, struct slName *tagFieldList)
 /* Make sure that rql query only includes fields that exist in tags */
 {
 struct hash *hash = hashFromSlNameList(tagFieldList);
 rqlCheckFieldsExist(rql, hash, "cdwFileTags table");
 hashFree(&hash);
 }
 
 struct slRef *tagStanzasMatchingQuery(struct tagStorm *tags, char *query)
 /* Return list of references to stanzas that match RQL query */
 {
 struct rqlStatement *rql = rqlStatementParseString(query);
 struct slName *tagFieldList = tagStormFieldList(tags);
 cdwCheckRqlFields(rql, tagFieldList);
 slFreeList(&tagFieldList);
 int matchCount = 0;
 struct slRef *list = NULL;
 struct lm *lm = lmInit(0);
 rBuildStanzaRefList(tags, tags->forest, rql, lm, &matchCount, &list);
 rqlStatementFree(&rql);
 lmCleanup(&lm);
 return list;
 }
 
 struct cgiParsedVars *cdwMetaVarsList(struct sqlConnection *conn, struct cdwFile *ef)
 /* Return list of cgiParsedVars dictionaries for metadata for file.  Free this up 
  * with cgiParsedVarsFreeList() */
 {
 struct cgiParsedVars *tagsList = cgiParsedVarsNew(ef->tags);
 struct cgiParsedVars *parentTags = NULL;
 char query[256];
 sqlSafef(query, sizeof(query), 
     "select tags from cdwMetaTags where id=%u", ef->metaTagsId);
 char *metaCgi = sqlQuickString(conn, query);
 if (metaCgi != NULL)
     {
     parentTags = cgiParsedVarsNew(metaCgi);
     tagsList->next = parentTags;
     freez(&metaCgi);
     }
 return tagsList;
 }
 
 static int gMatchCount = 0;
 static boolean gDoSelect = FALSE;
 static boolean gFirst = TRUE; 
 
 static void rMatchesToRa(struct tagStorm *tags, struct tagStanza *list, 
     struct rqlStatement *rql, struct lm *lm)
 /* Recursively traverse stanzas on list outputting matching stanzas as ra. */
 {
 struct tagStanza *stanza;
 for (stanza = list; stanza != NULL; stanza = stanza->next)
     {
     if (rql->limit < 0 || rql->limit > gMatchCount)
 	{
 	if (stanza->children)
 	    rMatchesToRa(tags, stanza->children, rql, lm);
 	else    /* Just apply query to leaves */
 	    {
 	    if (cdwRqlStatementMatch(rql, stanza, lm))
 		{
 		++gMatchCount;
 		if (gDoSelect)
 		    {
 		    struct slName *field;
 		    for (field = rql->fieldList; field != NULL; field = field->next)
 			{
 			char *val = tagFindVal(stanza, field->name);
 			if (val != NULL)
 			    printf("%s\t%s\n", field->name, val);
 			}
 		    printf("\n");
 		    }
 		}
 	    }
 	}
     }
 }
 
 static void printQuotedTsv(char *val)
 /* Print out tab separated value inside of double quotes. Escape any existing quotes with quotes. */
 {
 putchar('"');
 char c;
 while ((c = *val++) != 0)
     {
     if (c == '"')
         putchar(c);
     putchar(c);
     }
 putchar('"');
 }
 
 static void rMatchesToCsv(struct tagStorm *tags, struct tagStanza *list, 
     struct rqlStatement *rql, struct lm *lm)
 /* Recursively traverse stanzas on list outputting matching stanzas as 
  * a comma separated values file. */
 {
 struct tagStanza *stanza;
 for (stanza = list; stanza != NULL; stanza = stanza->next)
     {
     if (rql->limit < 0 || rql->limit > gMatchCount)  // We are inside the acceptable limit
 	{
 	if (stanza->children) // Recurse till we have just leaves. 
 	    rMatchesToCsv(tags, stanza->children, rql, lm);
 	else    /* Just apply query to leaves */
 	    {
 	    if (cdwRqlStatementMatch(rql, stanza, lm))
 		{
 		++gMatchCount;
 		if (gDoSelect)
 		    {
 		    struct slName *field;
 		    if (gFirst)// For the first stanza print out a header line. 
 			{
 			char *sep = "";
 			gFirst = FALSE;
 			for (field = rql->fieldList; field != NULL; field = field->next)
 			    {
 			    printf("%s%s", sep, field->name); 
 			    sep = ",";
 			    }
 			printf("\n"); 
 			}
 		    char *sep = "";
 		    for (field = rql->fieldList; field != NULL; field = field->next)
 			{
 			fputs(sep, stdout);
 			sep = ",";
 			char *val = emptyForNull(tagFindVal(stanza, field->name));
 			// Check for embedded comma or existing quotes
 			if (strchr(val, ',') == NULL && strchr(val, '"') == NULL)
 			    fputs(val, stdout);
 			else
 			    {
 			    printQuotedTsv(val);
 			    }
 			}
 		    printf("\n");
 		    }
 		}
 	    }
 	}
     }
 }
 
 static void rMatchesToTsv(struct tagStorm *tags, struct tagStanza *list, 
     struct rqlStatement *rql, struct lm *lm)
 /* Recursively traverse stanzas on list outputting matching stanzas as a tab separated values file. */
 {
 struct tagStanza *stanza;
 for (stanza = list; stanza != NULL; stanza = stanza->next)
     {
     if (rql->limit < 0 || rql->limit > gMatchCount)  // We are inside the acceptable limit
 	{
 	if (stanza->children) // Recurse till we have just leaves. 
 	    rMatchesToTsv(tags, stanza->children, rql, lm);
 	else    /* Just apply query to leaves */
 	    {
 	    if (cdwRqlStatementMatch(rql, stanza, lm))
 		{
 		++gMatchCount;
 		if (gDoSelect)
 		    {
 		    struct slName *field;
 		    if (gFirst)// For the first stanza print out a header line. 
 			{
 			gFirst = FALSE;
 			printf("#"); 
 			char *sep = "";
 			for (field = rql->fieldList; field != NULL; field = field->next)
 			    {
 			    printf("%s%s", sep, field->name); 
 			    sep = "\t";
 			    }
 			printf("\n"); 
 			}
 		    char *sep = "";
 		    for (field = rql->fieldList; field != NULL; field = field->next)
 			{
 			char *val = naForNull(tagFindVal(stanza, field->name));
 			printf("%s%s", sep, val);
 			sep = "\t";
 			}
 		    printf("\n");
 		    }
 		}
 	    }
 	}
     }
 }
 
 void cdwPrintMatchingStanzas(char *rqlQuery, int limit, struct tagStorm *tags, char *format)
 /* Show stanzas that match query */
 {
 struct dyString *dy = dyStringCreate("%s", rqlQuery);
 int maxLimit = 10000;
 if (limit > maxLimit)
     limit = maxLimit;
 struct rqlStatement *rql = rqlStatementParseString(dy->string);
 
 /* Get list of all tag types in tree and use it to expand wildcards in the query
  * field list. */
 struct slName *allFieldList = tagStormFieldList(tags);
 slSort(&allFieldList, slNameCmpCase);
 rql->fieldList = wildExpandList(allFieldList, rql->fieldList, TRUE);
 /* Traverse tag tree outputting when rql statement matches in select case, just
  * updateing count in count case. */
 gDoSelect = sameWord(rql->command, "select");
 if (gDoSelect)
     rql->limit = limit;
 struct lm *lm = lmInit(0);
 if (sameString(format, "ra"))
     rMatchesToRa(tags, tags->forest, rql, lm);
 else if (sameString(format, "tsv"))
     rMatchesToTsv(tags, tags->forest, rql, lm); 
 else if (sameString(format, "csv"))
     rMatchesToCsv(tags, tags->forest, rql, lm);
 if (sameWord(rql->command, "count"))
     printf("%d\n", gMatchCount);
 }
 
 
 void cdwPrintSlRefList(struct slRef *results, struct slName *fieldNames, char *format, int limit)
 /* Print a linked list of results in ra, tsv, or csv format.  Each result should be a list of
  * slPair key/values. */
 {
 int maxLimit = 10000;
 if (limit > maxLimit)
     limit = maxLimit;
 int matchCount = 0;
 if (sameString(format, "csv"))
     {
     // Write csv header
     struct slName *fieldName = fieldNames;
     char *sep = "";
     while (fieldName != NULL)
         {
         printf("%s%s", sep, fieldName->name);
         sep = ",";
         fieldName = fieldName->next;
         }
     printf("\n");
     }
 if (sameString(format, "tsv"))
     {
     // Write tsv header
     struct slName *fieldName = fieldNames;
     char *sep = "";
     printf("#");
     while (fieldName != NULL)
         {
         printf("%s%s", sep, fieldName->name);
         sep = "\t";
         fieldName = fieldName->next;
         }
     printf("\n");
     }
 
 struct slRef *result = results;
 while (result != NULL)
     {
     if (++matchCount > limit)
         break;
     struct slPair *keyval = result->val;
     char *sep = "";
     while (keyval != NULL)
         {
         char *val = keyval->val;
         if (sameString(format, "ra") && !isEmpty(val))
             printf("%s\t%s\n", keyval->name, val);
         if (sameString(format, "csv"))
             {
             printf("%s", sep);
             sep = ",";
             val = emptyForNull(val);
             // Check for embedded comma or existing quotes
             if (strchr(val, ',') == NULL && strchr(val, '"') == NULL)
                 printf("%s", val);
             else
                 {
                 printQuotedTsv(val);
                 }
             }
         if (sameString(format, "tsv"))
             {
             val = naForNull(val);
             printf("%s%s", sep, val);
             sep = "\t";
             }
         keyval = keyval->next;
         }
     printf("\n");
     result = result->next;
     }
 }
 
 #ifdef NOT_CURRENTLY_USED
 
 static struct dyString *getLoginBits(struct cart *cart)
 /* Get a little HTML fragment that has login/logout bit of menu */
 {
 /* Construct URL to return back to this page */
 char *command = cartUsualString(cart, "cdwCommand", "home");
 char *sidString = cartSidUrlString(cart);
 char returnUrl[PATH_LEN*2];
 safef(returnUrl, sizeof(returnUrl), "../cgi-bin/cdwWebBrowse?cdwCommand=%s&%s",
     command, sidString );
 char *encodedReturn = cgiEncode(returnUrl);
 
 /* Write a little html into loginBits */
 struct dyString *loginBits = dyStringNew(0);
 dyStringAppend(loginBits, "<a class=\"a-unstyled\" href=\"");
 char *userName = wikiLinkUserName();
 if (userName == NULL)
     {
     dyStringPrintf(loginBits, "../cgi-bin/hgLogin?hgLogin.do.displayLoginPage=1&returnto=%s&%s",
 	    encodedReturn, sidString);
     dyStringPrintf(loginBits, "\"><span class=\"label label-login\">Login</span></a>");
     }
 else
     {
     dyStringPrintf(loginBits, "../cgi-bin/hgLogin?hgLogin.do.displayLogout=1&returnto=%s&%s",
 	    encodedReturn, sidString);
     dyStringPrintf(loginBits, "\" id=\"logoutLink\"><span class=\"label back-gray\">Logout %s</span></a>", userName);
 
     if (loginUseBasicAuth())
         wikiFixLogoutLinkWithJs();
     }
 
 /* Clean up and go home */
 freez(&encodedReturn);
 return loginBits;
 }
 #endif
 
 char *cdwPageHeader(struct cart *cart, boolean makeAbsolute)
 /* Return page header string.  This is content that actually appears at the top
  * of the page, like menu stuff.  Optionally make links point to absolute URLs instead of relative. */
 {
 // page header html is in a stringified .h file
 struct dyString *dy = dyStringNew(4*1024);
 dyStringPrintf(dy, 
 #include "cdwPageHeader.h"
        );
 
 char *menubarStr = menuBarAddUiVars(dy->string, "/cgi-bin/cdw", cartSidUrlString(cart));
 if (!makeAbsolute)
     return menubarStr;
 
 char *menubarStr2 = replaceChars(menubarStr, "../", "/");
 freez(&menubarStr);
 return menubarStr2;
 }
 
 char *cdwPageFooter(struct cart *cart, boolean makeAbsolute)
 /* Return page footer string.  This is content that appears in the page footer, like
  * links to other institutions etc.  Optionally make any relative URLs into absolute
  * URLs. */
 {
 // page footer html is in a stringified .h file
 struct dyString *dy = dyStringNew(4*1024);
 dyStringPrintf(dy, 
 #include "cdwPageFooter.h"
     );
 
 char *menubarStr = menuBarAddUiVars(dy->string, "/cgi-bin/cdw", cartSidUrlString(cart));
 if (!makeAbsolute)
     return menubarStr;
 
 char *menubarStr2 = replaceChars(menubarStr, "../", "/");
 freez(&menubarStr);
 return menubarStr2;
 }
 
 char *cdwLocalMenuBar(struct cart *cart, boolean makeAbsolute)
 /* Return menu bar string. Optionally make links in menubar to point to absolute URLs, not relative. */
 {
 // menu bar html is in a stringified .h file
 struct dyString *dy = dyStringNew(4*1024);
 dyStringPrintf(dy, 
 #include "cdwNavBar.h"
         );
 
 char *menubarStr = menuBarAddUiVars(dy->string, "/cgi-bin/cdw", cartSidUrlString(cart));
 if (!makeAbsolute)
     return menubarStr;
 
 char *menubarStr2 = replaceChars(menubarStr, "../", "/");
 freez(&menubarStr);
 return menubarStr2;
 }
 
 char *fileExtFromFormat(char *format)
 /* return file extension given the cdwFile format as defined in cdwValid.c. Result has to be freed */
 {
 if (sameWord(format, "vcf"))
     return cloneString(".vcf.gz");
 if (sameWord(format, "fasta"))
     return cloneString(".fa.gz");
 if (sameWord(format, "fastq"))
     return cloneString(".fastq.gz");
 if (sameWord(format, "unknown"))
     return cloneString("");
 
 return catTwoStrings(".", format);
 }