77c1fd0753c5b43a3c10baa351ed40962be704cb
lrnassar
  Tue Aug 27 16:53:48 2024 -0700
Small change to generateUsageStats.py was to improve how it splits character lines. Before if counts were high, e.g. track:1034, it would slice out last 2 numbers but keep the :10. For the assemblyStatsCron various changes were made as documented in the RM ticket. This script is a fairly big mess, and has a lot of room for improvement via functions, better commentings, not using bash to write out, and general optimizations like parsing files mutltiple times. However, it is only used internally for general exploratory overviews and thus it has not warranted a proper restructuring and cleaning. The primary motivation here was to add up counts from track usage across all hubs instead of only reporting the highest one. Refs #34266

diff --git src/hg/logCrawl/dbTrackAndSearchUsage/generateUsageStats.py src/hg/logCrawl/dbTrackAndSearchUsage/generateUsageStats.py
index b1af1bb..5d39cff 100755
--- src/hg/logCrawl/dbTrackAndSearchUsage/generateUsageStats.py
+++ src/hg/logCrawl/dbTrackAndSearchUsage/generateUsageStats.py
@@ -1,674 +1,674 @@
 #!/usr/bin/env python3
 
 import subprocess, os, gzip, argparse, sys, json, operator, datetime
 from collections import Counter, defaultdict
 
 #####
 # Define dictionaries/sets/etc. needed to record stats
 #####
 # Making these global so that they can be modified by functions whithout needing
 #   a function argument to specify them
 # Dictionaries for holding information about db use
 dbUsers = defaultdict(Counter)
 # Ex dbUsers struct: {"db":{"hgsid":count}}
 dbCounts = dict()
 # Ex dbCounts struct: {"db":count}
 # Dictionaries for recording information per month
 dbUsersMonth = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict())))
 # Ex dbUsersMonth struct: {"db":{"year":{"month":{"hgsid":count}}}}
 dbCountsMonth = defaultdict(lambda: defaultdict(lambda: defaultdict()))
 # Ex dbUsersMonth struct: {"db":{"year":{"month":count}}}}
 
 # Dictionaries for holding information about track use per db
 trackUsers = defaultdict(lambda: defaultdict(lambda: defaultdict()))
 # Ex trackUsers struct: {"db":{"track":{"hgsid":count}}}
 trackCounts = defaultdict(dict)
 # Ex trackCounts struct: {"db":{"track":count}}
 # Dictionaries for per month information
 trackUsersMonth = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda:\
   defaultdict(lambda: defaultdict()))))
 # Ex trackUsersMonth struct: {"db":{"year":{"month":{"track":{"hgsid":count}}}}}
 trackCountsMonth = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda:\
   defaultdict())))
 # Ex trackCountsMonth struct: {"db":{"year":{"month":{"track":count}}}}
 
 # Dictionaries for holding information about track use per hub
 trackUsersHubs = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda:
   defaultdict())))
 # Ex trackUsersHubs struct: {"hubId":{"db":{"track":{"hgsid":count}}}}
 trackCountsHubs = defaultdict(lambda: defaultdict(lambda: defaultdict()))
 # Ex trackCountsHubs struct: {"hubId":{"db":{"track":count}}}
 # Dictionaries for per month information
 trackUsersHubsMonth = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda:\
   defaultdict(lambda: defaultdict(lambda: defaultdict())))))
 # Ex trackUserHubsMonth struct: {"hubId":{"db":{"year":{"month":{"track":{"hgsid":count}}}}}}
 trackCountsHubsMonth = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(
   lambda: defaultdict()))))
 # Ex trackCountsHubsMonth struct: {"hubId":{"db":{"year":{"month":{"track":count}}}}}
 
 monthYearSet = set() # Set containing monthYear strings (e.g. "Aug 2017")
 
 # Create a dictionary of hubUrls, shortLabels, and hubStatus ids
 publicHubs = dict()
 # Use hgsql to grab hub ID from hubStatus table and shortLabel from hubPublic
 # for each hub in hubPublic table
 cmd = ["/cluster/bin/x86_64/hgsql", "hgcentral", "-h", "genome-centdb", "-Ne", "select s.id,p.hubUrl,p.shortLabel\
        from hubPublic p join hubStatus s where s.hubUrl=p.hubUrl", ]
 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 cmdout, cmderr = p.communicate()
 hubs = cmdout.decode("ASCII")
 hubs = hubs.split("\n")
 for hub in hubs:
     if hub == "":
         continue
     else:
         splitHub = hub.split("\t")
         publicHubs[splitHub[0]] = [splitHub[1], splitHub[2]]
 
 #####
 #####
 
 def parseTrackLog(line):
     """Parse trackLog line and return important fields: db, year, month, hgsid,
        and a list of tracks"""
     #### Sample line being processed ####
     # From Chris Lee's combined/trimmed log format produced by {script name}
     # [Sun Mar 05 04:11:27 2017] [error] [client ###.###.###.##] trackLog 0 hg38 hgsid_### cytoBandIdeo:1,cloneEndCTD:2
 
     ####
     splitLine = line.strip().split('\t')
     date = splitLine[3]
     month = date.split()[1]
     year = date.split()[4]
     ip = splitLine[1]
     hgsid = splitLine[2]
     # temporary, really should rewrite script to use a var like ip_hgsid everywhere
     #hgsid = ip + "_" + hgsid
     db = splitLine[4]
     if len(splitLine) > 5:
         tracks = splitLine[5].split(",")
     else:
         tracks = []
     return db, year, month, hgsid, tracks
 
 def modDicts(db, year, month, hgsid, tracks, toProcess, perMonth=False):
     """Modify global dictionaries to store information on 
        db, track, and hub track usage"""
  
     ##### Set some variables based on input options
     processDbUsers = toProcess[0]
     processTrackUsers = toProcess[1]
     processTrackHubUsers = toProcess[2]
 
     ##### Process info about db usage
     # Count up number of times each hgsid shows up
     
     if processDbUsers == True:
         if hgsid not in dbUsers[db]:
             # If no entry in dictionary of users, intialize value to 1
             dbUsers[db][hgsid] = 1
         else:
             # Otherwise, increment usage by 1
             dbUsers[db][hgsid] += 1
         # If perMonth is true, then we record information about db usage on a perMonth basis
         if perMonth == True:
             if hgsid not in dbUsersMonth[db][year][month]:
                 dbUsersMonth[db][year][month][hgsid] = 1
             else:
                 dbUsersMonth[db][year][month][hgsid] += 1
 
     ##### Process per track information
     if processTrackUsers == True:
         for track in tracks:
             # Skip empty entries in trackList
             if track == "":
                 continue
             # Remove trailing characters
-            track = track[:-2]
+            track = track.split(":")[0]
 
             # Record track user
             if hgsid not in trackUsers[db][track]:
                 trackUsers[db][track][hgsid] = 1
             else:
                 trackUsers[db][track][hgsid] += 1
 
             # If perMonth is true, then we record information about track usage on a perMonth basis
             if perMonth == True:
                 # Incremement count for track/hgsid by one
                 if hgsid not in trackUsersMonth[db][year][month][track]:
                     trackUsersMonth[db][year][month][track][hgsid] = 1
                 else:
                     trackUsersMonth[db][year][month][track][hgsid] += 1
 
             ##### Process public hub tracks
             if processTrackHubUsers == True:
                 if track.startswith("hub_"):
-                    track = track[:-2]
+                    track = track.split(":")[0]
                     splitTrack = track.split("_", 2)
                     if len(splitTrack) > 2:
                         hubId = splitTrack[1]
                         hubTrack = splitTrack[2]
 
                         # Processed in same way as normal tracks, although now the top level dictionary is
                         #  keyed by hubIds, rather than db name
                         if hubId in publicHubs:
                             if hgsid not in trackUsersHubs[hubId][db][hubTrack]:
                                 trackUsersHubs[hubId][db][hubTrack][hgsid] = 1
                             else:
                                 trackUsersHubs[hubId][db][hubTrack][hgsid] += 1
 
                             if perMonth == True:
                                 if hgsid not in trackUsersHubsMonth[hubId][db][year][month][hubTrack]:
                                     trackUsersHubsMonth[hubId][db][year][month][hubTrack][hgsid] = 1
                                 else:
                                     trackUsersHubsMonth[hubId][db][year][month][hubTrack][hgsid] += 1
 
 def processFile(fileName, toProcess, perMonth=False):
     """Process a file line by line using the function parseTrackLog and record usage information using
        the function modDicts"""
     if fileName.endswith(".gz"):
         ifh = gzip.open(fileName, "r")
     else:
         ifh = open(fileName, "r")
     for line in ifh:
         if "str" not in str(type(line)):
             line = line.decode("ASCII")
         db, year, month, hgsid, tracks = parseTrackLog(line)
         modDicts(db, year, month, hgsid, tracks, toProcess, perMonth)
         # Keep track of month/years covered
         monthYear = month + " " + year
         monthYearSet.add(monthYear)
 
 def processDir(dirName, toProcess, perMonth=False):
     """Process files in a directory using processFile function"""
     fileNames = os.listdir(dirName)
     for log in fileNames:
         fileName = os.path.join(dirName, log)
         processFile(fileName, toProcess, perMonth)
 
 def dumpToJson(data, outputFile, outputDir):
     """output data to named outputFile"""
     jsonOut = open(os.path.join(outputDir, outputFile), "w")
     json.dump(data, jsonOut)
     jsonOut.close()
 
 def main():
     # Parse command-line arguments
     parser = argparse.ArgumentParser(
         formatter_class=argparse.RawDescriptionHelpFormatter,
         description="Generates usage statistics for dbs, tracks, and hubs \
 tracks using processed Apache error_log files. \nThe processed files can be \
 found in the following directory: /hive/users/chmalee/logs/trimmedLogs/result\n\n\
 For more information, see RM#26191.")
     parser.add_argument("-f","--fileName", type=str, help='input file name, \
 must be space-separated Apache error_log file')
     parser.add_argument("-d","--dirName", type=str , help='input directory \
 name, files must be space-separated error_log files. No other files should be \
 present in this directory.')
     parser.add_argument("--dbCounts", action='store_true', help='output a \
 file containing users per ucsc database')
     parser.add_argument("--dbUsers", action='store_true', help='output a \
 file containing hgsids associated with each ucsc databases and how many uses each one had')
     parser.add_argument("--dbUsage", action='store_true', help='output a \
 file containing hgsids associated with each ucsc databases and how many uses each one had')
     parser.add_argument("--trackCounts", action='store_true', help='output a \
 file containing counts of how many users had a track on for a particular ucsc database')
     parser.add_argument("--trackUsers", action='store_true', help='output a \
 file containing hgsids, the tracks associated with them and how many trackLog lines they showed up in')
     parser.add_argument("--trackUsage", action='store_true', help='output a \
 file containing hgsids associated with each ucsc databases and how many uses each one had')
     parser.add_argument("--trackHubCounts", action='store_true', help='output a \
 file containing counts of how many users had a hub track on for a particular ucsc database or assembly hub')
     parser.add_argument("--trackHubUsers", action='store_true', help='output a \
 file containing hgsids, the hub tracks associated with them and how many trackLog lines they showed up in')
     parser.add_argument("--trackHubUsage", action='store_true', help='output a \
 file containing hgsids associated with each ucsc databases and how many uses each one had')
     parser.add_argument("--allOutput", action='store_true', help='output a \
 file for each option above (dbCounts, dbUsers, dbUsage, trackCounts, trackUsers, trackHubCounts, trackHubUsers)')
     parser.add_argument("-p","--perMonth", action='store_true', help='output \
 file containing info on db/track/hub track use per month')
     parser.add_argument("-m","--monthYear", action='store_true', help='output \
 file containing month/year pairs (e.g. "Mar 2017")')
     parser.add_argument("-j","--jsonOut", action='store_true', help='output \
 json files for summary dictionaries')
     parser.add_argument("-t","--outputDefaults", action='store_true',
 help='output a file containing info on default track usage for top 15 most used assemblies')
     parser.add_argument("-o","--outDir", type=str, help='directory in which to place output files')
     args = parser.parse_args()
 
     # Print help message if no arguments are supplied
     if len(sys.argv) == 1:
         parser.print_help(sys.stderr)
         sys.exit(1)
 
     # File and directory options can't be used together. Catch this and exit.
     if args.fileName != None and args.dirName != None:
         print("-f/--fileName and -d/--dirName cannot be used together. Choose one and re-run.")
         sys.exit(1)
 
     # Catch it early if input file/directory doesn't exist and exit.
     if args.fileName:
         if not os.path.exists(args.fileName):
             print(args.fileName, "doesn't exist. Please run on a valid file.")
             exit(1)
     elif args.dirName:
         if not os.path.exists(args.dirName):
             print(args.dirName, "doesn't exist. Please run on a valid directory.")
             exit(1)
 
     if not any([args.dbCounts, args.dbUsers, args.dbUsage, args.trackUsers, args.trackCounts, args.trackUsage, args.trackHubUsers, args.trackHubCounts, args.trackHubUsage, args.allOutput]):
         print("You must specify at least one of: --allOutput, --dbCounts, --dbUsage, --dbUsers, --trackCounts, --trackUsers, --trackHubCounts, --trackHubUsers.")
         exit(1)
 
     # Setup output directory
     if args.outDir == None:
         # If an output directory is unspecified, then a new one with the current date/time is made
         currDateTime = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
         os.makedirs(currDateTime)
         outDir = currDateTime
     else:
         # Otherwise, user supplied a output directory name
         outDir = args.outDir
         if not os.path.exists(outDir):
 	    # If specied output directory doesn't exist, create it
             os.makedirs(outDir)
 
     # If "all" option is set change everyone to True
     if args.allOutput == True:
         args.dbCounts = True
         args.dbUsers = True
         args.dbUsage = True
         args.trackUsers = True
         args.trackCounts = True
         args.trackUsage = True
         args.trackHubUsers = True
         args.trackHubCounts = True
         args.trackHubUsage = True
 
     # Make list of users to process and build out dictionaries for later
     usersToProcess = [False, False, False]
     # Pos 0 - process db users
     # Pos 1 - process track hub users
     # Pos 2 - process public track hub users
     # The reason I've done it this way is to allow people to output just the counts
     # dbs/tracks/hub tracks without outputting the users files. I could have just set
     # args.*Users = True if args.*Counts was true, but that would mean that the users file
     # would also be output, defeating the purpose of the option to only output the *Counts
     if any([args.dbCounts, args.dbUsers, args.dbUsage]):
         # If dbCounts or dbUsers speficied, we need to process users
         usersToProcess[0] = True
     if any([args.trackCounts, args.trackUsers, args.trackUsage]):
         usersToProcess[1] = True
     if any([args.trackHubCounts, args.trackHubUsers, args.trackHubUsage]):
         usersToProcess[1] = True
         usersToProcess[2] = True
     
     # Process input to create dictionaries containing info about users per db/track/hub track
     if args.fileName:
         processFile(args.fileName, usersToProcess, args.perMonth)
     elif args.dirName:
         processDir(args.dirName, usersToProcess, args.perMonth)
 
     ##### Output files of summaries of db/track/hub track usage information
 
     # Output files of db users and db use
     if args.dbCounts == True:
         dbCountsFile = open(os.path.join(outDir, "dbCounts.tsv"), "w")
     if args.dbUsers == True:
         dbUsersFile = open(os.path.join(outDir, "dbUsers.tsv"), "w")
     if args.dbUsage == True:
         dbUsageFile = open(os.path.join(outDir, "dbUsage.tsv"), "w")
     # Set of nested for loops to go through dictionary level by level and
     # output and summarize results into appropriate counts dictionary
     if any([args.dbUsage, args.dbUsers, args.dbCounts]):
         for db in dbUsers:
             dbCounts[db] = 0
             dbUsage = 0
             for hgsid in dbUsers[db]:
                 # Ignore those dbs only used once
                 if dbUsers[db][hgsid] != 1:
                     dbCounts[db] += 1
                     # Output dbUsers info to a file
                     count = dbUsers[db][hgsid]
                     dbUsage += count
                     if args.dbUsers == True:
                         dbUsersFile.write(db + "\t" + hgsid + "\t" + str(count) + "\n")
             # Output counts of total users for each db
             if dbCounts[db] != 0:
                 if args.dbCounts == True:
                     dbCountsFile.write(db + "\t" + str(dbCounts[db]) + "\n")
                 if args.dbUsage == True:
                     dbUsageFile.write(db + "\t" + str(dbUsage) + "\n")
     # Close our output files
     if args.dbCounts == True:
         print("Finished outputting dbCounts.tsv")
         dbCountsFile.close()
     if args.dbUsers == True:
         print("Finished outputting dbUsers.tsv")
         dbUsersFile.close()
     if args.dbUsage == True:
         print("Finished outputting dbUsage.tsv")
         dbUsageFile.close()
 
     # Output files of track users and track use
     if args.trackCounts == True:
         trackCountsFile = open(os.path.join(outDir, "trackCounts.tsv"), "w")
     if args.trackUsers == True:
         trackUsersFile = open(os.path.join(outDir, "trackUsers.tsv"), "w")
     if args.trackUsage == True:
         trackUsageFile = open(os.path.join(outDir, "trackUsage.tsv"), "w")
     # Set of nested for loops to go through dictionary level by level and
     # output and summarize results into appropriate counts dictionary
     if any([args.trackUsage, args.trackUsers, args.trackCounts]):
         for db in trackUsers:
             for track in trackUsers[db]:
                 # Initialize count for track to 0
                 trackCounts[db][track] = 0
                 trackUsage = 0
                 for hgsid in trackUsers[db][track]:
                     # Filter out those hgsids who only used track once - likely to be bots
                     if trackUsers[db][track][hgsid] != 1:
                         trackCounts[db][track] += 1
                         # Output information on how much each user used each track
                         count = trackUsers[db][track][hgsid]
                         trackUsage += count
                         if args.trackUsers == True:
                             trackUsersFile.write(db + "\t" + track + "\t" + hgsid + "\t" + str(count) + "\n")
                 if trackCounts[db][track] != 0:
                     if args.trackCounts == True:
                         # Output counts of total users for each track
                         count = trackCounts[db][track]
                         trackCountsFile.write(db + "\t" + track + "\t" + str(count) + "\n")
                     if args.trackUsage == True:
                         trackUsageFile.write(db + "\t" + track + "\t" + str(trackUsage) + "\n")
     # Close our output files
     if args.trackCounts == True:
         print("Finished outputting trackCounts.tsv")
         trackCountsFile.close()
     if args.trackUsers == True:
         print("Finished outputting trackUsers.tsv")
         trackUsersFile.close()
     if args.trackUsage == True:
         print("Finished outputting trackUsage.tsv")
         trackUsageFile.close()
 
     # Output files of hub track users and hub track use
     if args.trackHubUsers == True:
         trackUsersHubsFile = open(os.path.join(outDir, "trackUsersHubs.tsv"), "w")
     if args.trackHubCounts == True:
         trackCountsHubsFile = open(os.path.join(outDir, "trackCountsHubs.tsv"), "w")
     if args.trackHubUsage == True:
         trackUsageHubsFile = open(os.path.join(outDir, "trackUsageHubs.tsv"), "w")
     # Set of nested for loops to go through dictionary level by level and
     # output and summarize results into appropriate counts dictionary
     if any([args.trackHubUsage, args.trackHubUsers, args.trackHubCounts]):
         for hubId in trackUsersHubs:
             hubLabel = publicHubs[hubId][1]
             for db in trackUsersHubs[hubId]:
                 for track in trackUsersHubs[hubId][db]:
                     # Initialize count for hub track to 0
                     trackCountsHubs[hubId][db][track] = 0
                     trackHubUsage = 0
                     for hgsid in trackUsersHubs[hubId][db][track]:
                         # Filter out those hgsids who only used track once - likely to be bots
                         if trackUsersHubs[hubId][db][track][hgsid] != 1:
                             trackCountsHubs[hubId][db][track] += 1
                             # Output information on how much each user used each track
                             count = trackUsersHubs[hubId][db][track][hgsid]
                             trackHubUsage += count
                             if args.trackHubUsers == True:
                                 trackUsersHubsFile.write(hubLabel + "\t" + db + "\t" + track + "\t" + hgsid +\
                                 "\t" + str(count) + "\n")
                     if trackCountsHubs[hubId][db][track] != 0:
                         if args.trackHubCounts == True:
                             # Output counts of total users for each hub track
                             count = trackCountsHubs[hubId][db][track]
                             trackCountsHubsFile.write(hubLabel + "\t" + db + "\t" + track + "\t" +\
                                 str(count) + "\n")
                         if args.trackHubUsage == True:
                             trackUsageHubsFile.write(hubLabel + "\t" + db + "\t" + track + "\t" +\
                                 str(trackHubUsage) + "\n")
     # Close our output files
     if args.trackHubUsers == True:
         print("Finished outputting trackHubUsers.tsv")
         trackUsersHubsFile.close()
     if args.trackHubCounts == True:
         print("Finished outputting trackHubCounts.tsv")
         trackCountsHubsFile.close()
     if args.trackHubUsage == True:
         print("Finished outputting trackHubUsage.tsv")
         trackUsageHubsFile.close()
 
     # Output file containing info on month/years covered by stats if indicated
     if args.monthYear == True:
         monthYearFile = open(os.path.join(outDir, "monthYear.tsv"), "w")
         for pair in monthYearSet:
             monthYearFile.write(pair + "\n")
         print("Finished outputting monthYear.tsv")
         monthYearFile.close()
 
     ##### Output data per month when indicated
     if args.perMonth == True:
         if args.dbUsers == True:
             dbUsersMonthFile = open(os.path.join(outDir, "dbUsers.perMonth.tsv"), "w")
         if args.dbCounts == True:
             dbCountsMonthFile = open(os.path.join(outDir, "dbCounts.perMonth.tsv"), "w")
         if args.dbUsage == True:
             dbUsageMonthFile = open(os.path.join(outDir, "dbUsage.perMonth.tsv"), "w")
         # Set of nested for loops to go through dictionary level by level and
         # output and summarize results into appropriate counts dictionary
         if any([args.dbUsage, args.dbUsers, args.dbCounts]):
             for db in dbUsersMonth:
                 for year in dbUsersMonth[db]:
                     for month in dbUsersMonth[db][year]:
                         dbCountsMonth[db][year][month] = 0
                         dbUsageMonth = 0
                         for hgsid in dbUsersMonth[db][year][month]:
                              if dbUsersMonth[db][year][month][hgsid] != 1:
                                  dbCountsMonth[db][year][month] += 1
                                  # Output dbUsersMonth info to a file
                                  count = dbUsersMonth[db][year][month][hgsid]
                                  dbUsageMonth += count
                                  if args.dbUsers == True:
                                      dbUsersMonthFile.write(db + "\t" + year + "\t" + month +\
                                          "\t" + hgsid + "\t" + str(count) + "\n")
                         # Output dbCounts to a file
                         if dbCountsMonth[db][year][month] != 0:
                             if args.dbCounts == True:
                                 count = dbCountsMonth[db][year][month]
                                 dbCountsMonthFile.write(db + "\t" + year + "\t" + month +\
                                     "\t" + str(count) + "\n")
                             if args.dbUsage == True:
                                 dbUsageMonthFile.write(db + "\t" + year + "\t" + month +\
                                     "\t" + str(dbUsageMonth) + "\n")
         if args.dbUsers == True:
             print("Finished outputting dbUsers.perMonth.tsv")
             dbUsersMonthFile.close()
         if args.dbCounts == True:
             print("Finished outputting dbCounts.perMonth.tsv")
             dbCountsMonthFile.close()
         if args.dbUsage == True:
             print("Finished outputting dbUsage.perMonth.tsv")
             dbUsageMonthFile.close()
 
         # Summarize user dictionaries to create counts per db/track/hub track
         if args.trackUsers == True:
             trackUsersMonthFile = open(os.path.join(outDir, "trackUsers.perMonth.tsv"), "w")
         if args.trackCounts == True:
             trackCountsMonthFile = open(os.path.join(outDir, "trackCounts.perMonth.tsv"), "w")
         if args.trackUsage == True:
             trackUsageMonthFile = open(os.path.join(outDir, "trackUsage.perMonth.tsv"), "w")
         # Set of nested for loops to go through dictionary level by level and
         # output and summarize results into appropriate counts dictionary
         if any([args.trackUsage, args.trackUsers, args.trackCounts]):
             for db in trackUsersMonth:
                 for year in trackUsersMonth[db]:
                     for month in trackUsersMonth[db][year]:
                         for track in trackUsersMonth[db][year][month]:
                             trackCountsMonth[db][year][month][track] = 0
                             trackUsageMonth = 0
                             for hgsid in trackUsersMonth[db][year][month][track]:
                                  if trackUsersMonth[db][year][month][track][hgsid] != 1:
                                      trackCountsMonth[db][year][month][track] += 1
                                      count = trackUsersMonth[db][year][month][track][hgsid]
                                      trackUsageMonth += count
                                      if args.trackUsers == True:
                                          trackUsersMonthFile.write(db + "\t" + year + "\t" + month + "\t" +\
                                              hgsid + "\t" + track + "\t" + str(count) + "\n")
                             if trackCountsMonth[db][year][month][track] != 0:
                                 if args.trackCounts == True:
                                     count = trackCountsMonth[db][year][month][track]
                                     trackCountsMonthFile.write(db + "\t" + year + "\t" + month + "\t" +\
                                         track + "\t" + str(count) + "\n")
                                 if args.trackUsage == True:
                                     trackUsageMonthFile.write(db + "\t" + year + "\t" + month + "\t" +\
                                         track + "\t" + str(trackUsageMonth) + "\n")
         if args.trackUsers == True:
             print("Finished outputting trackUsers.perMonth.tsv")
             trackUsersMonthFile.close()
         if args.trackCounts == True:
             print("Finished outputting trackUsers.perMonth.tsv")
             trackCountsMonthFile.close()
         if args.trackUsage == True:
             print("Finished outputting trackUsage.perMonth.tsv")
             trackUsageMonthFile.close()
 
         # Summarize user dictionaries to create counts per db/track/hub track
         if args.trackHubUsers == True:
             trackUsersHubsMonthFile = open(os.path.join(outDir, "trackUsersHubs.perMonth.tsv"), "w")
         if args.trackHubCounts == True:
             trackCountsHubsMonthFile = open(os.path.join(outDir, "trackCountsHubs.perMonth.tsv"), "w")
         if args.trackHubUsage == True:
             trackUsageHubsMonthFile = open(os.path.join(outDir, "trackUsageHubs.perMonth.tsv"), "w")
         # Set of nested for loops to go through dictionary level by level and
         # output and summarize results into appropriate counts dictionary
         if any([args.trackHubUsage, args.trackHubUsers, args.trackHubCounts]):
             for hubId in trackUsersHubsMonth:
                 hubLabel = publicHubs[hubId][1]
                 for db in trackUsersHubsMonth[hubId]:
                     for year in trackUsersHubsMonth[hubId][db]:
                         for month in trackUsersHubsMonth[hubId][db][year]:
                             for track in trackUsersHubsMonth[hubId][db][year][month]:
                                 trackCountsHubsMonth[hubId][db][year][month][track] = 0
                                 trackHubUsageMonth = 0
                                 for hgsid in trackUsersHubsMonth[hubId][db][year][month][track]:
                                      if trackUsersHubsMonth[hubId][db][year][month][track][hgsid] != 1:
                                          trackCountsHubsMonth[hubId][db][year][month][track] += 1
                                          count = trackUsersHubsMonth[hubId][db][year][month][track][hgsid]
                                          trackHubUsageMonth += count
                                          if args.trackHubUsers == True:
                                             trackUsersHubsMonthFile.write(hubLabel + "\t" + db + "\t" + year +\
                                                 "\t" + month + "\t" + track + "\t" + hgsid + "\t" + str(count) + "\n")
                                 if trackCountsHubsMonth[hubId][db][year][month][track] != 0:
                                     if args.trackHubCounts == True:
                                         count = trackCountsHubsMonth[hubId][db][year][month][track]
                                         trackCountsHubsMonthFile.write(hubLabel + "\t" + db + "\t" + year +\
                                             "\t" + month + "\t" + track + "\t" +\
                                             str(count) + "\n")
                                     if args.trackHubUsage == True:
                                         trackUsageHubsMonthFile.write(hubLabel + "\t" + db + "\t" + year +\
                                             "\t" + month + "\t" + track + "\t" +\
                                             str(trackHubUsageMonth) + "\n")
         if args.trackHubUsers == True:
             print("Finished outputting trackUsersHubs.perMonth.tsv")
             trackUsersHubsMonthFile.close()
         if args.trackHubCounts == True:
             print("Finished outputting trackCountsHubs.perMonth.tsv")
             trackCountsHubsMonthFile.close()
         if args.trackHubUsage == True:
             print("Finished outputting trackUsageHubs.perMonth.tsv")
             trackUsageHubsMonthFile.close()
 
     #####
 
     ##### Output json files if indicated #####
 
     if args.jsonOut == True:
         dumpToJson(dbCounts, "dbCounts.json", outDir)
         dumpToJson(trackCounts, "trackCounts.json", outDir)
         dumpToJson(trackCountsHubs, "trackCountsHubs.json", outDir)
 
         if args.perMonth == True:
             dumpToJson(dbCountsMonth, "dbCounts.perMonth.json", outDir)
             dumpToJson(trackCountsMonth, "trackCounts.perMonth.json", outDir)
             dumpToJson(trackCountsHubsMonth, "trackCountsHubs.perMonth.json", outDir)
 
         #if args.monthYear == True:
         #    dumpToJson(monthYearSet, "monthYearSet.json")
 
     #####
 
     ##### Output information on default track usage if indicated #####
     if args.outputDefaults == True and all([args.dbCounts, args.trackCounts]):
 
         # Sort dbs by most popular
         dbCountsSorted = sorted(dbCounts.items(), key=operator.itemgetter(1))
         dbCountsSorted.reverse()
 
         defaultCountsFile = open(os.path.join(outDir, "defaultCounts.tsv"), "w")
         for x in range(0, 15): # Will only output the default track stats for the 15 most popular assemblies
             db = dbCountsSorted[x][0]
             dbOpt = "db=" + db
             # HGDB_CONF must be set here so that we use default tracks from beta, not dev
             # Dev can contain staged tracks that don't exist on RR, leading to errors later in script
             cmd = ["cd /usr/local/apache/cgi-bin && HGDB_CONF=$HOME/.hg.conf.beta ./hgTracks hgt.trackImgOnly=1" + dbOpt]
             p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
             cmdout, cmderr = p.communicate()
             errText = cmderr.decode("ASCII") # Convert binary output into ACSII for processing
             # Process stderr output as that's what contains the trackLog lines
             splitErrText = errText.split("\n")
             trackLog = splitErrText[0] # First element is trackLog line, second is CGI_TIME; only want trackLog
             splitLine = trackLog.split(" ")
 
             # Build list of tracks
             tracks = splitLine[4]
             tracks = tracks.split(",")
 
             dbUse = dbCounts[db]
             # output list to file that contains column headings
             defaultCountsFile.write("#db\ttrackName\ttrackUse\t% using\t% turning off\n#" + db + "\t" + str(dbUse) + "\n")
             defaultCounts = []
             for track in tracks:
                 if track == "":
                     continue
 
                 # Remove trailing characters
-                track = track[:-2]
+                track = track.split(":")[0]
                
                 try: 
                     trackUse = trackCounts[db][track]
                     relUse = (trackUse/dbUse)*100
                     relOff = ((dbUse - trackUse)/dbUse)*100
                     # Store all this info a in a list so that we can sort my most used tracks later
                     defaultCounts.append([track, trackUse, relUse, relOff])
                 except KeyError:
                     continue
 
             # Sort defaultCounts for current assembly by most used track first
             defaultCountsSorted = sorted(defaultCounts, key=operator.itemgetter(2))
             defaultCountsSorted.reverse()
 
             # Output sorted defaultCounts to a file
             for line in defaultCountsSorted:
                 track = line[0]
                 use = line[1]
                 on = line[2]
                 off = line[3]
 
                 output = "{}\t{}\t{:d}\t{:3.2f}\t{:3.2f}\n".format(db, track, use, on, off)
                 defaultCountsFile.write(output)
 
         print("Finished outputting defaultCounts.tsv")
         defaultCountsFile.close()
     elif args.outputDefaults == True:
         if not any([args.dbCounts, args.trackCounts]):
             # Need both dbCounts and trackCounts to actually be populated with data to be able to do the default counts
             print("\nCannot output default tracks if either --trackCounts or --dbCounts is not set. Set both of these options and re-run")
     ####
 
     # Print output directory name, particularly useful if using default output directory names
     # based on day/time
     print("\nYour output is in", outDir)
 if __name__ == "__main__":
     main()