src/utils/qa/trackCountsParse cfa9546a9f8fe1b79434faa7ec9cf81093b57d34

cfa9546a9f8fe1b79434faa7ec9cf81093b57d34
lrnassar
  Tue Jun 3 13:43:30 2025 -0700
Some small tweaks to the trackCountsParse script that generates a list of potential tracks to nest or hide based on usage. 1. Check to see if the makeUsageReport script has ran, and if not run it for those dates. 2. Cleaned up some greps that needed escaped backslashes. 3. Typo fix. 4. Clarifying the line to call an easy to format tabFmt script. Refs #35858

diff --git src/utils/qa/trackCountsParse src/utils/qa/trackCountsParse
index f6bd6b77d23..07d5b851c45 100755
--- src/utils/qa/trackCountsParse
+++ src/utils/qa/trackCountsParse
@@ -1,410 +1,421 @@
 #!/usr/bin/env python3
 
 import subprocess
 from collections import OrderedDict
 from time import localtime, strftime
 from datetime import datetime
 from dateutil.relativedelta import relativedelta
 import calendar
 import math
-import subprocess,sys,argparse
+import subprocess,sys,argparse,os
 
 def parseArgs():
     """
     Parse the command line arguments.
     """
     parser = argparse.ArgumentParser(description = __doc__,
                                      formatter_class=argparse.RawDescriptionHelpFormatter)
     optional = parser._action_groups.pop()
 
     required = parser.add_argument_group('required arguments')
 
     required.add_argument ("dbs",
         help = "Database to query for track counts, e.g. hg19, hg38, mm10.")
     required.add_argument ("workDir",
         help = "Work directory to use for processing and final output. Use full path with '/' at the end.")
     optional.add_argument ("-c", "--cutOffThreshhold", dest = "cutOffThreshhold", default = .3,
         help = "Optional: The % value, as compared to the trackCounts median, to be used " + \
             "as a threshhold to choose what tracks should be filtered. Default is .3.")
     optional.add_argument ("-n", "--numOfMonthsToCompare", dest = "numOfMonthsToCompare", default = 6,
         help = "Optional: The number of months to compare for filtering. " + \
             "Default is 6.")
     optional.add_argument ("-r", "--singleReport", default = False, action = "store_true",
         help = "Optional: Run as a singleReport. This generates track counts for the specified " + \
             "dates. This is useful for seeing track counts over a period of time. Requires the vars below")
     optional.add_argument ("-s", "--startDate", dest = "startDate",
         help = "Optional: The start date when running in singleReport mode. " + \
             "Date should be formatted as YYYY-MM-DD.")
     optional.add_argument ("-e", "--endDate", dest = "endDate",
         help = "Optional: The end date when running in singleReport mode. " + \
             "Date should be formatted as YYYY-MM-DD.")
     if (len(sys.argv) == 1):
         parser.print_usage()
         print("\nGenerates track counts based on the log parsing paring script '/hive/users/chmalee/logs/byDate/makeUsageReport'.\n" + \
               "The default behavior looks at track counts over the last 6 months and generates a list of tracks that\n" + \
               "were below 30% of the median track usage every month, allowing for a floor(n/4) exception where n is\n" + \
               "the number of months searched. Using the default 6 months, that allows for 1 exemption.\n" + \
               "This output can be used in order to identify seldomly used tracks for archiving, retiring or restructuring.\n\n" + \
               "Alternatively, the script can be run in 'singleReport(-r)' mode where it will generate a sorted list\n" + \
               "of track counts over a period of time. This could be useful for reporting purposes.\n\n" + \
 
               "Example runs:\n" + \
               "    trackCountsParse hg38 /hive/users/lrnassar/trackCounts/\n" + \
               "    trackCountsParse hg38 /hive/users/lrnassar/trackCounts/ -c .5 -n 12\n" + \
               "    trackCountsParse hg38 /hive/users/lrnassar/trackCounts/ -r -s 2023-01-01 -e 2023-12-31\n")
         
         exit(0)
     parser._action_groups.append(optional)
     options = parser.parse_args()
     return  options
 
 def bash(cmd):
     """Run the cmd in bash subprocess"""
     try:
         rawBashOutput = subprocess.run(cmd, check=True, shell=True,\
                                        stdout=subprocess.PIPE, universal_newlines=True, stderr=subprocess.STDOUT)
         bashStdoutt = rawBashOutput.stdout
     except subprocess.CalledProcessError as e:
         raise RuntimeError("command '{}' return with error (code {}): {}".format(e.cmd, e.returncode, e.output))
     return(bashStdoutt)
 
+def file_exists(filepath):
+    return os.path.isfile(filepath)
+
 def generateTrackCounts(dbs,workDir,startDate,endDate):
     #Generate track usage report binned by month for a specific time frame and dbs
     #Format date format is XXXX-XX-XX, e.g. 2023-11-01
     outputFileName = workDir+dbs+"."+startDate+"to"+endDate+".trackCounts.txt"
     #Run the script and remove the ct line (ct_), hub lines (hub_), dup lines (dup_)
     #And header line (^#), as well as remove the first column of repeating database
-    bash("/hive/users/chmalee/logs/byDate/makeUsageReport -t -db "+dbs+" --bin-months -s "+startDate+" -e "+endDate+" | grep -v \"hub_\|ct_\|dup_\|^#\" | cut -f2- > "+outputFileName)
+    
+    if not file_exists(outputFileName):
+        print("Generating new file")
+        print("/hive/users/chmalee/logs/byDate/makeUsageReport -t -db " + dbs + " --bin-months -s " + startDate + " -e " + endDate + " | grep -v \"hub_\\|ct_\\|dup_\\|^#\" | cut -f2- > " + outputFileName)
+        cmd = ("/hive/users/chmalee/logs/byDate/makeUsageReport -t -db " + dbs + " --bin-months -s " + startDate + " -e " + endDate + " | grep -v \"hub_\\|ct_\\|dup_\\|^#\" | cut -f2- > " + outputFileName)
+        bash(cmd)
+    else:
+        print("File already generated: " + outputFileName)
+        
     return(outputFileName)
 
 def createDicFromTrackCountsFile(trackCountsFilePath):
     """
     This function reads a file containing track counts and creates a dictionary 
     with track names as keys and their corresponding counts as values.
     
     Args:
     - trackCountsFilePath (str): Path to the file containing track counts.
     
     Returns:
     - dict: A dictionary containing track names as keys and their counts as values.
     """
     trackList = open(trackCountsFilePath, 'r')
     trackCountsDic = {}
     for line in trackList:
         parsedLine = line.rstrip().split("\t")
         if parsedLine[0].endswith(":"):
             currentTrackName = parsedLine[0][0:len(parsedLine[0])-1]
         else:
             currentTrackName = parsedLine[0]
         if currentTrackName not in trackCountsDic.keys():
             trackCountsDic[currentTrackName] = {}
             trackCountsDic[currentTrackName]["Count"] = parsedLine[1]
         else:
             trackCountsDic[currentTrackName]["Count"] = str(int(trackCountsDic[currentTrackName]["Count"]) + int(parsedLine[1]))
     trackList.close()
     totalCount = len(trackCountsDic.keys())
     print("Total tracks to parse: "+str(totalCount))
     return(trackCountsDic,totalCount)
 
 def checkIfThereIsAHigherLevelParentTrack(parentChildAssociationsDic,trackName,firstTry,dbs):
     """
     Recursive function that searches for deeper associations and builds dictionary
     """
     if firstTry == False:
         currentParentName = parentChildAssociationsDic[trackName]['parentName']
         tdbQuery = bash("tdbQuery \"select * from "+dbs+" where track='"+currentParentName+"'\"").split("\n")
     else:
         tdbQuery = bash("tdbQuery \"select * from "+dbs+" where track='"+trackName+"'\"").split("\n")
     if "compositeTrack" in str(tdbQuery) or "superTrack" in str(tdbQuery)  or "parent" in str(tdbQuery):
         for entry in tdbQuery:        
             if entry.startswith("parent"):
                 parentChildAssociationsDic[trackName]['Container'] = True #I ADDED THIS LINE IF THERE ARE ISSUES
                 parentName = entry.split(" ")[1]
                 parentChildAssociationsDic[trackName]['parentName'] = parentName
             elif entry.startswith("superTrack"):
                 entry = entry.split(" ")
                 if entry[1] != "on":
                     parentChildAssociationsDic[trackName]['Container'] = True
                     parentName = entry[1]
                     parentChildAssociationsDic[trackName]['parentName'] = parentName
     else:
         if firstTry != False:
             parentChildAssociationsDic[trackName]['Container'] = False
     return(parentChildAssociationsDic)
 
 def lookUpTracksToFindParentChildAssociations(trackCountsDic,totalCount,parentChildAssociationsDic,dbs):
     """
     This function takes a dictionary of track names and their details, 
     queries a database to find parent-child relationships for each track,
     and updates the dictionary with the associated parent track information.
     
     Args:
     - trackCountsDic (dict): A dictionary containing track names and their details.
     
     Returns:
     - dict: An updated dictionary containing parent-child association information.
     """
     n=0
     for trackName in trackCountsDic.keys():
         if trackName not in parentChildAssociationsDic.keys():
             parentChildAssociationsDic[trackName] = {}
             n+=1
             if n%2000 == 0:
                 print(str(n)+" out of "+str(totalCount))
             #Make a first check to see if there are parents
             parentChildAssociationsDic = checkIfThereIsAHigherLevelParentTrack(parentChildAssociationsDic,trackName,True,dbs)
             #Check to see if there is a higher level parent
             if 'parentName' in parentChildAssociationsDic[trackName].keys():
                 parentChildAssociationsDic = checkIfThereIsAHigherLevelParentTrack(parentChildAssociationsDic,trackName,False,dbs)
             #The top level tracks have container on, but no parent
             else:
                 parentChildAssociationsDic[trackName]['Container'] = False
             #Check to see if there is a final higher level parent
             if 'parentName' in parentChildAssociationsDic[trackName].keys():
                 parentChildAssociationsDic = checkIfThereIsAHigherLevelParentTrack(parentChildAssociationsDic,trackName,False,dbs)        
 
     return(parentChildAssociationsDic)
 
 def buildFinalDicWithOnlyTopLevelTrackCounts(trackCountsDic,parentChildAssociationsDic):
     """
     Iterate through the dictionary of all track counts + parental relationships
     and create a final dic that only includes all possible top-level tracks
     with the highest possible count from any of its children.
     """
     finalDicOfTopLevelTracksAndCounts = {}
     for trackName in trackCountsDic.keys():
         if parentChildAssociationsDic[trackName]['Container'] is False:
             if trackName not in finalDicOfTopLevelTracksAndCounts.keys():
                 finalDicOfTopLevelTracksAndCounts[trackName] = trackCountsDic[trackName]['Count']
             elif int(trackCountsDic[trackName]['Count']) > int(finalDicOfTopLevelTracksAndCounts[trackName]):
                 finalDicOfTopLevelTracksAndCounts[trackName] = trackCountsDic[trackName]['Count']
         else:
             if parentChildAssociationsDic[trackName]['parentName'] not in finalDicOfTopLevelTracksAndCounts.keys():
                 finalDicOfTopLevelTracksAndCounts[parentChildAssociationsDic[trackName]['parentName']] = trackCountsDic[trackName]['Count']
             else:
                 if int(trackCountsDic[trackName]['Count']) > int(finalDicOfTopLevelTracksAndCounts[parentChildAssociationsDic[trackName]['parentName']]):
                     finalDicOfTopLevelTracksAndCounts[parentChildAssociationsDic[trackName]['parentName']] = trackCountsDic[trackName]['Count']
     return(finalDicOfTopLevelTracksAndCounts)
 
 def makeFinalFileOnTopLevelTrackCounts(finalDicOfTopLevelTracksAndCounts,pathUrl,dbs):
     """
     This function creates a final output file containing details of top-level tracks,
     including their short labels (if available) and counts. The file is saved at the
     specified path URL.
     
     Args:
     - finalDicOfTopLevelTracksAndCounts (dict): A dictionary containing top-level track names 
                                                 and their respective counts.
     - pathUrl (str): The path where the final output file will be saved.
     """
     outputFile = open(pathUrl, 'w')
     n=0
     for key in finalDicOfTopLevelTracksAndCounts.keys():
         if key != "":
             tdbQuery = bash("tdbQuery \"select * from "+dbs+" where track='"+key+"'\"").split("\n")
             if "shortLabel" in str(tdbQuery):
                 for entry in tdbQuery:        
                     if entry.startswith("shortLabel"):
                         n+=1
                         shortLabel = " ".join(entry.split(" ")[1:])
                         outputFile.write(key+"\t"+shortLabel+"\t"+str(finalDicOfTopLevelTracksAndCounts[key])+"\n")
             else:
                 n+=1
                 outputFile.write(key+"\t"+key+"\t"+str(finalDicOfTopLevelTracksAndCounts[key])+"\n")
     print("Final file completed. Tota number of tracks: "+str(n))
     outputFile.close()
     #Order and sort final file
     bash("sort -t $'\t' -k3 -rn "+pathUrl+" > "+pathUrl+".sorted")
     print("Final sorted file: "+pathUrl+".sorted")
 
 def get_count(dicField):
     return dicField['trackCounts']
 
 def makeOrderedDicForSpecificTime(finalDicOfTopLevelTracksAndCounts,dbs):
     orderedDic = OrderedDict()
     listOfCountsToSort = []
     n=0
     #Fetch the shortLabels and make a list where each entry is a dic with trackName, shortLabel, and trackCount
     for key in finalDicOfTopLevelTracksAndCounts.keys():
         if key != "":
             tdbQuery = bash("tdbQuery \"select * from "+dbs+" where track='"+key+"'\"").split("\n")
             if "shortLabel" in str(tdbQuery):
                 for entry in tdbQuery:        
                     if entry.startswith("shortLabel"):
                         n+=1
                         shortLabel = " ".join(entry.split(" ")[1:])
                         listOfCountsToSort.append({'trackName':key,'shortLabel':shortLabel,'trackCounts':int(finalDicOfTopLevelTracksAndCounts[key])})
             else:
                 n+=1
                 listOfCountsToSort.append({'trackName':key,'shortLabel':key,'trackCounts':int(finalDicOfTopLevelTracksAndCounts[key])})
-    print("Tota number of tracks: "+str(n))
+    print("Total number of tracks: "+str(n))
     
     #Sort the trackCounts list to return in order to find data that meets cutoff threshhold
     listOfCountsToSort.sort(key=get_count, reverse=True)
     return(listOfCountsToSort)
     
 def refineTrackCountsBasedOnCutOff(listOfTracks,cutOffThreshhold,period):
     """
     Take an ordered list containing dics of track counts and filter it based
     on a set threshhold. Then return a new ordered dictionary that contains
     the tracks below the threshhold with trackNames as keys and shortLabel
     and counts as values
     """
     trackCountCutoff = listOfTracks[int(len(listOfTracks)/2)]['trackCounts']*cutOffThreshhold
     finalTrackCountsDic = OrderedDict()
     for track in listOfTracks:
         if track['trackCounts'] < trackCountCutoff:
             finalTrackCountsDic[track['trackName']]={'shortLabel':track['shortLabel'],'trackCounts':track['trackCounts'],'countComparedToMaxForPeriod':track['trackCounts']/listOfTracks[0]["trackCounts"],'countComparedToCutoff':track['trackCounts']/trackCountCutoff}
     print("The trackCount cutoff for "+period+" is: "+str(trackCountCutoff))
     return(finalTrackCountsDic)
 
 def getDateRangesForComparison(numOfMonthsToCompare):
     """
     Based on a number of months to compare given, find the year + month combination
     followed by the last day of each month to be used in the log query script. Return
     an ordered dictionary with the date ranges as keys, which will be used as the titles
     of the respective final ouputs, and the start/end dates as the content.
     **Note** This subtracts an additional month from the latest month in order
     to ensure that the logs chosen are complete.
     """
     dateRanges = OrderedDict()
     date = datetime.today().strftime('%Y-%m')
     for number in range(numOfMonthsToCompare):
         monthToParse = datetime.strftime(datetime.strptime(date, '%Y-%m') - relativedelta(months=number+1), '%Y-%m')
         year = monthToParse.split('-')[0]
         month = monthToParse.split('-')[1]
         lastDateOfMonth = calendar.monthrange(int(year), int(month))[1]
         startDate = year+"-"+month+"-01"
         endDate = year+"-"+month+"-"+str(lastDateOfMonth)
         dateRanges[startDate+"-"+endDate] = {'startDate':startDate,'endDate':endDate}
     return(dateRanges)
 
 def createFinalListOfTracksThatMeetCutoffEveryMonth(finalDicWithCutOffDics,numOfMonthsToCompare):
     """
     Iterates through all of the monthly dictionaries and creates a final list
     where only tracks present in every period are present. This is to filter
     out monthly outliers. It then reports which ones were filtered, if any.
     Due to data being weird, if we are checking at least 4 months allow
     for outliers of floor(n/4)
     """
     initialTrackList = {}
     listOfTracksInEligiblePeriods = []
     listOfTracksFilteredOut = []
     #Create initial list of all tracks present in these periods
     for period in finalDicWithCutOffDics.keys():
         for track in finalDicWithCutOffDics[period].keys():
             if track not in initialTrackList:
                 initialTrackList[track] = 0
     #Due to data being weird, if we are checking at least 4 months allow
     #for outliers of floor(n/4)
     outlierMonthsExcemption = math.floor(numOfMonthsToCompare/4)
     #Go over the list and add a penalty of 1 for every period in which
     #the track is missing, then create a final list of tracks that
     #pass through the filter
     for period in finalDicWithCutOffDics.keys():
         for track in initialTrackList.keys():
             if track not in finalDicWithCutOffDics[period].keys():
                 initialTrackList[track] = initialTrackList[track] + 1
     for track in initialTrackList.keys():
         if initialTrackList[track] <= outlierMonthsExcemption:
             listOfTracksInEligiblePeriods.append(track)
         else:
             listOfTracksFilteredOut.append(track)
     if listOfTracksFilteredOut != []:
         print("The following tracks were filtered out because they did not meet")
         print("the cutoff in all of the months specified:\n")
         for track in listOfTracksFilteredOut:
             print(track)
     return(listOfTracksInEligiblePeriods)
 
 def get_avCount(dicField):
     """Helper function to help sort dict"""
     return dicField['averageTrackCount']
 
 def constructSortedFinalTrackDicWithAllData(finalDicWithCutOffDics,listOfTracksInEligiblePeriods,numOfMonthsToCompare,dbs):
     """
     Takes in a final list of track names which have met all the conditions for potential archiving
     and constructs a final dictionary with all of the data sorted. This includes averages over
     the time period for the track counts as well as the comparison to median/max. The track group
     is also queried for use in the ultimate decision.
     """
     firstPeriod = next(iter(finalDicWithCutOffDics))
     listOfTracksToReport = []
     for track in listOfTracksInEligiblePeriods:
         group = ""
         addedTrackCounts = 0
         addedCountComparedToMaxForPeriod = 0
         addedCountComparedToCutoff = 0      
         missingMonths = 0 #This checks for the floor(n/4) tolerance that tracks can be missing
         for period in finalDicWithCutOffDics.keys():
             if track in finalDicWithCutOffDics[period].keys():
                 if group == "":
                     shortLabel = finalDicWithCutOffDics[period][track]["shortLabel"]
                     tdbQuery = bash("tdbQuery \"select group from "+dbs+" where track='"+track+"'\"").split("\n")
                     if 'group' in str(tdbQuery):
                         group = tdbQuery[0].split(" ")[1]
                     else: 
                         group = "No group"
                 addedTrackCounts+=finalDicWithCutOffDics[period][track]['trackCounts']
                 addedCountComparedToMaxForPeriod+=finalDicWithCutOffDics[period][track]['countComparedToMaxForPeriod']
                 addedCountComparedToCutoff+=finalDicWithCutOffDics[period][track]['countComparedToCutoff']
             else:
                 missingMonths+=1
                 
         averageTrackCount = round(addedTrackCounts/(numOfMonthsToCompare-missingMonths),2)
         averageCountComparedToMaxForPeriod = round(addedCountComparedToMaxForPeriod/(numOfMonthsToCompare-missingMonths),4)
         averageCountComparedToCutoff = round(addedCountComparedToCutoff/(numOfMonthsToCompare-missingMonths),2)
         listOfTracksToReport.append({'trackName':track,'shortLabel':shortLabel,'group':group,'averageTrackCount':averageTrackCount,'averageCountComparedToMaxForPeriod':averageCountComparedToMaxForPeriod,'averageCountComparedToCutoff':averageCountComparedToCutoff})
     listOfTracksToReport.sort(key=get_avCount, reverse=True)
     return(listOfTracksToReport)
 
 def writeFinalTrackListToFile(finalOutputTrackDicToReport,workDir,dbs,cutOffThreshhold,numOfMonthsToCompare):
     """
     Take the final processed dictionary and write it out to a tsv file including the vars
     used in the data generation.
     """
     date = datetime.today().strftime('%Y-%m')
     monthToParse = datetime.strftime(datetime.strptime(date, '%Y-%m') - relativedelta(months=numOfMonthsToCompare), '%Y-%m')
     fileNamePathStartToEndDate = workDir+monthToParse+"-"+date+"."+dbs+".tracksToArchive.tsv"
     finalOutputFile = open(fileNamePathStartToEndDate,'w')
     finalOutputFile.write("#Variables used in this file generation: dbs="+dbs+" numOfMonthsToCompare="+str(numOfMonthsToCompare)+" cutOffThreshhold="+str(cutOffThreshhold)+"\n")
     finalOutputFile.write("#trackName\tshortLabel\tgroup\taverageTrackCount\taverageCountComparedToMaxForPeriod\taverageCountComparedToCutoff\n")
     for track in finalOutputTrackDicToReport:
         finalOutputFile.write(track['trackName']+"\t"+track['shortLabel']+"\t"+track['group']+"\t"+str(track['averageTrackCount'])+"\t"+str(track['averageCountComparedToMaxForPeriod'])+"\t"+str(track['averageCountComparedToCutoff'])+"\n")
     finalOutputFile.close()
     print("\nCutoff tracks file complete: "+fileNamePathStartToEndDate)
-    print("\nYou can pipe this output into ~markd/bin/tabFmt removing the first line to get a nicely formatted output.")
+    print("\nYou nicely format the output as such: tail -n +2 outputFilePath.tsv | tabFmt stdin")
 
 def main():
     """Initialize options and call other functions"""
     options = parseArgs()
     dbs,workDir,cutOffThreshhold,numOfMonthsToCompare = options.dbs,options.workDir,options.cutOffThreshhold,options.numOfMonthsToCompare
     #Line below exists only for debugging purposes
-    #dbs,workDir,cutOffThreshhold,numOfMonthsToCompare = 'hg38','/hive/users/lrnassar/temp/tmp/',.3,12
-    if options.singleReport == True:
+    # dbs,workDir,cutOffThreshhold,numOfMonthsToCompare,singleReport = 'hg38','/hive/users/lrnassar/temp/tmp/',.3,6,False
+    if singleReport == True:
         startDate,endDate,parentChildAssociationsDic = options.startDate,options.endDate,{}
         logFile = generateTrackCounts(dbs,workDir,startDate,endDate)
         trackCountsDic,totalCount = createDicFromTrackCountsFile(logFile)
         parentChildAssociationsDic = lookUpTracksToFindParentChildAssociations(trackCountsDic,totalCount,parentChildAssociationsDic,dbs)
         finalDicOfTopLevelTracksAndCounts = buildFinalDicWithOnlyTopLevelTrackCounts(trackCountsDic,parentChildAssociationsDic)
         makeFinalFileOnTopLevelTrackCounts(finalDicOfTopLevelTracksAndCounts,workDir+"trackCounts.tsv",dbs)    
 
     else:
         print("Script started: "+strftime("%Y-%m-%d %H:%M:%S", localtime()))
         dateRanges = getDateRangesForComparison(numOfMonthsToCompare)
         finalDicWithCutOffDics,parentChildAssociationsDic = OrderedDict(),{}
         for period in dateRanges:
             logFile = generateTrackCounts(dbs,workDir,dateRanges[period]['startDate'],dateRanges[period]['endDate'])
             trackCountsDic,totalCount = createDicFromTrackCountsFile(logFile)
             parentChildAssociationsDic = lookUpTracksToFindParentChildAssociations(trackCountsDic,totalCount,parentChildAssociationsDic,dbs)
             finalDicOfTopLevelTracksAndCounts = buildFinalDicWithOnlyTopLevelTrackCounts(trackCountsDic,parentChildAssociationsDic)
             listOfTracks = makeOrderedDicForSpecificTime(finalDicOfTopLevelTracksAndCounts,dbs)
             finalDicWithCutOffDics[period] = refineTrackCountsBasedOnCutOff(listOfTracks,cutOffThreshhold,period)
             print("The number of tracks that met the criteria for "+period+" is: "+str(len(finalDicWithCutOffDics[period].keys())))
         
         listOfTracksInEligiblePeriods = createFinalListOfTracksThatMeetCutoffEveryMonth(finalDicWithCutOffDics,numOfMonthsToCompare)
         finalOutputTrackDicToReport = constructSortedFinalTrackDicWithAllData(finalDicWithCutOffDics,listOfTracksInEligiblePeriods,numOfMonthsToCompare,dbs)
         writeFinalTrackListToFile(finalOutputTrackDicToReport,workDir,dbs,cutOffThreshhold,numOfMonthsToCompare)
         print("Script finished: "+strftime("%Y-%m-%d %H:%M:%S", localtime()))
 
 main()