src/utils/qa/trackCountsParse d122f99d0953b764550091651e882f03af2453fb

d122f99d0953b764550091651e882f03af2453fb
lrnassar
  Wed Jan 24 14:05:59 2024 -0800
Adding new script that parses track counts to report either 1. The amount of usage over a period of time, or 2. Report a number of tracks under a certain threshold. This is to be used in determining tracks that we should hide, nest, depracate or archive. Refs #32813

diff --git src/utils/qa/trackCountsParse src/utils/qa/trackCountsParse
new file mode 100755
index 0000000..f6bd6b7
--- /dev/null
+++ src/utils/qa/trackCountsParse
@@ -0,0 +1,410 @@
+#!/usr/bin/env python3
+
+import subprocess
+from collections import OrderedDict
+from time import localtime, strftime
+from datetime import datetime
+from dateutil.relativedelta import relativedelta
+import calendar
+import math
+import subprocess,sys,argparse
+
+def parseArgs():
+    """
+    Parse the command line arguments.
+    """
+    parser = argparse.ArgumentParser(description = __doc__,
+                                     formatter_class=argparse.RawDescriptionHelpFormatter)
+    optional = parser._action_groups.pop()
+
+    required = parser.add_argument_group('required arguments')
+
+    required.add_argument ("dbs",
+        help = "Database to query for track counts, e.g. hg19, hg38, mm10.")
+    required.add_argument ("workDir",
+        help = "Work directory to use for processing and final output. Use full path with '/' at the end.")
+    optional.add_argument ("-c", "--cutOffThreshhold", dest = "cutOffThreshhold", default = .3,
+        help = "Optional: The % value, as compared to the trackCounts median, to be used " + \
+            "as a threshhold to choose what tracks should be filtered. Default is .3.")
+    optional.add_argument ("-n", "--numOfMonthsToCompare", dest = "numOfMonthsToCompare", default = 6,
+        help = "Optional: The number of months to compare for filtering. " + \
+            "Default is 6.")
+    optional.add_argument ("-r", "--singleReport", default = False, action = "store_true",
+        help = "Optional: Run as a singleReport. This generates track counts for the specified " + \
+            "dates. This is useful for seeing track counts over a period of time. Requires the vars below")
+    optional.add_argument ("-s", "--startDate", dest = "startDate",
+        help = "Optional: The start date when running in singleReport mode. " + \
+            "Date should be formatted as YYYY-MM-DD.")
+    optional.add_argument ("-e", "--endDate", dest = "endDate",
+        help = "Optional: The end date when running in singleReport mode. " + \
+            "Date should be formatted as YYYY-MM-DD.")
+    if (len(sys.argv) == 1):
+        parser.print_usage()
+        print("\nGenerates track counts based on the log parsing paring script '/hive/users/chmalee/logs/byDate/makeUsageReport'.\n" + \
+              "The default behavior looks at track counts over the last 6 months and generates a list of tracks that\n" + \
+              "were below 30% of the median track usage every month, allowing for a floor(n/4) exception where n is\n" + \
+              "the number of months searched. Using the default 6 months, that allows for 1 exemption.\n" + \
+              "This output can be used in order to identify seldomly used tracks for archiving, retiring or restructuring.\n\n" + \
+              "Alternatively, the script can be run in 'singleReport(-r)' mode where it will generate a sorted list\n" + \
+              "of track counts over a period of time. This could be useful for reporting purposes.\n\n" + \
+
+              "Example runs:\n" + \
+              "    trackCountsParse hg38 /hive/users/lrnassar/trackCounts/\n" + \
+              "    trackCountsParse hg38 /hive/users/lrnassar/trackCounts/ -c .5 -n 12\n" + \
+              "    trackCountsParse hg38 /hive/users/lrnassar/trackCounts/ -r -s 2023-01-01 -e 2023-12-31\n")
+        
+        exit(0)
+    parser._action_groups.append(optional)
+    options = parser.parse_args()
+    return  options
+
+def bash(cmd):
+    """Run the cmd in bash subprocess"""
+    try:
+        rawBashOutput = subprocess.run(cmd, check=True, shell=True,\
+                                       stdout=subprocess.PIPE, universal_newlines=True, stderr=subprocess.STDOUT)
+        bashStdoutt = rawBashOutput.stdout
+    except subprocess.CalledProcessError as e:
+        raise RuntimeError("command '{}' return with error (code {}): {}".format(e.cmd, e.returncode, e.output))
+    return(bashStdoutt)
+
+def generateTrackCounts(dbs,workDir,startDate,endDate):
+    #Generate track usage report binned by month for a specific time frame and dbs
+    #Format date format is XXXX-XX-XX, e.g. 2023-11-01
+    outputFileName = workDir+dbs+"."+startDate+"to"+endDate+".trackCounts.txt"
+    #Run the script and remove the ct line (ct_), hub lines (hub_), dup lines (dup_)
+    #And header line (^#), as well as remove the first column of repeating database
+    bash("/hive/users/chmalee/logs/byDate/makeUsageReport -t -db "+dbs+" --bin-months -s "+startDate+" -e "+endDate+" | grep -v \"hub_\|ct_\|dup_\|^#\" | cut -f2- > "+outputFileName)
+    return(outputFileName)
+
+def createDicFromTrackCountsFile(trackCountsFilePath):
+    """
+    This function reads a file containing track counts and creates a dictionary 
+    with track names as keys and their corresponding counts as values.
+    
+    Args:
+    - trackCountsFilePath (str): Path to the file containing track counts.
+    
+    Returns:
+    - dict: A dictionary containing track names as keys and their counts as values.
+    """
+    trackList = open(trackCountsFilePath, 'r')
+    trackCountsDic = {}
+    for line in trackList:
+        parsedLine = line.rstrip().split("\t")
+        if parsedLine[0].endswith(":"):
+            currentTrackName = parsedLine[0][0:len(parsedLine[0])-1]
+        else:
+            currentTrackName = parsedLine[0]
+        if currentTrackName not in trackCountsDic.keys():
+            trackCountsDic[currentTrackName] = {}
+            trackCountsDic[currentTrackName]["Count"] = parsedLine[1]
+        else:
+            trackCountsDic[currentTrackName]["Count"] = str(int(trackCountsDic[currentTrackName]["Count"]) + int(parsedLine[1]))
+    trackList.close()
+    totalCount = len(trackCountsDic.keys())
+    print("Total tracks to parse: "+str(totalCount))
+    return(trackCountsDic,totalCount)
+
+def checkIfThereIsAHigherLevelParentTrack(parentChildAssociationsDic,trackName,firstTry,dbs):
+    """
+    Recursive function that searches for deeper associations and builds dictionary
+    """
+    if firstTry == False:
+        currentParentName = parentChildAssociationsDic[trackName]['parentName']
+        tdbQuery = bash("tdbQuery \"select * from "+dbs+" where track='"+currentParentName+"'\"").split("\n")
+    else:
+        tdbQuery = bash("tdbQuery \"select * from "+dbs+" where track='"+trackName+"'\"").split("\n")
+    if "compositeTrack" in str(tdbQuery) or "superTrack" in str(tdbQuery)  or "parent" in str(tdbQuery):
+        for entry in tdbQuery:        
+            if entry.startswith("parent"):
+                parentChildAssociationsDic[trackName]['Container'] = True #I ADDED THIS LINE IF THERE ARE ISSUES
+                parentName = entry.split(" ")[1]
+                parentChildAssociationsDic[trackName]['parentName'] = parentName
+            elif entry.startswith("superTrack"):
+                entry = entry.split(" ")
+                if entry[1] != "on":
+                    parentChildAssociationsDic[trackName]['Container'] = True
+                    parentName = entry[1]
+                    parentChildAssociationsDic[trackName]['parentName'] = parentName
+    else:
+        if firstTry != False:
+            parentChildAssociationsDic[trackName]['Container'] = False
+    return(parentChildAssociationsDic)
+
+def lookUpTracksToFindParentChildAssociations(trackCountsDic,totalCount,parentChildAssociationsDic,dbs):
+    """
+    This function takes a dictionary of track names and their details, 
+    queries a database to find parent-child relationships for each track,
+    and updates the dictionary with the associated parent track information.
+    
+    Args:
+    - trackCountsDic (dict): A dictionary containing track names and their details.
+    
+    Returns:
+    - dict: An updated dictionary containing parent-child association information.
+    """
+    n=0
+    for trackName in trackCountsDic.keys():
+        if trackName not in parentChildAssociationsDic.keys():
+            parentChildAssociationsDic[trackName] = {}
+            n+=1
+            if n%2000 == 0:
+                print(str(n)+" out of "+str(totalCount))
+            #Make a first check to see if there are parents
+            parentChildAssociationsDic = checkIfThereIsAHigherLevelParentTrack(parentChildAssociationsDic,trackName,True,dbs)
+            #Check to see if there is a higher level parent
+            if 'parentName' in parentChildAssociationsDic[trackName].keys():
+                parentChildAssociationsDic = checkIfThereIsAHigherLevelParentTrack(parentChildAssociationsDic,trackName,False,dbs)
+            #The top level tracks have container on, but no parent
+            else:
+                parentChildAssociationsDic[trackName]['Container'] = False
+            #Check to see if there is a final higher level parent
+            if 'parentName' in parentChildAssociationsDic[trackName].keys():
+                parentChildAssociationsDic = checkIfThereIsAHigherLevelParentTrack(parentChildAssociationsDic,trackName,False,dbs)        
+
+    return(parentChildAssociationsDic)
+
+def buildFinalDicWithOnlyTopLevelTrackCounts(trackCountsDic,parentChildAssociationsDic):
+    """
+    Iterate through the dictionary of all track counts + parental relationships
+    and create a final dic that only includes all possible top-level tracks
+    with the highest possible count from any of its children.
+    """
+    finalDicOfTopLevelTracksAndCounts = {}
+    for trackName in trackCountsDic.keys():
+        if parentChildAssociationsDic[trackName]['Container'] is False:
+            if trackName not in finalDicOfTopLevelTracksAndCounts.keys():
+                finalDicOfTopLevelTracksAndCounts[trackName] = trackCountsDic[trackName]['Count']
+            elif int(trackCountsDic[trackName]['Count']) > int(finalDicOfTopLevelTracksAndCounts[trackName]):
+                finalDicOfTopLevelTracksAndCounts[trackName] = trackCountsDic[trackName]['Count']
+        else:
+            if parentChildAssociationsDic[trackName]['parentName'] not in finalDicOfTopLevelTracksAndCounts.keys():
+                finalDicOfTopLevelTracksAndCounts[parentChildAssociationsDic[trackName]['parentName']] = trackCountsDic[trackName]['Count']
+            else:
+                if int(trackCountsDic[trackName]['Count']) > int(finalDicOfTopLevelTracksAndCounts[parentChildAssociationsDic[trackName]['parentName']]):
+                    finalDicOfTopLevelTracksAndCounts[parentChildAssociationsDic[trackName]['parentName']] = trackCountsDic[trackName]['Count']
+    return(finalDicOfTopLevelTracksAndCounts)
+
+def makeFinalFileOnTopLevelTrackCounts(finalDicOfTopLevelTracksAndCounts,pathUrl,dbs):
+    """
+    This function creates a final output file containing details of top-level tracks,
+    including their short labels (if available) and counts. The file is saved at the
+    specified path URL.
+    
+    Args:
+    - finalDicOfTopLevelTracksAndCounts (dict): A dictionary containing top-level track names 
+                                                and their respective counts.
+    - pathUrl (str): The path where the final output file will be saved.
+    """
+    outputFile = open(pathUrl, 'w')
+    n=0
+    for key in finalDicOfTopLevelTracksAndCounts.keys():
+        if key != "":
+            tdbQuery = bash("tdbQuery \"select * from "+dbs+" where track='"+key+"'\"").split("\n")
+            if "shortLabel" in str(tdbQuery):
+                for entry in tdbQuery:        
+                    if entry.startswith("shortLabel"):
+                        n+=1
+                        shortLabel = " ".join(entry.split(" ")[1:])
+                        outputFile.write(key+"\t"+shortLabel+"\t"+str(finalDicOfTopLevelTracksAndCounts[key])+"\n")
+            else:
+                n+=1
+                outputFile.write(key+"\t"+key+"\t"+str(finalDicOfTopLevelTracksAndCounts[key])+"\n")
+    print("Final file completed. Tota number of tracks: "+str(n))
+    outputFile.close()
+    #Order and sort final file
+    bash("sort -t $'\t' -k3 -rn "+pathUrl+" > "+pathUrl+".sorted")
+    print("Final sorted file: "+pathUrl+".sorted")
+
+def get_count(dicField):
+    return dicField['trackCounts']
+
+def makeOrderedDicForSpecificTime(finalDicOfTopLevelTracksAndCounts,dbs):
+    orderedDic = OrderedDict()
+    listOfCountsToSort = []
+    n=0
+    #Fetch the shortLabels and make a list where each entry is a dic with trackName, shortLabel, and trackCount
+    for key in finalDicOfTopLevelTracksAndCounts.keys():
+        if key != "":
+            tdbQuery = bash("tdbQuery \"select * from "+dbs+" where track='"+key+"'\"").split("\n")
+            if "shortLabel" in str(tdbQuery):
+                for entry in tdbQuery:        
+                    if entry.startswith("shortLabel"):
+                        n+=1
+                        shortLabel = " ".join(entry.split(" ")[1:])
+                        listOfCountsToSort.append({'trackName':key,'shortLabel':shortLabel,'trackCounts':int(finalDicOfTopLevelTracksAndCounts[key])})
+            else:
+                n+=1
+                listOfCountsToSort.append({'trackName':key,'shortLabel':key,'trackCounts':int(finalDicOfTopLevelTracksAndCounts[key])})
+    print("Tota number of tracks: "+str(n))
+    
+    #Sort the trackCounts list to return in order to find data that meets cutoff threshhold
+    listOfCountsToSort.sort(key=get_count, reverse=True)
+    return(listOfCountsToSort)
+    
+def refineTrackCountsBasedOnCutOff(listOfTracks,cutOffThreshhold,period):
+    """
+    Take an ordered list containing dics of track counts and filter it based
+    on a set threshhold. Then return a new ordered dictionary that contains
+    the tracks below the threshhold with trackNames as keys and shortLabel
+    and counts as values
+    """
+    trackCountCutoff = listOfTracks[int(len(listOfTracks)/2)]['trackCounts']*cutOffThreshhold
+    finalTrackCountsDic = OrderedDict()
+    for track in listOfTracks:
+        if track['trackCounts'] < trackCountCutoff:
+            finalTrackCountsDic[track['trackName']]={'shortLabel':track['shortLabel'],'trackCounts':track['trackCounts'],'countComparedToMaxForPeriod':track['trackCounts']/listOfTracks[0]["trackCounts"],'countComparedToCutoff':track['trackCounts']/trackCountCutoff}
+    print("The trackCount cutoff for "+period+" is: "+str(trackCountCutoff))
+    return(finalTrackCountsDic)
+
+def getDateRangesForComparison(numOfMonthsToCompare):
+    """
+    Based on a number of months to compare given, find the year + month combination
+    followed by the last day of each month to be used in the log query script. Return
+    an ordered dictionary with the date ranges as keys, which will be used as the titles
+    of the respective final ouputs, and the start/end dates as the content.
+    **Note** This subtracts an additional month from the latest month in order
+    to ensure that the logs chosen are complete.
+    """
+    dateRanges = OrderedDict()
+    date = datetime.today().strftime('%Y-%m')
+    for number in range(numOfMonthsToCompare):
+        monthToParse = datetime.strftime(datetime.strptime(date, '%Y-%m') - relativedelta(months=number+1), '%Y-%m')
+        year = monthToParse.split('-')[0]
+        month = monthToParse.split('-')[1]
+        lastDateOfMonth = calendar.monthrange(int(year), int(month))[1]
+        startDate = year+"-"+month+"-01"
+        endDate = year+"-"+month+"-"+str(lastDateOfMonth)
+        dateRanges[startDate+"-"+endDate] = {'startDate':startDate,'endDate':endDate}
+    return(dateRanges)
+
+def createFinalListOfTracksThatMeetCutoffEveryMonth(finalDicWithCutOffDics,numOfMonthsToCompare):
+    """
+    Iterates through all of the monthly dictionaries and creates a final list
+    where only tracks present in every period are present. This is to filter
+    out monthly outliers. It then reports which ones were filtered, if any.
+    Due to data being weird, if we are checking at least 4 months allow
+    for outliers of floor(n/4)
+    """
+    initialTrackList = {}
+    listOfTracksInEligiblePeriods = []
+    listOfTracksFilteredOut = []
+    #Create initial list of all tracks present in these periods
+    for period in finalDicWithCutOffDics.keys():
+        for track in finalDicWithCutOffDics[period].keys():
+            if track not in initialTrackList:
+                initialTrackList[track] = 0
+    #Due to data being weird, if we are checking at least 4 months allow
+    #for outliers of floor(n/4)
+    outlierMonthsExcemption = math.floor(numOfMonthsToCompare/4)
+    #Go over the list and add a penalty of 1 for every period in which
+    #the track is missing, then create a final list of tracks that
+    #pass through the filter
+    for period in finalDicWithCutOffDics.keys():
+        for track in initialTrackList.keys():
+            if track not in finalDicWithCutOffDics[period].keys():
+                initialTrackList[track] = initialTrackList[track] + 1
+    for track in initialTrackList.keys():
+        if initialTrackList[track] <= outlierMonthsExcemption:
+            listOfTracksInEligiblePeriods.append(track)
+        else:
+            listOfTracksFilteredOut.append(track)
+    if listOfTracksFilteredOut != []:
+        print("The following tracks were filtered out because they did not meet")
+        print("the cutoff in all of the months specified:\n")
+        for track in listOfTracksFilteredOut:
+            print(track)
+    return(listOfTracksInEligiblePeriods)
+
+def get_avCount(dicField):
+    """Helper function to help sort dict"""
+    return dicField['averageTrackCount']
+
+def constructSortedFinalTrackDicWithAllData(finalDicWithCutOffDics,listOfTracksInEligiblePeriods,numOfMonthsToCompare,dbs):
+    """
+    Takes in a final list of track names which have met all the conditions for potential archiving
+    and constructs a final dictionary with all of the data sorted. This includes averages over
+    the time period for the track counts as well as the comparison to median/max. The track group
+    is also queried for use in the ultimate decision.
+    """
+    firstPeriod = next(iter(finalDicWithCutOffDics))
+    listOfTracksToReport = []
+    for track in listOfTracksInEligiblePeriods:
+        group = ""
+        addedTrackCounts = 0
+        addedCountComparedToMaxForPeriod = 0
+        addedCountComparedToCutoff = 0      
+        missingMonths = 0 #This checks for the floor(n/4) tolerance that tracks can be missing
+        for period in finalDicWithCutOffDics.keys():
+            if track in finalDicWithCutOffDics[period].keys():
+                if group == "":
+                    shortLabel = finalDicWithCutOffDics[period][track]["shortLabel"]
+                    tdbQuery = bash("tdbQuery \"select group from "+dbs+" where track='"+track+"'\"").split("\n")
+                    if 'group' in str(tdbQuery):
+                        group = tdbQuery[0].split(" ")[1]
+                    else: 
+                        group = "No group"
+                addedTrackCounts+=finalDicWithCutOffDics[period][track]['trackCounts']
+                addedCountComparedToMaxForPeriod+=finalDicWithCutOffDics[period][track]['countComparedToMaxForPeriod']
+                addedCountComparedToCutoff+=finalDicWithCutOffDics[period][track]['countComparedToCutoff']
+            else:
+                missingMonths+=1
+                
+        averageTrackCount = round(addedTrackCounts/(numOfMonthsToCompare-missingMonths),2)
+        averageCountComparedToMaxForPeriod = round(addedCountComparedToMaxForPeriod/(numOfMonthsToCompare-missingMonths),4)
+        averageCountComparedToCutoff = round(addedCountComparedToCutoff/(numOfMonthsToCompare-missingMonths),2)
+        listOfTracksToReport.append({'trackName':track,'shortLabel':shortLabel,'group':group,'averageTrackCount':averageTrackCount,'averageCountComparedToMaxForPeriod':averageCountComparedToMaxForPeriod,'averageCountComparedToCutoff':averageCountComparedToCutoff})
+    listOfTracksToReport.sort(key=get_avCount, reverse=True)
+    return(listOfTracksToReport)
+
+def writeFinalTrackListToFile(finalOutputTrackDicToReport,workDir,dbs,cutOffThreshhold,numOfMonthsToCompare):
+    """
+    Take the final processed dictionary and write it out to a tsv file including the vars
+    used in the data generation.
+    """
+    date = datetime.today().strftime('%Y-%m')
+    monthToParse = datetime.strftime(datetime.strptime(date, '%Y-%m') - relativedelta(months=numOfMonthsToCompare), '%Y-%m')
+    fileNamePathStartToEndDate = workDir+monthToParse+"-"+date+"."+dbs+".tracksToArchive.tsv"
+    finalOutputFile = open(fileNamePathStartToEndDate,'w')
+    finalOutputFile.write("#Variables used in this file generation: dbs="+dbs+" numOfMonthsToCompare="+str(numOfMonthsToCompare)+" cutOffThreshhold="+str(cutOffThreshhold)+"\n")
+    finalOutputFile.write("#trackName\tshortLabel\tgroup\taverageTrackCount\taverageCountComparedToMaxForPeriod\taverageCountComparedToCutoff\n")
+    for track in finalOutputTrackDicToReport:
+        finalOutputFile.write(track['trackName']+"\t"+track['shortLabel']+"\t"+track['group']+"\t"+str(track['averageTrackCount'])+"\t"+str(track['averageCountComparedToMaxForPeriod'])+"\t"+str(track['averageCountComparedToCutoff'])+"\n")
+    finalOutputFile.close()
+    print("\nCutoff tracks file complete: "+fileNamePathStartToEndDate)
+    print("\nYou can pipe this output into ~markd/bin/tabFmt removing the first line to get a nicely formatted output.")
+
+def main():
+    """Initialize options and call other functions"""
+    options = parseArgs()
+    dbs,workDir,cutOffThreshhold,numOfMonthsToCompare = options.dbs,options.workDir,options.cutOffThreshhold,options.numOfMonthsToCompare
+    #Line below exists only for debugging purposes
+    #dbs,workDir,cutOffThreshhold,numOfMonthsToCompare = 'hg38','/hive/users/lrnassar/temp/tmp/',.3,12
+    if options.singleReport == True:
+        startDate,endDate,parentChildAssociationsDic = options.startDate,options.endDate,{}
+        logFile = generateTrackCounts(dbs,workDir,startDate,endDate)
+        trackCountsDic,totalCount = createDicFromTrackCountsFile(logFile)
+        parentChildAssociationsDic = lookUpTracksToFindParentChildAssociations(trackCountsDic,totalCount,parentChildAssociationsDic,dbs)
+        finalDicOfTopLevelTracksAndCounts = buildFinalDicWithOnlyTopLevelTrackCounts(trackCountsDic,parentChildAssociationsDic)
+        makeFinalFileOnTopLevelTrackCounts(finalDicOfTopLevelTracksAndCounts,workDir+"trackCounts.tsv",dbs)    
+
+    else:
+        print("Script started: "+strftime("%Y-%m-%d %H:%M:%S", localtime()))
+        dateRanges = getDateRangesForComparison(numOfMonthsToCompare)
+        finalDicWithCutOffDics,parentChildAssociationsDic = OrderedDict(),{}
+        for period in dateRanges:
+            logFile = generateTrackCounts(dbs,workDir,dateRanges[period]['startDate'],dateRanges[period]['endDate'])
+            trackCountsDic,totalCount = createDicFromTrackCountsFile(logFile)
+            parentChildAssociationsDic = lookUpTracksToFindParentChildAssociations(trackCountsDic,totalCount,parentChildAssociationsDic,dbs)
+            finalDicOfTopLevelTracksAndCounts = buildFinalDicWithOnlyTopLevelTrackCounts(trackCountsDic,parentChildAssociationsDic)
+            listOfTracks = makeOrderedDicForSpecificTime(finalDicOfTopLevelTracksAndCounts,dbs)
+            finalDicWithCutOffDics[period] = refineTrackCountsBasedOnCutOff(listOfTracks,cutOffThreshhold,period)
+            print("The number of tracks that met the criteria for "+period+" is: "+str(len(finalDicWithCutOffDics[period].keys())))
+        
+        listOfTracksInEligiblePeriods = createFinalListOfTracksThatMeetCutoffEveryMonth(finalDicWithCutOffDics,numOfMonthsToCompare)
+        finalOutputTrackDicToReport = constructSortedFinalTrackDicWithAllData(finalDicWithCutOffDics,listOfTracksInEligiblePeriods,numOfMonthsToCompare,dbs)
+        writeFinalTrackListToFile(finalOutputTrackDicToReport,workDir,dbs,cutOffThreshhold,numOfMonthsToCompare)
+        print("Script finished: "+strftime("%Y-%m-%d %H:%M:%S", localtime()))
+
+main()