71d7eb7202ebdd7a35f404ecd48eb6b4d877ff58 wong Thu Sep 8 10:36:03 2011 -0700 added in new functions, check date range in old assembly, check if dateUnrestricted is within 9 months of dateSubmitted in old assembly and cehck if new assembly already has dateResubmitted, also other structural changes, and some necessary secondary functions to make the three main ones work. diff --git python/programs/dateCheck/dateCheck python/programs/dateCheck/dateCheck index 77f1028..c9eb425 100755 --- python/programs/dateCheck/dateCheck +++ python/programs/dateCheck/dateCheck @@ -1,110 +1,208 @@ #!/usr/bin/python import sys import os import datetime -dir = os.path.dirname(os.path.abspath(__file__)) +#dir = os.path.dirname(os.path.abspath(__file__)) +dir = os.path.dirname(__file__) #print "dir = %s" % dir library = os.path.join(dir,"../../lib") #print library sys.path.append(library) #sys.path.append('/cluster/home/wong/kent/python/lib/') from ucscgenomics.rafile.RaFile import * -def indexhg18(rafile): - objlist = {} - rahg18 = RaFile(rafile) - - #structure is hg18expids[expIdviewreplicate] = information dict +def indexOldAssembly(rafile): + objList = {} + oldAssmRaFile = RaFile(rafile) + oldestDateSub = 0 + oldestObjSub = "" + newestDateSub = 0 + newestObjSub = "" + oldestDateUn = 0 + oldestObjUn = "" + newestDateUn = 0 + newestObjUn = "" + #structure is oldAssmExpIds[expIdviewreplicate] = information dict #used for easy lookup of an equivalent hg18 object, it will only run through the list of expids first instead of all objects looking for a match - hg18expids = {} + oldAssmExpIds = {} - for key in rahg18.keys(): - stanza = rahg18[key] + for key in oldAssmRaFile.keys(): + stanza = oldAssmRaFile[key] + if 'dateSubmitted' not in stanza: + print "%s has no dateSubmitted" % (stanza['metaObject']) + continue if not ('replicate' in stanza): stanza['replicate'] = "NA" #print "hg18:%s" % (stanza['metaObject']) if 'expId' in stanza: info = {} info['replicate'] = stanza['replicate'] info['dateSubmitted'] = stanza['dateSubmitted'] info['dateUnrestricted'] = stanza['dateUnrestricted'] info['metaObject'] = stanza['metaObject'] expid = stanza['expId'] rep = stanza['replicate'] - objview = stanza['view'] - key = "%s%s%s" % (expid, rep, objview) - hg18expids[key] = info + objView = stanza['view'] + key = "%s%s%s" % (expid, rep, objView) + if 'dateResubmitted' in stanza: + info['dateResubmitted'] = stanza['dateResubmitted'] + if not oldestDateUn: + oldestDateUn = stanza['dateUnrestricted'] + newestDateUn = stanza['dateUnrestricted'] + oldestObjUn = stanza['metaObject'] + newestObjUn = stanza['metaObject'] + if stanza['dateUnrestricted'] < oldestDateUn: + oldestDateUn = stanza['dateUnrestricted'] + oldestObjUn = stanza['metaObject'] + if stanza['dateUnrestricted'] > newestDateUn: + newestDateUn = stanza['dateUnrestricted'] + newestObjUn = stanza['metaObject'] + if not oldestDateSub: + oldestDateSub = stanza['dateSubmitted'] + newestDateSub = stanza['dateSubmitted'] + oldestObjSub = stanza['metaObject'] + newestObjSub = stanza['metaObject'] + if stanza['dateUnrestricted'] < oldestDateSub: + oldestDateSub = stanza['dateSubmitted'] + oldestObjSub = stanza['metaObject'] + if stanza['dateUnrestricted'] > newestDateSub: + newestDateSub = stanza['dateSubmitted'] + newestObjSub = stanza['metaObject'] + + + oldAssmExpIds[key] = info + localdateun = calculateDateUn(stanza['dateSubmitted']) + if localdateun == stanza['dateUnrestricted'] or stanza['dateUnrestricted'] == minusOneDay(localdateun): + pass + else: + print "%s: dateSubmitted = %s, calculated dateUnrestricted = %s, real dateUnrestricted = %s" % (stanza['metaObject'], stanza['dateSubmitted'], localdateun, stanza['dateUnrestricted']) + print "DateSubmitted range in old assembly: %s %s to %s %s" % (oldestObjSub, oldestDateSub, newestObjSub, newestDateSub) + print "DateUnrestricted range in old assembly: %s %s to %s %s" % (oldestObjUn, oldestDateUn, newestObjUn, newestDateUn) - return(hg18expids) + return(oldAssmExpIds) -def gethg19objects(rafile): +def dayInMon(mon, year): + if mon == 02: + if (not (year % 4)) and (year % 100) or (not (year % 400)): + return 29 + else: + return 28 + elif mon == 4 or mon == 6 or mon == 9 or mon == 11: + return 30 + else: + return 31 + +def calculateDateUn(dateIn): + splitdate = dateIn.split("-") + mon = int(splitdate[1]) + day = int(splitdate[2]) + year = int(splitdate[0]) + restday = day + restyear = year + restmon = mon + 9 + if restmon > 12: + restyear = year + 1 + restmon = (mon + 9) % 12 + daymon = dayInMon(restmon, restyear) + if day > daymon: + restmon = restmon + 1 + restday = daymon + returnstr = "%02d-%02d-%02d" % (restyear, restmon, restday) + return(str(returnstr)) + +def minusOneDay(dateIn): + + splitdate = dateIn.split("-") + mon = int(splitdate[1]) + day = int(splitdate[2]) + year = int(splitdate[0]) + newday = day - 1 + newmon = mon + if newday == 0: + newmon = mon - 1 + newday = dayInMon(newmon, year) + returnstr = "%02d-%02d-%02d" % (year, newmon, newday) + return(str(returnstr)) + +def getNewAssemblyObjects(rafile): ra = RaFile(rafile) - objlist = {} + objList = {} for key in ra.keys(): stanza = ra[key] - objinfo = {} + #print "stanza = %s" % stanza + objInfo = {} if not ('replicate' in stanza): stanza['replicate'] = "NA" #print "hg19:%s" % (stanza['metaObject']) if 'expId' in stanza: - objinfo['expId'] = stanza['expId'] - objinfo['view'] = stanza['view'] - objinfo['replicate'] = stanza['replicate'] + objInfo['expId'] = stanza['expId'] + objInfo['view'] = stanza['view'] + objInfo['replicate'] = stanza['replicate'] if 'dateUnrestricted' in stanza: - objinfo['dateUnrestricted'] = stanza['dateUnrestricted'] - objinfo['dateSubmitted'] = stanza['dateSubmitted'] - objlist[stanza['metaObject']] = objinfo - return(objlist) + #print "here2"; + objInfo['dateUnrestricted'] = stanza['dateUnrestricted'] + #if 'dateSubmitted' in stanza: + objInfo['dateSubmitted'] = stanza['dateSubmitted'] + #localdateun = calculateDateUn(stanza['dateSubmitted']) + #print "dateSubmitted = %s, calculated dateUnrestricted = %s, real dateUnrestricted = %s" (stanza['dateSubmitted'], localdateun, stanza['dateUnrestricted']) + #else: + # sys.stdout.write("missing dateSubmitted in object %s" % stanza['metaObject'] + if 'dateResubmitted' in stanza: + objInfo['dateUnrestricted'] = stanza['dateSubmitted'] + objList[stanza['metaObject']] = objInfo + return(objList) def main(): if len(sys.argv) != 3: arglen = len(sys.argv) - print arglen - sys.exit("usage: dateCheck.py hg19.ra hg18.ra") + #print arglen + sys.exit("usage: dateCheck.py newAssemblyTrackDb.ra oldAssemblyTrackDb.ra") #sys.exit() - hg18expids = indexhg18(sys.argv[2]) - hg19objects = gethg19objects(sys.argv[1]) + oldAssmExpIds = indexOldAssembly(sys.argv[2]) + newAssmObjects = getNewAssemblyObjects(sys.argv[1]) date = datetime.date today = str(date.today()) #print today - for key in hg19objects.keys(): - expid = hg19objects[key]['expId'] - rep = hg19objects[key]['replicate'] - view = hg19objects[key]['view'] + for key in newAssmObjects.keys(): + expid = newAssmObjects[key]['expId'] + rep = newAssmObjects[key]['replicate'] + view = newAssmObjects[key]['view'] key2 = "%s%s%s" % (expid, rep, view) #print "key2 = %s" % (key2) - if key2 in hg18expids: - infodict = hg18expids[key2] - setvars = "" + if key2 in oldAssmExpIds: + infoDict = oldAssmExpIds[key2] + setVars = "" vars = "" - if infodict['dateSubmitted'] < hg19objects[key]['dateSubmitted']: + if 'dateResubmitted' in newAssmObjects[key] and 'dateResubmitted' in infodict: + print "New Assembly already has dateResubmitted for object %s, old assembly object %s" % (key, infodict['metaObject']) + if infoDict['dateSubmitted'] < newAssmObjects[key]['dateSubmitted']: #print "\n" - #print "%s vs %s, dateSubmitted: %s vs %s" % (key, infodict['metaObject'], infodict['dateSubmitted'], hg19objects[key]['dateSubmitted']) - setvars = "dateSubmitted=%s dateResubmitted=%s" % (infodict['dateSubmitted'], hg19objects[key]['dateSubmitted']) + #print "%s vs %s, dateSubmitted: %s vs %s" % (key, infoDict['metaObject'], infoDict['dateSubmitted'], newAssmObjects[key]['dateSubmitted']) + setVars = "dateSubmitted=%s dateResubmitted=%s" % (infoDict['dateSubmitted'], newAssmObjects[key]['dateSubmitted']) - if infodict['dateUnrestricted'] < hg19objects[key]['dateSubmitted'] and hg19objects[key]['dateUnrestricted'] > today: - #print "%s vs %s, dateUnrestricted: %s vs %s" % (key, infodict['metaObject'], infodict['dateUnrestricted'], hg19objects[key]['dateUnrestricted']) - setvars = "%s dateUnrestricted=%s" % (setvars, infodict['dateUnrestricted']) + if infoDict['dateUnrestricted'] < newAssmObjects[key]['dateUnrestricted']: + #print "%s vs %s, dateUnrestricted: %s vs %s" % (key, infoDict['metaObject'], infoDict['dateUnrestricted'], newAssmObjects[key]['dateUnrestricted']) + setVars = "%s dateUnrestricted=%s" % (setVars, infoDict['dateUnrestricted']) - if setvars: + if setVars: if rep == "NA": vars = "expId=%s view=%s" % (expid, view) else: vars = "expId=%s view=%s replicate=%s" % (expid, view, rep) - print "mdbUpdate hg19 -vars=\"%s\" -setVars=\"%s\"" % (vars, setvars) + sys.stderr.write("mdbUpdate hg19 -vars=\"%s\" -setVars=\"%s\"\n" % (vars, setVars)) if __name__ == "__main__": main()