a44e3254174152b2d4b3f241d1935654cd139181 vsmalladi Tue May 8 10:11:21 2012 -0700 Renamed library from ucscgenomics to ucscGb. Redmine #7029. diff --git python/lib/ucscGb/ucscUtils.py python/lib/ucscGb/ucscUtils.py new file mode 100644 index 0000000..8399505 --- /dev/null +++ python/lib/ucscGb/ucscUtils.py @@ -0,0 +1,152 @@ +#!/hive/groups/encode/dcc/bin/python +import sys, string, os, re, argparse, subprocess, math + +def isGbdbFile(file, table, database): + errors = [] + if os.path.isfile("/gbdb/%s/bbi/%s" % (database, file)): + return 1 + else: + cmd = "hgsql %s -e \"select fileName from (%s)\"" % (database, table) + p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True) + cmdoutput = p.stdout.read() + if os.path.isfile(cmdoutput.split("\n")[1]): + return 1 + else: + return 0 + +def makeFileSizes(inlist, path=None): + checklist = list() + + for i in inlist: + if path: + checklist.append("%s/%s" % (path, i)) + else: + checklist.append(i) + filesizes = 0 + for i in checklist: + realpath = os.path.realpath(i) + filesizes = filesizes + int(os.path.getsize(realpath)) + + filesizes = math.ceil(float(filesizes) / (1024**2)) + + return int(filesizes) + +def printIter(set, path=None): + output = [] + for i in sorted(set): + if path: + output.append("%s/%s" % (path, i)) + else: + output.append("%s" % (i)) + return output + +def zeros(m,n): + cross = list() + for i in range(m): + cross.append(list()) + for j in range(n): + cross[i].append(0) + return cross + +def mergeList(list1,list2): + """ Takes in two lists, returns a single list merged into a consensus list using a modified Needleman-Wunsch + alignment. The comment lines with two comments are there to signalify a change to Smith-Waterman, remove the + double commented lines to convert. + Single comment lines are for debugging, remove when this routine matures. + """ + + m,n = len(list1),len(list2) + score = zeros(m+1,n+1) + pointer = zeros(m+1,n+1) + penalty = -1 + max_i = 0 + max_j = 0 + maxScore = 0 + + #print "len_i = %s, len_j = %s" % (m,n) + for i in range(1,m+1): + for j in range(1,n+1): + scoreUp = score[i-1][j]+penalty + scoreDown = score[i][j-1]+penalty + scoreDiagonal = score[i-1][j-1]-5 + if list1[i-1] == list2[j-1]: + scoreDiagonal = scoreDiagonal + 10 + score[i][j] = max(0,scoreUp,scoreDown,scoreDiagonal) + if score[i][j] == 0: + pointer[i][j] = 0 + if score[i][j] == scoreUp: + pointer[i][j] = 1 + if score[i][j] == scoreDown: + pointer[i][j] = 2 + if score[i][j] == scoreDiagonal: + pointer[i][j] = 3 + ##if score[i][j] >= maxScore: + ## max_i = i + ## max_j = j + ## maxScore = score[i][j] + + #for k in pointer: + # line = "" + # for l in k: + # line = line + "%s " % l + # print line + + align1,align2 = list(),list() + ##after_i,after_j = max_i,max_j + #print "max_i = %s, max_j = %s" % (max_i, max_j) + while pointer[i][j] != 0: + if pointer[i][j] == 3: + align1.append(str(list1[i-1])) + align2.append(str(list2[j-1])) + i = i-1 + j = j-1 + elif pointer[i][j] == 2: + align1.append('-') + align2.append(str(list2[j-1])) + j = j-1 + elif pointer[i][j] == 1: + align1.append(str(list1[i-1])) + align2.append('-') + i = i-1 + + before_i,before_j = i,j + align1.reverse() + align2.reverse() + consensus = list() + + #for i in range(len(align1)): + # print "%s = %s" % (align1[i], align2[i]) + + for i in range(len(align1)): + if align1[i] == align2[i]: + consensus.append(align1[i]) + elif align1[i] == "-": + consensus.append(align2[i]) + elif align2[i] == "-": + consensus.append(align1[i]) + elif align1[i] != align2[i]: + consensus.append(align1[i]) + consensus.append(align2[i]) + before = list() + if before_i != 0: + before.extend(list1[0:before_i]) + if before_j != 0: + before.extend(list2[0:before_j]) + ##if after_i < len(list1): + ## consensus.extend(list1[after_i:]) + ##if after_j < len(list2): + ## consensus.extend(list2[after_j:]) + before.extend(consensus) + setcon = list() + p = re.compile('^#') + p2 = re.compile('^\s*$') + for i in before: + if p.match(i) or p2.match(i): + setcon.append(i) + continue + if i in setcon: + continue + else: + setcon.append(i) + + return setcon