a0f9d361a3f365777aab3843309aa534e5277e03 wong Thu Jan 5 10:56:29 2012 -0800 first draft of raMerging functions diff --git python/lib/ucscgenomics/ucscUtils.py python/lib/ucscgenomics/ucscUtils.py index af669f8..c540fe2 100644 --- python/lib/ucscgenomics/ucscUtils.py +++ python/lib/ucscgenomics/ucscUtils.py @@ -1,17 +1,17 @@ #!/hive/groups/encode/dcc/bin/python -import sys, os, re, argparse, subprocess, math +import sys, string, os, re, argparse, subprocess, math def isGbdbFile(file, table, database): errors = [] if os.path.isfile("/gbdb/%s/bbi/%s" % (database, file)): return 1 else: cmd = "hgsql %s -e \"select fileName from (%s)\"" % (database, table) p = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, close_fds=True) cmdoutput = p.stdout.read() if os.path.isfile(cmdoutput.split("\n")[1]): return 1 else: return 0 def makeFileSizes(inlist, path=None): @@ -26,16 +26,107 @@ for i in checklist: realpath = os.path.realpath(i) filesizes = filesizes + int(os.path.getsize(realpath)) filesizes = math.ceil(float(filesizes) / (1024**2)) return int(filesizes) def printIter(set, path=None): output = [] for i in sorted(set): if path: output.append("%s/%s" % (path, i)) else: output.append("%s" % (i)) - return output \ No newline at end of file + return output + +def zeros(m,n): + cross = list() + for i in range(m): + cross.append(list()) + for j in range(n): + cross[i].append(0) + return cross + +def mergeList(list1,list2): + m,n = len(list1),len(list2) + score = zeros(m+1,n+1) + pointer = zeros(m+1,n+1) + penalty = -1 + max_i = 0 + max_j = 0 + maxScore = 0 + + for i in range(1,m+1): + for j in range(1,n+1): + scoreUp = score[i-1][j]+penalty + scoreDown = score[i][j-1]+penalty + scoreDiagonal = score[i-1][j-1] + if list1[i-1] == list2[j-1]: + scoreDiagonal = scoreDiagonal + 10 + score[i][j] = max(0,scoreUp,scoreDown,scoreDiagonal) + if score[i][j] == 0: + pointer[i][j] = 0 + if score[i][j] == scoreUp: + pointer[i][j] = 1 + if score[i][j] == scoreDown: + pointer[i][j] = 2 + if score[i][j] == scoreDiagonal: + pointer[i][j] = 3 + if score[i][j] >= maxScore: + max_i = i + max_j = j + maxScore = score[i][j] + + align1,align2 = list(),list() + after_i,after_j = max_i,max_j + + while pointer[i][j] != 0: + if pointer[i][j] == 3: + align1.append(str(list1[i-1])) + align2.append(str(list2[j-1])) + i = i-1 + j = j-1 + elif pointer[i][j] == 2: + align1.append('-') + align2.append(str(list2[j-1])) + j = j-1 + elif pointer[i][j] == 1: + align1.append(str(list1[i-1])) + align2.append('-') + i = i-1 + + before_i,before_j = i,j + align1.reverse() + align2.reverse() + consensus = list() + + for i in range(len(align1)): + if align1[i] == align2[i]: + consensus.append(align1[i]) + elif align1[i] == "-": + consensus.append(align2[i]) + elif align2[i] == "-": + consensus.append(align1[i]) + elif align1[i] != align2[i]: + consensus.append(align1[i]) + consensus.append(align2[i]) + + before = list() + if before_i != 0: + before.extend(list1[0:before_i]) + if before_j != 0: + before.extend(list2[0:before_j]) + if after_i < len(list1): + consensus.extend(list1[after_i:]) + if after_j < len(list2): + consensus.extend(list2[after_j:]) + before.extend(consensus) + setcon = list() + for i in before: + if i in setcon: + continue + else: + setcon.append(i) + + return setcon