e975034e529d38834c40b7c5b10519d2c06372f8 max Wed May 13 08:11:17 2020 -0700 making a few fixes to genbankToBigGenePred, refs #25114 diff --git src/utils/genbankToBigGenePred src/utils/genbankToBigGenePred index 60d6279..fb0af70 100755 --- src/utils/genbankToBigGenePred +++ src/utils/genbankToBigGenePred @@ -123,47 +123,47 @@ row.append(geneId) row.append("cmpl") # ??? row.append("cmpl") row.append(exonFrames) # exonFrames row.append("N.a.") # transcript type row.append(geneId) # Primary identifier for gene row.append(geneId) # Alternative/human readable gene name row.append("N.a.") # Gene type row.append(ft.qualifiers.get("note", [""])[0]) # Notes row.append(protId) # Protein product ID row.append(xrefs.get("GeneID", "")) # NCBI Gene ID row.append(str(ft.extract(rec).seq)) # Gene type row.append("") # cDNA PSL row.append(repr(ft.qualifiers)) # protein sequence row.append("") # prot to cDNA PSL - row.append(repr(ft.qualifiers)) # anythign else + #row.append(repr(ft.qualifiers)) # anythign else else: row.append(ft.qualifiers.get("inference", [""])[0]) row.append(ft.qualifiers.get("function", [""])[0]) #row.append(repr(ft.qualifiers)) # anythign else, for debugging row = [str(x) for x in row] ofh.write("\t".join(row)) ofh.write("\n") def bedToBigBed(fname, chromSizes, asName, bbDir): " convert bed to bigBed " cmd = ["bedSort", fname, fname] assert (subprocess.call(cmd)==0) outFname = basename(fname).replace(".bed", ".bb") - cmd = ["/cluster/home/braney/bin/x86_64/bedToBigBed", "-tab", "-type=bed12+", "-as="+asName, fname, chromSizes, join(bbDir, outFname)] + cmd = ["/cluster/home/braney/bin/x86_64/bedToBigBed", "-tab", "-type=bed12+", "-as="+asName, fname, chromSizes, join(bbDir, outFname), "-extraIndex=name", "-allow1bpOverlap"] logging.info("Running: "+(" ".join(cmd))) ret = subprocess.call(cmd) assert(ret==0) def gbToBigGenePred(fname, chromName, chromSizes, outDir, bbDir): outFnames = { "source" : join(outDir, "source.ra"), "genes" : join(outDir, "genes.bed"), "peptides" : join(outDir, "peptides.bed"), "other" : join(outDir, "other.bed"), } outFhs = {} for k, val in outFnames.items(): outFhs[k] = open(val, "w")