ccbcc084780401ca9e070e3ce2e6da99a56db615 chmalee Wed Jun 20 12:03:19 2018 -0700 changes to expMatrixToBarchartBed after code review, refs #21626 diff --git src/utils/expMatrixToBarchartBed/expMatrixToBarchartBed src/utils/expMatrixToBarchartBed/expMatrixToBarchartBed index cb1af09..7154139 100755 --- src/utils/expMatrixToBarchartBed/expMatrixToBarchartBed +++ src/utils/expMatrixToBarchartBed/expMatrixToBarchartBed @@ -206,31 +206,31 @@ groupAverages[columnToGroup[count]].append(float(col)) count += 1 # Store some information on the bed file. Most important is the groupOrder. if getBedInfo: getBedInfo = False groups = "" bedInfo += "#chr\tchromStart\tchromEnd\tname\tscore\tstrand\tname2\texpCount\texpScores;" if (groupOrder is not None): for group in open(groupOrder, "r"): groups += group.strip("\n") + " " else: for key, value in sorted(groupAverages.iteritems()): groups += key + " " - if autoSql: + if autoSql and len(autoSql) != 11: # parseExtraFields requires first 11 fields to be standard bedInfo += groups[:-1] + "\t_offset\t_lineLength\t" + "\t".join(autoSql[11:]) else: bedInfo += groups[:-1] + "\t_offset\t_lineLength" # Write out the transcript name, this is needed to join with coordinates later. bedLikeFile.write(splitLine[0] + "\t") # Create a list of the average scores per group. bedLine = "" # The fullAverage is used to assign a tpm score representative of the entire bed row. fullAverage = 0.0 count = 0.0 if (groupOrder is not None): for group in open(groupOrder, "r"): # Averages if (useMean): @@ -325,58 +325,61 @@ os.system(cmd) # Go through the joined file and re arrange the columns creating a bed 6+5+ file. # Also assign a scaled score 0 - 1000 to each tpm value. bedFile = tempfile.NamedTemporaryFile(mode="w+", bufsize=1) for line in joinedFile: splitLine = line.strip("\n").split("\t") if ("_" in splitLine[0]): sys.stderr.write("This transcript: " + splitLine[0] + " was dropped for having a '_' in the name.\n") continue # Ignore alt sequences. # Drop sequences where start is greater than end. if (float(splitLine[1]) > float(splitLine[2])): sys.stderr.write("This transcript: " + splitLine[0] + " was dropped since chr end, " + \ splitLine[2] + ", is smaller than chr start, " + splitLine[1] + ".\n") continue - if autoSql: score = str(determineScore(tpmCutoffs, float(splitLine[-3]))) - #skip the 4th field since we recalculate it - bedLine = "\t".join(splitLine[:4] + [score] + splitLine[5:]) + "\n" + if autoSql: + #skip the 4th field since we recalculated it + #need a different ordering to account for possible extraFields + bedLine = "\t".join(splitLine[:4] + [score] + splitLine[5:7] + splitLine[-2:] + splitLine[7:-3]) + "\n" else: - bedLine = "\t".join(splitLine[:4] + [str(determineScore(tpmCutoffs, float(splitLine[-3])))] + \ - splitLine[5:7] + splitLine[8:]) + "\n" + #skip the 4th field since we recalculate it + bedLine = "\t".join(splitLine[:4] + [score] + splitLine[5:7] + splitLine[8:]) + "\n" bedFile.write(bedLine) # Run Max's indexing script: TODO: add verbose options indexedBedFile = tempfile.NamedTemporaryFile(mode="w+", bufsize=1) cmd = "bedJoinTabOffset " + options.matrixFile.name + " " + bedFile.name + " " + indexedBedFile.name + if not options.verbose: cmd += " &>/dev/null" os.system(cmd) # Prepend the bed info to the start of the file. cmd = "echo '" + bedInfo + "' > " + options.outputFile.name os.system(cmd) # any extra fields must come after the fields added by bedJoinTabOffset if autoSql: reorderedBedFile = tempfile.NamedTemporaryFile(mode="w+", bufsize=1) - # standard bed6+5 barChart fields + # first print the standard bed6+3 barChart fields + # then print the two fields added by bedJoinTabOffset # then any extra fields at the end: cmd = "awk -F'\\t' -v \"OFS=\\t\" '" + \ - "{for (i = 1; i < 8; i++) {if (i > 1) printf \"\\t\"; printf \"%s\", $i;}; " + \ - "for (i = NF-3; i <= NF; i++) {printf \"\\t%s\", $i;} " + \ - "for (i = 8; i < NF - 4; i++) {printf \"\\t%s\", $i;} " + \ + "{for (i = 1; i < 10; i++) {if (i > 1) printf \"\\t\"; printf \"%s\", $i;}; " + \ + "for (i = NF-1; i <= NF; i++) {printf \"\\t%s\", $i;} " + \ + "for (i = 10; i < NF - 1; i++) {printf \"\\t%s\", $i;} " + \ "printf \"\\n\";}' " + indexedBedFile.name + " > " + reorderedBedFile.name os.system(cmd) cmd = "cat " + reorderedBedFile.name + " >> " + options.outputFile.name os.system(cmd) else: cmd = "cat " + indexedBedFile.name + " >> " + options.outputFile.name os.system(cmd) if options.verbose: print ("The columns and order of the groups are; \n" + bedInfo) def main(args): """ Initialized options and calls other functions. """ options = parseArgs(args)