src/utils/expMatrixToBarchartBed/expMatrixToBarchartBed ccbcc084780401ca9e070e3ce2e6da99a56db615

ccbcc084780401ca9e070e3ce2e6da99a56db615
chmalee
  Wed Jun 20 12:03:19 2018 -0700
changes to expMatrixToBarchartBed after code review, refs #21626

diff --git src/utils/expMatrixToBarchartBed/expMatrixToBarchartBed src/utils/expMatrixToBarchartBed/expMatrixToBarchartBed
index cb1af09..7154139 100755
--- src/utils/expMatrixToBarchartBed/expMatrixToBarchartBed
+++ src/utils/expMatrixToBarchartBed/expMatrixToBarchartBed
@@ -206,31 +206,31 @@
                     groupAverages[columnToGroup[count]].append(float(col))
 
             count += 1
 
         # Store some information on the bed file. Most important is the groupOrder.
         if getBedInfo:
             getBedInfo = False
             groups = ""
             bedInfo += "#chr\tchromStart\tchromEnd\tname\tscore\tstrand\tname2\texpCount\texpScores;"
             if (groupOrder is not None):
                 for group in open(groupOrder, "r"):
                     groups += group.strip("\n") + " "
             else:
                 for key, value in sorted(groupAverages.iteritems()):
                     groups += key + " "
-            if autoSql:
+            if autoSql and len(autoSql) != 11: # parseExtraFields requires first 11 fields to be standard
                 bedInfo += groups[:-1] + "\t_offset\t_lineLength\t" + "\t".join(autoSql[11:])
             else:
                 bedInfo += groups[:-1] + "\t_offset\t_lineLength"
 
         # Write out the transcript name, this is needed to join with coordinates later.
         bedLikeFile.write(splitLine[0] + "\t")
         # Create a list of the average scores per group.
         bedLine = ""
         # The fullAverage is used to assign a tpm score representative of the entire bed row.
         fullAverage = 0.0
         count = 0.0
         if (groupOrder is not None):
             for group in open(groupOrder, "r"):
                 # Averages
                 if (useMean):
@@ -325,58 +325,61 @@
     os.system(cmd)
 
     # Go through the joined file and re arrange the columns creating a bed 6+5+ file.
     # Also assign a scaled score 0 - 1000 to each tpm value.
     bedFile = tempfile.NamedTemporaryFile(mode="w+", bufsize=1)
     for line in joinedFile:
         splitLine = line.strip("\n").split("\t")
         if ("_" in splitLine[0]):
             sys.stderr.write("This transcript: " + splitLine[0] + " was dropped for having a '_' in the name.\n")
             continue # Ignore alt sequences.
         # Drop sequences where start is greater than end.
         if (float(splitLine[1]) > float(splitLine[2])):
             sys.stderr.write("This transcript: " + splitLine[0] + " was dropped since chr end, " + \
                     splitLine[2] + ", is smaller than chr start, " + splitLine[1] + ".\n")
             continue
-        if autoSql:
         score = str(determineScore(tpmCutoffs, float(splitLine[-3])))
-            #skip the 4th field since we recalculate it
-            bedLine = "\t".join(splitLine[:4] + [score] + splitLine[5:]) + "\n"
+        if autoSql:
+            #skip the 4th field since we recalculated it
+            #need a different ordering to account for possible extraFields
+            bedLine = "\t".join(splitLine[:4] + [score] + splitLine[5:7] + splitLine[-2:] + splitLine[7:-3]) + "\n"
         else:
-            bedLine = "\t".join(splitLine[:4] + [str(determineScore(tpmCutoffs, float(splitLine[-3])))] + \
-                splitLine[5:7] + splitLine[8:]) + "\n"
+            #skip the 4th field since we recalculate it
+            bedLine = "\t".join(splitLine[:4] + [score] + splitLine[5:7] + splitLine[8:]) + "\n"
 
         bedFile.write(bedLine)
 
     # Run Max's indexing script: TODO: add verbose options
     indexedBedFile = tempfile.NamedTemporaryFile(mode="w+", bufsize=1)
     cmd = "bedJoinTabOffset " + options.matrixFile.name + " " + bedFile.name + " " + indexedBedFile.name
+    if not options.verbose: cmd += " &>/dev/null"
     os.system(cmd)
 
     # Prepend the bed info to the start of the file.
     cmd = "echo '" + bedInfo + "' > " + options.outputFile.name
     os.system(cmd)
 
     # any extra fields must come after the fields added by bedJoinTabOffset
     if autoSql:
         reorderedBedFile = tempfile.NamedTemporaryFile(mode="w+", bufsize=1)
-        # standard bed6+5 barChart fields
+        # first print the standard bed6+3 barChart fields
+        # then print the two fields added by bedJoinTabOffset
         # then any extra fields at the end:
         cmd = "awk -F'\\t' -v \"OFS=\\t\" '" + \
-                "{for (i = 1; i < 8; i++) {if (i > 1) printf \"\\t\"; printf \"%s\", $i;}; " + \
-                "for (i = NF-3; i <= NF; i++) {printf \"\\t%s\", $i;} " + \
-                "for (i = 8; i < NF - 4; i++) {printf \"\\t%s\", $i;} " + \
+                "{for (i = 1; i < 10; i++) {if (i > 1) printf \"\\t\"; printf \"%s\", $i;}; " + \
+                "for (i = NF-1; i <= NF; i++) {printf \"\\t%s\", $i;} " + \
+                "for (i = 10; i < NF - 1; i++) {printf \"\\t%s\", $i;} " + \
                 "printf \"\\n\";}' " + indexedBedFile.name + " > " + reorderedBedFile.name
         os.system(cmd)
         cmd = "cat " + reorderedBedFile.name + " >> " + options.outputFile.name
         os.system(cmd)
     else:
         cmd = "cat " + indexedBedFile.name + " >> " + options.outputFile.name
         os.system(cmd)
 
     if options.verbose: print ("The columns and order of the groups are; \n" + bedInfo)
 
 def main(args):
     """
     Initialized options and calls other functions.
     """
     options = parseArgs(args)