src/utils/expMatrixToBarchartBed/expMatrixToBarchartBed e86711604cdb883a23ee8af668b49e4d666dae5c

e86711604cdb883a23ee8af668b49e4d666dae5c
chmalee
  Tue Apr 2 13:58:55 2019 -0700
Letting python handle printing of floats instead of custom float parser, also allowing fix or alt sequences, refs #23218

diff --git src/utils/expMatrixToBarchartBed/expMatrixToBarchartBed src/utils/expMatrixToBarchartBed/expMatrixToBarchartBed
index 7154139..c66c87c 100755
--- src/utils/expMatrixToBarchartBed/expMatrixToBarchartBed
+++ src/utils/expMatrixToBarchartBed/expMatrixToBarchartBed
@@ -95,52 +95,30 @@
 
 def determineScore(tpmCutoffs, tpm):
     """
     Cast the tpm to a score between 0-1000. Since there are only
     9 visual blocks cast them to be in one of the 9 blocks.
     tpmCutoffs - A list of integers
     tpm - An integer
     """
     count = 0
     for val in tpmCutoffs:
         if (val > tpm):
             return count*111
         count = count + 1
     return 999
 
-def floatRound(inFloat):
-    """
-    Return a float that has at most 2 decimal places.
-    """
-    beforeDecimal = True
-    result = ""
-    count = 0
-    for char in inFloat:
-        if char is ".":
-            beforeDecimal = False
-            result += char
-            continue
-        if beforeDecimal:
-            result += char
-        else:
-            if count >= 2:
-                return result
-            else:
-                count += 1
-                result += char
-    return result
-
 def condenseMatrixIntoBedCols(matrix, groupOrder, autoSql, sampleToGroup, validTpms, bedLikeFile, useMean):
     """
     Take an expression matrix and a dictionary that maps the samples to groups.
     Go through the expression matrix and calculate the average for each group, outputting
     it to an intermediate file as they are calculated. The intermediate file has
     three columns, the first is the average tpm for the entire gene, next is the
     number of groups and finally the average tpm for each group as a comma separated list.
 
     matrix - An expression matrix, samples are the x rows, transcripts the y rows.
     groupOrder - Optional order of categories
     autoSql - Optional description of extra fields
     sampleToGroup - A dictionary that maps string samples to string groups.
     validTpms - An empty list of integers.
     bedLikeFile - An intermediate file, looks slightly like a bed.
     """
@@ -225,40 +203,40 @@
 
         # Write out the transcript name, this is needed to join with coordinates later.
         bedLikeFile.write(splitLine[0] + "\t")
         # Create a list of the average scores per group.
         bedLine = ""
         # The fullAverage is used to assign a tpm score representative of the entire bed row.
         fullAverage = 0.0
         count = 0.0
         if (groupOrder is not None):
             for group in open(groupOrder, "r"):
                 # Averages
                 if (useMean):
                     value = groupAverages[group.strip("\n")]
                 else:
                     value = median(groupAverages[group.strip("\n")])
-                bedLine = bedLine + "," + floatRound(str(value))
+                bedLine = bedLine + "," + "%0.2g" % value
                 count += 1.0
                 fullAverage += value
         else:
             for key, value in sorted(groupAverages.iteritems()):
                 if (useMean):
-                    bedLine = bedLine + "," + floatRound(str(value))
+                    bedLine = bedLine + "," + "%0.2g" % value
                     fullAverage += value
                 else:
-                    bedLine = bedLine + "," + floatRound(str(median(value)))
+                    bedLine = bedLine + "," + "%0.2g" % median(value)
                     fullAverage += median(value)
 
                 count += 1.0
         # Create what will be columns 5, 7 and 8 of the final bed.
         bedLine = str(fullAverage/count) + "\t" + str(int(count)) +  "\t" + bedLine[1:] + "\n"
         # If the fullAverage tpm is greater than 0 then consider it in the validTpm list.
         if (fullAverage > 0.0):
             validTpms.append((fullAverage/count))
         # Write the bedLine to the intermediate bed-like file.
         bedLikeFile.write(bedLine)
     # Return the bedInfo so it can be printed right before the script ends.
     return bedInfo
 
 def expMatrixToBarchartBed(options):
     """
@@ -317,33 +295,30 @@
     os.system(cmd)
 
     # Join the bed-like file and the coordinate file, the awk accounts for any extra
     # fields that may be included, and keeps the file in standard bed 6+5 format
     joinedFile = tempfile.NamedTemporaryFile(mode="w+", bufsize=1)
     cmd = "join -t $'\\t' -1 4 -2 1 " + sortedCoords.name + " " + sortedBedLikeFile.name + \
             " | awk -F'\t' -v OFS=\"\\t\" '{printf \"%s\\t%s\\t%s\\t%s\", $2,$3,$4,$1; " + \
             "for (i=5;i<=NF;i++) {printf \"\\t%s\", $i}; printf\"\\n\";}' > " + joinedFile.name
     os.system(cmd)
 
     # Go through the joined file and re arrange the columns creating a bed 6+5+ file.
     # Also assign a scaled score 0 - 1000 to each tpm value.
     bedFile = tempfile.NamedTemporaryFile(mode="w+", bufsize=1)
     for line in joinedFile:
         splitLine = line.strip("\n").split("\t")
-        if ("_" in splitLine[0]):
-            sys.stderr.write("This transcript: " + splitLine[0] + " was dropped for having a '_' in the name.\n")
-            continue # Ignore alt sequences.
         # Drop sequences where start is greater than end.
         if (float(splitLine[1]) > float(splitLine[2])):
             sys.stderr.write("This transcript: " + splitLine[0] + " was dropped since chr end, " + \
                     splitLine[2] + ", is smaller than chr start, " + splitLine[1] + ".\n")
             continue
         score = str(determineScore(tpmCutoffs, float(splitLine[-3])))
         if autoSql:
             #skip the 4th field since we recalculated it
             #need a different ordering to account for possible extraFields
             bedLine = "\t".join(splitLine[:4] + [score] + splitLine[5:7] + splitLine[-2:] + splitLine[7:-3]) + "\n"
         else:
             #skip the 4th field since we recalculate it
             bedLine = "\t".join(splitLine[:4] + [score] + splitLine[5:7] + splitLine[8:]) + "\n"
 
         bedFile.write(bedLine)