22da87555da04013fc59dfb49e167dc35d711f75
chmalee
  Mon Jun 4 15:34:17 2018 -0700
Remake of miRna tissue atlas barChart with correctly formatted matrix file, refs #18508

diff --git src/utils/expMatrixToBarchartBed/expMatrixToBarchartBed src/utils/expMatrixToBarchartBed/expMatrixToBarchartBed
index 7ef4579..4eff1a8 100755
--- src/utils/expMatrixToBarchartBed/expMatrixToBarchartBed
+++ src/utils/expMatrixToBarchartBed/expMatrixToBarchartBed
@@ -108,31 +108,31 @@
     matrix - An expression matrix, samples are the x rows, transcripts the y rows.  
     bedLikeFile - An intermediate file, looks slightly like a bed. 
     """
     # Store some information on the bed file, most important is the order 
     # of the 8th column. 
     bedInfo = "" 
     firstLine = True
     getBedInfo = True
     # Use the first line of the matrix and the sampleToGroup dict to create a dictionary that maps  
     # the column to a group.  
     columnToGroup = dict()
     
     # Go through the matrix line by line. The first line is used to build an index mapping columns 
     # to group blocks, then for each line with TPM values merge the values based on group blocks.  
     for line in matrix:
-        splitLine = line.strip("\n").split() 
+        splitLine = line.strip("\n").split("\t")
 
         # The first line is the word 'transcript' followed by a list of the sample names. 
         if firstLine: 
             firstLine = False 
             count = 1
             firstCol = True
             for col in splitLine:
                 if firstCol:
                     firstCol = False
                     continue
                 group = sampleToGroup[col]
                 columnToGroup.setdefault(count, group)
                 count += 1  
             continue
         
@@ -226,31 +226,31 @@
     options - The command line options (file names, etc). 
         Use the meta data to map the sample names to their groups, then create a dict that maps
     the columns to the groups.  Go through the matrix line by line and get the median or average for each 
     group. Print this to an intermediate file, then use the unix 'join' command to link with
     the coordinates file via the first matrix column. This creates a file with many of the bed fields
     just in the wrong order.  Go through this file to re arrange the columns, check for and remove entries 
     where chromsomes names include "_" and chr start > chr end.  Finally run Max's bedJoinTabOffset to 
     index the matrix adding the dataOffset and dataLen columns and creating a bed 6+5 file.   
     """
 
     # Create a dictionary that maps the sample names to their group.
     sampleToGroup = dict()
     count = 0 
     for item in options.sampleFile:
         count +=1 
-        splitLine = item.strip("\n").split()
+        splitLine = item.strip("\n").split("\t")
         if (len(splitLine) is not 2):
             print ("There was an error reading the sample file at line " + str(count))
             exit(1)
         sampleToGroup.setdefault(splitLine[0], splitLine[1])
 
     # Use an intermediate file to hold the average values for each group. 
     bedLikeFile = tempfile.NamedTemporaryFile( mode = "w+", bufsize = 1)
     # Keep a list of TPM scores greater than 0. This will be used later
     # to assign bed scores. 
     validTpms = []
 
     # Go through the matrix and condense it into a bed like file. Populate
     # the validTpms array and the bedInfo string.  
     bedInfo = condenseMatrixIntoBedCols(options.matrixFile, options.groupOrderFile, sampleToGroup, \
                     validTpms, bedLikeFile, options.useMean)
@@ -280,31 +280,31 @@
     # Sort the coordinate file to prepare it for the join. 
     sortedCoords = tempfile.NamedTemporaryFile( mode = "w+", bufsize = 1)
     cmd = "paste " + coordBedPart1.name + " " + coordBedPart2.name + " | sort > " + sortedCoords.name 
     os.system(cmd)
     
     # Join the bed-like file and the coordinate file. 
     joinedFile = tempfile.NamedTemporaryFile(mode="w+", bufsize=1)
     cmd = "join " + sortedCoords.name + " " + sortedBedLikeFile.name + " | awk -v " + \
             "OFS=\"\\t\" '$1=$1' > " + joinedFile.name
     os.system(cmd)
     
     # Go through the joined file and re arrange the columns creating a bed 6+2 file.
     # Also assign a scaled score 0 - 1000 to each tpm value. 
     bedFile = tempfile.NamedTemporaryFile(mode="w+", bufsize=1)
     for line in joinedFile:
-        splitLine = line.strip("\n").split()
+        splitLine = line.strip("\n").split("\t")
         if ("_" in splitLine[0]): 
             sys.stderr.write("This transcript " + splitLine[0] + " was dropped for having a '_' in the name.\n") 
             continue # Ignore alt sequences.  
         # Drop sequences where start is greater than end. 
         if (float(splitLine[2]) > float(splitLine[3])):
             sys.stderr.write("This transcript " + splitLine[0] + " was dropped since chr end, " + \
                     splitLine[3] + ", is smaller than chr start, " + splitLine[2] + "\n.") 
             continue
         chrom = splitLine[1]
         chrStart = splitLine[2]
         chrEnd = splitLine[3]
         name = splitLine[0]
         score = str(determineScore(tpmCutoffs, float(splitLine[7])))
         strand = splitLine[5]
         name2 = splitLine[6]