be4311c07e14feb728abc6425ee606ffaa611a58
markd
  Fri Jan 22 06:46:58 2021 -0800
merge with master

diff --git src/hg/utils/otto/gwas/perlParser.pl src/hg/utils/otto/gwas/perlParser.pl
index 28b3eea..43c554c 100644
--- src/hg/utils/otto/gwas/perlParser.pl
+++ src/hg/utils/otto/gwas/perlParser.pl
@@ -1,64 +1,67 @@
 my @saveIdx = (
     1, # PUBMEDID
     2, # FIRST AUTHOR
     3, # DATE
     4, # JOURNAL
     6, # STUDY
     7, # DISEASE/TRAIT
     8, # INITIAL SAMPLE SIZE
     9, # REPLICATION SAMPLE SIZE
     10, # REGION
     13, # REPORTED GENE(S)
     20, # STRONGEST SNP-RISK ALLELE
     26, # RISK ALLELE FREQUENCY
     27, # P-VALUE
     29, # P-VALUE (TEXT)
     30, # OR or BETA
     31, # 95% CI (TEXT)
     32, # PLATFORM [SNPS PASSING QC]
     33, # CNV
 );
     my $snpIdx = 21;
     my $riskIdx = 20;
 
 # Restore this if they change time formats again
 #    use Time::Piece;
     while (<>) { 
                 next if (/^\s*$/); 
                 s/\r$//; 
                 @w = split("\t"); 
                 # Skip if SNPs column is empty
                 next if ($w[$snpIdx] !~ /^rs\d+/); 
 # Restore this if they change time formats again
 #		$w[3] =  Time::Piece->strptime($w[3], '%d-%b-%Y')->strftime('%Y-%m-%d');
 
                 $w[$snpIdx] =~ s/ //g; 
                 my @snps = split(",", $w[$snpIdx]); 
 
                 $w[$riskIdx] =~ s/\s+//g;
 
                 # Keep only the columns we care about
                 my @savedCols = ();
                 foreach $i (@saveIdx) { 
                   $savedCols[@savedCols] = $w[$i];
+                  if ($i == 33 && $savedCols[$#savedCols] ne "Y") { # if CNV is missing add a 'N'
+                    $savedCols[$#savedCols] = "N";
+                  }
                 } 
                 # trim leading/trailing spaces if any; 
                 # convert the Unicode in titles to HTML because non-ASCII gives Galaxy trouble. 
                 # NB: this conversion now handled externally by iconv.  If Galaxy can handle
                 # HTML entities, though, that might be even better - we could replace use of
                 # iconv with calls to HTML::Entities
                 foreach $i (0 .. $#savedCols) { 
                   $savedCols[$i] =~ s/^\s*//;  $savedCols[$i] =~ s/\s*$//; 
                   @chars = split(//, $savedCols[$i]); 
                   $savedCols[$i] = ""; 
                   foreach $c (@chars) { 
                     if (ord($c) > 127) { 
                       $c = sprintf "&#%d;", ord($c); 
                     } 
                   $savedCols[$i] .= $c; 
                   } 
                 } 
                 foreach $s (@snps) { 
                   print join("\t", $s, @savedCols) . "\n"; 
                 } 
               }