06d7be056190c14b85e71bc12523f18ea6815b5e markd Mon Dec 7 00:50:29 2020 -0800 BLAT mmap index support merge with master diff --git src/hg/utils/automation/parseLrgXml.pl src/hg/utils/automation/parseLrgXml.pl index d33dd84..bb28d0f 100755 --- src/hg/utils/automation/parseLrgXml.pl +++ src/hg/utils/automation/parseLrgXml.pl @@ -1,39 +1,44 @@ #!/usr/bin/env perl # DO NOT EDIT the /cluster/bin/scripts copy of this file -- # edit ~/kent/src/hg/utils/automation/parseLrgXml.pl instead. use warnings; use strict; use Encode; # Of the UTF-8 sort, not the ENCODE project. :) use XML::LibXML; use Data::Dumper; #print Data::Dumper->Dump([$dom], [qw(dom)]); my $assemblyPrefix = shift @ARGV; +my $workdir = shift @ARGV; +if (not defined $workdir) { $workdir = "./";} sub usage { my ($status) = @_; my $base = $0; $base =~ s/^(.*\/)?//; print STDERR " -usage: $base assemblyPrefix +usage: $base assemblyPrefix pathToFiles Parses LRG_*.xml files in current directory into BED (LRG regions) and genePred+Cdna+Pep (transcripts in LRG coordinates.) assemblyPrefix is something like \"GRCh37\" to match GRCh37.p5 etc. + If pathToFiles is present, look for pathToFiles/LRG_*.xml files. pathToFiles + defaults to the current working directory. + "; exit $status; } # usage my %gbAccToHg19Alt = ( gl000250 => 'chr6_apd_hap1', gl000251 => 'chr6_cox_hap2', gl000252 => 'chr6_dbb_hap3', gl000253 => 'chr6_mann_hap4', gl000254 => 'chr6_mcf_hap5', gl000255 => 'chr6_qbl_hap6', gl000256 => 'chr6_ssto_hap7', gl000257 => 'chr4_ctg9_hap1', gl000258 => 'chr17_ctg5_hap1', ); @@ -294,33 +299,33 @@ if (@codingRegions) { my @ps = $codingRegions[0]->findnodes('translation'); # I expect one for protein-coding genes, none for non-coding: print $pepF "$txName\t" . $ps[0]->findvalue('sequence') . "\n"; } } # each transcript } # parseOneLrg ################################ MAIN ################################# if (! $assemblyPrefix) { usage(1); } -my @xmlFiles = <LRG_*.xml>; +my @xmlFiles = <"$workdir/LRG_*.xml">; if (@xmlFiles == 0) { - warn "No LRG_*.xml files found in current directory -- need to cd somewhere?\n"; + warn "No LRG_*.xml files found in $workdir -- need to cd somewhere?\n"; } open(my $bedF, "| sort -k1,1 -k2n,2n >lrg.bed") || die "Can't open lrg.bed for (sorting and) writing): $!\n"; open(my $gpF, "| sort -k2,2 -k4n,4n >lrgTranscriptsUnmapped.gp") || die "Can't open lrgTranscriptsUnmapped.gp for (sorting and) writing: $!\n"; open(my $cdnaF, ">lrgCdna.tab") || die "Can't open lrgCdna.tab for writing: $!\n"; open(my $pepF, ">lrgPep.tab") || die "Can't open lrgPep.tab for writing: $!\n"; foreach my $file (@xmlFiles) { parseOneLrg($file, $bedF, $gpF, $cdnaF, $pepF); } close($bedF); close($gpF);