src/hg/utils/automation/asmHubNcbiGene.pl 92509db0231e9b1ae83aa0c85182883dd8eabc9b

92509db0231e9b1ae83aa0c85182883dd8eabc9b
hiram
  Wed Sep 14 13:59:35 2022 -0700
fixup chainNet trackDb and now sending files to dynablat and arranging hub files to place it in use refs #29811

diff --git src/hg/utils/automation/asmHubNcbiGene.pl src/hg/utils/automation/asmHubNcbiGene.pl
index dabe90f..fbc0b3b 100755
--- src/hg/utils/automation/asmHubNcbiGene.pl
+++ src/hg/utils/automation/asmHubNcbiGene.pl
@@ -1,81 +1,85 @@
 #!/usr/bin/env perl
 
 use strict;
 use warnings;
 use FindBin qw($Bin);
 use lib "$Bin";
 use AsmHub;
 use File::Basename;
 
 my $argc = scalar(@ARGV);
 
-if ($argc != 3) {
-  printf STDERR "usage: asmHubNcbiGene.pl asmId asmId.names.tab .../trackData/\n";
+if ($argc != 4) {
+  printf STDERR "usage: asmHubNcbiGene.pl asmId ncbiAsmId asmId.names.tab .../trackData/\n";
   printf STDERR "where asmId is the assembly identifier,\n";
   printf STDERR "and .../trackData/ is the path to the /trackData/ directory.\n";
+  printf STDERR "asmId may be equal to ncbiAsmId if it is a GenArk build\n";
+  printf STDERR "or asmId might be a default dbName if it is a UCSC style\n";
+  printf STDERR "browser build.\n";
   exit 255;
 }
 
 # from Perl Cookbook Recipe 2.17, print out large numbers with comma
 # delimiters:
 sub commify($) {
     my $text = reverse $_[0];
     $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g;
     return scalar reverse $text
 }
 
 my $asmId = shift;
+my $ncbiAsmId = shift;
 my $namesFile = shift;
 my $trackDataDir = shift;
 my $ncbiGeneBbi = "$trackDataDir/ncbiGene/$asmId.ncbiGene.bb";
 my $asmType = "refseq";
 
 if ( ! -s $ncbiGeneBbi ) {
   printf STDERR "ERROR: can not find $asmId.ncbiGene.bb file\n";
   exit 255;
 }
 
-my @partNames = split('_', $asmId);
+my @partNames = split('_', $ncbiAsmId);
 my $ftpDirPath = sprintf("%s/%s/%s/%s/%s", $partNames[0],
    substr($partNames[1],0,3), substr($partNames[1],3,3),
-   substr($partNames[1],6,3), $asmId);
+   substr($partNames[1],6,3), $ncbiAsmId);
 
 $asmType = "genbank" if ($partNames[0] =~ m/GCA/);
 my $totalBases = `ave -col=2 $trackDataDir/../${asmId}.chrom.sizes | grep "^total" | awk '{printf "%d", \$2}'`;
 chomp $totalBases;
 my $geneStats = `cat $trackDataDir/ncbiGene/${asmId}.ncbiGene.stats.txt | awk '{printf "%d\\n", \$2}' | xargs echo`;
 chomp $geneStats;
 my ($itemCount, $basesCovered) = split('\s+', $geneStats);
 my $percentCoverage = sprintf("%.3f", 100.0 * $basesCovered / $totalBases);
 $itemCount = commify($itemCount);
 $basesCovered = commify($basesCovered);
 $totalBases = commify($totalBases);
 
 my $em = "<em>";
 my $noEm = "</em>";
 my $assemblyDate = `grep -v "^#" $namesFile | cut -f9`;
 chomp $assemblyDate;
 my $ncbiAssemblyId = `grep -v "^#" $namesFile | cut -f10`;
 chomp $ncbiAssemblyId;
 my $organism = `grep -v "^#" $namesFile | cut -f5`;
 chomp $organism;
 
 print <<_EOF_
 <h2>Description</h2>
 <p>
-The NCBI Gene track for the $assemblyDate $em${organism}$noEm/$asmId
-genome assembly is constructed from the gff file <b>${asmId}_genomic.gff.gz</b>
+The NCBI Gene track for the $assemblyDate $em${organism}$noEm/$ncbiAsmId
+genome assembly is constructed from the gff file <b>${ncbiAsmId}_genomic.gff.gz</b>
 delivered with the NCBI RefSeq genome assemblies at the FTP location:<br>
 <a href='ftp://ftp.ncbi.nlm.nih.gov/genomes/all/$ftpDirPath/' target='_blank'>ftp://ftp.ncbi.nlm.nih.gov/genomes/all/$ftpDirPath/</a>
 </p>
 
 <h2>Track statistics summary</h2>
 <p>
 <b>Total genome size: </b>$totalBases<br>
 <b>Gene count: </b>$itemCount<br>
 <b>Bases in genes: </b>$basesCovered<br>
 <b>Percent genome coverage: </b>% $percentCoverage<br>
 </p>
 
 _EOF_
    ;