22a67b8eddff877e9265906cfbc4df5a41c3e0db lrnassar Mon Aug 5 12:01:54 2019 -0700 Pointing hard-coded links to hgdownload refs #23734 diff --git src/hg/makeDb/doc/VGP/mkHubIndex.pl src/hg/makeDb/doc/VGP/mkHubIndex.pl index 8e61706..1ff0fff 100755 --- src/hg/makeDb/doc/VGP/mkHubIndex.pl +++ src/hg/makeDb/doc/VGP/mkHubIndex.pl @@ -1,248 +1,248 @@ #!/usr/bin/env perl use strict; use warnings; my @orderList; # asmId of the assemblies in order from the *.list files # the order to read the different .list files: my @classList = qw( mammal bird reptile amphibian fish ); my %class; # key is asmId, value is from class list my $assemblyCount = 0; my %betterName; # key is asmId, value is common name ############################################################################## # from Perl Cookbook Recipe 2.17, print out large numbers with comma delimiters: ############################################################################## sub commify($) { my $text = reverse $_[0]; $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g; return scalar reverse $text } ############################################################################## ### start the HTML output ############################################################################## sub startHtml() { my $timeStamp = `date "+%F"`; chomp $timeStamp; # print <<"END"

VGP - Vertebrate Genomes Project assembly hub

VGP logo

This assembly hub contains assemblies released by the Vertebrate Genomes Project.

How to view the hub

You can load this hub from our Public Hubs page or by clicking these links to any of our official websites:

To manually attach this hub to other genome browsers:

  1. From the blue navigation bar, go to My Data -> Track Hubs
  2. Then select the My Hubs tab and enter this URL into the textbox: -
    http://genome-test.gi.ucsc.edu/hubs/VGP/hub.txt
  3. +
    https://hgdownload.soe.ucsc.edu/hubs/VGP/hub.txt
  4. Once you have added the URL to the entry form, press the Add Hub button to add the hub.

After adding the hub, you will be redirected to the gateway page. The genome assemblies can be selected from the VGP Hub Assembly dropdown menu.

See also: assembly statistics

Data resource links

NOTE: Click on the column headers to sort the table by that column END } ############################################################################## ### start the table output ############################################################################## sub startTable() { print <<"END" END } ############################################################################## ### end the table output ############################################################################## sub endTable() { print <<"END"
common name and
view in browser
scientific name and
data download
NCBI assembly bioSamplebioProject Taxon ID assembly date
VGP link
class
END } ############################################################################## ### end the HTML output ############################################################################## sub endHtml() { print <<"END" END } ############################################################################## ### tableContents() ############################################################################## sub tableContents() { open (CN, "|sort --ignore-case >commonNameOrder.list") or die "can not write to commonNameOrder.list"; foreach my $asmId (@orderList) { # next if ($asmId =~ m/GCF_900963305.1_fEcheNa1.1/); my $asmReport="${asmId}/download/${asmId}_assembly_report.txt"; my ($gcPrefix, $asmAcc, $asmName) = split('_', $asmId, 3); my $chromSizes="${asmId}/${asmId}.chrom.sizes"; my $sciName = "notFound"; my $commonName = "notFound"; my $bioSample = "notFound"; my $bioProject = "notFound"; my $taxId = "notFound"; my $asmDate = "notFound"; my $itemsFound = 0; open (FH, "<$asmReport") or die "can not read $asmReport"; while (my $line = ) { last if ($itemsFound > 5); chomp $line; $line =~ s/ //g;; $line =~ s/\s+$//g;; if ($line =~ m/Date:/) { if ($asmDate =~ m/notFound/) { ++$itemsFound; $asmDate = $line; $asmDate =~ s/.*:\s+//; } } elsif ($line =~ m/BioSample:/) { if ($bioSample =~ m/notFound/) { ++$itemsFound; $bioSample = $line; $bioSample =~ s/.*:\s+//; } } elsif ($line =~ m/BioProject:/) { if ($bioProject =~ m/notFound/) { ++$itemsFound; $bioProject = $line; $bioProject =~ s/.*:\s+//; } } elsif ($line =~ m/Organism name:/) { if ($sciName =~ m/notFound/) { ++$itemsFound; $commonName = $line; $sciName = $line; $commonName =~ s/.*\(//; $commonName =~ s/\)//; $sciName =~ s/.*:\s+//; $sciName =~ s/\s+\(.*//; } } elsif ($line =~ m/Taxid:/) { if ($taxId =~ m/notFound/) { ++$itemsFound; $taxId = $line; $taxId =~ s/.*:\s+//; } } } close (FH); $commonName = $betterName{$asmId} if (exists($betterName{$asmId})); printf CN "%s\t%s\n", $commonName, $asmId; - printf "%s\n", $asmId, $commonName; - printf " %s\n", $asmId, $sciName; + printf "%s\n", $asmId, $commonName; + printf " %s\n", $asmId, $sciName; printf " %s\n", $gcPrefix, $asmAcc, $asmId; printf " %s\n", $bioSample, $bioSample; printf " %s\n", $bioProject, $bioProject; printf " %s\n", $taxId, $taxId; my $sciNameUnderscore = $sciName; $sciNameUnderscore =~ s/ /_/g; $sciNameUnderscore = "Strigops_habroptilus" if ($sciName =~ m/Strigops habroptila/); printf " %s\n", $sciNameUnderscore, $asmDate; printf " %s\n", $class{$asmId}; printf "\n"; } close(CN); } ############################################################################## ### main() ############################################################################## my $home = $ENV{'HOME'}; my $srcDir = "$home/kent/src/hg/makeDb/doc/VGP"; open (FH, "<$srcDir/commonNames.txt") or die "can not read $srcDir/commonNames.txt"; while (my $line = ) { chomp $line; my ($asmId, $name) = split('\t', $line); $betterName{$asmId} = $name; } close (FH); foreach my $species (@classList) { my $listFile = "$srcDir/${species}.list"; open (FH, "<$listFile") or die "can not read $listFile"; while (my $asmId = ) { chomp $asmId; push @orderList, $asmId; $class{$asmId} = $species; ++$assemblyCount; } close (FH); } startHtml(); startTable(); tableContents(); endTable(); endHtml();