27228c7e494674469a64cdeb61d7c4e433102f24 hiram Thu Jan 2 13:16:14 2020 -0800 updated text to global reference wording refs #24534 diff --git src/hg/makeDb/doc/globalReference/mkHubIndex.pl src/hg/makeDb/doc/globalReference/mkHubIndex.pl index 52db04b..60b8cc4 100755 --- src/hg/makeDb/doc/globalReference/mkHubIndex.pl +++ src/hg/makeDb/doc/globalReference/mkHubIndex.pl @@ -1,254 +1,256 @@ #!/usr/bin/env perl use strict; use warnings; my $Name = "GlobalReference"; my $asmHubWorkDir = "globalReference"; my $defaultAssembly = "GCA_001524155.4_NA19240_prelim_3.0"; my @orderList; # asmId of the assemblies in order from the *.list files # the order to read the different .list files: my @classList = qw( human ); my %class; # key is asmId, value is from class list my $assemblyCount = 0; my %coriellLink; # key is asmId, value is isolate ID to construct link # https://www.coriell.org/0/Sections/Search/Sample_Detail.aspx?Ref= my %ethnicGroup; # ksy is asmId, value is ethnicity my %countryOfOrigin; # ksy is asmId, value is country of origin ############################################################################## # from Perl Cookbook Recipe 2.17, print out large numbers with comma delimiters: ############################################################################## sub commify($) { my $text = reverse $_[0]; $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g; return scalar reverse $text } ############################################################################## ### start the HTML output ############################################################################## sub startHtml() { my $timeStamp = `date "+%F"`; chomp $timeStamp; # print <<"END" - + -

Platinum Genomes Project assembly hub

+

Global Reference Genomes assembly hubs

-This assembly hub contains assemblies released -by the -Platinum Genomes Project. +This assembly hub contains ten high-quality, population-identified assemblies released +by the +Reference Genome Improvement project. This population list is not conclusive and +represents reference genomes from 10 different human populations. Thank you to +Washington University in Saint Louis for consolidating these open source reference genomes.

How to view the hub

You can load this hub from our Public Hubs -page or by clicking these links to any of our official websites: +page or by clicking these assembly links to any of our official websites:

To manually attach this hub to other genome browsers:

  1. From the blue navigation bar, go to My Data -> Track Hubs
  2. Then select the My Hubs tab and enter this URL into the textbox:
    https://hgdownload.soe.ucsc.edu/hubs/$asmHubWorkDir/hub.txt
  3. Once you have added the URL to the entry form, press the Add Hub button to add the hub.

After adding the hub, you will be redirected to the gateway page. The -genome assemblies can be selected from the Platinum Genomes Hub Assembly dropdown menu. +genome assemblies can be selected from the Reference Genome Improvement Hub Assembly dropdown menu.

See also: assembly statistics

Data resource links

NOTE: Click on the column headers to sort the table by that column END } ############################################################################## ### start the table output ############################################################################## sub startTable() { print <<"END" END } ############################################################################## ### end the table output ############################################################################## sub endTable() { print <<"END"
ethnicity
link to genome browser
country of origin
and data download
NCBI assembly bioSamplebioProject assembly date,
source link
END } ############################################################################## ### end the HTML output ############################################################################## sub endHtml() { print <<"END" END } ############################################################################## ### tableContents() ############################################################################## sub tableContents() { open (CN, "|sort --ignore-case >commonNameOrder.list") or die "can not write to commonNameOrder.list"; foreach my $asmId (@orderList) { # next if ($asmId =~ m/GCF_900963305.1_fEcheNa1.1/); my $asmReport="${asmId}/download/${asmId}_assembly_report.txt"; my ($gcPrefix, $asmAcc, $asmName) = split('_', $asmId, 3); my $chromSizes="${asmId}/${asmId}.chrom.sizes"; my $sciName = "notFound"; my $commonName = "notFound"; my $bioSample = "notFound"; my $bioProject = "notFound"; my $taxId = "notFound"; my $asmDate = "notFound"; my $itemsFound = 0; open (FH, "<$asmReport") or die "can not read $asmReport"; while (my $line = ) { last if ($itemsFound > 5); chomp $line; $line =~ s/ //g;; $line =~ s/\s+$//g;; if ($line =~ m/Date:/) { if ($asmDate =~ m/notFound/) { ++$itemsFound; $line =~ s/.*:\s+//; my @a = split('-', $line); $asmDate = sprintf("%04d-%02d-%02d", $a[0], $a[1], $a[2]); } } elsif ($line =~ m/BioSample:/) { if ($bioSample =~ m/notFound/) { ++$itemsFound; $bioSample = $line; $bioSample =~ s/.*:\s+//; } } elsif ($line =~ m/BioProject:/) { if ($bioProject =~ m/notFound/) { ++$itemsFound; $bioProject = $line; $bioProject =~ s/.*:\s+//; } } elsif ($line =~ m/Organism name:/) { if ($sciName =~ m/notFound/) { ++$itemsFound; $commonName = $line; $sciName = $line; $commonName =~ s/.*\(//; $commonName =~ s/\)//; $sciName =~ s/.*:\s+//; $sciName =~ s/\s+\(.*//; } } elsif ($line =~ m/Taxid:/) { if ($taxId =~ m/notFound/) { ++$itemsFound; $taxId = $line; $taxId =~ s/.*:\s+//; } } } close (FH); printf CN "%s\t%s\n", $commonName, $asmId; printf "%s\n", $asmHubWorkDir, $asmId, $ethnicGroup{$asmId}; printf " %s\n", $asmHubWorkDir, $asmId, $countryOfOrigin{$asmId}; printf " %s\n", $gcPrefix, $asmAcc, $asmId; printf " %s\n", $bioSample, $bioSample; printf " %s\n", $bioProject, $bioProject; printf " %s\n", $coriellLink{$asmId}, $asmDate; printf "\n"; } close(CN); } ############################################################################## ### main() ############################################################################## my $home = $ENV{'HOME'}; my $srcDir = "$home/kent/src/hg/makeDb/doc/$asmHubWorkDir"; open (FH, "<$srcDir/isolate.txt") or die "can not read $srcDir/isolate.txt"; while (my $line = ) { chomp $line; my ($asmId, $isolate) = split('\t', $line); $coriellLink{$asmId} = $isolate; } close (FH); open (FH, "<$srcDir/ethnicGroup.txt") or die "can not read $srcDir/ethnicGroup.txt"; while (my $line = ) { chomp $line; my ($asmId, $ethnicGroup) = split('\t', $line); my ($ethnic, $origin) = split(', ', $ethnicGroup); $ethnicGroup{$asmId} = $ethnic; $countryOfOrigin{$asmId} = $origin; } close (FH); foreach my $species (@classList) { my $listFile = "$srcDir/${species}.list"; open (FH, "<$listFile") or die "can not read $listFile"; while (my $asmId = ) { chomp $asmId; push @orderList, $asmId; $class{$asmId} = $species; ++$assemblyCount; } close (FH); } startHtml(); startTable(); tableContents(); endTable(); endHtml();