895c7fceea79e43c9abf53a85b76ce7542cec328 hiram Thu Apr 7 13:26:38 2022 -0700 allow otherAligners directory in data downloads and gff3 gene file refs #29203 diff --git src/hg/utils/automation/asmHubGatewayPage.pl src/hg/utils/automation/asmHubGatewayPage.pl index 84bda67..f27e55d 100755 --- src/hg/utils/automation/asmHubGatewayPage.pl +++ src/hg/utils/automation/asmHubGatewayPage.pl @@ -318,54 +318,62 @@ <b>Assembly date:</b> %s<br> <b>Assembly type:</b> %s<br> <b>Assembly level:</b> %s<br> <b>Biosample:</b> <a href=\"https://www.ncbi.nlm.nih.gov/biosample/?term=%s\" target=\"_blank\">%s</a><br> <b>Assembly accession ID:</b> <a href=\"https://www.ncbi.nlm.nih.gov/assembly/%s\" target=\"_blank\">%s</a><br> <b>Assembly FTP location:</b> <a href='ftp://ftp.ncbi.nlm.nih.gov/genomes/all/%s' target='_blank'>%s</a><br> \n", $commonName, $orgName, $taxId, $taxId, $submitter, $asmDate, $descrAsmType, $asmLevel, $bioSample, $bioSample, $asmAccession, $asmAccession, $newStyleUrl, $newStyleUrl; chromSizes($chromSizes); printf "</p>\n<hr> <h4>Data file downloads</h4> <p> <ul> -<li><a href='https://$sourceServer/hubs/$localDataUrl/$asmAccession.fa.gz' target=_blank>$asmAccession.fa.gz</a> fasta sequence with original assembly sequence names</li> -<li><a href='https://$sourceServer/hubs/$localDataUrl/$asmAccession.2bit' target=_blank>$asmAccession.2bit</a> UCSC 2bit sequence file with original assembly sequence names</li> +<li><a href='https://$sourceServer/hubs/$localDataUrl/$asmAccession.fa.gz' target=_blank>$asmAccession.fa.gz</a> fasta sequence with NCBI GenBank sequence names</li> +<li><a href='https://$sourceServer/hubs/$localDataUrl/$asmAccession.2bit' target=_blank>$asmAccession.2bit</a> UCSC 2bit sequence file with NCBI GenBank sequence names</li> <li><a href='https://$sourceServer/hubs/$localDataUrl/$asmAccession.chromAlias.txt' target=_blank>$asmAccession.chromAlias.txt</a> chromAlias file to relate chromosome names</li> "; if ( -s "$buildDir/$asmId.chrNames.fa.gz") { printf "<li><a href='https://$sourceServer/hubs/$localDataUrl/$asmAccession.chrNames.fa.gz' target=_blank>$asmAccession.chrNames.fa.gz</a> fasta sequence with <b>chrN</b> sequence names</li>\n"; } if ( -s "$buildDir/$asmId.chrNames.2bit") { printf "<li><a href='https://$sourceServer/hubs/$localDataUrl/$asmAccession.chrNames.2bit' target=_blank>$asmAccession.chrNames.2bit</a> UCSC 2bit sequence file with <b>chrN</b> sequence names</li>\n"; } if ( -d "$genesDir" ) { - open (GD, "ls $genesDir/*.gtf.gz 2> /dev/null|") or die "can not ls $genesDir/*.gtf.gz"; + open (GD, "ls $genesDir/*.gtf.gz $genesDir/*.gff3.gz 2> /dev/null|") or die "can not ls $genesDir/*.gtf.gz"; while (my $gtfFile = <GD>) { chomp $gtfFile; my $gtf = basename($gtfFile); + if ($gtf =~ m/gff3.gz/) { + printf "<li><a href='https://$sourceServer/hubs/$localDataUrl/genes/$gtf' target=_blank>$gtf</a> gene GFF3 file</li>\n"; + } else { printf "<li><a href='https://$sourceServer/hubs/$localDataUrl/genes/$gtf' target=_blank>$gtf</a> gene GTF file</li>\n"; } } + close (GD); +} + +if ( -d "${buildDir}/otherAligners" ) { + printf "<li><a href='https://$sourceServer/hubs/$localDataUrl/otherAligners/' target=_blank>pre-computed indices</a> for alignment programs: bowtie2, bwa-mem2, hisat2, minimap2</li>\n"; +} -printf " -<li>explore the hub directory at: <a href='https://$sourceServer/hubs/$localDataUrl/' target=_blank>$sourceServer/hubs/$localDataUrl/</a></li> +printf "<li>explore the hub directory at: <a href='https://$sourceServer/hubs/$localDataUrl/' target=_blank>$sourceServer/hubs/$localDataUrl/</a></li> </ul> "; printf "</p>\n<hr> <h4>Copy this entire assembly hub for local use</h4> <p> This download is only for the purpose of using this assembly hub in your institution which may have firewall access restrictions to this data.<br> To download this assembly data, use this <b>rsync</b> command: <pre> rsync -a -P \\ rsync://$sourceServer/hubs/$localDataUrl/ \\ ./$accessionId/ </pre> @@ -398,31 +406,31 @@ if ($genomeSize < 4294967297) { printf "<hr> <h4>blat service</h4> <p> There is blat service available for this genome assembly. When viewing this assembly in the genome browser, access the blat service via the <em><b>Tools -> Blat</b></em> blue navigation bar menu item. </p> <p> For local command line blat service, access the blat service via the <b>gfClient</b> command line operation.<br> See also: <a href='http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/' target=_blank> hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/</a> to download command line binaries.<br> <br> -To operate this locally, you will need the <b>%s.2bit</b> file from: +To operate this locally, you will need the <b>$accessionId.2bit</b> file from: <pre> https://$sourceServer/hubs/$localDataUrl/ </pre> Which can be obtained with rsync via: <pre> rsync -a -P \ rsync://hgdownload.soe.ucsc.edu/hubs/$accessionDir/$accessionId.2bit ./ </pre> With that <b>$accessionId.2bit</b> file in your working directory where you run this command, for example, a DNA query with your DNA sequence in the file: <b>someDna.fa</b> with result in the file: <b>$accessionId.someDna.psl</b> <pre> gfClient -t=dna -q=dna -genome=$accessionId -genomeDataDir=$accessionDir \ dynablat-01.soe.ucsc.edu 4040 ./ someDna.fa $accessionId.someDna.psl