6d83c1f80eed30361f2c3a7d4c1282f48592b839
hiram
  Wed Jun 22 15:50:38 2022 -0700
util script to find common name string given an asmId refs #29545

diff --git src/hg/makeDb/doc/asmHubs/commonNames.pl src/hg/makeDb/doc/asmHubs/commonNames.pl
new file mode 100755
index 0000000..7dd46d1
--- /dev/null
+++ src/hg/makeDb/doc/asmHubs/commonNames.pl
@@ -0,0 +1,42 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+my $argc = scalar(@ARGV);
+
+if ($argc != 1){
+  printf STDERR "usage: ./commonNames.pl vgp.2020-04-24.list\n";
+  printf STDERR "will look up the common names from the assembly_report files\n";
+  exit 255;
+}
+
+my $ncbiSrc="/hive/data/outside/ncbi/genomes";
+
+my $listFile = shift;
+open (FH, "<$listFile") or die "can not open $listFile";
+while (my $asmId = <FH>) {
+  next if ($asmId =~ m/^#/);
+  $asmId =~ s/\s+.*//;
+  chomp $asmId;
+  next if (length($asmId) < 1);
+  my $gcx = substr($asmId, 0, 3);
+  my $id0 = substr($asmId, 4, 3);
+  my $id1 = substr($asmId, 7, 3);
+  my $id2 = substr($asmId, 10, 3);
+  my $srcDir = sprintf "%s/%s/%s/%s/%s/%s", $ncbiSrc, $gcx, $id0, $id1, $id2, $asmId;
+  my $asmRpt = "$srcDir/${asmId}_assembly_report.txt";
+  my $orgName = `grep -i -m 1 "Organism name:" "${asmRpt}" | tr -d "
"`;
+  $orgName =~ s/.*\(//;
+  $orgName =~ s/\)//;
+  chomp $orgName;
+  printf "%s\t%s\n", $asmId, $orgName;
+}
+close (FH);
+
+# GCA_003369685.2_UOA_Angus_1_assembly_report.txt
+# Organism name:
+
+# GCF_010993605.1_kPetMar1.pri
+# GCF_900246225.1_fAstCal1.2
+