c28af7c6bbe39c880c7e6dcc0f1c3ff2ee5b427c
angie
  Fri Sep 3 12:11:22 2021 -0700
Grab lab from obscure location found by Nextstrain team in ncov-ingest PR#208.

diff --git src/hg/utils/otto/sarscov2phylo/bioSampleJsonToTab.py src/hg/utils/otto/sarscov2phylo/bioSampleJsonToTab.py
index 74a3680..5d1270a 100755
--- src/hg/utils/otto/sarscov2phylo/bioSampleJsonToTab.py
+++ src/hg/utils/otto/sarscov2phylo/bioSampleJsonToTab.py
@@ -98,33 +98,41 @@
     """Try to extract most complete sample collection date from attrs"""
     collectionDate = tryAttrs(attrs, ['collection date', 'collection_date'])
     receiptDate = tryAttrs(attrs, ['receipt date', 'receipt_date'])
     date = ""
     if receiptDate and collectionDate:
         if len(receiptDate) > len(collectionDate):
             date = receiptDate
         else:
             date = collectionDate
     elif collectionDate:
         date = collectionDate
     else:
         date = receiptDate
     return date
 
-def labFromAttrs(attrs):
-    return tryAttrs(attrs, ['collecting institution', 'collected by', 'INSDC center name',
+def labFromRecordAttrs(record, attrs):
+    lab = tryAttrs(attrs, ['collecting institution', 'collected by', 'INSDC center name',
                            'collected_by'])
+    # HT Jover Lee https://github.com/nextstrain/ncov-ingest/pull/208
+    if not lab and record.get('owner') and record['owner'].get('name') and \
+        record['owner']['name'] == "European Bioinformatics Institute":
+        if record.get('sampleIds'):
+            for sidBlob in record['sampleIds']:
+                if sidBlob.get('label') == "Sample name":
+                    lab = sidBlob['db']
+    return lab
 
 def authorFromAttrs(attrs):
     return tryAttrs(attrs, ['collector name'])
 
 def countryFromAttrs(attrs):
     return tryAttrs(attrs, ['geographic location', 'geo_loc_name'])
 
 def localeFromAttrs(attrs, country):
     locale = tryAttrs(attrs, ['geographic location (region and locality)'])
     if country and not locale and ':' in country:
         country, locale = country.split(':', 2)
         locale = locale.strip()
     return country, locale
 
 def hostIdFromAttrs(attrs):
@@ -160,26 +168,26 @@
 Read in NCBI Datasets / Virus biosample.jsonl file (each line has JSON for a BioSample record),
 extract relevant attributes, and output TSV.
 """
     )
     parser.add_argument('jsonFile', help='File with one line of JSON per BioSample entry')
     args = parser.parse_args()
 
     with open(args.jsonFile) as f:
         for line in f:
             record = json.loads(line)
             attrs = nameValToDDict(record['attributes'])
             acc = record['accession']
             gi = ""
             name = nameFromRecordAttrs(record, attrs)
             date = dateFromAttrs(attrs)
-            lab = labFromAttrs(attrs)
+            lab = labFromRecordAttrs(record, attrs)
             author = authorFromAttrs(attrs)
             country = countryFromAttrs(attrs)
             country, locale = localeFromAttrs(attrs, country)
             hostId = hostIdFromAttrs(attrs)
             sraId = sraIdFromRecord(record)
             epiId = epiIdFromRecordAttrs(record, attrs)
             print('\t'.join([gi, acc, name, date, lab, author, country, locale,
                              hostId, sraId, epiId]))
 
 main()