6f3ab158f7f53c7d3b4b9c07b02de26912c33f0f jcasper Mon Feb 6 14:22:01 2023 -0800 Removing outdated comments, refs #30603 diff --git src/hg/encode/getTrackReferences/getTrackReferences src/hg/encode/getTrackReferences/getTrackReferences index 909bf53..234dd03 100755 --- src/hg/encode/getTrackReferences/getTrackReferences +++ src/hg/encode/getTrackReferences/getTrackReferences @@ -114,31 +114,30 @@ if doi and 'doi' in infoDict: idStr = ("DOI: <a href=\"https://doi.org/%s\" target=\"_blank\">%s</a>; " % (htmlEscape(infoDict['doi']), infoDict['doi'] ) ) + idStr # now that the pubmed link has been constructed, we can overwrite the url in infoDict with the original article URL # make sure the portlet that generates outlinks for PubMed didn't fail. If it did, try again until # it works or we give up. # Note: no longer sure this is necessary - seems like NCBI is doing something different now, but at # any rate urllib2 no longer seems to fetch the links list in at least some cases. Requests works. for try_count in range(10): origComment = "" fetch = requests.get(infoDict['url']) m = re.search('<div class="full-text-links-list">\s*<a\s+(class="[^"]*"\s+)?href="(\S+)"', fetch.text) if m: if m.group(2): - # Rhetorical: how can m match without m.group(1) being defined for this regex? Anyway .... infoDict['url'] = m.group(2).replace("&", "&") break else: origComment = "<!-- Can't find original article link for %s -->" % infoDict['url'] #n = re.search('<div class="icons"></div>', doc) # another possible detection of failed portlet p = re.search('Default output of portlet NCBIPageSection', doc) if p is None: break else: print "Failed to fetch complete links from NCBI after 10 tries. Try again later or just use the PubMed paper link." htmlLines = list() htmlLines.append("<p>") htmlLines.append("%s" % authStr) htmlLines.append("<a href=\"%s\" target=\"_blank\">" % htmlEscape(infoDict['url']))