d827b292f2da39b290412c402752b1bd68b37d2e lrnassar Wed Oct 25 17:00:56 2023 -0700 Adding a new condition to check for another regex expression. I found that two of the broken examples had spaces in the URL, so try to match on that and translate them. I also found one which had three backslashes incorrectly, so try to fix that too. I added it to a new loop so as not to affect the original functionality. This seems to cover all of the broken cases we had noted. Refs #31963 diff --git src/hg/encode/getTrackReferences/getTrackReferences src/hg/encode/getTrackReferences/getTrackReferences index 234dd03..2b99cb9 100755 --- src/hg/encode/getTrackReferences/getTrackReferences +++ src/hg/encode/getTrackReferences/getTrackReferences @@ -108,44 +108,57 @@ if not re.search("\.$", dateStr): dateStr = dateStr + "." # construct hyperlinks for PMID and PMCID (if it exists) idStr = "PMID: %s" % (htmlEscape(infoDict['url']), infoDict['pubmed']) if 'pmc' in infoDict: idStr = idStr + "; PMC: %s" % (infoDict['pmc'], infoDict['pmc']) if doi and 'doi' in infoDict: idStr = ("DOI: %s; " % (htmlEscape(infoDict['doi']), infoDict['doi'] ) ) + idStr # now that the pubmed link has been constructed, we can overwrite the url in infoDict with the original article URL # make sure the portlet that generates outlinks for PubMed didn't fail. If it did, try again until # it works or we give up. # Note: no longer sure this is necessary - seems like NCBI is doing something different now, but at # any rate urllib2 no longer seems to fetch the links list in at least some cases. Requests works. + origUrl = infoDict['url'] for try_count in range(10): origComment = "" + infoDict['url'] = origUrl fetch = requests.get(infoDict['url']) + try: m = re.search('

\s*

', doc) # another possible detection of failed portlet p = re.search('Default output of portlet NCBIPageSection', doc) if p is None: break + except: + try: + m = re.search('

\s*") htmlLines.append("%s" % authStr) htmlLines.append("" % htmlEscape(infoDict['url'])) htmlLines.append("%s." % htmlEscape(title)) htmlLines.append("%s. %s" % (htmlEscape(journal), dateStr)) htmlLines.append("%s" % idStr) htmlLines.append("

") if plain: htmlLines = list() idStr = "PMID: %s" % infoDict['pubmed']; if 'pmc' in infoDict: