1468e11700d139e452a7e77f9e30b4f87f9f38ec mspeir Fri Jun 13 15:30:58 2025 -0700 Fixing script so that only one entry per date is added to combined.html,added several options to script to control output directories for the various files which is mostly useful for debugging diff --git ucsc/updateNewsSec ucsc/updateNewsSec index 857a324..7a10469 100755 --- ucsc/updateNewsSec +++ ucsc/updateNewsSec @@ -1,52 +1,62 @@ #! /usr/bin/env python3 import json, sys, operator, argparse, os, urllib.request, subprocess, time, glob from datetime import datetime, timedelta, date parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description="Update 'News' section in desc.conf. Also updates sitemap and list of datasets currently on the RR.") parser.add_argument("-r","--run", action='store_true', help='run script to update news, rr datasets list, and sitemap.') +parser.add_argument("-n","--newsOnly", action='store_true', + help='only update news html') +parser.add_argument("-p","--perDateDir", action='store', default="/hive/data/inside/cells/news/perDate/", + help='where to find perDate news file') +parser.add_argument("-o","--newsOutDir", action='store', default="/hive/data/inside/cells/news/", + help='where to output news file') +parser.add_argument("-d","--rrDatasetsOut", action='store', default="/hive/data/inside/cells/", + help='directory where to find/output rr.datasets.txt') +parser.add_argument("-s","--sitemap", action='store', default="/hive/data/inside/cells/", + help='directory where to output sitemap file') args = parser.parse_args() def buildSitemapSet(): """Builds a set of all URLs currently in the sitemap.""" - sitemapPath = "/hive/data/inside/cells/sitemap.cells.txt" + sitemapPath = args.sitemap + "/sitemap.cells.txt" sitemapSet = set() # If the sitemap file exists, then open it and add each line to a new set if os.path.exists(sitemapPath): sitemap = open(sitemapPath, "r") for line in sitemap: sitemapSet.add(line) sitemap.close() # Otherwise, just make an empty file? # Maybe I should try/except here instead? else: sitemap = open(sitemapPath, "w") sitemap.close() return sitemapSet def addSiteToSitemap(entry, sitemapSet): """Writes an entry out to the sitemap if it's not already there.""" # Open sitemap file - with open("/hive/data/inside/cells/sitemap.cells.txt", "a") as sitemap: + with open(args.sitemap + "/sitemap.cells.txt", "a") as sitemap: urlline = "https://" + entry + ".cells.ucsc.edu\n" if urlline not in sitemapSet: sitemap.write(urlline) def makeDate(aDate): """Will turn a string date separated by "-" into a date object.""" # to make a date object, we need at least the year, month, and day splitDate = aDate.split("-") year = int(splitDate[0]) month = int(splitDate[1]) day = int(splitDate[2]) dateObj = date(year, month, day) return dateObj @@ -134,139 +144,138 @@ # Do some comparison to see if data for current dataset in collection # is older than the last if day < oldDate: oldDate = day # Also add an entry to betaInfo that covers the collection as a whole # Cell count for this one is the sum of the cell counts for all subdatasets # Date is that for the oldest dataset in the collection betaInfo[cname] = [cshort, str(collCellCount), oldDate] else: dname, bList = processDataset(dataset, bdir) betaInfo[dname] = bList return betaInfo def combineNews(): + """ Combines all of the perDate files into a single html file""" + # From https://stackoverflow.com/questions/3207219/how-do-i-list-all-files-of-a-directory - newsFiles = sorted(glob.glob("/hive/data/inside/cells/news/perDate/*.html"), reverse=True) + newsFiles = sorted(glob.glob(args.perDateDir+"*.html"), reverse=True) # Basically, we're gathering up all of the individual news files to combine them into one - filenames = ['/hive/data/inside/cells/news/basic.html'] + newsFiles - with open('/hive/data/inside/cells/news/combined.html','w') as outfile: + basicNews = args.newsOutDir + "/basic.html" + filenames = [basicNews] + newsFiles + with open(args.newsOutDir + '/combined.html','w') as outfile: for fname in filenames: with open(fname) as infile: outfile.write(infile.read()) def writeNewsHtml(toPrint, dateDir): """Takes a list of datasets and writes out an html file per day that lists all datasets released that day.""" for day in toPrint: dateOut = dateDir + str(day) + ".html" - if os.path.exists(dateOut): - htmlOut = open(dateOut, "a") - else: htmlOut = open(dateOut, "w") # Do some work to get the date into something we can easily grab pieces of betterDate = time.strftime('%d-%b-%Y', day.timetuple()) splitDay = betterDate.split("-") # Separate vars for month/day/year month=splitDay[1] dayNum=splitDay[0] year=splitDay[2] # Write bits out to the news file for the specific day htmlOut.write("
" + month + " " + dayNum + ", " + year + "
\n") htmlOut.write("New datasets:
\n