232df494c4ee8f9e36fbd496b94e46f08a140628
max
Fri Jun 13 08:13:47 2025 -0700
always make a new combined.html
diff --git ucsc/updateNewsSec ucsc/updateNewsSec
index e5fdced..857a324 100755
--- ucsc/updateNewsSec
+++ ucsc/updateNewsSec
@@ -133,64 +133,66 @@
day = info[2]
# Do some comparison to see if data for current dataset in collection
# is older than the last
if day < oldDate:
oldDate = day
# Also add an entry to betaInfo that covers the collection as a whole
# Cell count for this one is the sum of the cell counts for all subdatasets
# Date is that for the oldest dataset in the collection
betaInfo[cname] = [cshort, str(collCellCount), oldDate]
else:
dname, bList = processDataset(dataset, bdir)
betaInfo[dname] = bList
return betaInfo
+def combineNews():
+ # From https://stackoverflow.com/questions/3207219/how-do-i-list-all-files-of-a-directory
+ newsFiles = sorted(glob.glob("/hive/data/inside/cells/news/perDate/*.html"), reverse=True)
+ # Basically, we're gathering up all of the individual news files to combine them into one
+ filenames = ['/hive/data/inside/cells/news/basic.html'] + newsFiles
+ with open('/hive/data/inside/cells/news/combined.html','w') as outfile:
+ for fname in filenames:
+ with open(fname) as infile:
+ outfile.write(infile.read())
+
def writeNewsHtml(toPrint, dateDir):
"""Takes a list of datasets and writes out an html file per day that lists
all datasets released that day."""
for day in toPrint:
dateOut = dateDir + str(day) + ".html"
if os.path.exists(dateOut):
htmlOut = open(dateOut, "a")
else:
htmlOut = open(dateOut, "w")
# Do some work to get the date into something we can easily grab pieces of
betterDate = time.strftime('%d-%b-%Y', day.timetuple())
splitDay = betterDate.split("-")
# Separate vars for month/day/year
month=splitDay[1]
dayNum=splitDay[0]
year=splitDay[2]
# Write bits out to the news file for the specific day
htmlOut.write("
" + month + " " + dayNum + ", " + year + "
\n")
htmlOut.write("New datasets:
\n\n")
for line in toPrint[day]:
htmlOut.write(line)
htmlOut.write("
\n")
htmlOut.close()
- # From https://stackoverflow.com/questions/3207219/how-do-i-list-all-files-of-a-directory
- newsFiles = sorted(glob.glob("/hive/data/inside/cells/news/perDate/*.html"), reverse=True)
- # Basically, we're gathering up all of the individual news files to combine them into one
- filenames = ['/hive/data/inside/cells/news/basic.html'] + newsFiles
- with open('/hive/data/inside/cells/news/combined.html','w') as outfile:
- for fname in filenames:
- with open(fname) as infile:
- outfile.write(infile.read())
def main():
if args.run == True:
# From https://stackoverflow.com/questions/19216334/python-give-start-and-end-of-week-data-from-a-given-date
# and https://www.programiz.com/python-programming/datetime/current-datetime
# Get date for Monday, so that all datasets added in the last week show up under the same date
start = mondayBefore(date.today().strftime('%Y-%m-%d'))
# File should contain RR datasets
# First run of this script will generate this file,
# move it out of the way to regenerate, though this means that
# everything will be noted as being released on the same day
rrDatasetsPath = "/hive/data/inside/cells/rr.datasets.txt"
dateDir = "/hive/data/inside/cells/news/perDate/"
@@ -207,30 +209,31 @@
day = betaInfo[entry][2]
line = str(day) + "\t" + entry + "\t" + label + "\t" + count + "\n"
with open(rrDatasetsPath, "a") as rrDatasets:
rrDatasets.write(line)
# Check if '/' in shortname, if so means it's a dataset in a collection
# and we're not outputting it to the sitemap or to the announcements
if "/" not in entry:
addSiteToSitemap(entry, sitemapSet)
outLine = " " + label + "\n"
if day not in toPrint.keys():
toPrint[day] = [outLine]
else:
toPrint[day].append(outLine)
writeNewsHtml(toPrint, dateDir)
+ combineNews()
else: # This is the main part of the function that prints out the html for a news update
betaInfo = parseBetaDatasets()
# Parse the old rr.datasets.txt file so we know what's already out there
oldNames = set()
oldDatasets = open(rrDatasetsPath,"r")
for line in oldDatasets:
splitLine = line.strip().split("\t")
name = splitLine[1]
oldDate = makeDate(splitLine[0])
# Remove entry from betaInfo dict if it existed in rrDatasetsFile
if name in betaInfo.keys():
del betaInfo[name]
oldNames.add(name)
@@ -250,21 +253,22 @@
if entry not in oldNames:
allDatasets.write(line)
# Check if '/' in shortname, if so means it's a dataset in a collection
# and we're not outputting it to the sitemap or to the announcements
if "/" not in entry:
addSiteToSitemap(entry, sitemapSet)
outLine = " " + label + "\n"
# If doesn't already in exist in toPrint, add it
if day not in toPrint.keys():
toPrint[day] = [outLine]
else:
toPrint[day].append(outLine)
# Print out HTML for new datasets to be put into /hive/data/inside/cells/datasets/desc.conf
writeNewsHtml(toPrint, dateDir)
+ combineNews()
else:
parser.print_help(sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()