232df494c4ee8f9e36fbd496b94e46f08a140628 max Fri Jun 13 08:13:47 2025 -0700 always make a new combined.html diff --git ucsc/updateNewsSec ucsc/updateNewsSec index e5fdced..857a324 100755 --- ucsc/updateNewsSec +++ ucsc/updateNewsSec @@ -133,64 +133,66 @@ day = info[2] # Do some comparison to see if data for current dataset in collection # is older than the last if day < oldDate: oldDate = day # Also add an entry to betaInfo that covers the collection as a whole # Cell count for this one is the sum of the cell counts for all subdatasets # Date is that for the oldest dataset in the collection betaInfo[cname] = [cshort, str(collCellCount), oldDate] else: dname, bList = processDataset(dataset, bdir) betaInfo[dname] = bList return betaInfo +def combineNews(): + # From https://stackoverflow.com/questions/3207219/how-do-i-list-all-files-of-a-directory + newsFiles = sorted(glob.glob("/hive/data/inside/cells/news/perDate/*.html"), reverse=True) + # Basically, we're gathering up all of the individual news files to combine them into one + filenames = ['/hive/data/inside/cells/news/basic.html'] + newsFiles + with open('/hive/data/inside/cells/news/combined.html','w') as outfile: + for fname in filenames: + with open(fname) as infile: + outfile.write(infile.read()) + def writeNewsHtml(toPrint, dateDir): """Takes a list of datasets and writes out an html file per day that lists all datasets released that day.""" for day in toPrint: dateOut = dateDir + str(day) + ".html" if os.path.exists(dateOut): htmlOut = open(dateOut, "a") else: htmlOut = open(dateOut, "w") # Do some work to get the date into something we can easily grab pieces of betterDate = time.strftime('%d-%b-%Y', day.timetuple()) splitDay = betterDate.split("-") # Separate vars for month/day/year month=splitDay[1] dayNum=splitDay[0] year=splitDay[2] # Write bits out to the news file for the specific day htmlOut.write("

" + month + " " + dayNum + ", " + year + "

\n") htmlOut.write("

New datasets:

\n\n") htmlOut.close() - # From https://stackoverflow.com/questions/3207219/how-do-i-list-all-files-of-a-directory - newsFiles = sorted(glob.glob("/hive/data/inside/cells/news/perDate/*.html"), reverse=True) - # Basically, we're gathering up all of the individual news files to combine them into one - filenames = ['/hive/data/inside/cells/news/basic.html'] + newsFiles - with open('/hive/data/inside/cells/news/combined.html','w') as outfile: - for fname in filenames: - with open(fname) as infile: - outfile.write(infile.read()) def main(): if args.run == True: # From https://stackoverflow.com/questions/19216334/python-give-start-and-end-of-week-data-from-a-given-date # and https://www.programiz.com/python-programming/datetime/current-datetime # Get date for Monday, so that all datasets added in the last week show up under the same date start = mondayBefore(date.today().strftime('%Y-%m-%d')) # File should contain RR datasets # First run of this script will generate this file, # move it out of the way to regenerate, though this means that # everything will be noted as being released on the same day rrDatasetsPath = "/hive/data/inside/cells/rr.datasets.txt" dateDir = "/hive/data/inside/cells/news/perDate/" @@ -207,30 +209,31 @@ day = betaInfo[entry][2] line = str(day) + "\t" + entry + "\t" + label + "\t" + count + "\n" with open(rrDatasetsPath, "a") as rrDatasets: rrDatasets.write(line) # Check if '/' in shortname, if so means it's a dataset in a collection # and we're not outputting it to the sitemap or to the announcements if "/" not in entry: addSiteToSitemap(entry, sitemapSet) outLine = "
  • " + label + "\n" if day not in toPrint.keys(): toPrint[day] = [outLine] else: toPrint[day].append(outLine) writeNewsHtml(toPrint, dateDir) + combineNews() else: # This is the main part of the function that prints out the html for a news update betaInfo = parseBetaDatasets() # Parse the old rr.datasets.txt file so we know what's already out there oldNames = set() oldDatasets = open(rrDatasetsPath,"r") for line in oldDatasets: splitLine = line.strip().split("\t") name = splitLine[1] oldDate = makeDate(splitLine[0]) # Remove entry from betaInfo dict if it existed in rrDatasetsFile if name in betaInfo.keys(): del betaInfo[name] oldNames.add(name) @@ -250,21 +253,22 @@ if entry not in oldNames: allDatasets.write(line) # Check if '/' in shortname, if so means it's a dataset in a collection # and we're not outputting it to the sitemap or to the announcements if "/" not in entry: addSiteToSitemap(entry, sitemapSet) outLine = "
  • " + label + "\n" # If doesn't already in exist in toPrint, add it if day not in toPrint.keys(): toPrint[day] = [outLine] else: toPrint[day].append(outLine) # Print out HTML for new datasets to be put into /hive/data/inside/cells/datasets/desc.conf writeNewsHtml(toPrint, dateDir) + combineNews() else: parser.print_help(sys.stderr) sys.exit(1) if __name__ == "__main__": main()