83e926b3090278ebbbd801e0a7e0ef2f98ef0775 galt Tue Mar 29 01:55:50 2011 -0700 updating paraFetch and paraSync to support -newer option which only downloads newer files, very useful now that the dates are being preserved diff --git src/lib/net.c src/lib/net.c index bec4a47..e169195 100644 --- src/lib/net.c +++ src/lib/net.c @@ -1554,31 +1554,31 @@ unlink(outStat); return FALSE; } *pPcList = pcList; *pUrl = url; *pFileSize = fileSize; *pDateString = dateString; *pTotalDownloaded = totalDownloaded; return TRUE; } -boolean parallelFetch(char *url, char *outPath, int numConnections, int numRetries) +boolean parallelFetch(char *url, char *outPath, int numConnections, int numRetries, boolean newer) /* Open multiple parallel connections to URL to speed downloading */ { char *origPath = outPath; char outTemp[1024]; safef(outTemp, sizeof(outTemp), "%s.paraFetch", outPath); outPath = outTemp; /* get the size of the file to be downloaded */ off_t fileSize = 0; off_t totalDownloaded = 0; ssize_t sinceLastStatus = 0; char *dateString = ""; // TODO handle case-sensitivity of protocols input if (startsWith("http://",url) || startsWith("https://",url)) { struct hash *hash = newHash(0); @@ -1678,30 +1678,68 @@ if (fileSize == -1) restartable = FALSE; struct parallelConn *pcList = NULL, *pc; if (restartable && sameString(url, restartUrl) && fileSize == restartFileSize && sameString(dateString, restartDateString)) { pcList = restartPcList; totalDownloaded = restartTotalDownloaded; } else { + + if (newer) // only download it if it is newer than what we already have + { + /* datestamp mtime from last-modified header */ + struct tm tm; + // Last-Modified: Wed, 15 Nov 1995 04:58:08 GMT + // These strings are always GMT + if (strptime(dateString, "%a, %d %b %Y %H:%M:%S %Z", &tm) == NULL) + { + warn("unable to parse last-modified string [%s]", dateString); + } + else + { + time_t t; + // convert to UTC (GMT) time + t = mktimeFromUtc(&tm); + if (t == -1) + { + warn("mktimeFromUtc failed while converting last-modified string to UTC [%s]", dateString); + } + else + { + // get the file mtime + struct stat mystat; + ZeroVar(&mystat); + if (stat(origPath,&mystat)==0) + { + if (t <= mystat.st_mtime) + { + verbose(2,"Since nothing newer was found, skipping %s\n", origPath); + verbose(3,"t from last-modified = %ld; st_mtime = %ld\n", (long) t, (long)mystat.st_mtime); + return TRUE; + } + } + } + } + } + /* make a list of connections */ for (c = 0; c < numConnections; ++c) { AllocVar(pc); pc->next = NULL; pc->rangeStart = base; base += partSize; pc->partSize = partSize; if (fileSize != -1 && pc->rangeStart+pc->partSize >= fileSize) pc->partSize = fileSize - pc->rangeStart; pc->received = 0; pc->sd = -4; /* no connection tried yet */ slAddHead(&pcList, pc); } slReverse(&pcList);