src/lib/net.c 1.71

1.71 2009/08/19 03:44:27 galt
add https support for redirect
Index: src/lib/net.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/lib/net.c,v
retrieving revision 1.70
retrieving revision 1.71
diff -b -B -U 1000000 -r1.70 -r1.71
--- src/lib/net.c	10 Mar 2009 00:31:04 -0000	1.70
+++ src/lib/net.c	19 Aug 2009 03:44:27 -0000	1.71
@@ -1,1518 +1,1519 @@
 /* net.c some stuff to wrap around net communications. 
  *
  * This file is copyright 2002 Jim Kent, but license is hereby
  * granted for all use - public, private or commercial. */
 
 #include "common.h"
 #include <signal.h>
 #include <errno.h>
 #include <string.h>
 #include "internet.h"
 #include "errabort.h"
 #include "hash.h"
 #include "net.h"
 #include "linefile.h"
 #include "base64.h"
 #include "cheapcgi.h"
 #include "https.h"
 
 static char const rcsid[] = "$Id$";
 
 /* Brought errno in to get more useful error messages */
 
 extern int errno;
 
 static int netStreamSocket()
 /* Create a TCP/IP streaming socket.  Complain and return something
  * negative if can't */
 {
 int sd = socket(AF_INET, SOCK_STREAM, 0);
 if (sd < 0)
     warn("Couldn't make AF_INET socket.");
 return sd;
 }
 
 
 int netConnect(char *hostName, int port)
 /* Start connection with a server. */
 {
 int sd, err;
 struct sockaddr_in sai;		/* Some system socket info. */
 
 if (hostName == NULL)
     {
     warn("NULL hostName in netConnect");
     return -1;
     }
 if (!internetFillInAddress(hostName, port, &sai))
     return -1;
 if ((sd = netStreamSocket()) < 0)
     return sd;
 if ((err = connect(sd, (struct sockaddr*)&sai, sizeof(sai))) < 0)
    {
    warn("Couldn't connect to %s %d", hostName, port);
    close(sd);
    return err;
    }
 return sd;
 }
 
 int netMustConnect(char *hostName, int port)
 /* Start connection with server or die. */
 {
 int sd = netConnect(hostName, port);
 if (sd < 0)
    noWarnAbort();
 return sd;
 }
 
 int netMustConnectTo(char *hostName, char *portName)
 /* Start connection with a server and a port that needs to be converted to integer */
 {
 if (!isdigit(portName[0]))
     errAbort("netConnectTo: ports must be numerical, not %s", portName);
 return netMustConnect(hostName, atoi(portName));
 }
 
 
 int netAcceptingSocketFrom(int port, int queueSize, char *host)
 /* Create a socket that can accept connections from a 
  * IP address on the current machine if the current machine
  * has multiple IP addresses. */
 {
 struct sockaddr_in sai;
 int sd;
 int flag = 1;
 
 netBlockBrokenPipes();
 if ((sd = netStreamSocket()) < 0)
     return sd;
 if (!internetFillInAddress(host, port, &sai))
     return -1;
 if (setsockopt(sd, SOL_SOCKET, SO_REUSEADDR, &flag, sizeof(int)))
     return -1;
 if (bind(sd, (struct sockaddr*)&sai, sizeof(sai)) == -1)
     {
     warn("Couldn't bind socket to %d: %s", port, strerror(errno));
     close(sd);
     return -1;
     }
 listen(sd, queueSize);
 return sd;
 }
 
 int netAcceptingSocket(int port, int queueSize)
 /* Create a socket that can accept connections from
  * anywhere. */
 {
 return netAcceptingSocketFrom(port, queueSize, NULL);
 }
 
 int netAccept(int sd)
 /* Accept incoming connection from socket descriptor. */
 {
 socklen_t fromLen;
 return accept(sd, NULL, &fromLen);
 }
 
 int netAcceptFrom(int acceptor, unsigned char subnet[4])
 /* Wait for incoming connection from socket descriptor
  * from IP address in subnet.  Subnet is something
  * returned from netParseSubnet or internetParseDottedQuad. 
  * Subnet may be NULL. */
 {
 struct sockaddr_in sai;		/* Some system socket info. */
 ZeroVar(&sai);
 sai.sin_family = AF_INET;
 for (;;)
     {
     socklen_t addrSize = sizeof(sai);
     int sd = accept(acceptor, (struct sockaddr *)&sai, &addrSize);
     if (sd >= 0)
 	{
 	if (subnet == NULL)
 	    return sd;
 	else
 	    {
 	    unsigned char unpacked[4]; 
 	    internetUnpackIp(ntohl(sai.sin_addr.s_addr), unpacked);
 	    if (internetIpInSubnet(unpacked, subnet))
 		{
 		return sd;
 		}
 	    else
 		{
 		close(sd);
 		}
 	    }
 	}
     }
 }
 
 FILE *netFileFromSocket(int socket)
 /* Wrap a FILE around socket.  This should be fclose'd
  * and separately the socket close'd. */
 {
 FILE *f;
 if ((socket = dup(socket)) < 0)
    errnoAbort("Couldn't dupe socket in netFileFromSocket");
 f = fdopen(socket, "r+");
 if (f == NULL)
    errnoAbort("Couldn't fdopen socket in netFileFromSocket");
 return f;
 }
 
 static boolean plumberInstalled = FALSE;
 
 void netBlockBrokenPipes()
 /* Make it so a broken pipe doesn't kill us. */
 {
 if (!plumberInstalled)
     {
     signal(SIGPIPE, SIG_IGN);       /* Block broken pipe signals. */
     plumberInstalled = TRUE;
     }
 }
 
 size_t netReadAll(int sd, void *vBuf, size_t size)
 /* Read given number of bytes into buffer.
  * Don't give up on first read! */
 {
 char *buf = vBuf;
 size_t totalRead = 0;
 int oneRead;
 
 if (!plumberInstalled)
     netBlockBrokenPipes();
 while (totalRead < size)
     {
     oneRead = read(sd, buf + totalRead, size - totalRead);
     if (oneRead < 0)
 	return oneRead;
     if (oneRead == 0)
         break;
     totalRead += oneRead;
     }
 return totalRead;
 }
 
 int netMustReadAll(int sd, void *vBuf, size_t size)
 /* Read given number of bytes into buffer or die.
  * Don't give up if first read is short! */
 {
 int ret = netReadAll(sd, vBuf, size);
 if (ret < 0)
     errnoAbort("Couldn't finish netReadAll");
 return ret;
 }
 
 static void notGoodSubnet(char *sns)
 /* Complain about subnet format. */
 {
 errAbort("'%s' is not a properly formatted subnet.  Subnets must consist of\n"
          "one to three dot-separated numbers between 0 and 255\n", sns);
 }
 
 void netParseSubnet(char *in, unsigned char out[4])
 /* Parse subnet, which is a prefix of a normal dotted quad form.
  * Out will contain 255's for the don't care bits. */
 {
 out[0] = out[1] = out[2] = out[3] = 255;
 if (in != NULL)
     {
     char *snsCopy = strdup(in);
     char *words[5];
     int wordCount, i;
     wordCount = chopString(snsCopy, ".", words, ArraySize(words));
     if (wordCount > 3 || wordCount < 1)
         notGoodSubnet(in);
     for (i=0; i<wordCount; ++i)
 	{
 	char *s = words[i];
 	int x;
 	if (!isdigit(s[0]))
 	    notGoodSubnet(in);
 	x = atoi(s);
 	if (x > 255)
 	    notGoodSubnet(in);
 	out[i] = x;
 	}
     freez(&snsCopy);
     }
 }
 
 void netParseUrl(char *url, struct netParsedUrl *parsed)
 /* Parse a URL into components.   A full URL is made up as so:
  *   http://user:password@hostName:port/file;byterange=0-499
  * User and password may be cgi-encoded.
  * This is set up so that the http:// and the port are optional. 
  */
 {
 char *s, *t, *u, *v, *w, *x;
 char buf[1024];
 
 /* Make local copy of URL. */
 if (strlen(url) >= sizeof(buf))
     errAbort("Url too long: '%s'", url);
 strcpy(buf, url);
 url = buf;
 
 /* Find out protocol - default to http. */
 s = trimSpaces(url);
 s = stringIn("://", url);
 if (s == NULL)
     {
     strcpy(parsed->protocol, "http");
     s = url;
     }
 else
     {
     *s = 0;
     tolowers(url);
     strncpy(parsed->protocol, url, sizeof(parsed->protocol));
     s += 3;
     }
 
 /* Split off file part. */
 parsed->byteRangeStart = -1;  /* default to no byte range specified */
 parsed->byteRangeEnd = -1;
 u = strchr(s, '/');
 if (u == NULL)
     strcpy(parsed->file, "/");
 else
     {
     x = strrchr(u, ';');
     if (x)
 	{
 	if (startsWith(";byterange=", x))
 	    {
 	    char *y=strchr(x, '=');
 	    ++y;
 	    char *z=strchr(y, '-');
 	    if (z)
 		{
     		++z;
 		*x = 0;
 		// TODO: use something better than atol() ?
 		parsed->byteRangeStart = atoll(y); 
 		parsed->byteRangeEnd = atoll(z);
 	    	}    
 	    }
 	}
 
     /* need to encode spaces, but not ! other characters */
     char *t=replaceChars(u," ","%20");
     strncpy(parsed->file, t, sizeof(parsed->file));
     freeMem(t);
     *u = 0;
     }
 
 
 /* Split off user part */
 v = strchr(s, '@');
 if (v == NULL)
     {
     if (sameWord(parsed->protocol,"http") ||
         sameWord(parsed->protocol,"https"))
 	{
 	strcpy(parsed->user, "");
 	strcpy(parsed->password, "");
 	}
     if (sameWord(parsed->protocol,"ftp"))
 	{
 	strcpy(parsed->user, "anonymous");
 	strcpy(parsed->password, "x@genome.ucsc.edu");
 	}
     }
 else
     {
     *v = 0;
     /* split off password part */
     w = strchr(s, ':');
     if (w == NULL)
 	{
 	strncpy(parsed->user, s, sizeof(parsed->user));
 	strcpy(parsed->password, "");
 	}
     else
 	{
 	*w = 0;
 	strncpy(parsed->user, s, sizeof(parsed->user));
 	strncpy(parsed->password, w+1, sizeof(parsed->password));
 	}
     
     cgiDecode(parsed->user,parsed->user,strlen(parsed->user));
     cgiDecode(parsed->password,parsed->password,strlen(parsed->password));
     s = v+1;
     }
 
 
 /* Save port if it's there.  If not default to 80. */
 t = strchr(s, ':');
 if (t == NULL)
     {
     if (sameWord(parsed->protocol,"http"))
 	strcpy(parsed->port, "80");
     if (sameWord(parsed->protocol,"https"))
 	strcpy(parsed->port, "443");
     if (sameWord(parsed->protocol,"ftp"))
 	strcpy(parsed->port, "21");
     }
 else
     {
     *t++ = 0;
     if (!isdigit(t[0]))
 	errAbort("Non-numeric port name %s", t);
     strncpy(parsed->port, t, sizeof(parsed->port));
     }
 
 /* What's left is the host. */
 strncpy(parsed->host, s, sizeof(parsed->host));
 }
 
 /* this was cloned from rudp.c - move it later for sharing */
 static boolean readReadyWait(int sd, int microseconds)
 /* Wait for descriptor to have some data to read, up to
  * given number of microseconds. */
 {
 struct timeval tv;
 fd_set set;
 int readyCount;
 
 for (;;)
     {
     if (microseconds > 1000000)
 	{
 	tv.tv_sec = microseconds/1000000;
 	tv.tv_usec = microseconds%1000000;
 	}
     else
 	{
 	tv.tv_sec = 0;
 	tv.tv_usec = microseconds;
 	}
     FD_ZERO(&set);
     FD_SET(sd, &set);
     readyCount = select(sd+1, &set, NULL, NULL, &tv);
     if (readyCount < 0) 
 	{
 	if (errno == EINTR)	/* Select interrupted, not timed out. */
 	    continue;
     	else 
     	    warn("select failure in rudp: %s", strerror(errno));
     	}
     else
 	{
     	return readyCount > 0;	/* Zero readyCount indicates time out */
 	}
     }
 }
 
 void sendFtpCommandOnly(int sd, char *cmd)
 /* send command to ftp server */
 {   
 write(sd, cmd, strlen(cmd));
 }
 
 
 struct dyString *receiveFtpReply(int sd, char *cmd, boolean seeResult)
 /* send command to ftp server and check resulting reply code, 
    give error if not desired reply */
 {
 struct dyString *rs = NULL;
 int reply = 0;
 char buf[4*1024];
 int readSize;
 char *startLastLine = NULL;
 long timeOut = 1000000; /* wait in microsec */
 
 rs = newDyString(4*1024);
 while (1)
     {
     while (1)
 	{
 	if (!readReadyWait(sd, timeOut))
 	    {
 	    errAbort("ftp server response timed out > %ld microsec",timeOut);
 	    }
 	if ((readSize = read(sd, buf, sizeof(buf))) == 0)
 	    break;
 
 	dyStringAppendN(rs, buf, readSize);
 	if (endsWith(rs->string,"\n"))
 	    break;
 	}
 	
     /* find the start of the last line in the buffer */
     startLastLine = rs->string+strlen(rs->string)-1;
     if (startLastLine >= rs->string)
 	if (*startLastLine == '\n') 
 	    --startLastLine;
     while ((startLastLine >= rs->string) && (*startLastLine != '\n'))
 	--startLastLine;
     ++startLastLine;
 	
     if (strlen(startLastLine)>4)
       if (
 	isdigit(startLastLine[0]) &&
 	isdigit(startLastLine[1]) &&
 	isdigit(startLastLine[2]) &&
 	startLastLine[3]==' ')
 	break;
 	
     /* must be some text info we can't use, ignore it till we get status code */
 
     }
 
 reply = atoi(startLastLine);
 
 if ((reply < 200) || (reply > 399))
     errAbort("ftp server error on cmd=[%s] response=[%s]\n",cmd,rs->string);
     
 if (!seeResult) dyStringFree(&rs);
 
 return rs;
 }
 
 struct dyString *sendFtpCommand(int sd, char *cmd, boolean seeResult)
 /* send command to ftp server and check resulting reply code, 
    give error if not desired reply */
 {   
 sendFtpCommandOnly(sd, cmd);
 return receiveFtpReply(sd, cmd, seeResult);
 }
 
 int parsePasvPort(char *rs)
 /* parse PASV reply to get the port and return it */
 {
 char *words[7];
 int wordCount;
 char *rsStart = strchr(rs,'(');
 char *rsEnd = strchr(rs,')');
 int result = 0;
 rsStart++;
 *rsEnd=0;
 wordCount = chopString(rsStart, ",", words, ArraySize(words));
 if (wordCount != 6)
     errAbort("PASV reply does not parse correctly");
 result = atoi(words[4])*256+atoi(words[5]);    
 return result;
 }    
 
 
 long long parseFtpSIZE(char *rs)
 /* parse reply to SIZE and return it */
 {
 char *words[3];
 int wordCount;
 char *rsStart = rs;
 long long result = 0;
 wordCount = chopString(rsStart, " ", words, ArraySize(words));
 if (wordCount != 2)
     errAbort("SIZE reply does not parse correctly");
 result = atoll(words[1]);    
 return result;
 }    
 
 
 time_t parseFtpMDTM(char *rs)
 /* parse reply to MDTM and return it
  * 200 YYYYMMDDhhmmss */
 {
 char spread[] = "YYYY MM DD hh mm ss";
 char *to = spread;
 char *from = NULL;
 char *words[3];
 int wordCount;
 char *rsStart = rs;
 int len = strlen(rs);
 if (len == 0) 
     return FALSE;
 char *rsLast = rs + len - 1;
 if (*rsLast == '\n')
     {
     *rsLast = 0;
     --rsLast;
     --len;
     if (len == 0) 
 	return FALSE;
     }
 if (*rsLast == '\r')
     {
     *rsLast = 0;
     --rsLast;
     --len;
     if (len == 0) 
 	return FALSE;
     }
 wordCount = chopString(rsStart, " ", words, ArraySize(words));
 if (wordCount != 2)
     errAbort("MDTM reply does not parse correctly");
 
 //printf("MDTM parse string [%s], length=%lld\n", words[1], (long long) strlen(words[1]));
 
 from = words[1];
 
 *to++ = *from++;
 *to++ = *from++;
 *to++ = *from++;
 *to++ = *from++;
 *to++ = '-';
 *to++ = *from++;
 *to++ = *from++;
 *to++ = '-';
 *to++ = *from++;
 *to++ = *from++;
 *to++ = ' ';
 *to++ = *from++;
 *to++ = *from++;
 *to++ = ':';
 *to++ = *from++;
 *to++ = *from++;
 *to++ = ':';
 *to++ = *from++;
 *to++ = *from++;
 *to++ = 0;
 
 // printf("MDTM to [%s], length=%lld\n", spread, (long long) strlen(spread));
 
 struct tm tm;
 time_t t;
 
 if (strptime(spread, "%Y-%m-%d %H:%M:%S", &tm) == NULL)
     { /* Handle error */;
     errAbort("unable to parse MDTM string [%s]", spread);
     }
 
 //printf("year: %d; month: %d; day: %d;\n",
 //        tm.tm_year, tm.tm_mon, tm.tm_mday);
 //printf("hour: %d; minute: %d; second: %d\n",
 //        tm.tm_hour, tm.tm_min, tm.tm_sec);
 //printf("week day: %d; year day: %d\n", tm.tm_wday, tm.tm_yday);
 
 
 tm.tm_isdst = -1;      /* Not set by strptime(); tells mktime()
                           to determine whether daylight saving time
                           is in effect */
 t = mktime(&tm);
 if (t == -1)
     { /* Handle error */;
     errAbort("mktime failed while parsing last-modified string [%s]", words[1]);
     }
 
 //printf("seconds since the Epoch: %lld\n", (long long) t);"
 
 return t;
 }    
 
 
 
 boolean netGetFtpInfo(char *url, long long *retSize, time_t *retTime)
 /* Return date and size of ftp url file */
 {
 struct netParsedUrl npu;
 struct dyString *rs = NULL;
 int sd;
 long timeOut = 1000000; /* wait in microsec */
 char cmd[256];
 
 // TODO maybe remove this workaround where udc cache wants info on URL "/" ?
 
 /* Parse the URL and connect. */
 netParseUrl(url, &npu);
 
 if (!sameString(npu.protocol, "ftp"))
     errAbort("Sorry, can only netOpen ftp's currently");
 
 if (sameString(npu.file,"/"))
     {
     *retSize = 0;
     *retTime = time(NULL);
     return TRUE;
     }
 
 sd = netMustConnect(npu.host, atoi(npu.port));
 
 /* Ask remote ftp server for file info. */
 
 /* don't send a command, just read the welcome msg */
 if (readReadyWait(sd, timeOut))
     sendFtpCommand(sd, "", FALSE);
 
 safef(cmd,sizeof(cmd),"USER %s\r\n", npu.user);
 sendFtpCommand(sd, cmd, FALSE);
 
 safef(cmd,sizeof(cmd),"PASS %s\r\n", npu.password);
 sendFtpCommand(sd, cmd, FALSE);
 
 sendFtpCommand(sd, "TYPE I\r\n", FALSE);  // Not sure this is required for just size/date
 /* 200 Type set to I */
 /* (send the data as binary, so can support compressed files) */
 
 safef(cmd,sizeof(cmd),"SIZE %s\r\n", npu.file);
 rs = sendFtpCommand(sd, cmd, TRUE);
 *retSize = parseFtpSIZE(rs->string);
 /* 200 12345 */
 
 /* Clean up and return handle. */
 dyStringFree(&rs);
 
 safef(cmd,sizeof(cmd),"MDTM %s\r\n", npu.file);
 rs = sendFtpCommand(sd, cmd, TRUE);
 *retTime = parseFtpMDTM(rs->string);
 /* 200 YYYYMMDDhhmmss */
 
 /* Clean up and return handle. */
 dyStringFree(&rs);
 
 close(sd);   
 
 return TRUE;
 }
 
 
 int netGetOpenFtp(char *url)
 /* Return a file handle that will read the url. */
 {
 struct netParsedUrl npu;
 struct dyString *rs = NULL;
 int sd, sdata;
 long timeOut = 1000000; /* wait in microsec */
 char cmd[256];
 
 /* Parse the URL and connect. */
 netParseUrl(url, &npu);
 if (!sameString(npu.protocol, "ftp"))
     errAbort("Sorry, can only netOpen ftp's currently");
 sd = netMustConnect(npu.host, atoi(npu.port));
 
 /* Ask remote ftp server for a file. */
 
 /* don't send a command, just read the welcome msg */
 if (readReadyWait(sd, timeOut))
     sendFtpCommand(sd, "", FALSE);
 
 safef(cmd,sizeof(cmd),"USER %s\r\n",npu.user);
 sendFtpCommand(sd, cmd, FALSE);
 
 safef(cmd,sizeof(cmd),"PASS %s\r\n",npu.password);
 sendFtpCommand(sd, cmd, FALSE);
 
 sendFtpCommand(sd, "TYPE I\r\n", FALSE);
 /* 200 Type set to I */
 /* (send the data as binary, so can support compressed files) */
 
 rs = sendFtpCommand(sd, "PASV\r\n", TRUE);
 /* 227 Entering Passive Mode (128,231,210,81,222,250) */
 
 if ((npu.byteRangeStart != -1) && (npu.byteRangeEnd != -1))
     {
     safef(cmd,sizeof(cmd),"REST %lld\r\n", (long long) npu.byteRangeStart);
     sendFtpCommand(sd, cmd, FALSE);
     }
 
 safef(cmd,sizeof(cmd),"RETR %s\r\n", npu.file);
 sendFtpCommandOnly(sd, cmd);  
 
 sdata = netMustConnect(npu.host, parsePasvPort(rs->string));
 
 /* Because some FTP servers will kill the data connection
  * as soon as the control connection closes,
  * we have to develop a workaround using a partner process. */
 
 /* see which comes first, an error message on the control conn
  * or data on the data conn */
 
 int secondsWaited = 0;
 while (TRUE)
     {
     if (secondsWaited >= 10)
 	{
 	errAbort("ftp server error on cmd=[%s] timed-out waiting for data or error\n",cmd);
 	}
     timeOut = 1000000; /* wait in microsec */
     if (readReadyWait(sdata, timeOut))
 	{
 	break;   // we have some data
 	}
     if (readReadyWait(sd, 0)) /* wait in microsec */
 	{
 	receiveFtpReply(sd, cmd, FALSE);  // this can see an error like bad filename
 	}
     ++secondsWaited;
     }
     
 
 /* Clean up and return handle. */
 dyStringFree(&rs);
 
 
 fflush(stdin);
 fflush(stdout);
 fflush(stderr);
 
 int pipefd[2];
 
 pipe(pipefd);  /* make a pipe (fds go in pipefd[0] and pipefd[1])  */
 
 int pid = fork();
 
 if (pid < 0)
     errnoAbort("can't fork in netGetOpenFtp");
 if (pid == 0)
     {
     /* child */
 
     fclose(stdin);
     fclose(stdout);
 
     close(pipefd[0]);  /* close unused half of pipe */
 
     char buf[32768];
     int rd = 0;
     long long dataPos = 0; 
     if ((npu.byteRangeStart != -1) && (npu.byteRangeEnd != -1))
 	dataPos = npu.byteRangeStart;
     while((rd = read(sdata, buf, 32768)) > 0) 
 	{
 	if ((npu.byteRangeStart != -1) && (npu.byteRangeEnd != -1))
 	    if ((dataPos + rd) > npu.byteRangeEnd)
 		rd = npu.byteRangeEnd - dataPos + 1;
 	int wt = write(pipefd[1], buf, rd);
 	if (wt == -1)
 	    errnoAbort("error writing ftp data to pipe");
 	dataPos += rd;
 	if ((npu.byteRangeStart != -1) && (npu.byteRangeEnd != -1))
 	    if (dataPos >= npu.byteRangeEnd)
 		break;	    
 	}
     if (rd == -1)
 	errnoAbort("error reading ftp socket");
     close(pipefd[1]);  /* being safe */
     close(sd);
     close(sdata);
 
     exit(0);
 
     /* child will never get to here */
     }
 
 /* parent */
 
 close(pipefd[1]);  /* close unused unput half of pipe */
 
 /* although the parent closes these, the child has them open still */
 close(sd);   
 close(sdata);
 
 return pipefd[0];
 }
 
 int netHttpConnect(char *url, char *method, char *protocol, char *agent)
 /* Parse URL, connect to associated server on port,
  * and send most of the request to the server.  If
  * specified in the url send user name and password
  * too.  This does not send the final \r\n to finish
  * off the request, so that you can send cookies. 
  * Typically the "method" will be "GET" or "POST"
  * and the agent will be the name of your program or
  * library. */
 {
 struct netParsedUrl npu;
 struct dyString *dy = newDyString(512);
 int sd;
 
 /* Parse the URL and connect. */
 netParseUrl(url, &npu);
 if (sameString(npu.protocol, "http"))
     sd = netMustConnect(npu.host, atoi(npu.port));
 else if (sameString(npu.protocol, "https"))
     {
     sd = netMustConnectHttps(npu.host, atoi(npu.port));
     }
 else
     {
     errAbort("Sorry, can only netOpen http's currently");
     return -1;  /* never gets here, fixes compiler complaint */
     }
 
 /* Ask remote server for a file. */
 dyStringPrintf(dy, "%s %s %s\r\n", method, npu.file, protocol);
 dyStringPrintf(dy, "User-Agent: %s\r\n", agent);
 /* do not need the 80 since it is the default */
 if (sameString("80",npu.port))
     dyStringPrintf(dy, "Host: %s\r\n", npu.host);
 else
     dyStringPrintf(dy, "Host: %s:%s\r\n", npu.host, npu.port);
 if (!sameString(npu.user,""))
     {
     char up[256];
     char *b64up = NULL;
     safef(up, sizeof(up), "%s:%s", npu.user, npu.password);
     b64up = base64Encode(up, strlen(up));
     dyStringPrintf(dy, "Authorization: Basic %s\r\n", b64up);
     freez(&b64up);
     }
 dyStringAppend(dy, "Accept: */*\r\n");
 if ((npu.byteRangeStart != -1) && (npu.byteRangeEnd != -1))
     {
     dyStringPrintf(dy, "Range: bytes=%lld-%lld\r\n"
 	, (long long) npu.byteRangeStart
 	, (long long) npu.byteRangeEnd);
     }
 write(sd, dy->string, dy->stringSize);
 
 /* Clean up and return handle. */
 dyStringFree(&dy);
 return sd;
 }
 
 
 
 int netOpenHttpExt(char *url, char *method, boolean end)
 /* Return a file handle that will read the url.  If end is not
  * set then can send cookies and other info to returned file 
  * handle before reading. */
 {
 int sd =  netHttpConnect(url, method, "HTTP/1.0", "genome.ucsc.edu/net.c");
 if (end)
     write(sd, "\r\n", 2);
 return sd;
 }
 
 static int netGetOpenHttp(char *url)
 /* Return a file handle that will read the url.  */
 {
 return netOpenHttpExt(url, "GET", TRUE);
 }
 
 int netUrlHead(char *url, struct hash *hash)
 /* Go get head and return status.  Return negative number if
  * can't get head. If hash is non-null, fill it with header
  * lines, including hopefully Content-Type: */
 {
 int sd = netOpenHttpExt(url, "HEAD", TRUE);
 int status = EIO;
 if (sd >= 0)
     {
     char *line, *word;
     struct lineFile *lf = lineFileAttach(url, TRUE, sd);
 
     if (lineFileNext(lf, &line, NULL))
 	{
 	if (startsWith("HTTP/", line))
 	    {
 	    word = nextWord(&line);
 	    word = nextWord(&line);
 	    if (word != NULL && isdigit(word[0]))
 	        {
 		status = atoi(word);
 		if (hash != NULL)
 		    {
 		    while (lineFileNext(lf, &line, NULL))
 		        {
 			word = nextWord(&line);
 			if (word == NULL)
 			    break;
 			hashAdd(hash, word, cloneString(skipLeadingSpaces(line)));
 			}
 		    }
 		}
 	    }
 	}
     lineFileClose(&lf);
     }
 else
     status = errno;
 return status;
 }
 
 int netUrlOpen(char *url)
 /* Return unix low-level file handle for url. 
  * Just close(result) when done. */
 {
 if (startsWith("http://",url) || startsWith("https://",url) || (stringIn("://", url) == NULL))
     return netGetOpenHttp(url);
 else if (startsWith("ftp://",url))
     return netGetOpenFtp(url);
 else    
     errAbort("Sorry, can only netOpen http and ftp currently");
 return -1;    
 }
 
 struct dyString *netSlurpFile(int sd)
 /* Slurp file into dynamic string and return. */
 {
 char buf[4*1024];
 int readSize;
 struct dyString *dy = newDyString(4*1024);
 
 /* Slurp file into dy and return. */
 while ((readSize = read(sd, buf, sizeof(buf))) > 0)
     dyStringAppendN(dy, buf, readSize);
 return dy;
 }
 
 struct dyString *netSlurpUrl(char *url)
 /* Go grab all of URL and return it as dynamic string. */
 {
 int sd = netUrlOpen(url);
 struct dyString *dy = netSlurpFile(sd);
 close(sd);
 return dy;
 }
 
 
 boolean netSkipHttpHeaderLinesWithRedirect(int sd, char *url, char **redirectedUrl)
 /* Skip http header lines. Return FALSE if there's a problem.
  * The input is a standard sd or fd descriptor.
  * This is meant to be able work even with a re-passable stream handle,
  * e.g. can pass it to the pipes routines, which means we can't
  * attach a linefile since filling its buffer reads in more than just the http header.
  * Handles 300, 301, 302, 303, 307 http redirects by setting *redirectedUrl to
  * the new location. */
 {
 char buf[2000];
 char *line = buf;
 int maxbuf = sizeof(buf);
 int i=0;
 char c = ' ';
 int nread = 0;
 char *sep = NULL;
 char *headerName = NULL;
 char *headerVal = NULL;
 boolean redirect = FALSE;
 while(TRUE)
     {
     i = 0;
     while (TRUE)
 	{
 	nread = read(sd, &c, 1);  /* one char at a time, but http headers are small */
 	if (nread < 0)
 	    return FALSE;  /* err reading descriptor */
 	if (c == 10)
 	    break;
 	if (c != 13)
     	    buf[i++] = c;
 	if (i >= maxbuf)
 	    {
 	    warn("http header line too long > %d chars.",maxbuf);
 	    return FALSE;
 	    }
 	}
     buf[i] = 0;  /* add string terminator */
 
     if (sameString(line,""))
 	{
 	break; /* End of Header found */
 	}
     if (startsWith("HTTP/", line))
         {
 	char *version, *code;
 	version = nextWord(&line);
 	code = nextWord(&line);
 	if (code == NULL)
 	    {
 	    warn("Strange http header on %s\n", url);
 	    return FALSE;
 	    }
 	if (startsWith("30", code) && isdigit(code[2])
 	    && ((code[2] >= '0' && code[2] <= '3') || code[2] == '7') && code[3] == 0)
 	    {
 	    redirect = TRUE;
 	    }
 	else if (!(sameString(code, "200") || sameString(code, "206")))
 	    {
 	    warn("%s: %s %s\n", url, code, line);
 	    return FALSE;
 	    }
 	line = buf;  /* restore it */
 	}
     headerName = line;
     sep = strchr(line,':');
     if (sep)
 	{
 	*sep = 0;
 	headerVal = skipLeadingSpaces(++sep);
 	}
     else
 	{
 	headerVal = NULL;
 	}
     if (sameWord(headerName,"Location"))
 	{
 	if (redirect)
 	    *redirectedUrl = cloneString(headerVal);
 	}
     }
 return TRUE;
 }
 
 
 boolean netSkipHttpHeaderLinesHandlingRedirect(int sd, char *url, int *redirectedSd, char **redirectedUrl)
 /* Skip http headers lines, returning FALSE if there is a problem.  Generally called as
  *    netSkipHttpHeaderLine(sd, url, &sd, &url);
  * where sd is a socket (file) opened with netUrlOpen(url), and url is in dynamic memory.
  * If the http header indicates that the file has moved, then it will update the *redirectedSd and
  * *redirectedUrl with the new socket and URL, first closing sd.
  * If for some reason you want to detect whether the forwarding has occurred you could
  * call this as:
  *    char *newUrl = NULL;
  *    int newSd = 0;
  *    netSkipHttpHeaderLine(sd, url, &newSd, &newUrl);
  *    if (newUrl != NULL)
  *          // Update sd with newSd, free url if appropriate and replace it with newUrl, etc.
  *          //  free newUrl when finished.
  * This routine handles up to 5 steps of redirection.
  * The logic to this routine is also complicated a little to make it work in a pipe, which means we
  * can't attach a lineFile since filling the lineFile buffer reads in more than just the http header. */
 {
 int redirectCount = 0;
 while (TRUE)
     {
     /* url needed for err msgs, and to return redirect location */
     char *newUrl = NULL;
     boolean success = netSkipHttpHeaderLinesWithRedirect(sd, url, &newUrl);
     if (success && !newUrl) /* success after 0 to 5 redirects */
         {
 	if (redirectCount > 0)
 	    {
 	    *redirectedSd = sd;
 	    *redirectedUrl = url;
 	    }
 	return TRUE;
 	}
     close(sd);
     if (redirectCount > 0)
 	freeMem(url);
     if (success)
 	{
 	/* we have a new url to try */
 	++redirectCount;
 	if (redirectCount > 5)
 	    {
 	    warn("code 30x redirects: exceeded limit of 5 redirects, %s", newUrl);
 	    success = FALSE;
 	    }
-	else if (!startsWith("http://",newUrl))
+	else if (!startsWith("http://",newUrl) 
+              && !startsWith("https://",newUrl))
 	    {
-	    warn("redirected to non-http: %s", newUrl);
+	    warn("redirected to non-http(s): %s", newUrl);
 	    success = FALSE;
 	    }
 	else 
 	    {
 	    sd = netUrlOpen(newUrl);
 	    if (sd < 0)
 		{
 		warn("Couldn't open %s", newUrl);
 		success = FALSE;
 		}
 	    }
 	}
     if (!success)
 	{  /* failure after 0 to 5 redirects */
 	if (redirectCount > 0)
 	    freeMem(newUrl);
 	return FALSE;
 	}
     url = newUrl;
     }
 return FALSE;
 }
 
 struct lineFile *netLineFileMayOpen(char *url)
 /* Return a lineFile attached to url. http skips header.
  * Supports some compression formats.
  * Return NULL if there's a problem. */
 {
 int sd = netUrlOpen(url);
 if (sd < 0)
     {
     warn("Couldn't open %s", url);
     return NULL;
     }
 else
     {
     struct lineFile *lf = NULL;
     char *newUrl = NULL;
     int newSd = 0;
     if (startsWith("http://",url))
 	{  
 	if (!netSkipHttpHeaderLinesHandlingRedirect(sd, url, &newSd, &newUrl))
 	    {
 	    return NULL;
 	    }
 	if (newUrl != NULL)
 	    {
     	    /*  Update sd with newSd, replace it with newUrl, etc. */
 	    sd = newSd;
 	    url = newUrl;
 	    }
 	}
     if (endsWith(url, ".gz") ||
 	endsWith(url, ".Z")  ||
     	endsWith(url, ".bz2"))
 	{
 	lf = lineFileDecompressFd(url, TRUE, sd);
            /* url needed only for compress type determination */
 	}
     else
 	{
 	lf = lineFileAttach(url, TRUE, sd);
 	}
     if (newUrl) 
 	freeMem(newUrl); 
     return lf;
     }
 }
 
 
 struct lineFile *netLineFileOpen(char *url)
 /* Return a lineFile attached to url.  This one
  * will skip any headers.   Free this with
  * lineFileClose(). */
 {
 struct lineFile *lf = netLineFileMayOpen(url);
 if (lf == NULL)
     noWarnAbort();
 return lf;
 }
 
 boolean netSendString(int sd, char *s)
 /* Send a string down a socket - length byte first. */
 {
 int length = strlen(s);
 UBYTE len;
 
 if (length > 255)
     errAbort("Trying to send a string longer than 255 bytes (%d bytes)", length);
 len = length;
 if (write(sd, &len, 1)<0)
     {
     warn("Couldn't send string to socket");
     return FALSE;
     }
 if (write(sd, s, length)<0)
     {
     warn("Couldn't send string to socket");
     return FALSE;
     }
 return TRUE;
 }
 
 boolean netSendLongString(int sd, char *s)
 /* Send a long string down socket: two bytes for length. */
 {
 unsigned length = strlen(s);
 UBYTE b[2];
 
 if (length >= 64*1024)
     {
     warn("Trying to send a string longer than 64k bytes (%d bytes)", length);
     return FALSE;
     }
 b[0] = (length>>8);
 b[1] = (length&0xff);
 if (write(sd, b, 2) < 0)
     {
     warn("Couldn't send long string to socket");
     return FALSE;
     }
 if (write(sd, s, length)<0)
     {
     warn("Couldn't send long string to socket");
     return FALSE;
     }
 return TRUE;
 }
 
 boolean netSendHugeString(int sd, char *s)
 /* Send a long string down socket: four bytes for length. */
 {
 unsigned long length = strlen(s);
 unsigned long l = length;
 UBYTE b[4];
 int i;
 for (i=3; i>=0; --i)
     {
     b[i] = l & 0xff;
     l >>= 8;
     }
 if (write(sd, b, 4) < 0)
     {
     warn("Couldn't send huge string to socket");
     return FALSE;
     }
 if (write(sd, s, length) < 0)
     {
     warn("Couldn't send huge string to socket");
     return FALSE;
     }
 return TRUE;
 }
 
 
 char *netGetString(int sd, char buf[256])
 /* Read string into buf and return it.  If buf is NULL
  * an internal buffer will be used. Print warning message
  * and return NULL if any problem. */
 {
 static char sbuf[256];
 UBYTE len = 0;
 int length;
 int sz;
 if (buf == NULL) buf = sbuf;
 sz = netReadAll(sd, &len, 1);
 if (sz == 0)
     return NULL;
 if (sz < 0)
     {
     warn("Couldn't read string length");
     return NULL;
     }
 length = len;
 if (length > 0)
     if (netReadAll(sd, buf, length) < 0)
 	{
 	warn("Couldn't read string body");
 	return NULL;
 	}
 buf[length] = 0;
 return buf;
 }
 
 char *netGetLongString(int sd)
 /* Read string and return it.  freeMem
  * the result when done. */
 {
 UBYTE b[2];
 char *s = NULL;
 int length = 0;
 int sz;
 b[0] = b[1] = 0;
 sz = netReadAll(sd, b, 2);
 if (sz == 0)
     return NULL;
 if (sz < 0)
     {
     warn("Couldn't read long string length");
     return NULL;
     }
 length = (b[0]<<8) + b[1];
 s = needMem(length+1);
 if (length > 0)
     if (netReadAll(sd, s, length) < 0)
 	{
 	warn("Couldn't read long string body");
 	return NULL;
 	}
 s[length] = 0;
 return s;
 }
 
 char *netGetHugeString(int sd)
 /* Read string and return it.  freeMem
  * the result when done. */
 {
 UBYTE b[4];
 char *s = NULL;
 unsigned long length = 0;
 int sz, i;
 sz = netReadAll(sd, b, 4);
 if (sz == 0)
     return NULL;
 if (sz < 4)
     {
     warn("Couldn't read huge string length");
     return NULL;
     }
 for (i=0; i<4; ++i)
     {
     length <<= 8;
     length += b[i];
     }
 s = needMem(length+1);
 if (length > 0)
     {
     if (netReadAll(sd, s, length) < 0)
 	{
 	warn("Couldn't read huge string body");
 	return NULL;
 	}
     }
 s[length] = 0;
 return s;
 }
 
 
 char *netRecieveString(int sd, char buf[256])
 /* Read string into buf and return it.  If buf is NULL
  * an internal buffer will be used. Abort if any problem. */
 {
 char *s = netGetString(sd, buf);
 if (s == NULL)
      noWarnAbort();   
 return s;
 }
 
 char *netRecieveLongString(int sd)
 /* Read string and return it.  freeMem
  * the result when done. Abort if any problem*/
 {
 char *s = netGetLongString(sd);
 if (s == NULL)
      noWarnAbort();   
 return s;
 }
 
 char *netRecieveHugeString(int sd)
 /* Read string and return it.  freeMem
  * the result when done. Abort if any problem*/
 {
 char *s = netGetHugeString(sd);
 if (s == NULL)
      noWarnAbort();   
 return s;
 }
 
 
 struct lineFile *netHttpLineFileMayOpen(char *url, struct netParsedUrl **npu)
 /* Parse URL and open an HTTP socket for it but don't send a request yet. */
 {
 int sd;
 struct lineFile *lf;
 
 /* Parse the URL and try to connect. */
 AllocVar(*npu);
 netParseUrl(url, *npu);
 if (!sameString((*npu)->protocol, "http"))
     errAbort("Sorry, can only netOpen http's currently");
 sd = netConnect((*npu)->host, atoi((*npu)->port));
 if (sd < 0)
     return NULL;
 
 /* Return handle. */
 lf = lineFileAttach(url, TRUE, sd);
 return lf;
 } /* netHttpLineFileMayOpen */
 
 
 void netHttpGet(struct lineFile *lf, struct netParsedUrl *npu,
 		boolean keepAlive)
 /* Send a GET request, possibly with Keep-Alive. */
 {
 struct dyString *dy = newDyString(512);
 
 /* Ask remote server for the file/query. */
 dyStringPrintf(dy, "GET %s HTTP/1.1\r\n", npu->file);
 dyStringPrintf(dy, "User-Agent: genome.ucsc.edu/net.c\r\n");
 dyStringPrintf(dy, "Host: %s:%s\r\n", npu->host, npu->port);
 if (!sameString(npu->user,""))
     {
     char up[256];
     char *b64up = NULL;
     safef(up,sizeof(up), "%s:%s", npu->user, npu->password);
     b64up = base64Encode(up, strlen(up));
     dyStringPrintf(dy, "Authorization: Basic %s\r\n", b64up);
     freez(&b64up);
     }
 dyStringAppend(dy, "Accept: */*\r\n");
 if (keepAlive)
   {
     dyStringAppend(dy, "Connection: Keep-Alive\r\n");
     dyStringAppend(dy, "Connection: Persist\r\n");
   }
 else
     dyStringAppend(dy, "Connection: close\r\n");
 dyStringAppend(dy, "\r\n");
 write(lf->fd, dy->string, dy->stringSize);
 /* Clean up. */
 dyStringFree(&dy);
 } /* netHttpGet */
 
 int netHttpGetMultiple(char *url, struct slName *queries, void *userData,
 		       void (*responseCB)(void *userData, char *req,
 					  char *hdr, struct dyString *body))
 /* Given an URL which is the base of all requests to be made, and a 
  * linked list of queries to be appended to that base and sent in as 
  * requests, send the requests as a batch and read the HTTP response 
  * headers and bodies.  If not all the requests get responses (i.e. if 
  * the server is ignoring Keep-Alive or is imposing a limit), try again 
  * until we can't connect or until all requests have been served. 
  * For each HTTP response, do a callback. */
 {
   struct slName *qStart;
   struct slName *qPtr;
   struct lineFile *lf;
   struct netParsedUrl *npu;
   struct dyString *dyQ    = newDyString(512);
   struct dyString *body;
   char *base;
   char *hdr;
   int qCount;
   int qTotal;
   int numParseFailures;
   int contentLength;
   boolean chunked;
   boolean done;
   boolean keepAlive;
 
   /* Find out how many queries we'll need to do so we know how many times 
    * it's OK to run into end of file in case server ignores Keep-Alive. */
   qTotal = 0;
   for (qPtr = queries;  qPtr != NULL;  qPtr = qPtr->next)
     {
       qTotal++;
     }
 
   done = FALSE;
   qCount = 0;
   numParseFailures = 0;
   qStart = queries;
   while ((! done) && (qStart != NULL))
     {
       lf = netHttpLineFileMayOpen(url, &npu);
       if (lf == NULL)
 	{
 	  done = TRUE;
 	  break;
 	}
       base = cloneString(npu->file);
       /* Send all remaining requests with keep-alive. */
       for (qPtr = qStart;  qPtr != NULL;  qPtr = qPtr->next)
 	{
 	  dyStringClear(dyQ);
 	  dyStringAppend(dyQ, base);
 	  dyStringAppend(dyQ, qPtr->name);
 	  strcpy(npu->file, dyQ->string);
 	  keepAlive = (qPtr->next == NULL) ? FALSE : TRUE;
 	  netHttpGet(lf, npu, keepAlive);
 	}
       /* Get as many responses as we can; call responseCB() and 
        * advance qStart for each. */
       for (qPtr = qStart;  qPtr != NULL;  qPtr = qPtr->next)
         {
 	  if (lineFileParseHttpHeader(lf, &hdr, &chunked, &contentLength))
 	    {
 	      body = lineFileSlurpHttpBody(lf, chunked, contentLength);
 	      dyStringClear(dyQ);
 	      dyStringAppend(dyQ, base);
 	      dyStringAppend(dyQ, qPtr->name);
 	      responseCB(userData, dyQ->string, hdr, body);
 	      qStart = qStart->next;
 	      qCount++;
 	    }
 	  else
 	    {
 	      if (numParseFailures++ > qTotal) {
 		done = TRUE;
 	      }
 	      break;
 	    }
 	}
     }
 
   return qCount;
 } /* netHttpMultipleQueries */