d43ec6011a8adc1ba82e3b2c2473e2043d300c03
hiram
  Thu Apr 19 14:10:07 2012 -0700
UCSC source from CVS source tree 2005 version
diff --git src/utils/cpgIslandExt/readseq.c src/utils/cpgIslandExt/readseq.c
new file mode 100644
index 0000000..67eb85a
--- /dev/null
+++ src/utils/cpgIslandExt/readseq.c
@@ -0,0 +1,191 @@
+/*  File: readseq.c
+ *  Author: Richard Durbin (rd@sanger.ac.uk)
+ *  Copyright (C) R Durbin, 1994
+ *-------------------------------------------------------------------
+ * Description: generic code to read Pearson format files (fasta)
+ 		>header line
+		conv[x] is the internal code for char 'x'
+		conv[x] == -1 means ignore. conv[x] < -1 means error.
+		will work on fil == stdin
+ * Exported functions: readSequence
+ * HISTORY:
+ * Last edited: Apr 26 14:44 1994 (rd)
+ * * Dec 29 23:35 1993 (rd): now works off FILE*, returns id and desc
+ * Created: Tue Jan 19 21:14:35 1993 (rd)
+ *-------------------------------------------------------------------
+ */
+
+#include "stdio.h"
+#include "stdlib.h"
+#include "ctype.h"
+
+static char *messalloc (int n)
+{
+  char *result ;
+
+  if (!(result = (char*) malloc (n)))
+    { fprintf (stderr, "MALLOC failure reqesting %d bytes - aborting\n", n) ;
+      exit (-1) ;
+    }
+  return result ;
+}
+
+#define messfree(x) free(x)
+
+static void add (char c, char* *buf, int *buflen, int n)
+{
+  if (!buf)
+    return ;
+  if (n >= *buflen)
+    { if (*buflen < 0)
+	{ *buflen = -*buflen ;
+	  *buf = (char*) messalloc (*buflen) ;
+	}
+      else
+	{ char *newbuf ;
+	  *buflen *= 2 ;
+	  newbuf = (char*) messalloc (*buflen) ;
+	  memcpy (newbuf, *buf, n) ;
+	  messfree (*buf) ;
+	  *buf = newbuf ;
+	}
+    }
+  (*buf)[n] = c ;	  
+}
+
+int readSequence (FILE *fil, int *conv,
+		  char **seq, char **id, char **desc, int *length)
+{
+  int c ;
+  int n ;
+  static FILE *oldFil = 0 ;
+  static int line ;
+  int buflen ;
+
+  if (fil != oldFil)
+    { line = 1 ;
+      oldFil = fil ;
+    }
+  
+/* get id, descriptor */
+  c = fgetc (fil) ;
+  if (c == '>')			/* header line */
+    { c = fgetc(fil) ;
+
+      n = 0 ;			/* id */
+      buflen = -16 ;
+      while (!feof (fil) && c != ' ' && c != '\n' && c != '\t')
+	{ add (c, id, &buflen, n++) ;
+	  c = fgetc (fil) ;
+	}
+      add (0, id, &buflen, n++) ;
+
+				/* white space */
+      while (!feof (fil) && (c == ' ' || c == '\t'))
+	c = fgetc (fil) ;
+
+      n = 0 ;			/* desc */
+      buflen = -32 ;
+      while (!feof (fil) && c != '\n')
+	{ add (c, desc, &buflen, n++) ;
+	  c = fgetc (fil) ;
+	}
+      add (0, desc, &buflen, n++) ;
+
+      ++line ;
+    }
+  else
+    { ungetc (c, fil) ;		/* no header line */
+      if (id) 
+	*id = "" ;
+      if (desc)
+	*desc = "" ;
+    }
+
+  /* ensure whitespace ignored */
+
+  conv[' '] = conv['\t'] = conv['\n'] = -1 ;
+
+  n = 0 ;			/* sequence */
+  buflen = -1024 ;
+  while (!feof (fil))
+    { c = fgetc (fil) ;
+      if (c == '>')
+	{ ungetc (c, fil) ;
+	  break ;
+	}
+      if (c == EOF)
+	break ;
+      if (c == '\n')
+	++line ;
+      if (conv[c] < -1)
+	{ if (id) 
+	    fprintf (stderr, "Bad char 0x%x = '%c' at line %d, base %d, sequence %s\n",
+		     c, c, line, n, *id) ;
+	  else
+	    fprintf (stderr, "Bad char 0x%x = '%c' at line %d, base %d\n",
+		     c, c, line, n) ;
+	/*	  return 0 ;*/
+	}
+      if (conv[c] >= 0)
+	add (conv[c], seq, &buflen, n++) ;
+    }
+  add (0, seq, &buflen, n) ;
+  
+  if (length)
+    *length = n ;
+
+  return n ;
+}
+
+/*****************************************************/
+
+int seqConvert (char *seq, int *length, int *conv)
+{
+  int i, n = 0 ;
+  int c ;
+
+  for (i = 0 ; seq[i] ; ++i)
+    { c = seq[i] ;
+      if (length && i >= *length)
+	break ;
+      if (conv[c] < -1)
+	{ fprintf (stderr, "Bad char 0x%x = '%c' at base %d in seqConvert\n", c, c, n) ;
+	/*	  return 0 ;*/
+	}
+      if (conv[c] >= 0)
+	seq[n++] = conv[c] ;
+    }
+  if (n < i)
+    seq[n] = 0 ;
+
+  if (length)
+    *length = n ;
+  return n ;
+}
+
+/*********** standard conversion tables **************/
+
+int dna2textConv[] = {
+  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2, 
+  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2, 
+  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2, 
+  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -2,  -2,  -2,  -2,  -2,  -2,  /* ignore digits */
+  -2, 'A',  -2, 'C',  -2,  -2,  -2, 'G',  -2,  -2,  -2,  -2,  -2,  -2, 'N',  -2,
+  -2,  -2,  -2,  -2, 'T',  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,
+  -2, 'A',  -2, 'C',  -2,  -2,  -2, 'G',  -2,  -2,  -2,  -2,  -2,  -2, 'N',  -2,
+  -2,  -2,  -2,  -2, 'T',  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,
+} ;
+
+int dna2indexConv[] = {
+  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2, 
+  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2, 
+  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2, 
+  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -2,  -2,  -2,  -2,  -2,  -2,  /* ignore digits */
+  -2,   0,  -2,   1,  -2,  -2,  -2,   2,  -2,  -2,  -2,  -2,  -2,  -2,   4,  -2,
+  -2,  -2,  -2,  -2,   3,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,
+  -2,   0,  -2,   1,  -2,  -2,  -2,   2,  -2,  -2,  -2,  -2,  -2,  -2,   4,  -2,
+  -2,  -2,  -2,  -2,   3,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,  -2,
+} ;
+  
+/**************** end of file ***************/