src/hg/cgilib/cutter.c 533112afe2a2005e80cdb1f82904ea65032d4302

533112afe2a2005e80cdb1f82904ea65032d4302
braney
  Sat Oct 2 11:37:34 2021 -0700
split hg/lib into two separate libaries, one only used by the cgis

diff --git src/hg/cgilib/cutter.c src/hg/cgilib/cutter.c
new file mode 100644
index 0000000..8c5425e
--- /dev/null
+++ src/hg/cgilib/cutter.c
@@ -0,0 +1,791 @@
+/* cutter.c was originally generated by the autoSql program, which also 
+ * generated cutter.h and cutter.sql.  This module links the database and
+ * the RAM representation of objects. */
+
+/* Copyright (C) 2014 The Regents of the University of California 
+ * See README in this or parent directory for licensing information. */
+
+#include "common.h"
+#include "linefile.h"
+#include "dystring.h"
+#include "jksql.h"
+#include "dnaseq.h"
+#include "dnautil.h"
+#include "bed.h"
+#include "cutter.h"
+
+
+struct cutter *cutterLoad(char **row)
+/* Load a cutter from row fetched with select * from cutter
+ * from database.  Dispose of this with cutterFree(). */
+{
+struct cutter *ret;
+
+AllocVar(ret);
+ret->numSciz = sqlUnsigned(row[8]);
+ret->numCompanies = sqlUnsigned(row[10]);
+ret->numRefs = sqlUnsigned(row[12]);
+ret->name = cloneString(row[0]);
+ret->size = sqlUnsigned(row[1]);
+ret->matchSize = sqlUnsigned(row[2]);
+ret->seq = cloneString(row[3]);
+ret->cut = sqlUnsigned(row[4]);
+ret->overhang = sqlSigned(row[5]);
+ret->palindromic = sqlUnsigned(row[6]);
+ret->semicolon = sqlUnsigned(row[7]);
+{
+int sizeOne;
+sqlStringDynamicArray(row[9], &ret->scizs, &sizeOne);
+assert(sizeOne == ret->numSciz);
+}
+{
+int sizeOne;
+sqlCharDynamicArray(row[11], &ret->companies, &sizeOne);
+assert(sizeOne == ret->numCompanies);
+}
+{
+int sizeOne;
+sqlUnsignedDynamicArray(row[13], &ret->refs, &sizeOne);
+assert(sizeOne == ret->numRefs);
+}
+return ret;
+}
+
+struct cutter *cutterLoadAll(char *fileName) 
+/* Load all cutter from a whitespace-separated file.
+ * Dispose of this with cutterFreeList(). */
+{
+struct cutter *list = NULL, *el;
+struct lineFile *lf = lineFileOpen(fileName, TRUE);
+char *row[14];
+
+while (lineFileRow(lf, row))
+    {
+    el = cutterLoad(row);
+    slAddHead(&list, el);
+    }
+lineFileClose(&lf);
+slReverse(&list);
+return list;
+}
+
+struct cutter *cutterLoadAllByChar(char *fileName, char chopper) 
+/* Load all cutter from a chopper separated file.
+ * Dispose of this with cutterFreeList(). */
+{
+struct cutter *list = NULL, *el;
+struct lineFile *lf = lineFileOpen(fileName, TRUE);
+char *row[14];
+
+while (lineFileNextCharRow(lf, chopper, row, ArraySize(row)))
+    {
+    el = cutterLoad(row);
+    slAddHead(&list, el);
+    }
+lineFileClose(&lf);
+slReverse(&list);
+return list;
+}
+
+struct cutter *cutterLoadByQuery(struct sqlConnection *conn, char *query)
+/* Load all cutter from table that satisfy the query given.  
+ * Where query is of the form 'select * from example where something=something'
+ * or 'select example.* from example, anotherTable where example.something = 
+ * anotherTable.something'.
+ * Dispose of this with cutterFreeList(). */
+{
+struct cutter *list = NULL, *el;
+struct sqlResult *sr;
+char **row;
+
+sr = sqlGetResult(conn, query);
+while ((row = sqlNextRow(sr)) != NULL)
+    {
+    el = cutterLoad(row);
+    slAddHead(&list, el);
+    }
+slReverse(&list);
+sqlFreeResult(&sr);
+return list;
+}
+
+void cutterSaveToDb(struct sqlConnection *conn, struct cutter *el, char *tableName, int updateSize)
+/* Save cutter as a row to the table specified by tableName. 
+ * As blob fields may be arbitrary size updateSize specifies the approx size
+ * of a string that would contain the entire query. Arrays of native types are
+ * converted to comma separated strings and loaded as such, User defined types are
+ * inserted as NULL. Strings are automatically escaped to allow insertion into the database. */
+{
+struct dyString *update = newDyString(updateSize);
+char  *scizsArray, *companiesArray, *refsArray;
+scizsArray = sqlStringArrayToString(el->scizs, el->numSciz);
+companiesArray = sqlCharArrayToString(el->companies, el->numCompanies);
+refsArray = sqlUnsignedArrayToString(el->refs, el->numRefs);
+sqlDyStringPrintf(update, "insert into %s values ( '%s',%u,%u,'%s',%u,%d,%u,%u,%u,'%s',%u,'%s',%u,'%s')", 
+	tableName,  el->name,  el->size,  el->matchSize,  el->seq,  el->cut,  el->overhang,  el->palindromic,  el->semicolon,  el->numSciz,  scizsArray ,  el->numCompanies,  companiesArray ,  el->numRefs,  refsArray );
+sqlUpdate(conn, update->string);
+freeDyString(&update);
+freez(&scizsArray);
+freez(&companiesArray);
+freez(&refsArray);
+}
+
+
+struct cutter *cutterCommaIn(char **pS, struct cutter *ret)
+/* Create a cutter out of a comma separated string. 
+ * This will fill in ret if non-null, otherwise will
+ * return a new cutter */
+{
+char *s = *pS;
+
+if (ret == NULL)
+    AllocVar(ret);
+ret->name = sqlStringComma(&s);
+ret->size = sqlUnsignedComma(&s);
+ret->matchSize = sqlUnsignedComma(&s);
+ret->seq = sqlStringComma(&s);
+ret->cut = sqlUnsignedComma(&s);
+ret->overhang = sqlSignedComma(&s);
+ret->palindromic = sqlUnsignedComma(&s);
+ret->semicolon = sqlUnsignedComma(&s);
+ret->numSciz = sqlUnsignedComma(&s);
+{
+int i;
+s = sqlEatChar(s, '{');
+AllocArray(ret->scizs, ret->numSciz);
+for (i=0; i<ret->numSciz; ++i)
+    {
+    ret->scizs[i] = sqlStringComma(&s);
+    }
+s = sqlEatChar(s, '}');
+s = sqlEatChar(s, ',');
+}
+ret->numCompanies = sqlUnsignedComma(&s);
+{
+int i;
+s = sqlEatChar(s, '{');
+AllocArray(ret->companies, ret->numCompanies);
+for (i=0; i<ret->numCompanies; ++i)
+    {
+    ret->companies[i] = sqlCharComma(&s);
+    }
+s = sqlEatChar(s, '}');
+s = sqlEatChar(s, ',');
+}
+ret->numRefs = sqlUnsignedComma(&s);
+{
+int i;
+s = sqlEatChar(s, '{');
+AllocArray(ret->refs, ret->numRefs);
+for (i=0; i<ret->numRefs; ++i)
+    {
+    ret->refs[i] = sqlUnsignedComma(&s);
+    }
+s = sqlEatChar(s, '}');
+s = sqlEatChar(s, ',');
+}
+*pS = s;
+return ret;
+}
+
+void cutterFree(struct cutter **pEl)
+/* Free a single dynamically allocated cutter such as created
+ * with cutterLoad(). */
+{
+struct cutter *el;
+
+if ((el = *pEl) == NULL) return;
+freeMem(el->name);
+freeMem(el->seq);
+/* All strings in scizs are allocated at once, so only need to free first. */
+if (el->scizs != NULL)
+    freeMem(el->scizs[0]);
+freeMem(el->scizs);
+freeMem(el->companies);
+freeMem(el->refs);
+freez(pEl);
+}
+
+void cutterFreeList(struct cutter **pList)
+/* Free a list of dynamically allocated cutter's */
+{
+struct cutter *el, *next;
+
+for (el = *pList; el != NULL; el = next)
+    {
+    next = el->next;
+    cutterFree(&el);
+    }
+*pList = NULL;
+}
+
+void cutterOutput(struct cutter *el, FILE *f, char sep, char lastSep) 
+/* Print out cutter.  Separate fields with sep. Follow last field with lastSep. */
+{
+if (sep == ',') fputc('"',f);
+fprintf(f, "%s", el->name);
+if (sep == ',') fputc('"',f);
+fputc(sep,f);
+fprintf(f, "%u", el->size);
+fputc(sep,f);
+fprintf(f, "%u", el->matchSize);
+fputc(sep,f);
+if (sep == ',') fputc('"',f);
+fprintf(f, "%s", el->seq);
+if (sep == ',') fputc('"',f);
+fputc(sep,f);
+fprintf(f, "%u", el->cut);
+fputc(sep,f);
+fprintf(f, "%d", el->overhang);
+fputc(sep,f);
+fprintf(f, "%u", el->palindromic);
+fputc(sep,f);
+fprintf(f, "%u", el->semicolon);
+fputc(sep,f);
+fprintf(f, "%u", el->numSciz);
+fputc(sep,f);
+{
+int i;
+if (sep == ',') fputc('{',f);
+for (i=0; i<el->numSciz; ++i)
+    {
+    if (sep == ',') fputc('"',f);
+    fprintf(f, "%s", el->scizs[i]);
+    if (sep == ',') fputc('"',f);
+    fputc(',', f);
+    }
+if (sep == ',') fputc('}',f);
+}
+fputc(sep,f);
+fprintf(f, "%u", el->numCompanies);
+fputc(sep,f);
+{
+int i;
+if (sep == ',') fputc('{',f);
+for (i=0; i<el->numCompanies; ++i)
+    {
+    if (sep == ',') fputc('"',f);
+    fprintf(f, "%c", el->companies[i]);
+    if (sep == ',') fputc('"',f);
+    fputc(',', f);
+    }
+if (sep == ',') fputc('}',f);
+}
+fputc(sep,f);
+fprintf(f, "%u", el->numRefs);
+fputc(sep,f);
+{
+int i;
+if (sep == ',') fputc('{',f);
+for (i=0; i<el->numRefs; ++i)
+    {
+    fprintf(f, "%u", el->refs[i]);
+    fputc(',', f);
+    }
+if (sep == ',') fputc('}',f);
+}
+fputc(lastSep,f);
+}
+
+/* -------------------------------- End autoSql Generated Code -------------------------------- */
+
+boolean isIsosciz(struct cutter *mainEnz, struct cutter *possibleScizEnz)
+/* Is possibleScizEnz an isoscizomer of mainEnz? */
+{
+int i;
+for (i = 0; i < mainEnz->numSciz; i++)
+    if (sameString(possibleScizEnz->name, mainEnz->scizs[i]))
+	return TRUE;
+return FALSE;
+}
+
+struct dnaSeq *stickyEnd(struct cutter *enz)
+/* Return the sticky end sequence of an enzyme. Strand is unspecified.  Free this. */
+{
+struct dnaSeq *ret = NULL;
+if (!enz)
+    return NULL;
+if (enz->overhang > 0)
+    {
+    char *seq = cloneStringZ(enz->seq + enz->cut, enz->overhang);
+    ret = newDnaSeq(seq, enz->overhang, "sticky");
+    }
+else if (enz->overhang < 0)
+    {
+    int size = intAbs(enz->overhang);
+    char *seq = cloneStringZ(enz->seq + enz->cut - size, size);
+    complement(seq, strlen(seq));
+    ret = newDnaSeq(seq, strlen(seq), "sticky");
+    }
+return ret;
+}
+
+int acgtCount(char *seq)
+/* Return the count of canonical bases in a sequence. */
+{
+return countChars(seq,'A') + countChars(seq,'C') +
+    countChars(seq,'G') + countChars(seq,'T');
+}
+
+boolean sameStickyEnd(struct cutter *enz1, struct cutter *enz2)
+/* Check to see if two enzymes make the same sticky ends.  If either of the
+   enzymes have sticky ends that isn't all ACGT, then this returns false. */
+{
+boolean ret = FALSE;
+struct dnaSeq *sticky1 = stickyEnd(enz1);
+struct dnaSeq *sticky2 = stickyEnd(enz2);
+
+if (sticky1 && sticky2)
+if (sticky1 && sticky2 && (sticky1->size == sticky2->size) &&
+    (acgtCount(sticky1->dna) == sticky1->size) && (acgtCount(sticky2->dna) == sticky2->size))
+    {
+    if (sameString(sticky1->dna, sticky2->dna))
+	ret = TRUE;
+    else
+	{
+	reverseComplement(sticky2->dna, sticky2->size);
+	if (sameString(sticky1->dna, sticky2->dna))
+	    ret = TRUE;
+	}
+    }
+freeDnaSeq(&sticky1);
+freeDnaSeq(&sticky2);
+return ret;
+}
+
+struct slName *findIsoligamers(struct cutter *enz, struct cutter *enzList)
+/* Find isoligamers to an enzyme in a list of enzymes.  */
+{
+struct slName *list = NULL;
+struct cutter *cur;
+if (!enz || !enzList)
+    return NULL;
+for (cur = enzList; cur != NULL; cur = cur->next)
+    {
+    if (!sameString(cur->name, enz->name) && (sameStickyEnd(enz, cur)))
+	{
+	struct slName *newname = newSlName(cur->name);
+	slAddHead(&list, newname);
+	}
+    }
+slReverse(&list);
+return list;
+}
+
+boolean isPalindrome(char *seq)
+/* Determine if a sequence is palindromic. */
+{
+int i, j, size = strlen(seq);
+for (i = 0; i < size; i++)
+    switch (seq[i])
+	{
+	case 'H':
+	case 'B':
+	case 'V':
+	case 'D': return FALSE;
+	}
+/* Loop from both ends of the sequence: i from the left, j from the right. */
+for (i = 0, j = size-1; (i < size/2) && (j >= ((size%2==1) ? size/2+1 : size/2)); i++, j--)
+    {
+    switch (seq[i])
+	{
+	case 'A':
+	    {
+	    if (seq[j] != 'T')
+		return FALSE;
+	    break;
+	    }
+	case 'C':
+	    {
+	    if (seq[j] != 'G')
+		return FALSE;
+	    break;
+	    }
+	case 'G':
+	    {
+	    if (seq[j] != 'C')
+		return FALSE;
+	    break;
+	    }
+	case 'T':
+	    {
+	    if (seq[j] != 'A')
+		return FALSE;
+	    break;
+	    }
+	case 'W':
+	    {
+	    if (seq[j] != 'S')
+		return FALSE;
+	    break;
+	    }
+	case 'S':
+	    {
+	    if (seq[j] != 'S')
+		return FALSE;
+	    break;
+	    }
+	case 'Y':
+	    {
+	    if (seq[j] != 'R')
+		return FALSE;
+	    break;
+	    }
+	case 'R':
+	    {
+	    if (seq[j] != 'Y')
+		return FALSE;
+	    break;
+	    }
+	case 'M':
+	    {
+	    if (seq[j] != 'K')
+		return FALSE;
+	    break;
+	    }
+	case 'K':
+	    {
+	    if (seq[j] != 'M')
+		return FALSE;
+	    break;
+	    }
+	case 'N':
+	    {
+	    if (seq[j] != 'N')
+		return FALSE;
+	    break;
+	    }
+	}
+    }
+return TRUE;
+}
+
+struct cutter *readGcg(char *gcgFile)
+/* Parse a GCG file and load it into cutter format. */
+{
+struct lineFile *lf = lineFileOpen(gcgFile,TRUE);
+struct cutter *enzList = NULL;
+char *line = "whatever", *words[10], numWords;
+
+/* Skip to the right line. */
+while (lineFileNext(lf,&line,NULL) && !startsWith("..",line));
+/* */
+while ((numWords=lineFileChop(lf,words)))
+    {
+    struct cutter *newone = NULL;
+    int comIx = (numWords==7) ? 5 : 6;
+    int refIx = (numWords==7) ? 6 : 7;
+    int i;
+    char *items[100];
+
+    /* Skip ones */
+    if (words[4][0] == '?')
+	continue;
+    AllocVar(newone);
+    newone->semicolon = (words[0][0] == ';') ? TRUE : FALSE;
+    /* Deal with the first few columns */
+    if (!isdigit(words[1][0]))
+	errAbort("Error: expecting a number in cut site column on line %d\n", lf->lineIx+1);
+    if (!isdigit(words[3][0]) && words[3][0]!='-')
+	errAbort("Error: expecting a number in the overhang column on line %d\n", lf->lineIx+1);
+    if (words[comIx][0] != '>')
+	errAbort("Error: expecting a \'>\' in the commercial sources column of line %d\n", lf->lineIx+1);
+    newone->name = (words[0][0] == ';') ? cloneString(words[0]+1) : cloneString(words[0]);
+    newone->cut = atoi(words[1]);
+    newone->seq = cloneString(words[2]);
+    touppers(newone->seq);
+    stripChar(newone->seq,'\'');
+    stripChar(newone->seq,'_');
+    newone->size = strlen(newone->seq);
+    newone->matchSize = newone->size - countChars(newone->seq, 'N');
+    newone->palindromic = isPalindrome(newone->seq);
+    newone->overhang = atoi(words[3]);
+    newone->numCompanies = strlen(words[comIx]+1);
+    if (newone->numCompanies > 0)
+	newone->companies = cloneMem(words[comIx]+1, newone->numCompanies*sizeof(char));
+    newone->numRefs = chopString(words[refIx], ",", items, ArraySize(items));
+    AllocArray(newone->refs, newone->numRefs);
+    for (i = 0; i < newone->numRefs; i++) 
+	{
+	if (i == 100)
+	    errAbort("Error: Andy didn't make the array for holding references big enough\n");
+	if (!isdigit(items[i][0]))
+	    errAbort("Error: expecting number in references column in line %d\n", lf->lineIx+1);
+	newone->refs[i] = atoi(items[i]);
+	}
+    /* Deal with isoscizomers. */
+    if (numWords == 8)
+	{
+	newone->numSciz = chopString(words[5], ",", items, ArraySize(items));
+	AllocArray(newone->scizs, newone->numSciz*sizeof(int));
+	for (i = 0; i < newone->numSciz; i++)
+	    {
+	    if (i == 100)
+		errAbort("Error: Andy didn't make the array for having isoscizomers big enough\n");
+	    newone->scizs[i] = cloneString(items[i]);
+	    }
+	}
+    else 
+	newone->numSciz = 0;
+    slAddHead(&enzList, newone);
+    }
+slReverse(&enzList);
+lineFileClose(&lf);
+return enzList;
+}
+
+boolean matchingBase(char enzBase, char seqBase)
+/* An enzyme matching */
+{
+switch (seqBase)
+    {
+    /* Ignore hard-masked and gaps for now. */
+    case 'A': 
+	{
+	switch (enzBase)
+	    {
+	    case 'A': 
+	    case 'R':
+	    case 'M':
+	    case 'W':
+	    case 'H':
+	    case 'V':
+	    case 'D': 
+	    case 'N': return TRUE;
+	    default: return FALSE;
+	    }
+	}
+    case 'C': 
+	{
+	switch (enzBase)
+	    {
+	    case 'C': 
+	    case 'Y':
+	    case 'M':
+	    case 'S':
+	    case 'H':
+	    case 'B':
+	    case 'V': 
+	    case 'N': return TRUE;
+	    default: return FALSE;
+	    }
+	}
+    case 'G':
+	{
+	switch (enzBase)
+	    {
+	    case 'G': 
+	    case 'R':
+	    case 'K':
+	    case 'S':
+	    case 'B':
+	    case 'V':
+	    case 'D': 
+	    case 'N': return TRUE;
+	    default: return FALSE;
+	    }
+	}
+    case 'T':
+	{
+	switch (enzBase)
+	    {
+	    case 'T': 
+	    case 'Y':
+	    case 'K':
+	    case 'W':
+	    case 'H':
+	    case 'B':
+	    case 'D': 
+	    case 'N': return TRUE;
+	    default: return FALSE;
+	    }
+	}
+    }
+return FALSE;
+}
+
+struct bed *allocBedEnz(struct cutter *enz, char *seqName, int seqPos, char strand)
+{
+struct bed *newbed = NULL;
+AllocVar(newbed);
+newbed->chrom = cloneString(seqName);
+newbed->chromStart = (strand == '-') ? seqPos - enz->size : seqPos;
+newbed->chromEnd = (strand == '-') ? seqPos : seqPos + enz->size;
+newbed->name = cloneString(enz->name);
+newbed->score = 1000;
+newbed->strand[0] = strand;
+return newbed;
+}
+
+struct bed *searchStrand(struct hash *sixers, struct cutter *ACGTo[], struct dnaSeq *seq, int startOffset, char strand)
+/* Cheesey function that checks a strand for the enzymes after they're put in the hash/array structures.  
+   This used to be a part of the matchEnzymes function but I do it twice now. */
+{
+struct cutter *enz;
+struct bed *bedList = NULL;
+int seqPos;
+
+if (ACGTo[0] || ACGTo[1] || ACGTo[2] || ACGTo[3] || (sixers->elCount > 0)) 
+    {
+    for (seqPos = 0; seqPos < seq->size; seqPos++)
+	{
+	struct cutter *enzList = NULL;
+	char sixer[7];
+	int bedPos = (strand == '-') ? (seq->size - seqPos) : seqPos;
+	if (seq->size - seqPos >= 6)
+	    {
+	    struct hashEl *el = NULL;
+	    sixer[6] = '\0';
+	    memcpy(sixer, seq->dna+seqPos, 6);
+	    el = hashLookup(sixers, sixer);
+	    if (el)
+		{	    
+		struct bed *add;
+		enz = el->val;
+		add = allocBedEnz(enz, seq->name, bedPos + startOffset, strand);
+		slAddHead(&bedList, add);
+		/* Just in case there's another one with the same sequence. */
+		while ((el = hashLookupNext(el)))
+		    {
+		    enz = el->val;
+		    add = allocBedEnz(enz, seq->name, bedPos + startOffset, strand);
+		    slAddHead(&bedList, add);
+		    }
+		}
+	    }
+	/* Use a certain list depending on which letter we're on in the sequence. */
+	if (seq->dna[seqPos] == 'A')
+	    enzList = ACGTo[0];
+	else if (seq->dna[seqPos] == 'C')
+	    enzList = ACGTo[1];
+	else if (seq->dna[seqPos] == 'G')
+	    enzList = ACGTo[2];
+	else if (seq->dna[seqPos] == 'T')
+	    enzList = ACGTo[3];
+	for (enz = enzList; enz != NULL; enz = enz->next)
+	    {
+	    int enzPos = 0;
+	    int seqCurPos = seqPos;	
+	    while (enzPos < enz->size && seqCurPos < seq->size && matchingBase(enz->seq[enzPos],seq->dna[seqCurPos]))
+		{
+		enzPos++; 
+		seqCurPos++;
+		}
+	    if (enzPos == enz->size)
+		{
+		struct bed *add = allocBedEnz(enz, seq->name, bedPos + startOffset, strand);
+		slAddHead(&bedList, add);
+		}
+	    }
+	}
+    }
+return bedList;
+}
+
+struct bed *matchEnzymes(struct cutter *cutters, struct dnaSeq *seq, int startOffset)
+/* Match the enzymes to sequence and return a bed list in all cases. */
+{
+struct hash *sixers = newHash(8), *palinSixers = newHash(8);
+struct cutter *enz;
+struct cutter *ACGTo[5], *palinACGTo[5];
+struct bed *bedList = NULL, *tmp;
+int i;
+if (!cutters)
+    return NULL;
+for (i = 0; i < 5; i++)
+    ACGTo[i] = palinACGTo[i] = NULL;
+/* Put each of the enzymes in either a hash table of six-cutters or */
+enz = cutters;
+while (enz != NULL)
+    {
+    int acgtCount = 0;
+    struct cutter *next = enz->next;
+    acgtCount = countChars(enz->seq,'A') + countChars(enz->seq,'C') + 
+                countChars(enz->seq,'G') + countChars(enz->seq,'T');
+    /* Super dumb coding here but it's quick. */
+     if (enz->palindromic)
+	{
+	if (enz->size==6 && acgtCount==6)
+	    hashAdd(palinSixers, enz->seq, enz);
+	else
+	    {
+	    if (enz->seq[0] == 'A')
+		slAddHead(&palinACGTo[0], enz);
+	    else if (enz->seq[0] == 'C')
+		slAddHead(&palinACGTo[1], enz);
+	    else if (enz->seq[0] == 'G')
+		slAddHead(&palinACGTo[2], enz);
+	    else if (enz->seq[0] == 'T')
+		slAddHead(&palinACGTo[3], enz);
+	    else 
+		{
+		slAddHead(&palinACGTo[4], enz);
+		}
+	    }
+	}
+    else
+	{
+	if (enz->size==6 && acgtCount==6)
+	    hashAdd(sixers, enz->seq, enz);
+	else
+	    {
+	    if (enz->seq[0] == 'A')
+		slAddHead(&ACGTo[0], enz);
+	    else if (enz->seq[0] == 'C')
+		slAddHead(&ACGTo[1], enz);
+	    else if (enz->seq[0] == 'G')
+		slAddHead(&ACGTo[2], enz);
+	    else if (enz->seq[0] == 'T')
+		slAddHead(&ACGTo[3], enz);
+	    else 
+		slAddHead(&ACGTo[4], enz);
+	    }	
+	}
+    enz = next;
+    }
+/* At this point we got a hash for the palindromes and non-palindromic six-cutters, 
+   plus an array for each too.  The array is set up so the enzymes starting with 'A' go into [0], 'C'
+   into [1], 'G' into [2], 'T' into [3], and other bases into [4].  */
+if (ACGTo[4])
+    {
+    ACGTo[0] = slCat(ACGTo[0], ACGTo[4]);
+    ACGTo[1] = slCat(ACGTo[1], ACGTo[4]);
+    ACGTo[2] = slCat(ACGTo[2], ACGTo[4]);
+    ACGTo[3] = slCat(ACGTo[3], ACGTo[4]);
+    }
+if (palinACGTo[4])
+    {
+    palinACGTo[0] = slCat(palinACGTo[0], palinACGTo[4]);
+    palinACGTo[1] = slCat(palinACGTo[1], palinACGTo[4]);
+    palinACGTo[2] = slCat(palinACGTo[2], palinACGTo[4]);
+    palinACGTo[3] = slCat(palinACGTo[3], palinACGTo[4]);
+    }
+/* Search the DNA in three ways: on the plus strand for both palindromes and nonpalindromes, and then
+   just nonpalindromes on the minus strand. */
+bedList = searchStrand(palinSixers, palinACGTo, seq, startOffset, '+');
+tmp = searchStrand(sixers, ACGTo, seq, startOffset, '+');
+bedList = slCat(bedList, tmp);
+reverseComplement(seq->dna, seq->size);
+tmp = searchStrand(sixers, ACGTo, seq, startOffset, '-');
+bedList = slCat(bedList, tmp);
+if (bedList)
+    slReverse(&bedList);
+return bedList;
+}
+
+void cullCutters(struct cutter **enzList, boolean semicolon, struct slName *justThese, int matchSize)
+/* Reduce the list of enzymes based on different options. */
+{
+struct cutter *enz = *enzList, *next;
+while (enz != NULL)
+    {
+    next = enz->next;
+    if ((justThese && !slNameInList(justThese, enz->name)) ||  
+	(enz->matchSize < matchSize) || (!semicolon && enz->semicolon))
+	{	
+	slRemoveEl(enzList, enz);
+	cutterFree(&enz);
+	}
+    enz = next;
+    }
+}