ad3a176457ad1d21a0fedc47f349ec3484e751f1 angie Fri Feb 9 16:27:51 2018 -0800 Cleaning up some old ugliness about the size parameter to isAllDna and isAllNt. hgc's printSnpAlignment code that parsed snpNNN.fa was using lineSize as length but lineSize is length+1. Then isAllDna was written with "i<size-1" as the loop test instead of "i < size". I didn't fix that properly when I separated out isAllNt from isAllDna. Later, I (re?)discovered that isAllNt needed length+1 as its size and just added some FIXME comments. Thanks Brian R for prodding me to actually fix it. refs #20895 diff --git src/hg/hgc/hgc.c src/hg/hgc/hgc.c index 835a66a..e7c8d5d 100644 --- src/hg/hgc/hgc.c +++ src/hg/hgc/hgc.c @@ -245,30 +245,31 @@ #include "mdb.h" #include "yaleGencodeAssoc.h" #include "itemDetailsHtml.h" #include "trackVersion.h" #include "numtsClick.h" #include "geneReviewsClick.h" #include "bigBed.h" #include "bigPsl.h" #include "bedTabix.h" #include "longRange.h" #include "hmmstats.h" #include "aveStats.h" #include "trix.h" #include "bPlusTree.h" #include "customFactory.h" +#include "iupac.h" static char *rootDir = "hgcData"; #define LINESIZE 70 /* size of lines in comp seq feature */ struct cart *cart; /* User's settings. */ char *seqName; /* Name of sequence we're working on. */ int winStart, winEnd; /* Bounds of sequence. */ char *database; /* Name of mySQL database. */ char *organism; /* Colloquial name of organism. */ char *genome; /* common name, e.g. Mouse, Human */ char *scientificName; /* Scientific name of organism. */ struct hash *trackHash; /* A hash of all tracks - trackDb valued */ @@ -16216,97 +16217,89 @@ boldStart, boldEnd, tIsRc, tSeq->size, qSeq->size); } axtFree(&axt); hPrintf("</PRE>"); } void printSnpAlignment(struct trackDb *tdb, struct snp *snp, int version) /* Get flanking sequences from table; align and print */ { char *fileName = NULL; char *variation = NULL; char *line; struct lineFile *lf = NULL; -int lineSize; static int maxFlank = 1000; static int lineWidth = 100; boolean gotVar = FALSE; -boolean isNucleotide = TRUE; boolean leftFlankTrimmed = FALSE; boolean rightFlankTrimmed = FALSE; struct dyString *seqDbSnp5 = newDyString(512); struct dyString *seqDbSnp3 = newDyString(512); struct dyString *seqDbSnpTemp = newDyString(512); char *leftFlank = NULL; char *rightFlank = NULL; struct dnaSeq *dnaSeqDbSnp5 = NULL; struct dnaSeq *dnaSeqDbSnpO = NULL; struct dnaSeq *dnaSeqDbSnp3 = NULL; struct dnaSeq *seqDbSnp = NULL; struct dnaSeq *seqNib = NULL; -int spaces = 0; int len5 = 0; int len3 = 0; int start = 0; int end = 0; int skipCount = 0; off_t offset = 0; fileName = getSnpSeqFile(tdb, version); if (!fileName) return; offset = getSnpSeqFileOffset(tdb, snp); if (offset == -1) return; lf = lineFileOpen(fileName, TRUE); lineFileSeek(lf, offset, SEEK_SET); /* skip the header line */ -lineFileNext(lf, &line, &lineSize); +lineFileNext(lf, &line, NULL); if (!startsWith(">rs", line)) errAbort("Expected FASTA header, got this line:\n%s\nat offset %lld " "in file %s", line, (long long)offset, fileName); -while (lineFileNext(lf, &line, &lineSize)) +while (lineFileNext(lf, &line, NULL)) { - spaces = countChars(line, ' '); stripString(line, " "); - lineSize = lineSize - spaces; - if (sameString(line, "N")) - isNucleotide = FALSE; - else - isNucleotide = isAllDna(line, lineSize); - if (lineSize > 2 && gotVar) - dyStringAppend(seqDbSnp3,line); - else if (lineSize > 2 && !gotVar) - dyStringAppend(seqDbSnp5,line); - else if (lineSize == 2 && !isNucleotide) + int len = strlen(line); + if (len == 0) + break; + else if (len == 1 && isIupacAmbiguous(line[0])) { gotVar = TRUE; variation = cloneString(line); } - else if (lineSize == 1) - break; + else if (gotVar) + dyStringAppend(seqDbSnp3, line); + else + dyStringAppend(seqDbSnp5, line); } lineFileClose(&lf); if (variation == NULL) { printf("<P>Could not parse ambiguous SNP base out of dbSNP " "sequence, so can't display re-alignment of flanking sequences.\n"); return; } /* trim */ /* axtAffine has a limit of 100,000,000 bases for query x target */ leftFlank = dyStringCannibalize(&seqDbSnp5); rightFlank = dyStringCannibalize(&seqDbSnp3); len5 = strlen(leftFlank);