589685da775f5a122bb23d2b8eca372f53e4b5bc angie Wed Sep 10 16:34:34 2014 -0700 Changes to snpNcbiToUcsc to handle b141 on hg19 and hg38:- When checking range{I,D,S}* locTypes, use strlen(refNCBI) as length only if refNCBI contains only nucleotides. - Checking for overlapping variants at the same location (checkCluster -> DuplicateObserved, MixedObserved) used to be limited to insertions because those are easier to check. Now we check all variants that have all-nucleotide observed. Libified some code from hgTracks that can reverse-complement a slash-separated string of alleles, e.g. G/T -> A/C, -/AG -> -/CT. - SNP IDs are getting too huge (and sparse) to use as indexes into a statically allocated array; use a hash. - Erroneous NULL frequencies in dbSNP's SNPAlleleFreq need to be detected and ignored. refs #13309 diff --git src/inc/dnautil.h src/inc/dnautil.h index 94b25af..7282d43 100644 --- src/inc/dnautil.h +++ src/inc/dnautil.h @@ -77,30 +77,35 @@ void reverseComplement(DNA *dna, long length); /* Reverse offset - return what will be offset (0 based) to * same member of array after array is reversed. */ long reverseOffset(long offset, long arraySize); /* Switch start/end (zero based half open) coordinates * to opposite strand. */ void reverseIntRange(int *pStart, int *pEnd, int size); /* Switch start/end (zero based half open) coordinates * to opposite strand. */ void reverseUnsignedRange(unsigned *pStart, unsigned *pEnd, int size); +char *reverseComplementSlashSeparated(char *alleleStr); +/* Given a slash-separated series of sequences (a common representation of variant alleles), + * returns a slash-sep series with the reverse complement of each sequence (if it is a + * nucleotide sequence), also reversing the order of sequences. */ + enum dnaCase {dnaUpper,dnaLower,dnaMixed,}; /* DNA upper, lower, or mixed case? */ /* Convert T's to U's */ void toRna(DNA *dna); int cmpDnaStrings(DNA *a, DNA *b); /* Compare using screwy non-alphabetical DNA order TCGA */ typedef char Codon; /* Our codon type. */ /* Return single letter code (upper case) for protein. * Returns X for bad input, 0 for stop codon. * The "Standard" Code */ AA lookupCodon(DNA *dna);