589685da775f5a122bb23d2b8eca372f53e4b5bc
angie
Wed Sep 10 16:34:34 2014 -0700
Changes to snpNcbiToUcsc to handle b141 on hg19 and hg38:- When checking range{I,D,S}* locTypes, use strlen(refNCBI) as length only if
refNCBI contains only nucleotides.
- Checking for overlapping variants at the same location (checkCluster ->
DuplicateObserved, MixedObserved) used to be limited to insertions because
those are easier to check. Now we check all variants that have
all-nucleotide observed. Libified some code from hgTracks that can
reverse-complement a slash-separated string of alleles, e.g. G/T -> A/C,
-/AG -> -/CT.
- SNP IDs are getting too huge (and sparse) to use as indexes into a
statically allocated array; use a hash.
- Erroneous NULL frequencies in dbSNP's SNPAlleleFreq need to be detected
and ignored.
refs #13309
diff --git src/lib/dnautil.c src/lib/dnautil.c
index d02806d..7d8a665 100644
--- src/lib/dnautil.c
+++ src/lib/dnautil.c
@@ -462,30 +462,65 @@
temp = *pStart;
*pStart = size - *pEnd;
*pEnd = size - temp;
}
/* Switch start/end (zero based half open) coordinates
* to opposite strand. */
void reverseUnsignedRange(unsigned *pStart, unsigned *pEnd, int size)
{
unsigned temp;
temp = *pStart;
*pStart = size - *pEnd;
*pEnd = size - temp;
}
+char *reverseComplementSlashSeparated(char *alleleStr)
+/* Given a slash-separated series of sequences (a common representation of variant alleles),
+ * returns a slash-sep series with the reverse complement of each sequence (if it is a
+ * nucleotide sequence).
+ * Special behavior to support dbSNP's variant allele conventions:
+ * 1. Reverse the order of sequences (to maintain alphabetical ordering).
+ * 2. If alleleStr begins with "-/", then after reversing, move "-/" back to the beginning. */
+{
+int len = strlen(alleleStr);
+char choppyCopy[len+1];
+safecpy(choppyCopy, sizeof(choppyCopy), alleleStr);
+char *alleles[len];
+int alCount = chopByChar(choppyCopy, '/', alleles, ArraySize(alleles));
+char *outStr = needMem(len+1);
+int i;
+for (i = alCount-1; i >= 0; i--)
+ {
+ char *allele = alleles[i];
+ int alLen = strlen(allele);
+ if (isAllNt(allele, alLen))
+ reverseComplement(allele, alLen);
+ if (i != alCount-1)
+ safecat(outStr, len+1, "/");
+ safecat(outStr, len+1, allele);
+ }
+if (startsWith("-/", alleleStr))
+ {
+ // Keep "-/" at the beginning:
+ memmove(outStr+2, outStr, len-2);
+ outStr[0] = '-';
+ outStr[1] = '/';
+ }
+return outStr;
+}
+
int cmpDnaStrings(DNA *a, DNA *b)
/* Compare using screwy non-alphabetical DNA order TCGA */
{
for (;;)
{
DNA aa = *a++;
DNA bb = *b++;
if (aa != bb)
return ntVal[(int)aa] - ntVal[(int)bb];
if (aa == 0)
break;
}
return 0;
}