589685da775f5a122bb23d2b8eca372f53e4b5bc
angie
  Wed Sep 10 16:34:34 2014 -0700
Changes to snpNcbiToUcsc to handle b141 on hg19 and hg38:- When checking range{I,D,S}* locTypes, use strlen(refNCBI) as length only if
refNCBI contains only nucleotides.
- Checking for overlapping variants at the same location (checkCluster ->
DuplicateObserved, MixedObserved) used to be limited to insertions because
those are easier to check.  Now we check all variants that have
all-nucleotide observed.  Libified some code from hgTracks that can
reverse-complement a slash-separated string of alleles, e.g. G/T -> A/C,
-/AG -> -/CT.
- SNP IDs are getting too huge (and sparse) to use as indexes into a
statically allocated array; use a hash.
- Erroneous NULL frequencies in dbSNP's SNPAlleleFreq need to be detected
and ignored.
refs #13309

diff --git src/lib/dnautil.c src/lib/dnautil.c
index d02806d..7d8a665 100644
--- src/lib/dnautil.c
+++ src/lib/dnautil.c
@@ -462,30 +462,65 @@
 temp = *pStart;
 *pStart = size - *pEnd;
 *pEnd = size - temp;
 }
 
 /* Switch start/end (zero based half open) coordinates
  * to opposite strand. */
 void reverseUnsignedRange(unsigned *pStart, unsigned *pEnd, int size)
 {
 unsigned temp;
 temp = *pStart;
 *pStart = size - *pEnd;
 *pEnd = size - temp;
 }
 
+char *reverseComplementSlashSeparated(char *alleleStr)
+/* Given a slash-separated series of sequences (a common representation of variant alleles),
+ * returns a slash-sep series with the reverse complement of each sequence (if it is a
+ * nucleotide sequence).
+ * Special behavior to support dbSNP's variant allele conventions:
+ * 1. Reverse the order of sequences (to maintain alphabetical ordering).
+ * 2. If alleleStr begins with "-/", then after reversing, move "-/" back to the beginning. */
+{
+int len = strlen(alleleStr);
+char choppyCopy[len+1];
+safecpy(choppyCopy, sizeof(choppyCopy), alleleStr);
+char *alleles[len];
+int alCount = chopByChar(choppyCopy, '/', alleles, ArraySize(alleles));
+char *outStr = needMem(len+1);
+int i;
+for (i = alCount-1;  i >= 0;  i--)
+    {
+    char *allele = alleles[i];
+    int alLen = strlen(allele);
+    if (isAllNt(allele, alLen))
+        reverseComplement(allele, alLen);
+    if (i != alCount-1)
+        safecat(outStr, len+1, "/");
+    safecat(outStr, len+1, allele);
+    }
+if (startsWith("-/", alleleStr))
+    {
+    // Keep "-/" at the beginning:
+    memmove(outStr+2, outStr, len-2);
+    outStr[0] = '-';
+    outStr[1] = '/';
+    }
+return outStr;
+}
+
 int cmpDnaStrings(DNA *a, DNA *b)
 /* Compare using screwy non-alphabetical DNA order TCGA */
 {
 for (;;)
     {
     DNA aa = *a++;
     DNA bb = *b++;
     if (aa != bb)
         return ntVal[(int)aa] - ntVal[(int)bb];
     if (aa == 0)
 	break;
     }
 return 0;
 }