79124adf60b5a8e99e4025e8f982389e13db955e
braney
  Wed Sep 4 15:09:02 2019 -0700
starting V32 of hg38 knownGene

diff --git src/hg/makeDb/kgAllocId/kgAllocId.c src/hg/makeDb/kgAllocId/kgAllocId.c
new file mode 100644
index 0000000..9b1ea5f
--- /dev/null
+++ src/hg/makeDb/kgAllocId/kgAllocId.c
@@ -0,0 +1,154 @@
+/* kgAllocId - Assign new knownGene ids to Gencode IDs. */
+#include "common.h"
+#include "linefile.h"
+#include "hash.h"
+#include "options.h"
+#include "txCommon.h"
+
+void usage()
+/* Explain usage and exit. */
+{
+errAbort(
+  "kgAllocId - Assign new knownGene ids to Gencode IDs\n"
+  "usage:\n"
+  "   kgAllocId oldMap newIds startIdx newMap\n"
+  "options:\n"
+  "   -xxx=XXX\n"
+  );
+}
+
+/* Command line validation table. */
+static struct optionSpec options[] = {
+   {NULL, 0},
+};
+
+struct version
+{
+unsigned number;
+char *id;
+};
+
+struct hash *readMapNoVersion(char *name)
+{
+struct hash *hash = newHash(10);
+struct lineFile *lf = lineFileOpen(name, TRUE);
+
+char *row[2];
+while (lineFileRow(lf, row))
+    {
+    char *ptr = strrchr(row[0], '.');
+    *ptr++ = 0;
+    struct version *version;
+    AllocVar(version);
+    version->number = atoi(ptr);
+    version->id = cloneString(row[1]);
+
+    hashAdd(hash, row[0], version);
+    }
+
+lineFileClose(&lf);
+
+return hash;
+}
+
+struct hash *readMap(char *name)
+{
+struct hash *hash = newHash(10);
+struct lineFile *lf = lineFileOpen(name, TRUE);
+
+char *row[2];
+while (lineFileRow(lf, row))
+    hashAdd(hash, row[0], cloneString(row[1]));
+
+lineFileClose(&lf);
+
+return hash;
+}
+
+unsigned txId;
+char *newId()
+{
+char *newAcc = needMem(100);
+txGeneAccFromId(++txId, newAcc);
+strcat(newAcc, ".1");
+return newAcc;
+}
+
+char *addOne(char *id)
+{
+if (startsWith("uc064bas", id))
+    printf("big\n");
+char *copyId = cloneString(id);
+char *ptr = strrchr(copyId, '.');
+*ptr++ = 0;
+unsigned number = atoi(ptr) + 1;
+
+char buffer[4096];
+safef(buffer, sizeof buffer, "%s.%d", copyId, number);
+
+return cloneString(buffer);
+}
+
+void kgAllocId(char *oldMap, char *newIds, char * startIdStr, char *newMap)
+/* kgAllocId - Assign new knownGene ids to Gencode IDs. */
+{
+txId = atoi(startIdStr);
+struct hash *oldMapHash = readMap(oldMap);
+struct hash *oldMapHashNoVer = readMapNoVersion(oldMap);
+struct lineFile *lf = lineFileOpen(newIds, TRUE);
+FILE *out = mustOpen(newMap, "w");
+
+char *row[1];
+while (lineFileRow(lf, row))
+    {
+    char *thisId = cloneString(row[0]);
+    // first look to see if this id already in map
+    char *val = hashFindVal(oldMapHash, row[0]);
+    if (val)
+        {
+        fprintf(out, "%s\t%s\n", row[0], val);
+        continue;
+        }
+
+    // check to see if we have the id with a different version
+    char *ptr = strrchr(row[0], '.');
+    *ptr++ = 0;
+    struct hashEl *hel = hashLookup(oldMapHashNoVer, row[0]);
+
+    if (hel)
+        {
+        struct hashEl *iter = hel;
+        char *id = NULL;
+        unsigned max = 0;
+        for(; iter; iter = iter->next)
+            {
+            if (differentString(iter->name, row[0]))
+                continue;
+            struct version *version =((struct version *)iter->val);
+            unsigned value = version->number;
+            if (value > max)
+                {
+                max = value;
+                id = version->id;
+                }
+            }
+
+        fprintf(out, "%s\t%s\n", thisId, addOne(id));
+        continue;
+        }
+
+    fprintf(out, "%s\t%s\n", thisId, newId());
+
+    }
+fprintf(stderr,"lastId %d\n", txId);
+}
+
+int main(int argc, char *argv[])
+/* Process command line. */
+{
+optionInit(&argc, argv, options);
+if (argc != 5)
+    usage();
+kgAllocId(argv[1], argv[2], argv[3], argv[4]);
+return 0;
+}