bc21bd3d27fe3d29971231955b3fc544fa1c3d1e
angie
  Wed Oct 16 11:51:39 2013 -0700
Two new tracks for Locus Reference Genomic (LRG) (#11863) with customhandlers: LRG Regions and LRG Transcripts.
LRGs are frozen reference sequences for a particular gene plus some
upstream and downstream sequence.  They are intended to provide a
stable coordinate system for gene annotations that won't change
with every new genome assembly, but can be mapped to each genome
assembly.  Since there is a lot of metadata associated with each
region, I made LRG Regions a bigBed 12 + with fields describing
mismatches and indels, so that PSL can be derived from the bigBed
and the original LRG sequence can be reconstructed using genome
assembly sequence and the mismatch/indel info.  hgTracks shows
differences and LRG insertions into the reference assembly using
the cds.c baseColor code.  (LRG deletions from the reference appear
as gaps, which we get for free with bed12 info).
For LRG Transcripts, I found the genePred codon-coloring code
inadequate for showing an insertion into hg19 (or even mismatches),
so instead of genePred I ended up using PSL + sequence, more like
the mRNA track representation and display.

diff --git src/hg/inc/lrg.h src/hg/inc/lrg.h
new file mode 100644
index 0000000..f66b892
--- /dev/null
+++ src/hg/inc/lrg.h
@@ -0,0 +1,109 @@
+/* lrg.h was originally generated by the autoSql program, which also 
+ * generated lrg.c and lrg.sql.  This header links the database and
+ * the RAM representation of objects. */
+
+#ifndef LRG_H
+#define LRG_H
+
+#define LRG_NUM_COLS 21
+
+extern char *lrgCommaSepFieldNames;
+
+struct lrg
+/* Locus Reference Genomic regions */
+    {
+    struct lrg *next;  /* Next in singly linked list. */
+    char *chrom;	/* Reference assembly chromosome or scaffold */
+    unsigned chromStart;	/* Start position in chromosome */
+    unsigned chromEnd;	/* End position in chromosome */
+    char *name;	/* LRG ID */
+    unsigned score;	/* Placeholder for BED format compatibility (0) */
+    char strand[2];	/* Orientation of LRG to reference assembly: + or - */
+    unsigned thickStart;	/* Placeholder for BED format compatibility (same as chromStart) */
+    unsigned thickEnd;	/* Placeholder for BED format compatibility (same as chromEnd) */
+    unsigned reserved;	/* Placeholder for BED format compatibility (0) */
+    int blockCount;	/* Number of gapless aligned blocks */
+    int *blockSizes;	/* Comma separated list of block sizes */
+    int *chromStarts;	/* Start positions relative to chromStart */
+    char *mismatches;	/* List of bases that differ between LRG and reference assembly */
+    char *indels;	/* List of insertions/deletions in LRG and reference assembly */
+    unsigned lrgSize;	/* Length in bases of LRG sequence */
+    int hgncId;	/* Numeric HGNC gene identifier */
+    char *hgncSymbol;	/* HGNC gene symbol */
+    char *ncbiAcc;	/* NCBI accession of LRG sequence */
+    char *lrgSource;	/* Source of LRG sequence */
+    char *lrgSourceUrl;	/* URL of lrgSource */
+    char *creationDate;	/* Date on which this sequence was added as a LRG */
+    };
+
+struct lrg *lrgLoad(char **row);
+/* Load a lrg from row fetched with select * from lrg
+ * from database.  Dispose of this with lrgFree(). */
+
+struct lrg *lrgLoadAll(char *fileName);
+/* Load all lrg from whitespace-separated file.
+ * Dispose of this with lrgFreeList(). */
+
+struct lrg *lrgLoadAllByChar(char *fileName, char chopper);
+/* Load all lrg from chopper separated file.
+ * Dispose of this with lrgFreeList(). */
+
+#define lrgLoadAllByTab(a) lrgLoadAllByChar(a, '\t');
+/* Load all lrg from tab separated file.
+ * Dispose of this with lrgFreeList(). */
+
+struct lrg *lrgCommaIn(char **pS, struct lrg *ret);
+/* Create a lrg out of a comma separated string. 
+ * This will fill in ret if non-null, otherwise will
+ * return a new lrg */
+
+void lrgFree(struct lrg **pEl);
+/* Free a single dynamically allocated lrg such as created
+ * with lrgLoad(). */
+
+void lrgFreeList(struct lrg **pList);
+/* Free a list of dynamically allocated lrg's */
+
+void lrgOutput(struct lrg *el, FILE *f, char sep, char lastSep);
+/* Print out lrg.  Separate fields with sep. Follow last field with lastSep. */
+
+#define lrgTabOut(el,f) lrgOutput(el,f,'\t','\n');
+/* Print out lrg as a line in a tab-separated file. */
+
+#define lrgCommaOut(el,f) lrgOutput(el,f,',',',');
+/* Print out lrg as a comma separated list including final comma. */
+
+/* -------------------------------- End autoSql Generated Code -------------------------------- */
+
+
+struct psl *lrgToPsl(struct lrg *lrg, uint chromSize);
+/* Use lrg's mismatches and indels to make a PSL. */
+
+struct dnaSeq *lrgReconstructSequence(struct lrg *lrg, char *db);
+/* Use genomic sequence, lrg->mismatches and lrg->indels to reconstruct LRG sequence */
+
+struct lrgDiff
+/* Locus Reference Genomic <mapping><diff> item condensed into coordinates and sequences:
+ * an alignment gap block.  When lrg->strand is "-", chromSeq is reverse-complemented
+ * but chromosome coordinates are still on + strand. */
+{
+    struct lrgDiff *next;
+    uint chromStart;
+    uint chromEnd;
+    char *chromSeq;
+    uint lrgStart;
+    uint lrgEnd;
+    char *lrgSeq;
+};
+
+struct lrgDiff *lrgParseMismatches(struct lrg *lrg);
+/* Parse lrg->mismatches and return a list of lrgDiffs. */
+
+struct lrgDiff *lrgParseIndels(struct lrg *lrg);
+/* Parse lrg->indels and return a list of lrgDiffs. */
+
+void lrgDiffFreeList(struct lrgDiff **pDiff);
+/* Free up a list of parsed diffs. */
+
+#endif /* LRG_H */
+