90f26ce1f4bd4f69b53d11569d4c4efc0c3386b7
braney
  Tue Dec 20 12:41:19 2016 -0800
make some utilities to turn mafs into bigMafs

diff --git src/inc/maf.h src/inc/maf.h
index 3e3f06b..095a514 100644
--- src/inc/maf.h
+++ src/inc/maf.h
@@ -1,264 +1,267 @@
 /* maf.h - Multiple alignment format.  */
 #ifndef MAF_H
 #define MAF_H
 
 #ifndef COMMON_H
 #include "common.h"
 #endif
 
 #ifndef AXT_H
 #include "axt.h"
 #endif
 
 struct mafFile
 /* A file full of multiple alignments. */
     {
     struct mafFile *next;
     int version;	 /* Required */
     char *scoring;	 /* Optional (may be NULL). Name of  scoring scheme. */
     struct mafAli *alignments;	/* Possibly empty list of alignments. */
     struct lineFile *lf; /* Open line file if any. NULL except while parsing. */
     };
 
 void mafFileFree(struct mafFile **pObj);
 /* Free up a maf file including closing file handle if necessary. */
 
 void mafFileFreeList(struct mafFile **pList);
 /* Free up a list of maf files. */
 
 struct mafAli
 /* A multiple alignment. */
     {
     struct mafAli *next;
     double score;        /* Score.  Meaning depends on mafFile.scoring.  0.0 if no scoring. */
     struct mafComp *components;	/* List of components of alignment */
     int textSize;         /* Size of text in each component. */
     struct mafRegDef *regDef; /* source of region definitions (r line) */
     };
 
 void mafAliFree(struct mafAli **pObj);
 /* Free up a maf alignment. */
 
 void mafAliFreeList(struct mafAli **pList);
 /* Free up a list of maf alignmentx. */
 
 /* the set of syntenic relationships that the previous and
  * following alignments have with the current one */
 #define	MAF_INVERSE_STATUS		'V'
 #define	MAF_INSERT_STATUS		'I'
 #define	MAF_CONTIG_STATUS		'C'
 #define	MAF_CONTIG_NESTED_STATUS	'c'
 #define	MAF_NEW_STATUS			'N'
 #define	MAF_NEW_NESTED_STATUS		'n'
 #define	MAF_MAYBE_NEW_STATUS		'S'
 #define	MAF_MAYBE_NEW_NESTED_STATUS	's'
 #define	MAF_MISSING_STATUS		'M'
 #define	MAF_TANDEM_STATUS		'T'
 
 struct mafComp
 /* A component of a multiple alignment. */
     {
     struct mafComp *next;
     char *src;	 /* Name of sequence source.  */
     int srcSize; /* Size of sequence source.  */
     char strand; /* Strand of sequence.  Either + or -*/
     int start;	 /* Start within sequence. Zero based. If strand is - is relative to src end. */
     int size;	 /* Size in sequence (does not include dashes).  */
     char *text;  /* The sequence including dashes. */
     char *quality;  /* The quality data (same length as text, or NULL). */
     char leftStatus; /* the syntenic status of the alignment before us vis a vis ourselves */
     int leftLen;     /* length related information for the previous alignment for the species */
     char rightStatus; /* the syntenic status of the alignment after us vis a vis ourselves */
     int rightLen;     /* length related information for the following alignment for the species */
     };
 
 void mafCompFree(struct mafComp **pObj);
 /* Free up a maf component. */
 
 void mafCompFreeList(struct mafComp **pList);
 /* Free up a list of maf components. */
 
 char *mafCompGetSrcDb(struct mafComp *mc, char *buf, int bufSize);
 /* parse the srcDb name from the mafComp src name, return NULL if no srcDb */
 
 char *mafCompGetSrcName(struct mafComp *mc);
 /* parse the src sequence name from the mafComp src name */
 
 struct mafRegDef
 /* MAF region definition (r line) */
 {
     char *type;   // type of definition, one of constants below (not malloced)
     int size;     // region size
     char *id;     // identifiers
 };
 extern char *mafRegDefTxUpstream;  // transcription start size upstream region
 
 struct mafRegDef *mafRegDefNew(char *type, int size, char *id);
 /* construct a new mafRegDef object */
 
 void mafRegDefFree(struct mafRegDef **mrdPtr);
 /* Free a mafRegDef object */
 
 int mafPlusStart(struct mafComp *comp);
 /* Return start relative to plus strand of src. */
 
 struct mafFile *mafOpen(char *fileName);
 /* Open up a .maf file for reading.  Read header and
  * verify. Prepare for subsequent calls to mafNext().
  * Prints error message and aborts if there's a problem. */
 
 struct mafFile *mafMayOpen(char *fileName);
 /* Like mafOpen above, but returns NULL rather than aborting 
  * if file does not exist. */
 
 void mafRewind(struct mafFile *mf);
 /* Seek to beginning of open maf file */
 
 struct mafAli *mafNext(struct mafFile *mafFile);
 /* Return next alignment in file or NULL if at end. 
  * This will close the open file handle at end as well. */
 
 struct mafAli *mafNextWithPos(struct mafFile *mf, off_t *retOffset);
 /* Return next alignment in FILE or NULL if at end.  If retOffset is
  * non-NULL, return start offset of record in file. */
 
 struct mafFile *mafReadAll(char *fileName);
 /* Read in full maf file */
 
 void mafWriteStart(FILE *f, char *scoring);
 /* Write maf header and scoring scheme name (may be null) */
 
 void mafWrite(FILE *f, struct mafAli *maf);
 /* Write next alignment to file. */
 
+void mafWriteDelimiter(FILE *f, struct mafAli *maf, char delimiter);
+/* Write next alignment to file using delimiter instead of newline. */
+
 void mafWriteEnd(FILE *f);
 /* Write end tag of maf file. */
 
 void mafWriteAll(struct mafFile *mf, char *fileName);
 /* Write out full mafFile. */
 
 struct mafComp *mafMayFindComponent(struct mafAli *maf, char *src);
 /* Find component of given source. Return NULL if not found. */
 
 struct mafComp *mafMayFindComponentDb(struct mafAli *maf, char *db);
 /* Find component of given database or source. Return NULL if not found. */
 
 struct mafComp *mafFindComponent(struct mafAli *maf, char *src);
 /* Find component of given source or die trying. */
 
 struct mafComp *mafMayFindCompSpecies(struct mafAli *maf, char *species, char sepChar);
 /* Find component of given source that starts with species followed by sepChar or '\0'
    Return NULL if not found. */
 
 struct mafComp *mafFindCompSpecies(struct mafAli *maf, char *species, char sepChar);
 /* Find component of given source that starts with species followed by sepChar or '\0'
    or die trying. */
 
 struct mafComp *mafMayFindCompPrefix(struct mafAli *maf, char *pre, char *sep);
 /* Find component of given source that starts with pre followed by sep.
    Return NULL if not found. */
 
 struct mafComp *mafFindCompPrefix(struct mafAli *maf, char *pre, char *sep);
 /* Find component of given source that starts with pre followed by sep
    or die trying. */
 
 boolean mafMayFindAllComponents(struct mafAli *maf, struct hash *cHash);
 /* Check to see if all components in hash are in maf block.  Return FALSE if not found. */
 
 struct mafComp *mafMayFindComponentInHash(struct mafAli *maf, struct hash *cHash);
 /* Find arbitrary component of given source that matches any string in the cHash.
    Return NULL if not found. */
 
 struct mafComp *mafMayFindSpeciesInHash(struct mafAli *maf, struct hash *cHash, char sepChar);
 /* Find arbitrary component of given who's source prefix (ended by sep)
    matches matches any string in the cHash.  Return NULL if not found. */
 
 void mafMoveComponentToTop(struct mafAli *maf, char *componentSource);
 /* Move given component to head of component list. */
 
 struct mafAli *mafFromAxt(struct axt *pAxt, int tSize, 
 	char *tPrefix, int qSize, char *qPrefix);
 /* Make up a maf file from axt.  Slower than mafFromAxtTemp,
  * but the axt and maf are independent afterwards. */
 
 void mafFromAxtTemp(struct axt *axt, int tSize, int qSize,
 	struct mafAli *temp);
 /* Make a maf out of axt,  parasiting on the memory in axt.
  * Do *not* mafFree this temp.  The memory it has in pointers
  * is still owned by the axt.  Furthermore the next call to
  * this function will invalidate the previous temp value.
  * It's sort of a kludge, but quick to run and easy to implement. */
 
 struct mafAli *mafSubset(struct mafAli *maf, char *componentSource,
 	int newStart, int newEnd);
 /* see mafSubsetE below  (called with getInitialDases = FALSE */
 
 struct mafAli *mafSubsetE(struct mafAli *maf, char *componentSource,
 	int newStart, int newEnd, bool getInitialDashes);
 /* Extract subset of maf that intersects a given range
  * in a component sequence.  The newStart and newEnd
  * are given in the forward strand coordinates of the
  * component sequence.  The componentSource is typically
  * something like 'mm3.chr1'.  This will return NULL
  * if maf does not intersect range.  The score field
  * in the returned maf will not be filled in (since
  * we don't know which scoring scheme to use). 
  * If getInitialDashes is TRUE then the initial -'s
  * in the reference sequence are *not* removed*/
 
 boolean mafNeedSubset(struct mafAli *maf, char *componentSource,
 	int newStart, int newEnd);
 /* Return TRUE if maf only partially fits between newStart/newEnd
  * in given component. */
 
 double mafScoreMultiz(struct mafAli *maf);
 /* Return score of a maf (calculated rather than what is
  * stored in the structure. */
 
 double mafScoreRangeMultiz(struct mafAli *maf, int start, int size);
 /* Return score of a subset of an alignment.  Parameters are:
  *    maf - the alignment
  *    start - the (zero based) offset to start calculating score
  *    size - the size of the subset
  * The following relationship should hold:
  *   scoreRange(maf,start,size) =
  *	scoreRange(maf,0,start+size) - scoreRange(maf,0,start)
  */
 
 void mafScoreUseSimple();
 /* use a simple scoring system useful for finding mismatches */
 
 void mafScoreUseTraditional();
 /* use the tradition HOX scoring system */
 
 double mafScoreMultizMaxCol(int species);
 /* Return maximum possible score for a column. */
 
 void mafColMinMaxScore(struct mafAli *maf, 
 	double *retMin, double *retMax);
 /* Get min/max maf scores for a column. */
 
 void mafFlipStrand(struct mafAli *maf);
 /* Reverse complement maf. */
 
 void mafSrcDb(char *name, char *retDb, int retDbSize);
 /* Parse out just database part of name (up to but not including
  * first dot). If dot found, return entire name */
 
 boolean mafColumnEmpty(struct mafAli *maf, int col);
 /* Return TRUE if the column is all '-' or '.' */
 
 void mafStripEmptyColumns(struct mafAli *maf);
 /* Remove columns that are all '-' or '.' from  maf. */
 
 boolean isContigOrTandem(char status);
 /* is status MAF_CONTIG_STATUS or MAF_TANDEM_STATUS */
 
 struct mafComp *mafCompClone(struct mafComp *srcComp);
 /* clone a mafComp */
 
 struct mafAli *mafAliClone(struct mafAli *srcAli);
 /* clone a mafAli */
 
 #endif /* MAF_H */