src/inc/psl.h 1.41
1.41 2009/03/10 20:30:14 markd
fixed typo in comment
Index: src/inc/psl.h
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/inc/psl.h,v
retrieving revision 1.40
retrieving revision 1.41
diff -b -B -U 1000000 -r1.40 -r1.41
--- src/inc/psl.h 5 Mar 2009 00:41:20 -0000 1.40
+++ src/inc/psl.h 10 Mar 2009 20:30:14 -0000 1.41
@@ -1,321 +1,321 @@
/* psl.h was originally generated by the autoSql program, which also
* generated psl.c and psl.sql. This header links the database and
* the RAM representation of objects. Additional functions were
* added later.
*
* This file is copyright 2002 Jim Kent, but license is hereby
* granted for all use - public, private or commercial. */
#ifndef PSL_H
#define PSL_H
#ifndef LOCALMEM_H
#include "localmem.h"
#endif
#ifndef LINEFILE_H
#include "linefile.h"
#endif
#ifndef FUZZYFIND_H
#include "fuzzyFind.h"
#endif
#ifndef DNASEQ_H
#include "dnaseq.h"
#endif
/* Some forward declarations of structures used but not defined here. */
struct rbTree;
#define PSL_NUM_COLS 21 /* number of columns in a PSL */
#define PSLX_NUM_COLS 23 /* number of columns in a PSLX */
/* Options to pslGetCreateSql */
#define PSL_TNAMEIX 0x01 /* create target name index */
#define PSL_WITH_BIN 0x02 /* add bin column */
#define PSL_XA_FORMAT 0x04 /* add XA format columns */
/* options for pslFromAlign */
#define PSL_IS_SOFTMASK 0x01 /* lower case are mask */
struct psl
/* Summary info about a patSpace alignment */
{
struct psl *next; /* Next in singly linked list. */
unsigned match; /* Number of bases that match that aren't repeats */
unsigned misMatch; /* Number of bases that don't match */
unsigned repMatch; /* Number of bases that match but are part of repeats */
unsigned nCount; /* Number of 'N' bases */
unsigned qNumInsert; /* Number of inserts in query */
int qBaseInsert; /* Number of bases inserted in query */
unsigned tNumInsert; /* Number of inserts in target */
int tBaseInsert; /* Number of bases inserted in target */
char strand[3]; /* + or - for strand */
char *qName; /* Query sequence name */
unsigned qSize; /* Query sequence size */
int qStart; /* Alignment start position in query */
int qEnd; /* Alignment end position in query */
char *tName; /* Target sequence name */
unsigned tSize; /* Target sequence size */
int tStart; /* Alignment start position in target */
int tEnd; /* Alignment end position in target */
unsigned blockCount; /* Number of blocks in alignment */
unsigned *blockSizes; /* Size of each block */
unsigned *qStarts; /* Start of each block in query. */
unsigned *tStarts; /* Start of each block in target. */
char **qSequence; /* query sequence for each block */
char **tSequence; /* target sequence for each block */
};
struct psl *pslxLoad(char **row);
/* Load a pslx from row fetched with select * from psl
* from database. Dispose of this with pslFree(). */
struct psl *pslLoad(char **row);
/* Load a psl from row fetched with select * from psl
* from database. Dispose of this with pslFree(). */
struct psl *pslCommaIn(char **pS, struct psl *ret);
/* Create a psl out of a comma separated string.
* This will fill in ret if non-null, otherwise will
* return a new psl */
void pslFree(struct psl **pEl);
/* Free a single dynamically allocated psl such as created
* with pslLoad(). */
void pslFreeList(struct psl **pList);
/* Free a list of dynamically allocated psl's */
void pslOutput(struct psl *el, FILE *f, char sep, char lastSep);
/* Print out psl. Separate fields with sep. Follow last field with lastSep. */
#define pslTabOut(el,f) pslOutput(el,f,'\t','\n')
/* Print out psl as a line in a tab-separated file. */
#define pslCommaOut(el,f) pslOutput(el,f,',',',')
/* Print out psl as a comma separated list including final comma. */
/* ----- end autoSql generated part --------------- */
void pslOutFormat(struct psl *el, FILE *f, char sep, char lastSep);
/* Print out selected psl values. Separate fields with sep. Follow last field with lastSep. */
/* Prints out a better format with bold field headings followed by value */
/* Requires further upstream work to ensure that only the field headers */
/* declared here are printed if replacing an existing psl print function*/
struct psl *pslLoadAll(char *fileName);
/* Load all psl's in file. */
struct psl *pslNext(struct lineFile *lf);
/* Read next line from file and convert it to psl. Return
* NULL at eof. */
struct psl *pslxLoadLm(char **row, struct lm *lm);
/* Load row into local memory pslx. */
struct psl *pslLoadLm(char **row, struct lm *lm);
/* Load row into local memory psl. */
void pslWriteHead(FILE *f);
/* Write head of psl. */
void pslxWriteHead(FILE *f, enum gfType qType, enum gfType tType);
/* Write head of pslx (extended psl). */
void pslWriteAll(struct psl *pslList, char *fileName, boolean writeHeader);
/* Write a psl file from list. */
struct lineFile *pslFileOpen(char *fileName);
/* Read header part of psl and make sure it's right.
* Return line file handle to it. */
struct lineFile *pslFileOpenWithMeta(char *fileName, FILE *f);
/* Read header part of psl and make sure it's right.
* Return line file handle to it and send meta data to output file f */
struct lineFile *pslFileOpenWithUniqueMeta(char *fileName, FILE *f);
/* Read header part of psl and make sure it's right.
* Set flag to suppress duplicate header comments.
* Return line file handle to it. */
void pslxFileOpen(char *fileName, enum gfType *retQueryType,
enum gfType *retTargetType, struct lineFile **retLf);
/* Read header part of psl and make sure it's right. Return
* sequence types and file handle. */
void pslxFileOpenWithMeta(char *fileName, enum gfType *retQueryType, enum gfType *retTargetType, struct lineFile **retLf, FILE *f);
/* Read header part of psl and make sure it's right. Return
* sequence types and file handle and send meta data to output file f */
void pslxFileOpenWithUniqueMeta(char *fileName, enum gfType *retQueryType, enum gfType *retTargetType, struct lineFile **retLf, FILE *f);
/* Read header part of psl and make sure it's right. Return
* sequence types and file handle and send only unique meta data to output f */
int pslCmpQuery(const void *va, const void *vb);
/* Compare to sort based on query. */
int pslCmpTarget(const void *va, const void *vb);
/* Compare to sort based on target. */
int pslCmpTargetAndStrand(const void *va, const void *vb);
/* Compare to sort based on target, strand, tStart. */
int pslCmpScore(const void *va, const void *vb);
/* Compare to sort based on score (descending). */
int pslCmpQueryScore(const void *va, const void *vb);
/* Compare to sort based on query then score (descending). */
int pslCalcMilliBad(struct psl *psl, boolean isMrna);
/* Calculate badness in parts per thousand. */
int pslCmpScoreDesc(const void *va, const void *vb);
/* Compare to sort based on score descending. */
int pslCmpMatch(const void *va, const void *vb);
/* Compare to sort based on match. */
int pslScore(const struct psl *psl);
/* Return score for psl. */
struct ffAli *pslToFfAli(struct psl *psl, struct dnaSeq *query, struct dnaSeq *target,
int targetOffset);
/* Convert from psl to ffAli format. */
struct ffAli *pslToFakeFfAli(struct psl *psl, DNA *needle, DNA *haystack);
/* Convert from psl to ffAli format. In some cases you can pass NULL
* for needle and haystack - depending what the post-processing is going
* to be. */
struct psl *pslFromFakeFfAli(struct ffAli *ff,
DNA *needle, DNA *haystack, char strand,
char *qName, int qSize, char *tName, int tSize);
/* This will create a basic psl structure from a sorted series of ffAli
* blocks. The fields that would need actual sequence to be filled in
* are left zero however - fields including match, repMatch, mismatch. */
int pslOrientation(struct psl *psl);
/* Translate psl strand + or - to orientation +1 or -1 */
/* marcos to get query and target strand. Target returns implied + when
* it's not specific */
#define pslQStrand(p) ((p)->strand[0])
#define pslTStrand(p) (((p)->strand[1] != '-') ? '+' : '-')
int pslWeightedIntronOrientation(struct psl *psl, struct dnaSeq *genoSeq, int offset);
/* Return >0 if introns make it look like alignment is on + strand,
* <0 if introns make it look like alignment is on - strand,
* 0 if can't tell. The absolute value of the return indicates
* how many splice sites we've seen supporting the orientation.
* Sequence should NOT be reverse complemented. */
int pslIntronOrientation(struct psl *psl, struct dnaSeq *genoSeq, int offset);
/* Return 1 if introns make it look like alignment is on + strand,
* -1 if introns make it look like alignment is on - strand,
* 0 if can't tell.
* Sequence should NOT be reverse complemented. */
boolean pslHasIntron(struct psl *psl, struct dnaSeq *seq, int seqOffset);
/* Return TRUE if there's a probable intron. Sequence should NOT be
* reverse complemented. */
void pslTailSizes(struct psl *psl, int *retStartTail, int *retEndTail);
/* Find the length of "tails" (rather than extensions) implied by psl. */
void pslRc(struct psl *psl);
/* Reverse-complement a PSL alignment. This makes the target strand explicit. */
void pslSwap(struct psl *psl, boolean noRc);
/* swap query and target in psl. If noRc is TRUE, don't reverse-complement
* PSL if needed, instead make target strand explict. */
void pslTargetOffset(struct psl *psl, int offset);
/* Add offset to target positions in psl. */
void pslDump(struct psl *psl, FILE *f);
/* Dump most of PSL to file - for debugging. */
struct psl *pslTrimToTargetRange(struct psl *oldPsl, int tMin, int tMax);
/* Return psl trimmed to fit inside tMin/tMax. Note this does not
* update the match/misMatch and related fields. */
struct psl *pslTrimToQueryRange(struct psl *oldPsl, int qMin, int qMax);
/* Return psl trimmed to fit inside qMin/qMax. Note this does not
* update the match/misMatch and related fields. */
char* pslGetCreateSql(char* table, unsigned options, int tNameIdxLen);
/* Get SQL required to create PSL table. Options is a bit set consisting
* of PSL_TNAMEIX, PSL_WITH_BIN, and PSL_XA_FORMAT. tNameIdxLen is
* the number of characters in target name to index. If greater than
* zero, must specify PSL_TNAMEIX. If zero and PSL_TNAMEIX is specified,
* to will default to 8. */
int pslCheck(char *pslDesc, FILE* out, struct psl* psl);
/* Validate a PSL for consistency. pslDesc is printed the error messages
* to file out (open /dev/null to discard). Return count of errors. */
int pslCountBlocks(struct psl *target, struct psl *query, int maxBlockGap);
/* count the number of blocks in the query that overlap the target */
/* merge blocks that are closer than maxBlockGap */
struct hash *readPslToBinKeeper(char *sizeFileName, char *pslFileName);
/* read a list of psls and return results in hash of binKeeper structure for fast query*/
boolean pslIsProtein(const struct psl *psl);
/* is psl a protein psl (are it's blockSizes and scores in protein space) */
struct psl* pslFromAlign(char *qName, int qSize, int qStart, int qEnd, char *qString,
char *tName, int tSize, int tStart, int tEnd, char *tString,
char* strand, unsigned options);
/* Create a PSL from an alignment. Options PSL_IS_SOFTMASK if lower case
* bases indicate repeat masking. Returns NULL if alignment is empty after
* triming leading and trailing indels.*/
int pslShowAlignment(struct psl *psl, boolean isProt,
char *qName, bioSeq *qSeq, int qStart, int qEnd,
char *tName, bioSeq *tSeq, int tStart, int tEnd, FILE *f);
/* Show protein/DNA alignment or translated DNA alignment in HTML format. */
int pslGenoShowAlignment(struct psl *psl, boolean isProt,
char *qName, bioSeq *qSeq, int qStart, int qEnd,
char *tName, bioSeq *tSeq, int tStart, int tEnd, int exnStarts[], int exnEnds[], int exnCnt, FILE *f);
/* Show protein/DNA alignment or translated DNA alignment in HTML format. */
struct psl* pslNew(char *qName, unsigned qSize, int qStart, int qEnd,
char *tName, unsigned tSize, int tStart, int tEnd,
char *strand, unsigned blockSpace, unsigned opts);
/* create a new psl with space for the specified number of blocks allocated.
* pslGrow maybe used to expand this space if needed. Valid options are
* PSL_XA_FORMAT. */
void pslGrow(struct psl *psl, int *blockSpacePtr);
/* Increase memory allocated to a psl to hold more blocks. blockSpacePtr
* should point the the current maximum number of blocks and will be
* updated to with the new amount of space. */
int pslRangeTreeOverlap(struct psl *psl, struct rbTree *rangeTree);
/* Return amount that psl overlaps (on target side) with rangeTree. */
float pslIdent(struct psl *psl);
/* computer fraction identity */
float pslQueryAligned(struct psl *psl);
/* compute fraction of query that was aligned */
INLINE unsigned pslQEnd(struct psl *psl, int blkIdx)
-/* return query target end for the given block */
+/* return query end for the given block */
{
return psl->qStarts[blkIdx] + psl->blockSizes[blkIdx];
}
INLINE unsigned pslTEnd(struct psl *psl, int blkIdx)
/* return target end for the given block */
{
return psl->tStarts[blkIdx] + psl->blockSizes[blkIdx];
}
#endif /* PSL_H */