0a10e299786e9b470b55b2f02b9718ec6c4e4bb4 markd Sat Jun 27 15:33:26 2020 -0700 move genoFind build, save, and load to a library diff --git src/inc/genoFind.h src/inc/genoFind.h index f74c209..c9aac48 100644 --- src/inc/genoFind.h +++ src/inc/genoFind.h @@ -107,36 +107,62 @@ bits32 *listSizes; /* Size of list for each N-mer */ void *allocated; /* Storage space for all lists. */ bits32 **lists; /* A list for each N-mer. Used if * isSegmented is false. */ bits16 **endLists; /* A more complex list for each N-mer. * Used if isSegmented is true. * Values come in groups of threes. * The first is the packed last few * letters of the tile. The next two * are the offset in the genome. This * would be a struct but that would take * 8 bytes instead of 6, or nearly an * extra gigabyte of RAM. */ }; + void genoFindFree(struct genoFind **pGenoFind); /* Free up a genoFind index. */ struct gfSeqSource *gfFindNamedSource(struct genoFind *gf, char *name); /* Find target of given name. Return NULL if none. */ +struct genoFindIndex +/* container for genoFind indexes, sorting either an untranslated index on six translated indexes. + * these can be created in memory or saved to a file to quickly mmap */ +{ + void *memMapped; /* memory mapped if non-NULL, with amount allocated */ + size_t memLength; + bool isTrans; /* is this translated? */ + struct genoFind *untransGf; + struct genoFind *transGf[2][3]; +}; + +struct genoFindIndex* genoFindIndexBuild(int fileCount, char *seqFiles[], + int minMatch, int maxGap, int tileSize, + int repMatch, boolean doTrans, char *oocFile, + boolean allowOneMismatch, boolean doMask, + int stepSize, boolean noSimpRepMask); +/* build a untranslated or translated index */ + +void genoFindIndexWrite(struct genoFindIndex *gfIdx, char *fileName); +/* write index to file that can be mapped */ + +struct genoFindIndex* genoFindIndexLoad(char *fileName, boolean isTrans); +/* load indexes from file. */ + + /* --- Stuff for saving results ---- */ struct gfOutput /* A polymorphic object to help us write many file types. */ { struct gfOutput *next; void *data; /* Type-specific data pointer. Must be freeMem'able */ void (*out)(char *chromName, int chromSize, int chromOffset, struct ffAli *ali, bioSeq *tSeq, struct hash *t3Hash, bioSeq *qSeq, boolean qIsRc, boolean tIsRc, enum ffStringency stringency, int minMatch, struct gfOutput *out); /* This is the type of a client provided function to save an alignment. * The parameters are: * chromName - name of target (aka genomic or database) sequence.