0ede364a2dcb452681933d5a3579b4f05d90a245 markd Sun Jul 5 15:16:27 2020 -0700 fixed bug where totalSeqSize was not stored in index, now hgBlat works diff --git src/inc/genoFind.h src/inc/genoFind.h index bdf59e8..fd2209d 100644 --- src/inc/genoFind.h +++ src/inc/genoFind.h @@ -93,61 +93,62 @@ { int maxPat; /* Max # of times pattern can occur * before it is ignored. */ int minMatch; /* Minimum number of tile hits needed * to trigger a clump hit. */ int maxGap; /* Max gap between tiles in a clump. */ int tileSize; /* Size of each N-mer. */ int stepSize; /* Spacing between N-mers. */ int tileSpaceSize; /* Number of N-mer values. */ int tileMask; /* 1-s for each N-mer. */ int sourceCount; /* Count of source files. */ bool isPep; /* Is a peptide. */ bool allowOneMismatch; /* Allow a single mismatch? */ bool noSimpRepMask; /* Dis-Allow simple repeat masking. */ int segSize; /* Index is segmented if non-zero. */ - struct gfSeqSource *sources; /* List of sequence sources. */ bits32 totalSeqSize; /* Total size of all sequences. */ + struct gfSeqSource *sources; /* List of sequence sources. */ bits32 *listSizes; /* Size of list for each N-mer */ void *allocated; /* Storage space for all lists. */ bits32 **lists; /* A list for each N-mer. Used if * isSegmented is false. */ bits16 **endLists; /* A more complex list for each N-mer. * Used if isSegmented is true. * Values come in groups of threes. * The first is the packed last few * letters of the tile. The next two * are the offset in the genome. This * would be a struct but that would take * 8 bytes instead of 6, or nearly an * extra gigabyte of RAM. */ }; void genoFindFree(struct genoFind **pGenoFind); /* Free up a genoFind index. */ struct gfSeqSource *gfFindNamedSource(struct genoFind *gf, char *name); /* Find target of given name. Return NULL if none. */ struct genoFindIndex /* container for genoFind indexes, sorting either an untranslated index on six translated indexes. * these can be created in memory or saved to a file to quickly mmap */ { void *memMapped; /* memory mapped if non-NULL, with amount allocated */ size_t memLength; bool isTrans; /* is this translated? */ + bool noSimpRepMask; /* Suppresses simple repeat masking for very small genomes */ struct genoFind *untransGf; struct genoFind *transGf[2][3]; }; struct genoFindIndex* genoFindIndexBuild(int fileCount, char *seqFiles[], int minMatch, int maxGap, int tileSize, int repMatch, boolean doTrans, char *oocFile, boolean allowOneMismatch, boolean doMask, int stepSize, boolean noSimpRepMask); /* build a untranslated or translated index */ void genoFindIndexWrite(struct genoFindIndex *gfIdx, char *fileName); /* write index to file that can be mapped */ struct genoFindIndex* genoFindIndexLoad(char *fileName, boolean isTrans);