b6d7e4446453c3ab019b30024741247171a2f040 markd Mon Jun 29 20:04:40 2020 +0000 make sure data is 64-bit aligned, store offsets in headers to simplify diff --git src/inc/genoFind.h src/inc/genoFind.h index f894a25..bdf59e8 100644 --- src/inc/genoFind.h +++ src/inc/genoFind.h @@ -74,47 +74,50 @@ bits32 qStart, qEnd; /* Position in query. */ struct gfSeqSource *target; /* Target source sequence. */ bits32 tStart, tEnd; /* Position in target. */ int hitCount; /* Number of hits. */ struct gfHit *hitList; /* List of hits. Not allocated here. */ int queryCoverage; /* Number of bases covered in query (thx AG!) */ }; void gfClumpFree(struct gfClump **pClump); /* Free a single clump. */ void gfClumpFreeList(struct gfClump **pList); /* Free a list of dynamically allocated gfClump's */ struct genoFind -/* An index of all K-mers in the genome. */ +/* An index of all K-mers in the genome. + * WARNING: MUST MODIFY CODE TO STORE/LOAD INDEX TO FILES IF THIS STRUCTURE IS + * MODIFIED!!! + */ { int maxPat; /* Max # of times pattern can occur * before it is ignored. */ int minMatch; /* Minimum number of tile hits needed * to trigger a clump hit. */ int maxGap; /* Max gap between tiles in a clump. */ int tileSize; /* Size of each N-mer. */ int stepSize; /* Spacing between N-mers. */ int tileSpaceSize; /* Number of N-mer values. */ int tileMask; /* 1-s for each N-mer. */ int sourceCount; /* Count of source files. */ - struct gfSeqSource *sources; /* List of sequence sources. */ bool isPep; /* Is a peptide. */ bool allowOneMismatch; /* Allow a single mismatch? */ bool noSimpRepMask; /* Dis-Allow simple repeat masking. */ int segSize; /* Index is segmented if non-zero. */ + struct gfSeqSource *sources; /* List of sequence sources. */ bits32 totalSeqSize; /* Total size of all sequences. */ bits32 *listSizes; /* Size of list for each N-mer */ void *allocated; /* Storage space for all lists. */ bits32 **lists; /* A list for each N-mer. Used if * isSegmented is false. */ bits16 **endLists; /* A more complex list for each N-mer. * Used if isSegmented is true. * Values come in groups of threes. * The first is the packed last few * letters of the tile. The next two * are the offset in the genome. This * would be a struct but that would take * 8 bytes instead of 6, or nearly an * extra gigabyte of RAM. */ };