bbeeeeb5d888089a025aa547c05bfc9b443dc39f
angie
  Thu Dec 20 14:06:39 2018 -0800
Adding support for files that may have a mix of newline styles (\r\n, \r, \n), enabled by calling lineFileCarefulNewlines.  refs #22638
Scanning for any type of newline is not quite as efficient as scanning for only one pre-determined type, but it's necessary to deal with the kind of garbage data that has snuck into some saved sessions.
I also fixed a couple subtle cases that have not caused any trouble in our day-to-day dealings with nice \n-separated input with line sizes shorter than the default lf->buf size (64k):
* determineNlType initialized lf->nlType to UNIX, but if the first non-empty buffer did not contain any newline, UNIX may or may not have been the correct type.
* The second time determineNlType was called, it was using an outdated endIx.  Note the second instance of scanning for newlines used < sizeLeft as a test instead of endIx; that needed to be applied to determineNlType too.
* determineNlType was called with buf+endIx, but with a byte limit that didn't account for endIx.
I tested lineFile with an initial buf size of 16 (in lineFileAttach) to test the looping on gotLf.

diff --git src/inc/linefile.h src/inc/linefile.h
index 1481c98..0c0092e 100644
--- src/inc/linefile.h
+++ src/inc/linefile.h
@@ -13,31 +13,32 @@
 #define tabix_t tbx_t
 #define ti_iter_t hts_itr_t
 #define ti_open hts_open
 #define ti_index_load tbx_index_load
 #define ti_close tbx_destroy
 #define ti_get_tid tbx_name2id
 #define ti_queryi tbx_itr_queryi
 #define ti_iter_destroy tbx_itr_destroy
 
 #define LF_BOGUS_FILE_PREFIX "somefile."
 
 enum nlType {
  nlt_undet, /* undetermined */
  nlt_unix,  /* lf   */
  nlt_dos,   /* crlf */
- nlt_mac    /* cr   */
+ nlt_mac,   /* cr   */
+ nlt_mixed  /* could be any or all of the above */
 };
 
 struct metaOutput
 /* struct to store list of file handles to output meta data to
  * meta data is text after # */
     {
     struct metaOutput *next;    /* next file handle */
     FILE *metaFile;             /* file to write metadata to */
     };
 
 struct lineFile
 /* Structure to handle fast, line oriented
  * fileIo. */
     {
     struct lineFile *next;	/* Might need to be on a list. */
@@ -291,18 +292,22 @@
  * It must be readable in addition to fileOrUrl. If there's a problem, warn & return NULL.
  * This works only if kent/src has been compiled with USE_TABIX=1 and linked
  * with the tabix C library. */
 
 struct lineFile *lineFileTabixMayOpen(char *fileOrUrl, bool zTerm);
 /* Wrap a line file around a data file that has been compressed and indexed
  * by the tabix command line program.  The index file <fileName>.tbi must be
  * readable in addition to fileName. If there's a problem, warn & return NULL.
  * This works only if kent/src has been compiled with USE_TABIX=1 and linked
  * with the tabix C library. */
 
 boolean lineFileSetTabixRegion(struct lineFile *lf, char *seqName, int start, int end);
 /* Assuming lf was created by lineFileTabixMayOpen, tell tabix to seek to the specified region
  * and return TRUE (or if there are no items in region, return FALSE). */
 
+void lineFileCarefulNewlines(struct lineFile *lf);
+/* Tell lf to use a less efficient method of scanning for the next newline that can handle
+ * files with a mix of newline conventions. */
+
 #endif /* LINEFILE_H */