ea380ca54ffb45c5ca7a19560d0e0bfe5d0be274 kent Tue Apr 3 13:41:30 2012 -0700 Adding recommendations against long lines and long functions. diff --git src/README src/README index 974e006..95c37ba 100644 --- src/README +++ src/README @@ -161,31 +161,45 @@ level as the block they enclose: if (someTest) { doSomething(); doSomethingElse(); } Each block of code is indented by 4 from the previous block. As per Unix standard practice, tab stops are set to 8, not 4 as is the common practice in Windows, so some care must be taken to use tabs for indenting. Since tabs are especially problematic for Python code, and we are starting to use Python a fair bit as well, tabs are best avoided altogether. The proper settings for the vi editor to interpret tabs correctly in existing code, and avoid tabs in new code are: set ts=8 set sw=4 set expandtab -Lines should be no more than 100 characters wide. + +Lines should be no more than 100 characters wide. Lines that are +longer than this are broken and indented at least 8 spaces +more than the original line to indicate the line continuation. +Line continuations may be unavoidable when calling functions with long +parameter lists and in a few other situations. Where possible though +simplifying techniques should be applied to the code in preference +to the line continuations. Complex expressions can be broken into +parts that are assigned to intermediate variables. Long variable names +can be revisited and sometimes shortened. Deep indenting can be +avoided by simplifying logic and moving blocks into their own functions. +These are just some ways you can productively shorten long lines without +resorting to continuations, which no matter how they are done will tend +to obscure the logic apparent in the indentation. + NAMES Symbol names begin with a lower-case letter. The second and subsequent words in a name begin with a capital letter to help visually separate the words. Abbreviation of words is strongly discouraged. Words of five letters and less should generally not be abbreviated. If a word is abbreviated in general it is abbreviated to the first three letters: tabSeparatedFile -> tabSepFile In some cases, for local variables abbreviating to a single letter for each word is ok: tabSeparatedFile -> tsf In rare, complex, cases you may treat the abbreviation itself as a word, and only the @@ -285,47 +299,52 @@ AllocVar(twoBit); int ubyteSize = packedSize(seq->size); UBYTE *pt = AllocArray(twoBit->data, ubyteSize); twoBit->name = cloneString(seq->name); twoBit->size = seq->size; /* Convert to 4-bases per byte representation. */ char *dna = seq->dna; int i, end; end = seq->size - 4; for (i=0; isize-i); *pt = packDna4(last4); -/* Deal with blocks of N. */ +/* Deal with blocks of N, saving end points of blocks. */ twoBit->nBlockCount = countBlocksOfN(dna, seq->size); if (twoBit->nBlockCount > 0) { AllocArray(twoBit->nStarts, twoBit->nBlockCount); AllocArray(twoBit->nSizes, twoBit->nBlockCount); storeBlocksOfN(dna, seq->size, twoBit->nStarts, twoBit->nSizes); } -/* Deal with masking */ +/* Deal with masking, saving end points of blocks. */ if (doMask) { twoBit->maskBlockCount = countBlocksOfLower(dna, seq->size); if (twoBit->maskBlockCount > 0) { AllocArray(twoBit->maskStarts, twoBit->maskBlockCount); AllocArray(twoBit->maskSizes, twoBit->maskBlockCount); storeBlocksOfLower(dna, seq->size, twoBit->maskStarts, twoBit->maskSizes); } } return twoBit; } +Though code paragraphs help make long functions readable, in general +smaller functions are preferred. It is rare that a longer function +couldn't be improved by moving some blocks of code into new functions +or simplifying. + ==================================================================== This file last updated: $Date: 2010/06/03 16:48:53 $