e5a054a02e7a80af675a99ba146c27465837f1d8 markd Thu Dec 21 10:13:23 2023 -0800 more work on running tests in parallel diff --git src/hg/lib/tests/genePredTests.mk src/hg/lib/tests/genePredTests.mk index 3ba6cd2..5722b70 100644 --- src/hg/lib/tests/genePredTests.mk +++ src/hg/lib/tests/genePredTests.mk @@ -1,257 +1,259 @@ # makefile with genePred and genePredReader tests include ../../../inc/common.mk +.NOTPARALLEL: # table is used by multiple tests + BIN_DIR = bin/$(MACHTYPE) GENE_PRED_TESTER = ${BIN_DIR}/genePredTester EXP_DIR = expected/genePred OUT_DIR = output/genePred DB=hg15 TEST_TBL=gpTest_${USER} .NOTPARALLEL: # table is used by multiple tests # Data file used by tests: # - refSeq.psl - selected RefSeq PSLs used to generate genePred, some chosen # because of frame problems, etc. # - refSeq.cds - CDS for above. # - refSeqMin.gp - miminmal genePred, only required columns # - refSeqId.gp - with score # - refSeqIdName2.gp - with score, name2 # - refSeqFrame.gp - with id, name2, cdsStat and frame fields (from mrnaToGene) # - acembly.gff - data files used to build various tracks # - tigr.gff # - twinscan.gtf # - vegaGene.gtf # - vegaPseudo.gtf # - ncbi.gff - part GTF part GFF generated by NCBI # - incmpl.psl,cds - test creating genePreds where CDS of gene # is incomplete on one end or the other. # - flybase.gtf - used to test -impliedStopAfterCds # - frameBug.gff - VEGA gtf that broke frame assignment # - nscan.gtf - N-SCAN GTF with non-standard 5UTR features and bogus # frame on them. # - spaceInName.gff - exon lines ended with extra space, which CDS lines didn't, resulting in two # genePred records per gene. test: fileTests tableTests fromPslTests compatTblTests fromGxfTests ### # test of reading/writing tab-separated files. ### fileTests: fileMinTest fileIdTest fileIdName2Test fileFrameTest fileFrameStatTest doFileTest = ${MAKE} -f genePredTests.mk doFileTest fileMinTest: ${doFileTest} id=$@ inGp=refSeqMin.gp fileIdTest: ${doFileTest} id=$@ inGp=refSeqId.gp opts="-scoreFld" fileIdName2Test: ${doFileTest} id=$@ inGp=refSeqIdName2.gp opts="-scoreFld -name2Fld" fileFrameTest: ${doFileTest} id=$@ inGp=refSeqFrame.gp opts="-cdsStatFld -exonFramesFld" # this regress a bug parsing unk cds status, also tests auto-increment (ids are zero) fileFrameStatTest: ${doFileTest} id=$@ inGp=refSeqFrameStat.gp opts="-cdsStatFld -exonFramesFld" # Recurisve target to run a file read/write test. Will diff the output # with the input, which should be identical. # Expects the following variables to be set: # id - test id # inGp - genePred (omitting dir) # opts - genePredTester options to add doFileTest: mkout ${GENE_PRED_TESTER} ${opts} -needRows=5 -output=${OUT_DIR}/${id}.gp -info=${OUT_DIR}/${id}.info readFile input/genePred/${inGp} diff -u input/genePred/${inGp} ${OUT_DIR}/${id}.gp diff -u expected/genePred/${id}.info ${OUT_DIR}/${id}.info genePredCheck -verbose=0 ${OUT_DIR}/${id}.gp ### # Test of loading and reading database tables. When ids are not auto-assigned, # then compare with input file, otherwise, we need an expected file. ### tableTests: tableMinTest tableIdTest tableIdName2Test tableFrameTest tableFrameStatTest doTableTest = ${MAKE} -f genePredTests.mk doTableTest tableMinTest: ${doTableTest} id=$@ inGp=refSeqMin.gp expGp=input/genePred/refSeqMin.gp tableIdTest: ${doTableTest} id=$@ inGp=refSeqId.gp opts="-scoreFld" expGp=input/genePred/refSeqId.gp tableIdName2Test: ${doTableTest} id=$@ inGp=refSeqIdName2.gp opts="-scoreFld -name2Fld" expGp=input/genePred/refSeqIdName2.gp tableFrameTest: ${doTableTest} id=$@ inGp=refSeqFrame.gp opts="-cdsStatFld -exonFramesFld" expGp=expected/genePred/tableFrameTest.gp tableFrameStatTest: ${doTableTest} id=$@ inGp=refSeqFrameStat.gp opts="-cdsStatFld -exonFramesFld" expGp=expected/genePred/tableFrameStatTest.gp # Recurisve target to run a table read/write test. Will diff the output # with the input, which should be identical. # Expects the following variables to be set: # id - test id # inGp - genePred (omitting dir) # expGp - expected gp, full path. # opts - genePredTester options to add doTableTest: mkout ${GENE_PRED_TESTER} ${opts} -needRows=5 -output=${OUT_DIR}/${id}.gp loadTable ${DB} ${TEST_TBL} input/genePred/${inGp} ${GENE_PRED_TESTER} ${opts} -needRows=5 -output=${OUT_DIR}/${id}.gp -info=${OUT_DIR}/${id}.info readTable ${DB} ${TEST_TBL} diff -u ${expGp} ${OUT_DIR}/${id}.gp diff -u expected/genePred/${id}.info ${OUT_DIR}/${id}.info genePredCheck -verbose=0 ${OUT_DIR}/${id}.gp ### # test of genePredFromPsl2 with various various optional fields ### fromPslTests: fromPslMinTest fromPslCdsFrameTest fromPslIncmplTest doFromPslTest = ${MAKE} -f genePredTests.mk doFromPslTest fromPslMinTest: ${doFromPslTest} id=$@ inBase=refSeq fromPslCdsFrameTest: ${doFromPslTest} id=$@ inBase=refSeq opts="-cdsStatFld -exonFramesFld" # CDS incomplete on one end or the other fromPslIncmplTest: ${doFromPslTest} id=$@ inBase=incmpl opts="-cdsStatFld -exonFramesFld" # Recurisve target to run a file psl to genePred tests # id - test id # inBase - base name for psl and cds files # opts - genePredTester options to add doFromPslTest: mkout ${GENE_PRED_TESTER} ${opts} -output=${OUT_DIR}/${id}.gp fromPsl input/genePred/${inBase}.psl input/genePred/${inBase}.cds ${GENE_PRED_TESTER} ${opts} readFile ${OUT_DIR}/${id}.gp diff -u ${EXP_DIR}/${id}.gp ${OUT_DIR}/${id}.gp genePredCheck -verbose=0 ${OUT_DIR}/${id}.gp # tests of reading existing tables; these don't diff output, since tables # are not stable, but at least see if we can read them compatTblTests: refFlatTest refGeneTest knownGeneTest doCompatTblTest = ${MAKE} -f genePredTests.mk doCompatTblTest refFlatTest: ${doCompatTblTest} id=$@ tbl=refFlat refGeneTest: ${doCompatTblTest} id=$@ tbl=refGene knownGeneTest: ${doCompatTblTest} id=$@ tbl=knownGene # recursive target to read table # id, tbl doCompatTblTest: mkout ${GENE_PRED_TESTER} -maxRows=250 -needRows=250 -info=${OUT_DIR}/${id}.info readTable ${DB} ${tbl} diff -u ${EXP_DIR}/${id}.info ${OUT_DIR}/${id}.info ## # GFF/GTF conversion tests; try bits of files from several different # sources. ## fromGxfTests: fromGxfMinTest fromGxfFrameTest \ fromGxfExonSelTest fromGxfExonSelFrameTest \ fromGxfVegaTest fromGxfVegaFrameTest fromGxfVegaPseudoTest \ fromGxfAcemblyTest fromGxfAcemblyFrameTest \ fromGxfNcbiTest fromGtfRegressTest fromGffRegressTest \ fromGffCeSangerTest fromGffCeSangerTypeTest \ fromGtfImpliedStopTest fromGffFrameTest fromGtfNscanTest \ fromGffSpaceInNameTest doFromGxfTest = ${MAKE} -f genePredTests.mk doFromGxfTest fromGxfMinTest: ${doFromGxfTest} id=$@ what=fromGtf inBase=twinscan.gtf fromGxfFrameTest: ${doFromGxfTest} id=$@ what=fromGtf inBase=twinscan.gtf opts="-cdsStatFld -exonFramesFld -name2Fld" fromGxfExonSelTest: ${doFromGxfTest} id=$@ what=fromGff inBase=tigr.gff opts="-exonSelectWord=TC" fromGxfExonSelFrameTest: ${doFromGxfTest} id=$@ what=fromGff inBase=tigr.gff opts="-exonSelectWord=TC -cdsStatFld -exonFramesFld" fromGxfVegaTest: ${doFromGxfTest} id=$@ what=fromGtf inBase=vegaGene.gtf opts="-name2Fld" fromGxfVegaFrameTest: ${doFromGxfTest} id=$@ what=fromGtf inBase=vegaGene.gtf opts="-name2Fld -cdsStatFld -exonFramesFld -scoreFld" fromGxfVegaPseudoTest: ${doFromGxfTest} id=$@ what=fromGtf inBase=vegaPseudo.gtf opts="-name2Fld" fromGxfAcemblyTest: ${doFromGxfTest} id=$@ what=fromGff inBase=acembly.gff fromGxfAcemblyFrameTest: ${doFromGxfTest} id=$@ what=fromGff inBase=acembly.gff opts="-name2Fld -cdsStatFld -exonFramesFld -scoreFld" # this crashed ldHgLoad gene fromGxfNcbiTest: ${doFromGxfTest} id=$@ what=fromGff inBase=ncbi.gff opts="-name2Fld -cdsStatFld -exonFramesFld -scoreFld -exonSelectWord=exon" # GTF bug regression: # - stop_codon wiping out frame on last CDS # - stop_codon being the only codon in an exon didn't get frame right fromGtfRegressTest: ${doFromGxfTest} id=$@ what=fromGtf inBase=regress.gtf opts="-cdsStatFld -exonFramesFld" # GFF bug regressions # - none right now fromGffRegressTest: ${doFromGxfTest} id=$@ what=fromGff inBase=regress.gff opts="-exonSelectWord=exon" # ce2 sangerGene GFF bugs # - single exon CDS genes with frame column not being set (from ce2/sangerGene) # - bogus key name fields that get converted to ids 'ZC101.2c ; Confirmed_by_EST' # FIXME: this is not currently producing the correct frame information without specifying -exonSelectWork=coding_exon # probably need some serious work to be able to take in the majority off GFFs. # FIXME: just disabled checking until fixed fromGffCeSangerTest: ${doFromGxfTest} id=$@ what=fromGff inBase=ceSangerGene.gff opts="-noCheck -cdsStatFld -exonFramesFld -exonSelectWord=exon -ignoreUnconverted" readOpts=-noCheck # ce2 sangerGene GFF bugs # specifying exon type of coding_exon created only one exon. fromGffCeSangerTypeTest: ${doFromGxfTest} id=$@ what=fromGff inBase=ceSangerGene.gff opts="-cdsStatFld -exonFramesFld -ignoreUnconverted -exonSelectWord=coding_exon" # adding implied stop fromGtfImpliedStopTest: ${doFromGxfTest} id=$@ what=fromGtf inBase=flybase.gtf opts="-cdsStatFld -impliedStopAfterCds" # case where frame was lost fromGffFrameTest: ${doFromGxfTest} id=$@ what=fromGff inBase=frameBug.gff opts="-genePredExt -exonSelectWord=exon" fromGtfNscanTest: ${doFromGxfTest} id=$@ what=fromGtf inBase=nscan.gtf opts="-genePredExt" # space at end of some lines caused two genePreds per gene fromGffSpaceInNameTest: ${doFromGxfTest} id=$@ what=fromGff inBase=spaceInName.gff # recursive target for GFF/GTF tests # id - test id # what - fromGtf or fromGff # inFile - base name for GFF/GTF file # opts - genePredTester options to add # readOpts - special options for readFile doFromGxfTest: mkout ${GENE_PRED_TESTER} ${opts} -output=${OUT_DIR}/${id}.gp ${what} input/genePred/${inBase} ${GENE_PRED_TESTER} ${readOpts} readFile ${OUT_DIR}/${id}.gp diff -u ${EXP_DIR}/${id}.gp ${OUT_DIR}/${id}.gp genePredCheck -verbose=0 ${OUT_DIR}/${id}.gp mkout: @${MKDIR} ${OUT_DIR} clean:: @rm -rf ${OUT_DIR} hgsql -e "drop table if exists ${TEST_TBL}" ${DB}