051d8d750d9ee269ea2a26575a90a8591fb4cd84 markd Mon Jun 8 09:34:32 2026 -0700 added option to get accession with version from blast xml diff --git src/hg/blastToPsl/tests/makefile src/hg/blastToPsl/tests/makefile index bdd58c92770..e49b2164bd1 100644 --- src/hg/blastToPsl/tests/makefile +++ src/hg/blastToPsl/tests/makefile @@ -1,212 +1,218 @@ kentSrc = ../../.. include ../../../inc/common.mk #blastToPsl = valgrind --tool=memcheck blastToPsl blastToPsl = ${DESTBINDIR}/blastToPsl #blastXmlToPsl = valgrind --tool=memcheck blastXmlToPsl blastXmlToPsl = ${DESTBINDIR}/blastXmlToPsl pslCheck = ${DESTBINDIR}/pslCheck all: test: testBlastToPsl testBlastXmlToPsl ### # blastToPsl ### testBlastToPsl: protMrnaTest protDnaTest mrnaDnaTest mrnaDnaxTest \ blockBugTest identityBugTest protMrnaEval1Test protMrnaEval2Test \ longDbNameTest multiLineDescTest tblastxTest tblastnTest \ blastpPsiTest psiNoHitsTest psiBadFormatTest blastpPsiWrapTest \ protMrnaTsvTest protMrnaTest: mkdirs ${blastToPsl} -scores=output/$@.bscores input/kgProtMRna1.blast output/$@.psl ${pslCheck} -verbose=0 output/$@.psl diff -u expected/$@.psl output/$@.psl diff -u expected/$@.bscores output/$@.bscores # -tsv writes scores with a TSV header (no leading #) protMrnaTsvTest: mkdirs ${blastToPsl} -tsv -scores=output/$@.tsv input/kgProtMRna1.blast output/$@.psl ${pslCheck} -verbose=0 output/$@.psl diff -u expected/$@.psl output/$@.psl diff -u expected/$@.tsv output/$@.tsv protDnaTest: mkdirs ${blastToPsl} -scores=output/$@.bscores input/kgProtDna1.blast output/$@.psl ${pslCheck} -verbose=0 output/$@.psl diff -u expected/$@.psl output/$@.psl diff -u expected/$@.bscores output/$@.bscores mrnaDnaTest: mkdirs ${blastToPsl} input/mrnaDna1.blast output/$@.psl ${pslCheck} -verbose=0 output/$@.psl diff -u expected/$@.psl output/$@.psl mrnaDnaxTest: mkdirs ${blastToPsl} -pslx input/mrnaDna1.blast output/$@.psl ${pslCheck} -verbose=0 output/$@.psl diff -u expected/$@.psl output/$@.psl # test case that cause huge target block start. blockBugTest: mkdirs ${blastToPsl} input/blockBug.blast output/$@.psl ${pslCheck} -verbose=0 output/$@.psl diff -u expected/$@.psl output/$@.psl # handle id identityBugTest: mkdirs ${blastToPsl} input/identityBug.blast output/$@.psl ${pslCheck} -verbose=0 output/$@.psl diff -u expected/$@.psl output/$@.psl # test E-value threshold filter protMrnaEval1Test: mkdirs ${blastToPsl} -eVal=1 -scores=output/$@.bscores input/kgProtMRna1.blast output/$@.psl ${pslCheck} -verbose=0 output/$@.psl diff -u expected/$@.psl output/$@.psl diff -u expected/$@.bscores output/$@.bscores protMrnaEval2Test: mkdirs ${blastToPsl} -eVal=1e-10 -scores=output/$@.bscores input/kgProtMRna1.blast output/$@.psl ${pslCheck} -verbose=0 output/$@.psl diff -u expected/$@.psl output/$@.psl diff -u expected/$@.bscores output/$@.bscores longDbNameTest: mkdirs ${blastToPsl} input/longDbName.blast output/$@.psl ${pslCheck} -verbose=0 output/$@.psl diff -u expected/$@.psl output/$@.psl # multiline description broke parser multiLineDescTest: mkdirs ${blastToPsl} input/multiLineDesc.blast output/$@.psl ${pslCheck} -verbose=0 output/$@.psl diff -u expected/$@.psl output/$@.psl # tblastx tblastxTest: mkdirs ${blastToPsl} -scores=output/$@.bscores input/tblastx.blast output/$@.psl ${pslCheck} -verbose=0 output/$@.psl diff -u expected/$@.psl output/$@.psl diff -u expected/$@.bscores output/$@.bscores # tblastn tblastnTest: mkdirs ${blastToPsl} -scores=output/$@.bscores input/tblastn.blast output/$@.psl ${pslCheck} -verbose=0 output/$@.psl diff -u expected/$@.psl output/$@.psl diff -u expected/$@.bscores output/$@.bscores # this test is currently disabled. This file has Database: before Query= and # there are multiple queries for each Database: tblastnDbFirstTest: mkdirs ${blastToPsl} input/tblastnDbFirst.blast output/$@.psl ${pslCheck} -verbose=0 output/$@.psl diff -u expected/$@.psl output/$@.psl # this test is currently disabled, mega-blast is painful because # Database: comes before Query= and there are multiple queries for each # Database:. megaBlastTest: mkdirs ${blastToPsl} input/970.blast output/$@.psl ${pslCheck} -verbose=0 output/$@.psl diff -u expected/$@.psl output/$@.psl # PSI-BLASTP blastpPsiTest: mkdirs ${blastToPsl} -scores=output/$@.bscores input/blastpPsi.blast output/$@.psl ${pslCheck} -verbose=0 output/$@.psl diff -u expected/$@.psl output/$@.psl diff -u expected/$@.bscores output/$@.bscores # PSI-BLASTP with Identities line wrapping blastpPsiWrapTest: mkdirs ${blastToPsl} -scores=output/$@.bscores input/blastpPsiWrap.blast output/$@.psl ${pslCheck} -verbose=0 output/$@.psl diff -u expected/$@.psl output/$@.psl diff -u expected/$@.bscores output/$@.bscores # PSI-BLASTP with no hits psiNoHitsTest: mkdirs ${blastToPsl} -scores=output/$@.bscores input/psiNoHits.blast output/$@.psl diff -u expected/$@.psl output/$@.psl diff -u expected/$@.bscores output/$@.bscores # PSI-BLASTP produces bogus blocks where query starts at 0 and there is not # ending number on the line. Sbjct may be empty or have sequence. These # are ignored psiBadFormatTest: mkdirs ${blastToPsl} -scores=output/$@.bscores input/psiBadFormat.blast output/$@.psl >output/$@.err 2>&1 ${pslCheck} -verbose=0 output/$@.psl diff -u expected/$@.psl output/$@.psl diff -u expected/$@.bscores output/$@.bscores diff -u expected/$@.err output/$@.err ### # blastXmlToPsl ### -testBlastXmlToPsl: mrna1Test mrna1TsvTest prot1Test prot2Test prot1NucCoordTest sp1PsiBlastTest qtName1Test qtName2Test misclassedPsiBlastTest +testBlastXmlToPsl: mrna1Test mrna1TsvTest prot1Test prot2Test prot1NucCoordTest sp1PsiBlastTest qtName1Test qtName2Test hitIdIdTest misclassedPsiBlastTest mrna1Test: mkdirs ${blastXmlToPsl} -scores=output/$@.bscores input/mrna1.blastn.xml output/$@.psl ${pslCheck} -verbose=0 output/$@.psl diff -u expected/$@.psl output/$@.psl diff -u expected/$@.bscores output/$@.bscores # -tsv writes scores with a TSV header (no leading #) mrna1TsvTest: mkdirs ${blastXmlToPsl} -tsv -scores=output/$@.tsv input/mrna1.blastn.xml output/$@.psl ${pslCheck} -verbose=0 output/$@.psl diff -u expected/$@.psl output/$@.psl diff -u expected/$@.tsv output/$@.tsv prot1Test: mkdirs ${blastXmlToPsl} -scores=output/$@.bscores input/prot1.tblastn.xml output/$@.psl ${pslCheck} -verbose=0 output/$@.psl diff -u expected/$@.psl output/$@.psl diff -u expected/$@.bscores output/$@.bscores prot2Test: mkdirs ${blastXmlToPsl} -scores=output/$@.bscores input/prot2.psiblast.xml output/$@.psl ${pslCheck} -verbose=0 output/$@.psl diff -u expected/$@.psl output/$@.psl diff -u expected/$@.bscores output/$@.bscores prot1NucCoordTest: mkdirs ${blastXmlToPsl} -convertToNucCoords input/prot1.tblastn.xml output/$@.psl ${pslCheck} -verbose=0 output/$@.psl diff -u expected/$@.psl output/$@.psl sp1PsiBlastTest: mkdirs ${blastXmlToPsl} -tName=Hit_accession input/sp1.psiBlast.xml output/$@.psl ${pslCheck} -verbose=0 output/$@.psl diff -u expected/$@.psl output/$@.psl qtName1Test: mkdirs ${blastXmlToPsl} -qName=query-ID -tName=Hit_id input/sp1.psiBlast.xml output/$@.psl ${pslCheck} -verbose=0 output/$@.psl diff -u expected/$@.psl output/$@.psl qtName2Test: mkdirs ${blastXmlToPsl} -qName=query-def0 -tName=Hit_def0 input/sp1.psiBlast.xml output/$@.psl ${pslCheck} -verbose=0 output/$@.psl diff -u expected/$@.psl output/$@.psl +# -tName=Hit_id_id pulls accession out of an id like 'gb|CM102538.1|' +hitIdIdTest: mkdirs + ${blastXmlToPsl} -tName=Hit_id_id input/blastn-ncbi-seqs.xml output/$@.psl + ${pslCheck} -verbose=0 output/$@.psl + diff -u expected/$@.psl output/$@.psl + # psiblast file that claimes to be blastp misclassedPsiBlastTest: mkdirs ${blastXmlToPsl} -forcePsiBlast -tName=Hit_accession -scores=output/$@.bscores input/misclassed.psiBlast.xml output/$@.psl ${pslCheck} -verbose=0 output/$@.psl diff -u expected/$@.psl output/$@.psl diff -u expected/$@.bscores output/$@.bscores clean:: rm -rf output mkdirs: @${MKDIR} -p output