This is an automated email from the git hooks/post-receive script. afif pushed a commit to branch master in repository blasr.
commit 317d7ebbdd81af097a2baf0531b723b816bc7717 Author: Afif Elghraoui <[email protected]> Date: Sun Oct 23 15:45:00 2016 -0700 Imported Upstream version 5.3 --- Blasr.cpp | 36 ++++++++++++--- cram.mk | 11 +++-- ctest/affineAlign.t | 6 +-- ctest/aggressiveIntervalCut.t | 4 +- ctest/alignScore.t | 2 +- ctest/bamConcordant.t | 6 +-- ctest/bamIn.t | 39 ++-------------- ctest/bamOut.t | 4 +- ctest/bug25328.t | 2 +- ctest/bug25741.t | 9 ---- ctest/bug25766.t | 4 +- ctest/ccsH5.t | 4 +- ctest/cigarAdjecentIndels.t | 8 ++-- ctest/concordant.t | 14 +++--- ctest/dataset.t | 16 +++---- ctest/deterministic.t | 6 +-- ctest/ecoli.t | 10 ++-- ctest/fastMaxInterval.t | 4 +- ctest/filtercriteria.t | 6 +-- ctest/fofn.t | 4 +- ctest/hitpolicy.t | 18 ++++---- ctest/holeNumbers.t | 4 +- ctest/m0-5.t | 10 ++-- ctest/multipart.t | 2 +- ctest/noSplitSubreads.t | 6 +-- ctest/samNM.t | 13 ------ ctest/setup.sh | 3 +- ctest/unaligned.t | 4 +- ctest/useccsallBestN1.t | 5 +- ctest/useccsallLargeGenome.t | 4 +- ctest/verbose.t | 2 +- iblasr/BlasrHeaders.h | 1 + iblasr/BlasrUtils.hpp | 7 +-- iblasr/BlasrUtilsImpl.hpp | 11 +++-- iblasr/MappingParameters.h | 49 ++++++++++++++++++-- iblasr/RegisterBlasrOptions.h | 10 ++-- makefile | 2 +- rules.mk | 2 +- utils/SamFilter.cpp | 2 +- utils/bam2bax/makefile | 3 +- utils/bam2bax/src/Bam2BaxConverterImpl.hpp | 14 ------ utils/bam2bax/src/Bam2BaxMain.cpp | 4 +- utils/bam2bax/src/Bam2PlxMain.cpp | 4 +- utils/bam2bax/src/Converter.cpp | 16 ------- utils/bam2bax/src/Settings.cpp | 12 +---- utils/bam2bax/tests/cram/bam2bax.t | 22 ++++----- utils/bam2bax/tests/cram/bam2plx.t | 12 ++--- utils/bax2bam/CMakeLists.txt | 2 + utils/bax2bam/README.md | 4 +- utils/bax2bam/makefile | 5 +- utils/bax2bam/src/Bax2Bam.cpp | 72 ++++++++++++++++++++++++++--- utils/bax2bam/src/CMakeLists.txt | 2 + utils/bax2bam/src/ConverterBase.h | 16 ++++--- utils/bax2bam/src/Settings.cpp | 12 ++++- utils/bax2bam/src/Settings.h | 4 ++ utils/bax2bam/src/main.cpp | 15 +++++- utils/bax2bam/tests/CMakeLists.txt | 1 + utils/bax2bam/tests/bax2bam.t | 2 +- utils/bax2bam/tests/src/TestData.h.in | 2 +- utils/bax2bam/tests/src/test_ccs.cpp | 2 +- utils/bax2bam/tests/src/test_hqregions.cpp | 13 +++--- utils/bax2bam/tests/src/test_polymerase.cpp | 18 ++++---- utils/bax2bam/tests/src/test_subreads.cpp | 12 +++-- utils/ctest/samFilter.t | 24 +++++----- 64 files changed, 366 insertions(+), 277 deletions(-) diff --git a/Blasr.cpp b/Blasr.cpp index 85a928a..bee08c8 100644 --- a/Blasr.cpp +++ b/Blasr.cpp @@ -16,24 +16,33 @@ using namespace std; MappingSemaphores semaphores; ostream *outFilePtr = NULL; #ifdef USE_PBBAM -PacBio::BAM::BamWriter * bamWriterPtr = NULL; +PacBio::BAM::IRecordWriter * bamWriterPtr = NULL; // use IRecordWriter for both SAM ands BAM #endif HDFRegionTableReader *regionTableReader = NULL; ReaderAgglomerate *reader = NULL; +// Add comment to version history for each version change ! +// +// Version history +// +// 5.0 - a new major version number +// 5.1 - transiotion to POSIX notation - double sashes before multi-character flags +// 5.2 - --sam no longer supported +// 5.3 - --sam supported via pbbam/IRecordWriter +// const string GetMajorVersion() { - return "5.2"; + return "5.3"; } // version format is 3 numbers sparated by dots : Version.Subversion.SHA1 const string GetVersion(void) { string gitVersionString(SHA1_7); // gitVersionString is first 7 characters of SHA1 string version = GetMajorVersion(); - if (gitVersionString.size() == 7) { + // if (gitVersionString.size() == 7) { version.append("."); version.append(gitVersionString); - } + // } return version; } @@ -1286,12 +1295,23 @@ int main(int argc, char* argv[]) { commandLineString); string headerString = shp.ToString();// SAM/BAM header if (params.printSAM) { + // this is not going to be executed since sam is printed via bam *outFilePtr << headerString; } else if (params.printBAM) { + // here both bam and sam are handled #ifdef USE_PBBAM PacBio::BAM::BamHeader header = PacBio::BAM::BamHeader(headerString); - // Both file name and SAMHeader are required in order to create a BamWriter. - bamWriterPtr = new PacBio::BAM::BamWriter(params.outFileName, header); + // Create bam header + // Both file name and SAMHeader are required in order to create a BamWriter. + // sam_via_bam changes + if (params.sam_via_bam) + { + bamWriterPtr = new PacBio::BAM::SamWriter(params.outFileName, header); + } + else + { + bamWriterPtr = new PacBio::BAM::BamWriter(params.outFileName, header); + } #else REQUIRE_PBBAM_ERROR(); #endif @@ -1508,7 +1528,9 @@ int main(int argc, char* argv[]) { #ifdef USE_PBBAM assert(bamWriterPtr); try { - bamWriterPtr->TryFlush(); + if (!params.sam_via_bam) { // no need to flush for SAM , but need to understand why + bamWriterPtr->TryFlush(); + } delete bamWriterPtr; bamWriterPtr = NULL; } catch (std::exception e) { diff --git a/cram.mk b/cram.mk index e928689..35f62e8 100644 --- a/cram.mk +++ b/cram.mk @@ -1,14 +1,19 @@ FAST_CTESTS := \ -ctest/affineAlign.t ctest/bamOut.t ctest/ccsH5.t ctest/filtercriteria.t ctest/m0-5.t ctest/samNM.t \ +ctest/affineAlign.t ctest/bamOut.t ctest/ccsH5.t ctest/filtercriteria.t ctest/m0-5.t \ ctest/aggressiveIntervalCut.t ctest/fofn.t ctest/multipart.t \ -ctest/alignScore.t ctest/bug25741.t ctest/ecoli.t ctest/hitpolicy.t ctest/noSplitSubreads.t \ +ctest/alignScore.t ctest/hitpolicy.t ctest/noSplitSubreads.t \ ctest/bamIn.t ctest/fastMaxInterval.t ctest/open_fail.t ctest/verbose.t ctest/deterministic.t + MILD_CTESTS := \ - ctest/useccsallBestN1.t ctest/concordant.t ctest/bug25766.t ctest/holeNumbers.t + ctest/bug25766.t ctest/holeNumbers.t SLOW_CTESTS := ctest/bug25328.t ctest/useccsallLargeGenome.t +# XXX: following tests sidelined, needs bam input after --sam option removed +# FAST: ctest/ecoli.t +# MILD: ctest/useccsallBestN1.t ctest/concordant.t + #BLASR_PATH=/mnt/secondary/builds/full/3.0.0/prod/current-build_smrtanalysis/private/otherbins/internalall/bin/ #export BLASR_PATH diff --git a/ctest/affineAlign.t b/ctest/affineAlign.t index 71e9bbe..df03825 100644 --- a/ctest/affineAlign.t +++ b/ctest/affineAlign.t @@ -3,15 +3,15 @@ Set up Test affineAlign $ rm -rf $OUTDIR/affineAlign.m0 - $ $EXEC $DATDIR/affineAlign.fofn $DATDIR/substr_with_ins.fasta -m 0 -out $OUTDIR/affineAlign.m0 -affineAlign -holeNumbers 493 -insertion 100 -deletion 100 + $ $EXEC $DATDIR/affineAlign.fofn $DATDIR/substr_with_ins.fasta -m 0 --out $OUTDIR/affineAlign.m0 --affineAlign --holeNumbers 493 --insertion 100 --deletion 100 [INFO]* (glob) [INFO]* (glob) $ diff $OUTDIR/affineAlign.m0 $STDDIR/affineAlign_2014_06_10.m0 $ rm -rf $OUTDIR/ecoli_affine.m0 - $ $EXEC $DATDIR/ecoli_affine.fasta $DATDIR/ecoli_reference.fasta -m 0 -out $OUTDIR/ecoli_affine.m0 -affineAlign -insertion 100 -deletion 100 + $ $EXEC $DATDIR/ecoli_affine.fasta $DATDIR/ecoli_reference.fasta -m 0 --out $OUTDIR/ecoli_affine.m0 --affineAlign --insertion 100 --deletion 100 [INFO]* (glob) [INFO]* (glob) $ diff $OUTDIR/ecoli_affine.m0 $STDDIR/ecoli_affine_2014_06_10.m0 -# Note that MapQV for -affineAlign has been fixed in 2014 04 18, bug 24363 +# Note that MapQV for --affineAlign has been fixed in 2014 04 18, bug 24363 diff --git a/ctest/aggressiveIntervalCut.t b/ctest/aggressiveIntervalCut.t index 571e8fe..8743782 100644 --- a/ctest/aggressiveIntervalCut.t +++ b/ctest/aggressiveIntervalCut.t @@ -1,11 +1,11 @@ Set up $ . $TESTDIR/setup.sh -Test -aggressiveIntervalCut. +Test --aggressiveIntervalCut. $ rm -f $TMP1 $ BASFILE=/mnt/data3/vol53/2450598/0001/Analysis_Results/m130812_185809_42141_c100533960310000001823079711101380_s1_p0.bas.h5 $ REFFA=/mnt/secondary/Smrtpipe/repository/Ecoli_BL21_O26/sequence/Ecoli_BL21_O26.fasta - $ $EXEC $BASFILE $REFFA -holeNumbers 1-100 -out $TMP1 -aggressiveIntervalCut + $ $EXEC $BASFILE $REFFA --holeNumbers 1--100 --out $TMP1 --aggressiveIntervalCut [INFO] * [blasr] started. (glob) [INFO] * [blasr] ended. (glob) $ echo $? diff --git a/ctest/alignScore.t b/ctest/alignScore.t index 492f92e..d3eea9e 100644 --- a/ctest/alignScore.t +++ b/ctest/alignScore.t @@ -3,7 +3,7 @@ Set up Test alignment score $ rm -rf $OUTDIR/testscore.m0 - $ $EXEC $DATDIR/read.fasta $DATDIR/ref.fasta -minReadLength 1 -m 0 -out $OUTDIR/testscore.m0 + $ $EXEC $DATDIR/read.fasta $DATDIR/ref.fasta --minReadLength 1 -m 0 --out $OUTDIR/testscore.m0 [INFO]* (glob) [INFO]* (glob) $ diff $OUTDIR/testscore.m0 $STDDIR/testscore.m0 diff --git a/ctest/bamConcordant.t b/ctest/bamConcordant.t index 93d5691..fd910d6 100644 --- a/ctest/bamConcordant.t +++ b/ctest/bamConcordant.t @@ -1,8 +1,8 @@ Set up $ . $TESTDIR/setup.sh -Test using bam as input, use -concordant - $ $EXEC $DATDIR/test_bam/tiny_bam.fofn $DATDIR/bamConcordantRef.fasta -bam -concordant -refineConcordantAlignments -bestn 1 -out $OUTDIR/bamConcordant.bam +Test using bam as input, use --concordant + $ $EXEC $DATDIR/test_bam/tiny_bam.fofn $DATDIR/bamConcordantRef.fasta --bam --concordant --refineConcordantAlignments --bestn 1 --out $OUTDIR/bamConcordant.bam [INFO]* (glob) [INFO]* (glob) @@ -25,7 +25,7 @@ Check whether sam out and bam out have identical alignments, not checking qvs 86?? (glob) 86?? (glob) - $ $EXEC /pbi/dept/secondary/siv/testdata/SA3-RS/lambda/2372215/0007_tiny/Analysis_Results/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam $DATDIR/lambda_ref.fasta -m 4 -concordant -bestn 1 -holeNumbers 17417 -out $OUTDIR/tmp.m4 -V 2 > $OUTDIR/bamConcordant.log + $ $EXEC /pbi/dept/secondary/siv/testdata/SA3-RS/lambda/2372215/0007_tiny/Analysis_Results/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.subreads.bam $DATDIR/lambda_ref.fasta -m 4 --concordant --bestn 1 --holeNumbers 17417 --out $OUTDIR/tmp.m4 -V 2 > $OUTDIR/bamConcordant.log [INFO]* (glob) [INFO]* (glob) diff --git a/ctest/bamIn.t b/ctest/bamIn.t index d518c35..970759d 100644 --- a/ctest/bamIn.t +++ b/ctest/bamIn.t @@ -2,51 +2,20 @@ Set up $ . $TESTDIR/setup.sh Test using bam as input - $ $EXEC $DATDIR/test_bam/tiny_bam.fofn $DATDIR/lambda_ref.fasta -m 4 -out $OUTDIR/tiny_bam_in.m4 + $ $EXEC $DATDIR/test_bam/tiny_bam.fofn $DATDIR/lambda_ref.fasta -m 4 --out $OUTDIR/tiny_bam_in.m4 [INFO]* (glob) [INFO]* (glob) Check whether blasr produces identical results taking fasta sequences of the bam as input - $ $EXEC $DATDIR/test_bam/tiny_fasta.fofn $DATDIR/lambda_ref.fasta -m 4 -out $OUTDIR/tiny_fasta_in.m4 + $ $EXEC $DATDIR/test_bam/tiny_fasta.fofn $DATDIR/lambda_ref.fasta -m 4 --out $OUTDIR/tiny_fasta_in.m4 [INFO]* (glob) [INFO]* (glob) $ diff $OUTDIR/tiny_bam_in.m4 $OUTDIR/tiny_fasta_in.m4 -Test bam in, sam out - $ $EXEC $DATDIR/test_bam/tiny_bam.fofn $DATDIR/lambda_ref.fasta -sam -out $OUTDIR/tiny_bam_in.sam -printSAMQV -clipping subread -cigarUseSeqMatch - [INFO]* (glob) - [INFO]* (glob) - Test bam in, bam out - $ $EXEC $DATDIR/test_bam/tiny_bam.fofn $DATDIR/lambda_ref.fasta -bam -out $OUTDIR/tiny_bam_in.bam -clipping subread - [INFO]* (glob) - [INFO]* (glob) - -Check whether sam out and bam out have identical alignments, not checking qvs - $ $SAMTOOLS view -h $OUTDIR/tiny_bam_in.bam -o $OUTDIR/tiny_bam_in.bam.sam - $ cut -f 2-11 $OUTDIR/tiny_bam_in.bam.sam |sed -n '6,$p' > $TMP1.aln - $ cut -f 2-11 $OUTDIR/tiny_bam_in.sam |sed -n '6,$p' > $TMP2.aln - $ diff $TMP1.aln $TMP2.aln - -Check whether sam out and bam out have identical read groups @RG - $ awk '/^@RG/' $OUTDIR/tiny_bam_in.bam.sam > $TMP1.rg - $ awk '/^@RG/' $OUTDIR/tiny_bam_in.sam > $TMP2.rg - $ diff $TMP1.rg $TMP2.rg - -Compare iq produced with stdout - $ sed -n '6,$p' $OUTDIR/tiny_bam_in.bam.sam | awk '{gsub(/\t/,"\n");}1' | awk '/^iq:Z:/' > $TMP1.iq - $ sed -n '6,$p' $STDDIR/$UPDATEDATE/tiny_bam_in.bam.sam | awk '{gsub(/\t/,"\n");}1' | awk '/^iq:Z:/' > $TMP2.iq - $ diff $TMP1.iq $TMP2.iq - -TODO:Check whether sam out and bam out have identical insertion qvs -Currently QVs in bam are in 'native' orientation, and QVs in sam are in 'genomic' orientation. This needs to be fixed. -$ sed -n '6,$p' $OUTDIR/tiny_bam_in.sam | awk '{gsub(/\t/,"\n");}1' | awk '/^iq:Z:/' > $TMP2.iq - -Test with multiple nproc - $ $EXEC $DATDIR/test_bam/two_bam.fofn $DATDIR/lambda_ref.fasta -bam -nproc 15 -out $OUTDIR/two_bam_in.bam + $ $EXEC $DATDIR/test_bam/tiny_bam.fofn $DATDIR/lambda_ref.fasta --bam --out $OUTDIR/tiny_bam_in.bam --clipping subread [INFO]* (glob) [INFO]* (glob) - $ $SAMTOOLS view -h $OUTDIR/two_bam_in.bam -o $OUTDIR/two_bam_in.bam.sam -TODO: test -concordant, when pbbam API to query over ZMWs is available. +TODO: test --concordant, when pbbam API to query over ZMWs is available. TODO: test bam with ccs reads diff --git a/ctest/bamOut.t b/ctest/bamOut.t index a15b811..0e8c6fa 100644 --- a/ctest/bamOut.t +++ b/ctest/bamOut.t @@ -4,11 +4,11 @@ Set up Test generating bam output Input is bam, clipping=soft and subread should produce identical results - $ $EXEC $DATDIR/test_bam/tiny_bam.fofn $DATDIR/lambda_ref.fasta -bam -out $OUTDIR/tiny_bam_in_soft.bam -clipping soft + $ $EXEC $DATDIR/test_bam/tiny_bam.fofn $DATDIR/lambda_ref.fasta --bam --out $OUTDIR/tiny_bam_in_soft.bam --clipping soft [INFO]* (glob) [INFO]* (glob) - $ $EXEC $DATDIR/test_bam/tiny_bam.fofn $DATDIR/lambda_ref.fasta -bam -out $OUTDIR/tiny_bam_in_subread.bam -clipping subread + $ $EXEC $DATDIR/test_bam/tiny_bam.fofn $DATDIR/lambda_ref.fasta --bam --out $OUTDIR/tiny_bam_in_subread.bam --clipping subread [INFO]* (glob) [INFO]* (glob) diff --git a/ctest/bug25328.t b/ctest/bug25328.t index 2127433..12bb118 100644 --- a/ctest/bug25328.t +++ b/ctest/bug25328.t @@ -5,7 +5,7 @@ bug_25328, unrolled resequencing test $ INFA=$DATDIR/bug_25328_zmw_38131.fasta $ REF=$DATDIR/All4mers_circular_72x_l50256.fasta $ OUTFA=$OUTDIR/bug_25328.m4 - $ $EXEC $INFA $REF -bestn 1 -nCandidates 1 -forwardOnly -maxMatch 14 -m 4 -out $OUTFA + $ $EXEC $INFA $REF --bestn 1 --nCandidates 1 --forwardOnly --maxMatch 14 -m 4 --out $OUTFA [INFO]* (glob) [INFO]* (glob) diff --git a/ctest/bug25741.t b/ctest/bug25741.t deleted file mode 100644 index d29829a..0000000 --- a/ctest/bug25741.t +++ /dev/null @@ -1,9 +0,0 @@ -Set up - $ . $TESTDIR/setup.sh - -bug_25741, if input bas.h5 does not contain mergeQV, blasr with -printSAMQV, -nproc>1 should not write garbage 'mq' values to output. - $ $EXEC $DATDIR/bas_wo_mergeQV.fofn $DATDIR/lambda_ref.fasta -printSAMQV -sam -clipping subread -out $OUTDIR/out_printSAMQV.sam -nproc 12 - [INFO]* (glob) - [INFO]* (glob) - $ grep 'mq' $OUTDIR/out_printSAMQV.sam |wc -l - 1 diff --git a/ctest/bug25766.t b/ctest/bug25766.t index eaf9e06..3d05b2a 100644 --- a/ctest/bug25766.t +++ b/ctest/bug25766.t @@ -1,10 +1,10 @@ Set up $ . $TESTDIR/setup.sh -bug_25766, added an option -minRawSubreadScore +bug_25766, added an option --minRawSubreadScore $ BASFILE=$DATDIR/lambda_bax.fofn $ REF=$DATDIR/lambda_ref.fasta - $ $EXEC $BASFILE $REF -out $TMP1 -minRawSubreadScore 700 -nproc 18 + $ $EXEC $BASFILE $REF --out $TMP1 --minRawSubreadScore 700 --nproc 18 [INFO]* (glob) [INFO]* (glob) $ echo $? diff --git a/ctest/ccsH5.t b/ctest/ccsH5.t index b3e0477..63e4217 100644 --- a/ctest/ccsH5.t +++ b/ctest/ccsH5.t @@ -3,9 +3,9 @@ Set up Test using *.ccs.h5 as input # The results should be exactly the same as -# blasr $DATDIR/ccsasinput_bas.fofn $DATDIR/ccsasinput.fasta -m 4 -out tmp.m4 -useccsdenovo +# blasr $DATDIR/ccsasinput_bas.fofn $DATDIR/ccsasinput.fasta -m 4 --out tmp.m4 --useccsdenovo $ rm -rf $OUTDIR/ccsasinput.m4 - $ $EXEC $DATDIR/ccsasinput.fofn $DATDIR/ccsasinput.fasta -m 4 -out $OUTDIR/ccsasinput.m4 + $ $EXEC $DATDIR/ccsasinput.fofn $DATDIR/ccsasinput.fasta -m 4 --out $OUTDIR/ccsasinput.m4 [INFO]* (glob) [INFO]* (glob) $ diff $OUTDIR/ccsasinput.m4 $STDDIR/ccsasinput_2014_06_10.m4 diff --git a/ctest/cigarAdjecentIndels.t b/ctest/cigarAdjecentIndels.t index 698f3a3..c861b8a 100644 --- a/ctest/cigarAdjecentIndels.t +++ b/ctest/cigarAdjecentIndels.t @@ -1,8 +1,8 @@ Set up $ . $TESTDIR/setup.sh -Without -allowAdjacentIndels, adjacent indels should not exist in SAM/BAM CIGAR strings - $ $EXEC $DATDIR/test_dataset/nofilter.subreadset.xml $DATDIR/ecoli_reference.fasta -bam -out $OUTDIR/noAdjacentIndels.bam -concordant -refineConcordantAlignments -bestn 1 && echo $? +Without --allowAdjacentIndels, adjacent indels should not exist in SAM/BAM CIGAR strings + $ $EXEC $DATDIR/test_dataset/nofilter.subreadset.xml $DATDIR/ecoli_reference.fasta --bam --out $OUTDIR/noAdjacentIndels.bam --concordant --refineConcordantAlignments --bestn 1 && echo $? [INFO]* (glob) [INFO]* (glob) 0 @@ -15,8 +15,8 @@ Without -allowAdjacentIndels, adjacent indels should not exist in SAM/BAM CIGAR $ grep 'DI' $TMP1 |wc -l 0 -With -allowAdjacentIndels - $ $EXEC $DATDIR/test_dataset/nofilter.subreadset.xml $DATDIR/ecoli_reference.fasta -bam -out $OUTDIR/allowAdjacentIndels.bam -concordant -bestn 1 -allowAdjacentIndels && echo $? +With --allowAdjacentIndels + $ $EXEC $DATDIR/test_dataset/nofilter.subreadset.xml $DATDIR/ecoli_reference.fasta --bam --out $OUTDIR/allowAdjacentIndels.bam --concordant --bestn 1 --allowAdjacentIndels && echo $? [INFO]* (glob) [INFO]* (glob) 0 diff --git a/ctest/concordant.t b/ctest/concordant.t index 5b3b6aa..e0b0225 100644 --- a/ctest/concordant.t +++ b/ctest/concordant.t @@ -1,11 +1,13 @@ Set up $ . $TESTDIR/setup.sh -Test -concordant +Test --concordant + $ rm -rf $OUTDIR/concordant_subset.bam $ rm -rf $OUTDIR/concordant_subset.sam - $ $EXEC $DATDIR/ecoli_lp.fofn $DATDIR/ecoli_reference.fasta -concordant -refineConcordantAlignments -sam -out $OUTDIR/concordant_subset.sam -nproc 12 -holeNumbers 1-10000 -sa $DATDIR/ecoli_reference.sa + $ $EXEC $DATDIR/ecoli_lp.fofn $DATDIR/ecoli_reference.fasta --concordant --refineConcordantAlignments --bam --out $OUTDIR/concordant_subset.bam --nproc 12 --holeNumbers 1--10000 --sa $DATDIR/ecoli_reference.sa [INFO]* (glob) [INFO]* (glob) + $ $SAMTOOLS view $OUTDIR/concordant_subset.bam > $OUTDIR/concordant_subset.sam $ sed -n 6,110864p $OUTDIR/concordant_subset.sam > $OUTDIR/tmp1 $ sort $OUTDIR/tmp1 > $OUTDIR/tmp11 $ sed -n 6,110864p $STDDIR/$UPDATEDATE/concordant_subset.sam > $OUTDIR/tmp2 @@ -15,19 +17,19 @@ Test -concordant #2014_05_28 --> changelist 135254, use MAX_BAND_SIZE to contrain GuidedAlign #2014_08_21 --> changelist 138516, added YS, YE, ZM tags. #2014_08_28 --> changelist 139176, update SAM MD5 -#2014_09_12 --> changelist 140410, changed the default value of '-concordantTemplate' from 'longestsubread' to 'typicalsubread' +#2014_09_12 --> changelist 140410, changed the default value of '--concordantTemplate' from 'longestsubread' to 'typicalsubread' #2014_09_17 --> changelist 140573, changed SDPFragment LessThan to make sure blasr compiled with gcc 4.4 and 4.8 can produce identical results. -#2014_10_16 --> changelist 141378, changed the default value of '-concordantTemplate' from 'typicalsubread' to 'mediansubread' +#2014_10_16 --> changelist 141378, changed the default value of '--concordantTemplate' from 'typicalsubread' to 'mediansubread' #2015_03_01 --> changelist 146599, reads from the same movie should have unique readGroupId #2015_03_28 --> changelist 148101, 148080 updated read group id, 148100 updated TLEN #2015_04_09 --> changelist 148796, updated read group id #2015_04_25 --> changelist 149721, update CIGAR string, replace M with X=. #2015_04_25 --> changelist ?, force refine all concordant alignments -Test -concordant FMR1 case (the 'typical subread' is selected as template for concordant mapping) +Test --concordant FMR1 case (the 'typical subread' is selected as template for concordant mapping) $ FOFN=$DATDIR/FMR1_concordant.fofn $ REF=$DATDIR/FMR1_130CGG.fasta - $ $EXEC $FOFN $REF -concordant -refineConcordantAlignments -out $OUTDIR/FMR1_zmw_37927.m4 -m 4 -holeNumbers 37927 + $ $EXEC $FOFN $REF --concordant --refineConcordantAlignments --out $OUTDIR/FMR1_zmw_37927.m4 -m 4 --holeNumbers 37927 [INFO]* (glob) [INFO]* (glob) $ diff $OUTDIR/FMR1_zmw_37927.m4 $STDDIR/$UPDATEDATE/FMR1_zmw_37927.m4 diff --git a/ctest/dataset.t b/ctest/dataset.t index 9a14ec8..873d50e 100644 --- a/ctest/dataset.t +++ b/ctest/dataset.t @@ -2,7 +2,7 @@ Set up $ . $TESTDIR/setup.sh Test dataset.xml as input - $ $EXEC $DATDIR/test_dataset/chunking.subreadset.xml $DATDIR/ecoli_reference.fasta -m 4 -out $OUTDIR/chunking.m4 -bestn 1 && echo $? + $ $EXEC $DATDIR/test_dataset/chunking.subreadset.xml $DATDIR/ecoli_reference.fasta -m 4 --out $OUTDIR/chunking.m4 --bestn 1 && echo $? [INFO]* (glob) [INFO]* (glob) 0 @@ -10,20 +10,20 @@ Test filters in dataset.xml is respected. $ cat $OUTDIR/chunking.m4 | wc -l 9 -Test dataset.xml -bam output - $ $EXEC $DATDIR/test_dataset/chunking.subreadset.xml $DATDIR/ecoli_reference.fasta -bam -out $OUTDIR/chunking.bam && echo $? +Test dataset.xml --bam output + $ $EXEC $DATDIR/test_dataset/chunking.subreadset.xml $DATDIR/ecoli_reference.fasta --bam --out $OUTDIR/chunking.bam && echo $? [INFO]* (glob) [INFO]* (glob) 0 -Test dataset.xml -concordant - $ $EXEC $DATDIR/test_dataset/chunking.subreadset.xml $DATDIR/ecoli_reference.fasta -bam -out $OUTDIR/chunking.concordant.bam -concordant && echo $? +Test dataset.xml --concordant + $ $EXEC $DATDIR/test_dataset/chunking.subreadset.xml $DATDIR/ecoli_reference.fasta --bam --out $OUTDIR/chunking.concordant.bam --concordant && echo $? [INFO]* (glob) [INFO]* (glob) 0 Test dataset with no filters (to make sure that an empty filter does not discard all bam records.) - $ $EXEC $DATDIR/test_dataset/nofilter.subreadset.xml $DATDIR/ecoli_reference.fasta -bam -out $OUTDIR/nofilter.bam -concordant -bestn 1 && echo $? + $ $EXEC $DATDIR/test_dataset/nofilter.subreadset.xml $DATDIR/ecoli_reference.fasta --bam --out $OUTDIR/nofilter.bam --concordant --bestn 1 && echo $? [INFO]* (glob) [INFO]* (glob) 0 @@ -32,8 +32,8 @@ Test dataset with no filters (to make sure that an empty filter does not discard 131 -Test dataset with -concordant is on - $ $EXEC $DATDIR/test_dataset/nofilter.subreadset.xml $DATDIR/bamConcordantRef.fasta -bam -concordant -refineConcordantAlignments -bestn 1 -out $OUTDIR/datasetConcordant.bam -holeNumbers 1898 && echo $? +Test dataset with --concordant is on + $ $EXEC $DATDIR/test_dataset/nofilter.subreadset.xml $DATDIR/bamConcordantRef.fasta --bam --concordant --refineConcordantAlignments --bestn 1 --out $OUTDIR/datasetConcordant.bam --holeNumbers 1898 && echo $? [INFO]* (glob) [INFO]* (glob) 0 diff --git a/ctest/deterministic.t b/ctest/deterministic.t index 8a1214c..16bfe4a 100644 --- a/ctest/deterministic.t +++ b/ctest/deterministic.t @@ -13,7 +13,7 @@ and then check if output is determined. $ outfile=$OUTDIR/$name.m4 $ stdfile=$STDDIR/$name.m4 $ rm -f $outfile - $ $EXEC $infile $DATDIR/lambda_ref.fasta -m 4 -out $outfile && echo $? + $ $EXEC $infile $DATDIR/lambda_ref.fasta -m 4 --out $outfile && echo $? [INFO]* (glob) [INFO]* (glob) 0 @@ -26,7 +26,7 @@ and then check if output is determined. $ outfile=$OUTDIR/$name.m4 $ stdfile=$STDDIR/$name.m4 $ rm -f $outfile - $ $EXEC $infile $DATDIR/lambda_ref.fasta -m 4 -out $outfile && echo $? + $ $EXEC $infile $DATDIR/lambda_ref.fasta -m 4 --out $outfile && echo $? [INFO]* (glob) [INFO]* (glob) 0 @@ -39,7 +39,7 @@ and then check if output is determined. $ outfile=$OUTDIR/$name.m4 $ stdfile=$STDDIR/$name.m4 $ rm -f $outfile - $ $EXEC $infile $DATDIR/lambda_ref.fasta -m 4 -out $outfile && echo $? + $ $EXEC $infile $DATDIR/lambda_ref.fasta -m 4 --out $outfile && echo $? [INFO]* (glob) [INFO]* (glob) 0 diff --git a/ctest/ecoli.t b/ctest/ecoli.t index c6291a2..07940fc 100644 --- a/ctest/ecoli.t +++ b/ctest/ecoli.t @@ -2,18 +2,20 @@ Set up $ . $TESTDIR/setup.sh Test blasr on ecoli. -Test blasr with -sam +Test blasr with --bam # The following job takes a very long time to finish, let us use a subset of reads instead #See $STDOUT/ecoli_v1.4.sam for 1.4 output. # $STDOUT/ecoli_2014_03_28.sam for bug before mapQV for affineAlign/align without QV is fixed. + $ rm -rf $OUTDIR/ecoli_subset.bam $ rm -rf $OUTDIR/ecoli_subset.sam - $ $EXEC $DATDIR/ecoli_subset.fasta $DATDIR/ecoli_reference.fasta -sam -out $OUTDIR/ecoli_subset.sam -nproc 15 + $ $EXEC $DATDIR/ecoli_subset.fasta $DATDIR/ecoli_reference.fasta --bam --out $OUTDIR/ecoli_subset.bam --nproc 15 [INFO]* (glob) [INFO]* (glob) - $ sed -n '5,$ p' $OUTDIR/ecoli_subset.sam | sort | cut -f 1-11 > $TMP1 - $ sed -n '5,$ p' $STDDIR/$UPDATEDATE/ecoli_subset.sam | sort | cut -f 1-11 > $TMP2 + $ $SAMTOOLS view $OUTDIR/ecoli_subset.bam > $OUTDIR/ecoli_subset.sam + $ sed -n '5,$ p' $OUTDIR/ecoli_subset.sam | sort | cut -f 1--11 > $TMP1 + $ sed -n '5,$ p' $STDDIR/$UPDATEDATE/ecoli_subset.sam | sort | cut -f 1--11 > $TMP2 $ diff $TMP1 $TMP2 $ rm $TMP1 $TMP2 # 2015_03_08 --> changelist 148101, 148080 updated read group id; 148100 updated TLEN diff --git a/ctest/fastMaxInterval.t b/ctest/fastMaxInterval.t index 323fab1..67a687b 100644 --- a/ctest/fastMaxInterval.t +++ b/ctest/fastMaxInterval.t @@ -1,11 +1,11 @@ Set up $ . $TESTDIR/setup.sh -Test -fastMaxInterval. +Test --fastMaxInterval. $ rm -f $TMP1 $ BASFILE=/mnt/data3/vol53/2450598/0001/Analysis_Results/m130812_185809_42141_c100533960310000001823079711101380_s1_p0.bas.h5 $ REFFA=/mnt/secondary/Smrtpipe/repository/Ecoli_BL21_O26/sequence/Ecoli_BL21_O26.fasta - $ $EXEC $BASFILE $REFFA -holeNumbers 1-100 -out $TMP1 -fastMaxInterval + $ $EXEC $BASFILE $REFFA --holeNumbers 1--100 --out $TMP1 --fastMaxInterval [INFO] * [blasr] started. (glob) [INFO] * [blasr] ended. (glob) $ echo $? diff --git a/ctest/filtercriteria.t b/ctest/filtercriteria.t index 9cec9dc..a0f4460 100644 --- a/ctest/filtercriteria.t +++ b/ctest/filtercriteria.t @@ -7,12 +7,12 @@ Set up $ STDDIR=$STDDIR/$NAME $ mkdir -p $OUTDIR -Test -minPctSimilarity +Test --minPctSimilarity $ I=$DATDIR/tiny_bam.fofn $ R=$DATDIR/lambdaNEB.fa $ O=$OUTDIR/min_pct_similarity_90.m4 - $ $EXEC $I $R -out $O -m 4 -minPctSimilarity 90 + $ $EXEC $I $R --out $O -m 4 --minPctSimilarity 90 [INFO]* (glob) [INFO]* (glob) $ echo $? @@ -21,7 +21,7 @@ Test -minPctSimilarity 0 $ O=$OUTDIR/min_aln_len_1000.m4 - $ $EXEC $I $R -out $O -m 4 -minAlnLength 1000 + $ $EXEC $I $R --out $O -m 4 --minAlnLength 1000 [INFO]* (glob) [INFO]* (glob) $ echo $? diff --git a/ctest/fofn.t b/ctest/fofn.t index 348b6f7..dfdbd42 100644 --- a/ctest/fofn.t +++ b/ctest/fofn.t @@ -3,7 +3,7 @@ Set up Test blasr with *.fofn input # $ rm -rf $OUTDIR/lambda_bax.m4 -# $ $EXEC $DATDIR/lambda_bax.fofn $DATDIR/lambda_ref.fasta -m 4 -out lambda_bax_tmp.m4 -nproc 15 -minMatch 14 +# $ $EXEC $DATDIR/lambda_bax.fofn $DATDIR/lambda_ref.fasta -m 4 --out lambda_bax_tmp.m4 --nproc 15 --minMatch 14 # [INFO]* (glob) # [INFO]* (glob) # $ sort lambda_bax_tmp.m4 > $OUTDIR/lambda_bax.m4 @@ -11,7 +11,7 @@ Test blasr with *.fofn input # This test takes a long time, use a subset instad. $ rm -rf $OUTDIR/lambda_bax_subset.m4 - $ $EXEC $DATDIR/lambda_bax.fofn $DATDIR/lambda_ref.fasta -m 4 -out $OUTDIR/lambda_bax_tmp_subset.m4 -nproc 15 -minMatch 14 -holeNumbers 1-1000 -sa $DATDIR/lambda_ref.sa + $ $EXEC $DATDIR/lambda_bax.fofn $DATDIR/lambda_ref.fasta -m 4 --out $OUTDIR/lambda_bax_tmp_subset.m4 --nproc 15 --minMatch 14 --holeNumbers 1--1000 --sa $DATDIR/lambda_ref.sa [INFO]* (glob) [INFO]* (glob) $ sort $OUTDIR/lambda_bax_tmp_subset.m4 > $OUTDIR/lambda_bax_subset.m4 diff --git a/ctest/hitpolicy.t b/ctest/hitpolicy.t index 2a90e19..becf826 100644 --- a/ctest/hitpolicy.t +++ b/ctest/hitpolicy.t @@ -13,7 +13,7 @@ Set up $ X=$STDDIR/hitpolicy_all.m4 Test hitpolicy all - $ $EXEC $I $R -out $O -m 4 -hitPolicy all + $ $EXEC $I $R --out $O -m 4 --hitPolicy all [INFO]* (glob) [INFO]* (glob) $ echo $? @@ -24,7 +24,7 @@ Test hitpolicy all Test hitpolicy allbest $ O=$OUTDIR/hitpolicy_allbest.m4 $ X=$STDDIR/hitpolicy_allbest.m4 - $ $EXEC $I $R -out $O -m 4 -hitPolicy allbest && sort $O > $TMP1 && mv $TMP1 $O + $ $EXEC $I $R --out $O -m 4 --hitPolicy allbest && sort $O > $TMP1 && mv $TMP1 $O [INFO]* (glob) [INFO]* (glob) $ echo $? @@ -37,10 +37,10 @@ Test hitpolicy random $ O=$OUTDIR/hitpolicy_random.m4 $ O2=$OUTDIR/hitpolicy_random_2.m4 $ X=$STDDIR/hitpolicy_random.m4 - $ $EXEC $I $R -out $O -m 4 -hitPolicy random -randomSeed 1 + $ $EXEC $I $R --out $O -m 4 --hitPolicy random --randomSeed 1 [INFO]* (glob) [INFO]* (glob) - $ $EXEC $I $R -out $O2 -m 4 -hitPolicy random -randomSeed 1 + $ $EXEC $I $R --out $O2 -m 4 --hitPolicy random --randomSeed 1 [INFO]* (glob) [INFO]* (glob) $ sort $O > $TMP1 && mv $TMP1 $O @@ -52,10 +52,10 @@ Test hitpolicy randombest bam inputs, nproc > 1, fixed seed $ O=$OUTDIR/hitpolicy_randombest_bam_in.m4 $ O2=$OUTDIR/hitpolicy_randombest_bam_in_2.m4 $ X=$STDDIR/hitpolicy_randombest_bam_in.m4 - $ $EXEC $I $R -out $O -m 4 -hitPolicy randombest -randomSeed 1 -nproc 10 + $ $EXEC $I $R --out $O -m 4 --hitPolicy randombest --randomSeed 1 --nproc 10 [INFO]* (glob) [INFO]* (glob) - $ $EXEC $I $R -out $O2 -m 4 -hitPolicy randombest -randomSeed 1 -nproc 10 + $ $EXEC $I $R --out $O2 -m 4 --hitPolicy randombest --randomSeed 1 --nproc 10 [INFO]* (glob) [INFO]* (glob) $ sort $O > $TMP1 && mv $TMP1 $O @@ -67,7 +67,7 @@ Test hitpolicy randombest bax inputs, nproc > 1, fixed seed $ I=$DATDIR/tiny_bax.fofn $ O=$OUTDIR/hitpolicy_randombest_bax_in.m4 $ X=$STDDIR/hitpolicy_randombest_bax_in.m4 - $ $EXEC $I $R -out $O -m 4 -hitPolicy randombest -randomSeed 1 -nproc 10 + $ $EXEC $I $R --out $O -m 4 --hitPolicy randombest --randomSeed 1 --nproc 10 [INFO]* (glob) [INFO]* (glob) $ sort $O > $TMP1 && mv $TMP1 $O @@ -78,7 +78,7 @@ Test hitpolicy randombest fasta inputs, nproc > 1, fixed seed $ I=$DATDIR/tiny_fasta.fofn $ O=$OUTDIR/hitpolicy_randombest_fasta_in.m4 $ X=$STDDIR/hitpolicy_randombest_fasta_in.m4 - $ $EXEC $I $R -out $O -m 4 -hitPolicy randombest -randomSeed 1 -nproc 10 + $ $EXEC $I $R --out $O -m 4 --hitPolicy randombest --randomSeed 1 --nproc 10 [INFO]* (glob) [INFO]* (glob) $ sort $O > $TMP1 && mv $TMP1 $O @@ -88,7 +88,7 @@ Test hitpolicy randombest fasta inputs, nproc > 1, fixed seed Test hitpolicy leftmost $ O=$OUTDIR/hitpolicy_leftmost.m4 $ X=$STDDIR/hitpolicy_leftmost.m4 - $ $EXEC $I $R -out $O -m 4 -hitPolicy leftmost -nproc 10 + $ $EXEC $I $R --out $O -m 4 --hitPolicy leftmost --nproc 10 [INFO]* (glob) [INFO]* (glob) $ # target is lambda x 6, leftmost -> only map to the very first x. diff --git a/ctest/holeNumbers.t b/ctest/holeNumbers.t index 427f31a..7f2e292 100644 --- a/ctest/holeNumbers.t +++ b/ctest/holeNumbers.t @@ -1,9 +1,9 @@ Set up $ . $TESTDIR/setup.sh -Test -holeNumbers +Test --holeNumbers $ rm -f $OUTDIR/holeNumbers.m4 - $ $EXEC $DATDIR/lambda_bax.fofn $DATDIR/lambda_ref.fasta -m 4 -out $OUTDIR/holeNumbers.m4 -holeNumbers 14798,55000-55100 -nproc 8 + $ $EXEC $DATDIR/lambda_bax.fofn $DATDIR/lambda_ref.fasta -m 4 --out $OUTDIR/holeNumbers.m4 --holeNumbers 14798,55000--55100 --nproc 8 [INFO]* (glob) [INFO]* (glob) $ sort $OUTDIR/holeNumbers.m4 > $TMP1 diff --git a/ctest/m0-5.t b/ctest/m0-5.t index 84182b3..34c12e0 100644 --- a/ctest/m0-5.t +++ b/ctest/m0-5.t @@ -3,31 +3,31 @@ Set up Test blasr with -m 0 ~ 5 $ rm -rf $OUTDIR/read.m0 - $ $EXEC $DATDIR/read.fasta $DATDIR/ref.fasta -m 0 -out $OUTDIR/read.m0 + $ $EXEC $DATDIR/read.fasta $DATDIR/ref.fasta -m 0 --out $OUTDIR/read.m0 [INFO]* (glob) [INFO]* (glob) $ diff $OUTDIR/read.m0 $STDDIR/read.m0 $ rm -rf $OUTDIR/read.m1 - $ $EXEC $DATDIR/read.fasta $DATDIR/ref.fasta -m 1 -out $OUTDIR/read.m1 + $ $EXEC $DATDIR/read.fasta $DATDIR/ref.fasta -m 1 --out $OUTDIR/read.m1 [INFO]* (glob) [INFO]* (glob) $ diff $OUTDIR/read.m1 $STDDIR/read_2014_05_29.m1 $ rm -rf $OUTDIR/read.m2 - $ $EXEC $DATDIR/read.fasta $DATDIR/ref.fasta -m 2 -out $OUTDIR/read.m2 + $ $EXEC $DATDIR/read.fasta $DATDIR/ref.fasta -m 2 --out $OUTDIR/read.m2 [INFO]* (glob) [INFO]* (glob) $ diff $OUTDIR/read.m2 $STDDIR/read.m2 $ rm -rf $OUTDIR/read.m3 - $ $EXEC $DATDIR/read.fasta $DATDIR/ref.fasta -m 3 -out $OUTDIR/read.m3 + $ $EXEC $DATDIR/read.fasta $DATDIR/ref.fasta -m 3 --out $OUTDIR/read.m3 [INFO]* (glob) [INFO]* (glob) $ diff $OUTDIR/read.m3 $STDDIR/read.m3 $ rm -rf $OUTDIR/read.m4 - $ $EXEC $DATDIR/read.fasta $DATDIR/ref.fasta -m 4 -out $OUTDIR/read.m4 + $ $EXEC $DATDIR/read.fasta $DATDIR/ref.fasta -m 4 --out $OUTDIR/read.m4 [INFO]* (glob) [INFO]* (glob) $ diff $OUTDIR/read.m4 $STDDIR/read.m4 diff --git a/ctest/multipart.t b/ctest/multipart.t index 2cf6e81..5e824d7 100644 --- a/ctest/multipart.t +++ b/ctest/multipart.t @@ -6,7 +6,7 @@ contain any /PulseData, instead contains /MultiPart/Parts. $ rm -f $TMP1 $ BASFILE=/mnt/data3/vol53/2450598/0001/Analysis_Results/m130812_185809_42141_c100533960310000001823079711101380_s1_p0.bas.h5 $ REFFA=/mnt/secondary/Smrtpipe/repository/Ecoli_BL21_O26/sequence/Ecoli_BL21_O26.fasta - $ $EXEC $BASFILE $REFFA -holeNumbers 1-100 -out $TMP1 + $ $EXEC $BASFILE $REFFA --holeNumbers 1--100 --out $TMP1 [INFO] * [blasr] started. (glob) [INFO] * [blasr] ended. (glob) $ echo $? diff --git a/ctest/noSplitSubreads.t b/ctest/noSplitSubreads.t index 3667cbd..ed4a192 100644 --- a/ctest/noSplitSubreads.t +++ b/ctest/noSplitSubreads.t @@ -1,9 +1,9 @@ Set up $ . $TESTDIR/setup.sh -Test blasr with -noSplitSubreads +Test blasr with --noSplitSubreads # $ rm -rf $OUTDIR/lambda_bax_noSplitSubreads.m4 -# $ $EXEC $DATDIR/lambda_bax.fofn $DATDIR/lambda_ref.fasta -noSplitSubreads -m 4 -out lambda_bax_noSplitSubreads_tmp.m4 -nproc 15 +# $ $EXEC $DATDIR/lambda_bax.fofn $DATDIR/lambda_ref.fasta --noSplitSubreads -m 4 --out lambda_bax_noSplitSubreads_tmp.m4 --nproc 15 # [INFO]* (glob) # [INFO]* (glob) # $ sort lambda_bax_noSplitSubreads_tmp.m4 > $OUTDIR/lambda_bax_noSplitSubreads.m4 @@ -11,7 +11,7 @@ Test blasr with -noSplitSubreads # This test takes a long time, use a subset instad. $ rm -rf $OUTDIR/lambda_bax_noSplitSubreads_subset.m4 - $ $EXEC $DATDIR/lambda_bax.fofn $DATDIR/lambda_ref.fasta -noSplitSubreads -m 4 -out $OUTDIR/lambda_bax_noSplitSubreads_tmp_subset.m4 -nproc 15 -holeNumbers 1-1000 -sa $DATDIR/lambda_ref.sa + $ $EXEC $DATDIR/lambda_bax.fofn $DATDIR/lambda_ref.fasta --noSplitSubreads -m 4 --out $OUTDIR/lambda_bax_noSplitSubreads_tmp_subset.m4 --nproc 15 --holeNumbers 1--1000 --sa $DATDIR/lambda_ref.sa [INFO]* (glob) [INFO]* (glob) $ sort $OUTDIR/lambda_bax_noSplitSubreads_tmp_subset.m4 > $OUTDIR/lambda_bax_noSplitSubreads_subset.m4 diff --git a/ctest/samNM.t b/ctest/samNM.t deleted file mode 100644 index 93ae89c..0000000 --- a/ctest/samNM.t +++ /dev/null @@ -1,13 +0,0 @@ -Set up - $ . $TESTDIR/setup.sh - -Test Sam out nm tag - $ rm -rf $OUTDIR/read.sam - $ $EXEC $DATDIR/read.fasta $DATDIR/ref.fasta -sam -out $OUTDIR/read.sam - [INFO]* (glob) - [INFO]* (glob) - $ tail -n+5 $OUTDIR/read.sam |cut -f 21 - NM:i:2 - NM:i:3 - NM:i:2 - NM:i:4 diff --git a/ctest/setup.sh b/ctest/setup.sh index f75e4fb..8ae52b1 100755 --- a/ctest/setup.sh +++ b/ctest/setup.sh @@ -6,7 +6,8 @@ OUTDIR=$CURDIR/out STDDIR=$REMOTEDIR/stdout # Set up the executable: blasr. -EXEC=${BLASR_PATH}/blasr +#EXEC=${BLASR_PATH}/blasr +EXEC=blasr # Define tmporary files TMP1=$OUTDIR/$$.tmp.out diff --git a/ctest/unaligned.t b/ctest/unaligned.t index 3340b0c..08b47f2 100644 --- a/ctest/unaligned.t +++ b/ctest/unaligned.t @@ -2,7 +2,7 @@ Set up $ . $TESTDIR/setup.sh Test dataset.xml as input - $ $EXEC $DATDIR/test_dataset/chunking.subreadset.xml $DATDIR/ecoli_reference.fasta -unaligned $OUTDIR/unaligned.txt -noPrintUnalignedSeqs -concordant 1>/dev/null && echo $? + $ $EXEC $DATDIR/test_dataset/chunking.subreadset.xml $DATDIR/ecoli_reference.fasta --unaligned $OUTDIR/unaligned.txt --noPrintUnalignedSeqs --concordant 1>/dev/null && echo $? [INFO]* (glob) [INFO]* (glob) 0 @@ -13,7 +13,7 @@ Test dataset.xml as input m150404_101626_42267_c100807920800000001823174110291514_s1_p0/480/12033_13456 m150404_101626_42267_c100807920800000001823174110291514_s1_p0/480/13519_14067 - $ $EXEC $DATDIR/ecoli_subset.fasta $DATDIR/ecoli_reference.fasta -unaligned $OUTDIR/unaligned.txt -noPrintUnalignedSeqs 1>/dev/null && echo $? + $ $EXEC $DATDIR/ecoli_subset.fasta $DATDIR/ecoli_reference.fasta --unaligned $OUTDIR/unaligned.txt --noPrintUnalignedSeqs 1>/dev/null && echo $? [INFO]* (glob) [INFO]* (glob) 0 diff --git a/ctest/useccsallBestN1.t b/ctest/useccsallBestN1.t index 32fafc7..659b3b8 100644 --- a/ctest/useccsallBestN1.t +++ b/ctest/useccsallBestN1.t @@ -1,10 +1,11 @@ Set up $ . $TESTDIR/setup.sh -Test -useccsall with bestn = 1 - $ $EXEC $DATDIR/ccstest.fofn $DATDIR/ccstest_ref.fasta -bestn 1 -useccsall -sam -out $OUTDIR/useccsall.sam -holeNumbers 76772 +Test --useccsall with bestn = 1 + $ $EXEC $DATDIR/ccstest.fofn $DATDIR/ccstest_ref.fasta --bestn 1 --useccsall --bam --out $OUTDIR/useccsall.bam --holeNumbers 76772 [INFO]* (glob) [INFO]* (glob) + $ $SAMTOOLS view $OUTDIR/useccsall.bam > $OUTDIR/useccsall.sam $ sed -n '9,$ p' $OUTDIR/useccsall.sam |cut -f 1-4 > $TMP1 $ sed -n '9,$ p' $STDDIR/$UPDATEDATE/useccsall.sam | cut -f 1-4 > $TMP2 $ diff $TMP1 $TMP2 diff --git a/ctest/useccsallLargeGenome.t b/ctest/useccsallLargeGenome.t index ac93834..dfded47 100644 --- a/ctest/useccsallLargeGenome.t +++ b/ctest/useccsallLargeGenome.t @@ -1,13 +1,13 @@ Set up $ . $TESTDIR/setup.sh -Test -useccsall with Large genome. +Test --useccsall with Large genome. $ BASFILE=/mnt/data3/vol53/2450530/0014/Analysis_Results/m130507_052228_42161_c100519212550000001823079909281305_s1_p0.3.bax.h5 $ REFDIR=/mnt/secondary/Smrtpipe/repository/hg19_M_sorted/sequence $ REFFA=$REFDIR/hg19_M_sorted.fasta $ REFSA=$REFDIR/hg19_M_sorted.fasta.sa $ OUTFILE=$OUTDIR/intflow.m4 - $ $EXEC $BASFILE $REFFA -out $OUTFILE -m 4 -sa $REFSA -holeNumbers 109020 + $ $EXEC $BASFILE $REFFA --out $OUTFILE -m 4 --sa $REFSA --holeNumbers 109020 [INFO]* (glob) [INFO]* (glob) $ sort $OUTFILE > $TMP1 && sort $STDDIR/intflow_2014_06_10.m4 > $TMP2 && diff $TMP1 $TMP2 && echo $? diff --git a/ctest/verbose.t b/ctest/verbose.t index 19e508a..a36074c 100644 --- a/ctest/verbose.t +++ b/ctest/verbose.t @@ -2,7 +2,7 @@ Set up $ . $TESTDIR/setup.sh Test alignment score - $ $EXEC $DATDIR/lambda_bax.fofn $DATDIR/lambda_ref.fasta -holeNumbers 1-200 -V 3 > $TMP1 + $ $EXEC $DATDIR/lambda_bax.fofn $DATDIR/lambda_ref.fasta --holeNumbers 1--200 -V 3 > $TMP1 [INFO]* (glob) [INFO]* (glob) $ echo $? diff --git a/iblasr/BlasrHeaders.h b/iblasr/BlasrHeaders.h index 8a08c27..80d1fb1 100644 --- a/iblasr/BlasrHeaders.h +++ b/iblasr/BlasrHeaders.h @@ -24,6 +24,7 @@ using namespace std; #include <libconfig.h> #ifdef USE_PBBAM #include <pbbam/BamWriter.h> +#include <pbbam/SamWriter.h> #endif #include <CCSSequence.hpp> diff --git a/iblasr/BlasrUtils.hpp b/iblasr/BlasrUtils.hpp index ea7e4de..0176733 100644 --- a/iblasr/BlasrUtils.hpp +++ b/iblasr/BlasrUtils.hpp @@ -52,6 +52,7 @@ void SumMismatches(SMRTSequence &read, T_AlignmentCandidate &alignment, int mismatchScore, int fullIntvStart, int fullIntvEnd, + MappingParameters ¶ms, int &sum); //FIXME: move to class T_AlignmentCandidate @@ -120,7 +121,7 @@ void PrintAlignment(T_AlignmentCandidate &alignment, ostream &outFile #ifdef USE_PBBAM , SMRTSequence &subread - , PacBio::BAM::BamWriter * bamWriterPtr + , PacBio::BAM::IRecordWriter * bamWriterPtr #endif ); @@ -131,7 +132,7 @@ void PrintAlignments(vector<T_AlignmentCandidate*> alignmentPtrs, AlignmentContext alignmentContext, #ifdef USE_PBBAM SMRTSequence &subread, - PacBio::BAM::BamWriter * bamWriterPtr, + PacBio::BAM::IRecordWriter * bamWriterPtr, #endif MappingSemaphores & semaphores); @@ -162,7 +163,7 @@ void PrintAllReadAlignments(ReadAlignments & allReadAlignments, MappingParameters & params, vector<SMRTSequence> & subreads, #ifdef USE_PBBAM - PacBio::BAM::BamWriter * bamWriterPtr, + PacBio::BAM::IRecordWriter * bamWriterPtr, #endif MappingSemaphores & semaphores); diff --git a/iblasr/BlasrUtilsImpl.hpp b/iblasr/BlasrUtilsImpl.hpp index 6839590..49ac6b3 100644 --- a/iblasr/BlasrUtilsImpl.hpp +++ b/iblasr/BlasrUtilsImpl.hpp @@ -222,7 +222,7 @@ void StoreMapQVs(SMRTSequence &read, // bug 24363, use updated SumMismatches to compute mismatch score when // no QV is available. SumMismatches(read, *alignmentPtrs[*partIt], 15, - partitionBeginPos[p], partitionEndPos[p], mismatchSum); + partitionBeginPos[p], partitionEndPos[p], params, mismatchSum); } // // Random sequence can be aligned with about 50% similarity due @@ -345,13 +345,14 @@ void SumMismatches(SMRTSequence &read, T_AlignmentCandidate &alignment, int mismatchScore, int fullIntvStart, int fullIntvEnd, + MappingParameters ¶ms, int &sum) { int alnStart, alnEnd; alignment.GetQIntervalOnForwardStrand(alnStart, alnEnd); int p; sum = 0; - if (read.substitutionQV.Empty() == false) { + if (not params.ignoreQualities and read.substitutionQV.Empty() == false) { for (p = fullIntvStart; p < alnStart; p++) { sum += read.substitutionQV[p]; } @@ -953,7 +954,7 @@ void PrintAlignment(T_AlignmentCandidate &alignment, ostream &outFile #ifdef USE_PBBAM , SMRTSequence & subread - , PacBio::BAM::BamWriter * bamWriterPtr + , PacBio::BAM::IRecordWriter * bamWriterPtr #endif ) { try { @@ -1013,7 +1014,7 @@ void PrintAlignments(vector<T_AlignmentCandidate*> alignmentPtrs, AlignmentContext alignmentContext, #ifdef USE_PBBAM SMRTSequence &subread, - PacBio::BAM::BamWriter * bamWriterPtr, + PacBio::BAM::IRecordWriter * bamWriterPtr, #endif MappingSemaphores & semaphores) { if (params.nProc > 1) { @@ -1130,7 +1131,7 @@ void PrintAllReadAlignments(ReadAlignments & allReadAlignments, MappingParameters & params, vector<SMRTSequence> & subreads, #ifdef USE_PBBAM - PacBio::BAM::BamWriter * bamWriterPtr, + PacBio::BAM::IRecordWriter * bamWriterPtr, #endif MappingSemaphores & semaphores) { diff --git a/iblasr/MappingParameters.h b/iblasr/MappingParameters.h index ed64e80..271fce4 100644 --- a/iblasr/MappingParameters.h +++ b/iblasr/MappingParameters.h @@ -88,6 +88,7 @@ public: bool printSAM; bool cigarUseSeqMatch; bool printBAM; + bool sam_via_bam; // for SAM output via pbbam using IRecordWriter bool storeMapQV; bool useRandomSeed; int randomSeed; @@ -271,6 +272,7 @@ public: readsFileIndex = 0; printSAM = false; printBAM = false; + sam_via_bam = false; useRandomSeed = false; randomSeed = 0; placeRandomly = false; @@ -557,10 +559,6 @@ public: if (randomSeed != 0) { useRandomSeed = true; } - if (printSAM) { - cerr << "ERROR: --sam is no longer supported, use --bam, then translate from .bam to .sam" << endl; - exit(1); - } // // Parse the clipping. // @@ -581,7 +579,43 @@ public: exit(1); } - if (printBAM) { + if (printSAM) { // since sam is printed via bam we need to use ifndef USE_PBBAM here +#ifndef USE_PBBAM + REQUIRE_PBBAM_ERROR(); +#else + printSAM = false; + printBAM = true; + sam_via_bam = true; // set to true for constructors and to avoid entering if (printBAM + cigarUseSeqMatch = true; // ALWAYS true for BAM + printFormat = BAM; // Not sure for sam_via_bam + samQVList.SetDefaultQV(); + printSAMQV = true; + if (clipping != SAMOutput::soft) { + // Only support two clipping methods: soft or subread. + clipping = SAMOutput::subread; + } + // Turn on fa fa -> bam pipe + /* + if (queryFileType != FileType::PBBAM and queryFileType != FileType::PBDATASET and not enableHiddenPaths) { + // bax|fasta|fastq -> bam paths are turned off by default + cout << "ERROR, could not output alignments in BAM unless input reads are in PacBio BAM or DATASET files." << endl; + exit(1); + } + */ + if (outFileName == "") { + cout << "ERROR, SAM output file must be specified." << endl; + exit(1); + } + // VR Need to see what happens if printing SAM + // VR Check with Derek regarding sam_via_bam + if (outputByThread) { + cout << "ERROR, could not output alignments by threads in BAM format." << endl; + exit(1); + } +#endif + } + + if (printBAM && !sam_via_bam) { // Need to check settings for SAM, #ifndef USE_PBBAM REQUIRE_PBBAM_ERROR(); #else @@ -594,15 +628,20 @@ public: // Only support two clipping methods: soft or subread. clipping = SAMOutput::subread; } + // Turn on fa fa -> bam pipe + /* if (queryFileType != FileType::PBBAM and queryFileType != FileType::PBDATASET and not enableHiddenPaths) { // bax|fasta|fastq -> bam paths are turned off by default cout << "ERROR, could not output alignments in BAM unless input reads are in PacBio BAM or DATASET files." << endl; exit(1); } + */ if (outFileName == "") { cout << "ERROR, BAM output file must be specified." << endl; exit(1); } + // VR Need to see what happens if printing SAM + // VR Check with Derek regarding sam_via_bam if (outputByThread) { cout << "ERROR, could not output alignments by threads in BAM format." << endl; exit(1); diff --git a/iblasr/RegisterBlasrOptions.h b/iblasr/RegisterBlasrOptions.h index 0425db5..af7770a 100644 --- a/iblasr/RegisterBlasrOptions.h +++ b/iblasr/RegisterBlasrOptions.h @@ -466,12 +466,12 @@ const string BlasrDiscussion(void) { ss << "NAME" << endl << " blasr - Map SMRT Sequences to a reference genome."<< endl << endl << "SYNOPSIS" << endl - << " blasr reads.bam genome.fasta -bam -out out.bam" << endl << endl + << " blasr reads.bam genome.fasta --bam --out out.bam" << endl << endl << " blasr reads.fasta genome.fasta " << endl << endl - << " blasr reads.fasta genome.fasta -sa genome.fasta.sa" << endl << endl - << " blasr reads.bax.h5 genome.fasta [-sa genome.fasta.sa] " << endl << endl - << " blasr reads.bax.h5 genome.fasta -sa genome.fasta.sa -maxScore -100 -minMatch 15 ... " << endl << endl - << " blasr reads.bax.h5 genome.fasta -sa genome.fasta.sa -nproc 24 -out alignment.out ... " << endl << endl + << " blasr reads.fasta genome.fasta --sa genome.fasta.sa" << endl << endl + << " blasr reads.bax.h5 genome.fasta [--sa genome.fasta.sa] " << endl << endl + << " blasr reads.bax.h5 genome.fasta --sa genome.fasta.sa --maxScore 100 --minMatch 15 ... " << endl << endl + << " blasr reads.bax.h5 genome.fasta --sa genome.fasta.sa --nproc 24 --out alignment.out ... " << endl << endl << "DESCRIPTION " << endl << " blasr is a read mapping program that maps reads to positions " << endl << " in a genome by clustering short exact matches between the read and" << endl diff --git a/makefile b/makefile index 9182630..e2d53fc 100644 --- a/makefile +++ b/makefile @@ -9,6 +9,7 @@ foo: echo $(MAKEFILE_LIST) echo ${SRCDIR} +GET_SHA1 := $(shell git -C ${SRCDIR} describe --always --dirty='*') CXXFLAGS += -O3 -g -DSHA1_7=\"${GET_SHA1}\" CXXOPTS += \ -std=c++0x -pedantic \ @@ -28,7 +29,6 @@ override CXXFLAGS += ${CXXOPTS} ${GCXXFLAGS} SRCS := Blasr.cpp OBJS := ${SRCS:.cpp=.o} DEPS := ${SRCS:.cpp=.d} -GET_SHA1 := $(shell git rev-parse --short HEAD) override BLASR_PATH=${SRCDIR}/ export BLASR_PATH diff --git a/rules.mk b/rules.mk index c705565..a7adcc7 100644 --- a/rules.mk +++ b/rules.mk @@ -33,5 +33,5 @@ LDLIBS+= \ # We repeat LIBPBIHDF_LIBFLAGS because of a circular dependency. See #77. CPPFLAGS+=$(patsubst %,-I%,${INCDIRS}) -CPPFLAGS+=$(patsubst %,-isystem%,${SYSINCDIRS}) +CPPFLAGS+=$(patsubst %,-I%,${SYSINCDIRS}) LDFLAGS+=$(patsubst %,-L%,${LIBDIRS}) diff --git a/utils/SamFilter.cpp b/utils/SamFilter.cpp index c484618..30653ca 100644 --- a/utils/SamFilter.cpp +++ b/utils/SamFilter.cpp @@ -271,7 +271,7 @@ int main(int argc, char* argv[]) { string holeNumberStr; Ranges holeNumberRanges; - clp.RegisterStringOption("holeNumbers", &holeNumberStr, + clp.RegisterStringOption("-holeNumbers", &holeNumberStr, "A string of comma-delimited hole number ranges to output hits, " "such as '1,2,10-12'. " "This requires hit titles to be in SMRT read title format."); diff --git a/utils/bam2bax/makefile b/utils/bam2bax/makefile index f4b4245..277e605 100644 --- a/utils/bam2bax/makefile +++ b/utils/bam2bax/makefile @@ -7,7 +7,8 @@ include ${SRCDIR}/../../rules.mk all: ${CURDIR}/src/*.cpp ${CURDIR}/src/*.h ${CURDIR}/tests/src/*.cpp ${CURDIR}/tests/src/*.h @mkdir -p ${CURDIR}/build && \ cd ${CURDIR}/build && \ - cmake -DPacBioBAM_INCLUDE_DIRS=${PBBAM_INC} \ + cmake -DBOOST_ROOT=${BOOST_ROOT} \ + -DPacBioBAM_INCLUDE_DIRS=${PBBAM_INC} \ -DHTSLIB_INCLUDE_DIRS=${HTSLIB_INC} \ -DPacBioBAM_LIBRARIES=${PBBAM_LIB}/libpbbam${SH_LIB_EXT} \ -DHTSLIB_LIBRARIES=${HTSLIB_LIB}/libhts${SH_LIB_EXT} \ diff --git a/utils/bam2bax/src/Bam2BaxConverterImpl.hpp b/utils/bam2bax/src/Bam2BaxConverterImpl.hpp index 016b3fc..ab11a8d 100644 --- a/utils/bam2bax/src/Bam2BaxConverterImpl.hpp +++ b/utils/bam2bax/src/Bam2BaxConverterImpl.hpp @@ -50,20 +50,6 @@ bool Bam2BaxConverter<T_HDFWRITER>::ConvertFile(void) { } if (not settings_.ignoreQV) writer.WriteFakeDataSets(); for (auto error: writer.Errors()) { AddErrorMessage(error); } - } else if (not settings_.polymeraseBamFilename.empty()) { - // Read polymerase reads from polymerase.bam directly. - PacBio::BAM::EntireFileQuery query(bamfile); - for (auto record: query) { - SMRTSequence smrt; - smrt.Copy(record, true); - RegionAnnotation ra(record.HoleNumber(), - RegionTypeAdapter::ToRegionTypeIndex(PacBio::BAM::VirtualRegionType::HQREGION, regionTypes), - 0, 0, 0); - std::vector<RegionAnnotation> ras({ra}); - if (not writer.WriteOneZmw(smrt, ras) or not writer.Errors().empty()) { break; } - } - if (not settings_.ignoreQV) writer.WriteFakeDataSets(); - for (auto error: writer.Errors()) { AddErrorMessage(error); } } return errors_.empty(); diff --git a/utils/bam2bax/src/Bam2BaxMain.cpp b/utils/bam2bax/src/Bam2BaxMain.cpp index e8e3895..f29f83e 100644 --- a/utils/bam2bax/src/Bam2BaxMain.cpp +++ b/utils/bam2bax/src/Bam2BaxMain.cpp @@ -21,8 +21,8 @@ int main(int argc, char* argv[]) auto ioGroup = optparse::OptionGroup(parser, "Input/output files"); ioGroup.add_option("") .dest(Settings::Option::input_) - .metavar("movie.subreads.bam movie.scraps.bam | movie.polymerase.bam") - .help("Input a movie.polymerase.bam. Or a movie.subreads.bam and a movie.scraps.bam"); + .metavar("movie.subreads.bam movie.scraps.bam") + .help("A movie.subreads.bam and a movie.scraps.bam"); ioGroup.add_option("--trace") .dest(Settings::Option::trace_) .metavar("movie.trc.h5") diff --git a/utils/bam2bax/src/Bam2PlxMain.cpp b/utils/bam2bax/src/Bam2PlxMain.cpp index 8d0f48f..3be76a0 100644 --- a/utils/bam2bax/src/Bam2PlxMain.cpp +++ b/utils/bam2bax/src/Bam2PlxMain.cpp @@ -21,8 +21,8 @@ int main(int argc, char* argv[]) auto ioGroup = optparse::OptionGroup(parser, "Input/output files"); ioGroup.add_option("") .dest(Settings::Option::input_) - .metavar("movie.subreads.bam movie.scraps.bam | movie.polymerase.bam") - .help("Input a movie.polymerase.bam. Or a movie.subreads.bam and a movie.scraps.bam"); + .metavar("movie.subreads.bam movie.scraps.bam") + .help("A movie.subreads.bam and a movie.scraps.bam"); ioGroup.add_option("-o") .dest(Settings::Option::output_) .metavar("STRING") diff --git a/utils/bam2bax/src/Converter.cpp b/utils/bam2bax/src/Converter.cpp index 1926b19..8fa98b6 100644 --- a/utils/bam2bax/src/Converter.cpp +++ b/utils/bam2bax/src/Converter.cpp @@ -7,8 +7,6 @@ Converter::Converter(Settings const& settings) std::string infn = settings_.subreadsBamFilename; - if (infn.empty()) infn = settings_.polymeraseBamFilename; - bamfile_ = new PacBio::BAM::BamFile(infn); PacBio::BAM::BamHeader bamheader = bamfile_->Header(); @@ -75,20 +73,6 @@ bool Converter::Run() { } if (not settings_.ignoreQV) writer_->WriteFakeDataSets(); for (auto error: writer_->Errors()) { AddErrorMessage(error); } - } else if (not settings_.polymeraseBamFilename.empty()) { - // Read polymerase reads from polymerase.bam directly. - PacBio::BAM::EntireFileQuery query(*bamfile_); - for (auto record: query) { - SMRTSequence smrt; - smrt.Copy(record, true); - RegionAnnotation ra(record.HoleNumber(), - RegionTypeAdapter::ToRegionTypeIndex(PacBio::BAM::VirtualRegionType::HQREGION, regionTypes), - 0, 0, 0); - std::vector<RegionAnnotation> ras({ra}); - if (not writer_->WriteOneZmw(smrt, ras) or not writer_->Errors().empty()) { break; } - } - if (not settings_.ignoreQV) writer_->WriteFakeDataSets(); - for (auto error: writer_->Errors()) { AddErrorMessage(error); } } return errors_.empty(); diff --git a/utils/bam2bax/src/Settings.cpp b/utils/bam2bax/src/Settings.cpp index 5d54d56..a5a68d0 100644 --- a/utils/bam2bax/src/Settings.cpp +++ b/utils/bam2bax/src/Settings.cpp @@ -154,11 +154,7 @@ Settings Settings::FromCommandLine(optparse::OptionParser& parser, // input settings.inputBamFilenames = parser.args(); - if (settings.inputBamFilenames.size() == 1) { - settings.polymeraseBamFilename = settings.inputBamFilenames[0]; - if (settings.polymeraseBamFilename.find("polymerase.bam") == std::string::npos) - settings.errors_.push_back("missing input *.polymerase.bam."); - } else if (settings.inputBamFilenames.size() == 2) { + if (settings.inputBamFilenames.size() == 2) { settings.subreadsBamFilename = settings.inputBamFilenames[0]; settings.scrapsBamFilename = settings.inputBamFilenames[1]; if (settings.subreadsBamFilename.find("subreads.bam") == std::string::npos) @@ -166,7 +162,7 @@ Settings Settings::FromCommandLine(optparse::OptionParser& parser, if (settings.scrapsBamFilename.find("scraps.bam") == std::string::npos) settings.errors_.push_back("missing input *.scraps.bam."); } else { - settings.errors_.push_back("missing input (polymerase.bam or subreads+scraps.bam."); + settings.errors_.push_back("missing input subreads+scraps.bam."); } if (options.is_set(Settings::Option::trace_)) { @@ -180,8 +176,6 @@ Settings Settings::FromCommandLine(optparse::OptionParser& parser, if (settings.outputBaxPrefix.empty()) { // if output prefix not set. if (not settings.subreadsBamFilename.empty()) { settings.outputBaxPrefix = internal::GetMovienameFromFilename(settings.subreadsBamFilename); - } else if (not settings.polymeraseBamFilename.empty()) { - settings.outputBaxPrefix = internal::GetMovienameFromFilename(settings.polymeraseBamFilename); } } @@ -219,8 +213,6 @@ Settings Settings::FromCommandLine(optparse::OptionParser& parser, cerr << " subreads : " << settings.subreadsBamFilename << endl; if (not settings.scrapsBamFilename.empty()) cerr << " scraps : " << settings.scrapsBamFilename << endl; - if (not settings.polymeraseBamFilename.empty()) - cerr << " polymerase: " << settings.polymeraseBamFilename << endl; if (not settings.traceFilename.empty()) cerr << " trace : " << settings.traceFilename << endl; cerr << "Output h5 : " << settings.outputBaxFilename << endl diff --git a/utils/bam2bax/tests/cram/bam2bax.t b/utils/bam2bax/tests/cram/bam2bax.t index 735ff3e..d7b94f0 100755 --- a/utils/bam2bax/tests/cram/bam2bax.t +++ b/utils/bam2bax/tests/cram/bam2bax.t @@ -65,18 +65,6 @@ diff input with output $ diff $I_SR_SAM.tmp $O_SR_SAM.tmp || echo I.subreads.bam and O.subreads.bam are not identical $ diff $I_SC_SAM.tmp $O_SC_SAM.tmp || echo I.subreads.bam and O.subreads.bam are not identical - -polymerase.bam to bax.h5 - $ $BAM2BAX /pbi/dept/secondary/siv/testdata/bam2bax/bam2plx/small.polymerase.bam -o Analysis_Results/polymerase 1>/dev/null 2>/dev/null && echo $? - 0 - - $ h5ls -r Analysis_Results/polymerase.bax.h5 |grep PulseData/Regions |wc -l - 1 - - $ h5dump -d PulseData/Regions Analysis_Results/polymerase.bax.h5 |grep '133229, 2, 0, 0, 0' |wc -l - 1 - - ZMW with no HQ region $ $BAM2BAX /pbi/dept/secondary/siv/testdata/bam2bax/all_lq/all_lq.subreads.bam /pbi/dept/secondary/siv/testdata/bam2bax/all_lq/all_lq.scraps.bam -o Analysis_Results/all_lq 1>/dev/null 2>/dev/null && echo $? 0 @@ -84,3 +72,13 @@ ZMW with no HQ region $ h5dump -d /PulseData/Regions Analysis_Results/all_lq.bax.h5 |grep "(0,0): 47775928, 2, 0, 0, 700" (0,0): 47775928, 2, 0, 0, 700, +Check ZMW HoleXY + $ HOLEXY_PATH=/pbi/dept/secondary/siv/testdata/bam2bax/holexy + $ $BAM2BAX $HOLEXY_PATH/rt_m54075_160614_021811_4194561.subreads.bam $HOLEXY_PATH/rt_m54075_160614_021811_4194561.scraps.bam -o Analysis_Results/test_m54075_160614_021811_4194561 1>/dev/null 2>/dev/null && echo $? + 0 + + $ h5dump -d /PulseData/BaseCalls/ZMW/HoleXY $HOLEXY_PATH/poc_m54075_160614_021811_4194561.bax.h5 | grep "(0,0): " + (0,0): 64, 257 + $ h5dump -d /PulseData/BaseCalls/ZMW/HoleXY Analysis_Results/test_m54075_160614_021811_4194561.bax.h5 | grep "(0,0): " + (0,0): 64, 257 + diff --git a/utils/bam2bax/tests/cram/bam2plx.t b/utils/bam2bax/tests/cram/bam2plx.t index 07669c4..e0d4235 100755 --- a/utils/bam2bax/tests/cram/bam2plx.t +++ b/utils/bam2bax/tests/cram/bam2plx.t @@ -115,15 +115,15 @@ Check PulseCalls/MeanSignal DATATYPE H5T_STD_U16LE DATASPACE SIMPLE { ( 15581, 4 ) / ( H5S_UNLIMITED, 4 ) } DATA { - (0,0): 0, 0, 0, 112, - (1,0): 0, 75, 0, 0, - (2,0): 0, 0, 0, 65, - (3,0): 0, 60, 0, 0, - (4,0): 0, 0, 0, 62, + (0,0): 0, 0, 0, 11, + (1,0): 0, 8, 0, 0, + (2,0): 0, 0, 0, 7, + (3,0): 0, 6, 0, 0, + (4,0): 0, 0, 0, 6, Check PulseCalls/MidSignal $ h5dump -d /PulseData/PulseCalls/MidSignal $O_PLX_H5 |grep "(0):" - (0): 0, 0, 0, 51, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82, 0, 82, + (0): 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 8, 0, Check PulseCalls/WidthInFrames $ h5dump -d /PulseData/PulseCalls/WidthInFrames $O_PLX_H5 | grep "(0):" diff --git a/utils/bax2bam/CMakeLists.txt b/utils/bax2bam/CMakeLists.txt index 8ffb153..14f4697 100644 --- a/utils/bax2bam/CMakeLists.txt +++ b/utils/bax2bam/CMakeLists.txt @@ -77,6 +77,8 @@ if (NOT ZLIB_LIBRARIES OR NOT ZLIB_INCLUDE_DIRS) find_package(ZLIB REQUIRED) endif() +find_package(Threads) + # shared CXX flags for src & tests include(CheckCXXCompilerFlag) set(Bax2Bam_CXX_FLAGS "-g -std=c++11 -Wall") diff --git a/utils/bax2bam/README.md b/utils/bax2bam/README.md index 62bdcbd..20832d9 100644 --- a/utils/bax2bam/README.md +++ b/utils/bax2bam/README.md @@ -40,7 +40,7 @@ Options: DeletionTag dt Y InsertionQV iq Y IPD ip Y - PulseWidth pw N + PulseWidth pw Y MergeQV mq Y SubstitutionQV sq Y SubstitutionTag st N @@ -59,4 +59,4 @@ Options: that non-sequencing ZMWs should be included in the output scraps BAM file, if applicable. -``` \ No newline at end of file +``` diff --git a/utils/bax2bam/makefile b/utils/bax2bam/makefile index c50e02c..4c395bb 100644 --- a/utils/bax2bam/makefile +++ b/utils/bax2bam/makefile @@ -1,13 +1,14 @@ .PHONY=all SRCDIR:=$(dir $(realpath $(lastword $(MAKEFILE_LIST)))) --include ${CURDIR}/../../defines.mk +include ${CURDIR}/../../defines.mk include ${SRCDIR}/../../rules.mk all: ${CURDIR}/src/* ${CURDIR}/tests/src/* @mkdir -p ${CURDIR}/build && \ cd ${CURDIR}/build && \ - cmake -DPacBioBAM_INCLUDE_DIRS=${PBBAM_INC} \ + cmake -DBOOST_ROOT=${BOOST_ROOT} \ + -DPacBioBAM_INCLUDE_DIRS=${PBBAM_INC} \ -DHTSLIB_INCLUDE_DIRS=${HTSLIB_INC} \ -DPacBioBAM_LIBRARIES=${PBBAM_LIB}/libpbbam${SH_LIB_EXT} \ -DHTSLIB_LIBRARIES=${HTSLIB_LIB}/libhts${SH_LIB_EXT} \ diff --git a/utils/bax2bam/src/Bax2Bam.cpp b/utils/bax2bam/src/Bax2Bam.cpp index 70fc196..6f66acb 100644 --- a/utils/bax2bam/src/Bax2Bam.cpp +++ b/utils/bax2bam/src/Bax2Bam.cpp @@ -38,17 +38,77 @@ bool WriteDatasetXmlOutput(const Settings& settings, assert(errors); try { + + // determine output details based on mode + // initialize with SUBREAD data (most common) + DataSet::TypeEnum outputDataSetType; + string outputDataSetMetaType; + string outputTimestampPrefix; + string outputBamFileType; + string outputScrapsFileType; + string outputXmlSuffix; + + switch(settings.mode) + { + case Settings::SubreadMode : + { + outputDataSetType = DataSet::SUBREAD; + outputDataSetMetaType = "PacBio.DataSet.SubreadSet"; + outputTimestampPrefix = "pacbio_dataset_subreadset-"; + outputBamFileType = "PacBio.SubreadFile.SubreadBamFile"; + outputScrapsFileType = "PacBio.SubreadFile.ScrapsBamFile"; + outputXmlSuffix = ".subreadset.xml"; + break; + } + + case Settings::CCSMode : + { + outputDataSetType = DataSet::CONSENSUS_READ; + outputDataSetMetaType = "PacBio.DataSet.ConsensusReadSet"; + outputTimestampPrefix = "pacbio_dataset_consensusreadset-"; + outputBamFileType = "PacBio.ConsensusReadFile.ConsensusReadBamFile"; + outputScrapsFileType = ""; + outputXmlSuffix = ".consensusreadset.xml"; + break; + + } + case Settings::HQRegionMode : + { + outputDataSetType = DataSet::SUBREAD; + outputDataSetMetaType = "PacBio.DataSet.SubreadSet"; + outputTimestampPrefix = "pacbio_dataset_subreadset-"; + outputBamFileType = "PacBio.SubreadFile.HqRegionBamFile"; + outputScrapsFileType = "PacBio.SubreadFile.HqScrapsBamFile";; + outputXmlSuffix = ".subreadset.xml"; + break; + } + case Settings::PolymeraseMode : + { + outputDataSetType = DataSet::SUBREAD; + outputDataSetMetaType = "PacBio.DataSet.SubreadSet"; + outputTimestampPrefix = "pacbio_dataset_subreadset-"; + outputBamFileType = "PacBio.SubreadFile.PolymeraseBamFile"; + outputScrapsFileType = "PacBio.SubreadFile.PolymeraseScrapsBamFile"; + outputXmlSuffix = ".subreadset.xml"; + break; + } + + default: + assert(false); // should already be checked upstream + throw std::runtime_error("unknown mode selected"); + } + DataSet dataset(settings.datasetXmlFilename); assert(dataset.Type() == DataSet::HDF_SUBREAD); // change type - dataset.Type(DataSet::SUBREAD); - dataset.MetaType("PacBio.DataSet.SubreadSet"); + dataset.Type(outputDataSetType); + dataset.MetaType(outputDataSetMetaType); time_t currentTime = time(NULL); //const string& timestamp = CurrentTimestamp(); dataset.CreatedAt(ToIso8601(currentTime)); - dataset.TimeStampedName(string{"pacbio_dataset_subreadset-"}+ToDataSetFormat(currentTime)); + dataset.TimeStampedName(outputTimestampPrefix+ToDataSetFormat(currentTime)); // change files: remove BAX, add BAM std::vector<ExternalResource> toRemove; @@ -86,7 +146,7 @@ bool WriteDatasetXmlOutput(const Settings& settings, // Combine the scheme and filepath and store in the dataset mainBamFilepath = scheme + mainBamFilepath; - ExternalResource mainBam{ "PacBio.SubreadFile.SubreadBamFile", mainBamFilepath }; + ExternalResource mainBam{ outputBamFileType, mainBamFilepath }; FileIndex mainPbi{ "PacBio.Index.PacBioIndex", mainBamFilepath + ".pbi" }; mainBam.FileIndices().Add(mainPbi); @@ -108,7 +168,7 @@ bool WriteDatasetXmlOutput(const Settings& settings, scrapsBamFilepath.append(settings.scrapsBamFilename); } - ExternalResource scrapsBam{ "PacBio.SubreadFile.ScrapsBamFile", scrapsBamFilepath }; + ExternalResource scrapsBam{ outputScrapsFileType, scrapsBamFilepath }; FileIndex scrapsPbi{ "PacBio.Index.PacBioIndex", scrapsBamFilepath + ".pbi" }; scrapsBam.FileIndices().Add(scrapsPbi); mainBam.ExternalResources().Add(scrapsBam); @@ -139,7 +199,7 @@ bool WriteDatasetXmlOutput(const Settings& settings, // save to file string xmlFn = settings.outputXmlFilename; // try user-provided explicit filename first if (xmlFn.empty()) - xmlFn = settings.outputBamPrefix + ".dataset.xml"; // prefix set w/ moviename elsewhere if not user-provided + xmlFn = settings.outputBamPrefix + outputXmlSuffix; // prefix set w/ moviename elsewhere if not user-provided dataset.Save(xmlFn); return true; diff --git a/utils/bax2bam/src/CMakeLists.txt b/utils/bax2bam/src/CMakeLists.txt index 9fa7243..aa8b803 100644 --- a/utils/bax2bam/src/CMakeLists.txt +++ b/utils/bax2bam/src/CMakeLists.txt @@ -53,5 +53,7 @@ target_link_libraries(bax2bam ${PacBioBAM_LIBRARIES} ${HTSLIB_LIBRARIES} ${ZLIB_LIBRARIES} + ${CMAKE_THREAD_LIBS_INIT} ${MY_LIBRT} + "-ldl" ) diff --git a/utils/bax2bam/src/ConverterBase.h b/utils/bax2bam/src/ConverterBase.h index e2cafd5..19a1d6c 100644 --- a/utils/bax2bam/src/ConverterBase.h +++ b/utils/bax2bam/src/ConverterBase.h @@ -818,12 +818,16 @@ bool ConverterBase<RecordType, HdfReader>::LoadChemistryFromMetadataXML( sequencingKit_ = pt.get<std::string>("Metadata.SequencingKit.PartNumber"); basecallerVersion_ = pt.get<std::string>("Metadata.InstCtrlVer"); - // throws if invalid chemistry triple - // we'll take the opportunity to exit early with error message - using PacBio::BAM::ReadGroupInfo; - auto chemistryCheck = ReadGroupInfo::SequencingChemistryFromTriple(bindingKit_, - sequencingKit_, - basecallerVersion_); + if (!settings_.isIgnoringChemistryCheck) { + + // throws if invalid chemistry triple + // we'll take the opportunity to exit early with error message + using PacBio::BAM::ReadGroupInfo; + auto chemistryCheck = ReadGroupInfo::SequencingChemistryFromTriple(bindingKit_, + sequencingKit_, + basecallerVersion_); + } + return true; } catch (PacBio::BAM::InvalidSequencingChemistryException& e) { diff --git a/utils/bax2bam/src/Settings.cpp b/utils/bax2bam/src/Settings.cpp index a6daa01..6b827c3 100644 --- a/utils/bax2bam/src/Settings.cpp +++ b/utils/bax2bam/src/Settings.cpp @@ -88,17 +88,19 @@ const char* Settings::Option::ccsMode_ = "ccsMode"; const char* Settings::Option::internalMode_ = "internalMode"; const char* Settings::Option::outputXml_ = "outputXml"; const char* Settings::Option::sequelPlatform_ = "sequelPlatform"; +const char* Settings::Option::allowUnsupportedChem_ = "allowUnsupportedChem"; Settings::Settings(void) : mode(Settings::SubreadMode) , isInternal(false) , isSequelInput(false) + , isIgnoringChemistryCheck(false) , usingDeletionQV(true) , usingDeletionTag(true) , usingInsertionQV(true) , usingIPD(true) , usingMergeQV(true) - , usingPulseWidth(false) + , usingPulseWidth(true) , usingSubstitutionQV(true) , usingSubstitutionTag(false) , losslessFrames(false) @@ -190,6 +192,10 @@ Settings Settings::FromCommandLine(optparse::OptionParser& parser, settings.isInternal = options.is_set(Settings::Option::internalMode_) ? options.get(Settings::Option::internalMode_) : false; + // strict/relaxed chemistry check + settings.isIgnoringChemistryCheck = options.is_set(Settings::Option::allowUnsupportedChem_) ? options.get(Settings::Option::allowUnsupportedChem_) + : false; + // platform settings.isSequelInput = options.is_set(Settings::Option::sequelPlatform_) ? options.get(Settings::Option::sequelPlatform_) : false; @@ -228,6 +234,10 @@ Settings Settings::FromCommandLine(optparse::OptionParser& parser, } } + // always disable PulseWidth tag in CCS mode + if (isCCS) + settings.usingPulseWidth = false; + #ifdef DEBUG_SETTINGS string modeString; diff --git a/utils/bax2bam/src/Settings.h b/utils/bax2bam/src/Settings.h index c060620..ab44923 100644 --- a/utils/bax2bam/src/Settings.h +++ b/utils/bax2bam/src/Settings.h @@ -30,6 +30,7 @@ public: static const char* internalMode_; static const char* outputXml_; static const char* sequelPlatform_; + static const char* allowUnsupportedChem_; }; public: @@ -56,6 +57,9 @@ public: // platform bool isSequelInput; + // chemistry checking? + bool isIgnoringChemistryCheck; + // features bool usingDeletionQV; bool usingDeletionTag; diff --git a/utils/bax2bam/src/main.cpp b/utils/bax2bam/src/main.cpp index 223004f..8cbb7e8 100644 --- a/utils/bax2bam/src/main.cpp +++ b/utils/bax2bam/src/main.cpp @@ -75,10 +75,13 @@ int main(int argc, char* argv[]) " DeletionTag dt Y\n" " InsertionQV iq Y\n" " IPD ip Y\n" - " PulseWidth pw N\n" + " PulseWidth pw Y*\n" " MergeQV mq Y\n" " SubstitutionQV sq Y\n" " SubstitutionTag st N\n" + "\n" + "* - PulseWidth will be ignored in CCS mode.\n" + "\n" "If this option is used, then only those features listed will be included, " "regardless of the default state." ); @@ -102,6 +105,16 @@ int main(int argc, char* argv[]) ); parser.add_option_group(bamModeGroup); + auto additionalGroup = optparse::OptionGroup(parser, "Additional options"); + additionalGroup.add_option("--allowUnrecognizedChemistryTriple") + .dest(Settings::Option::allowUnsupportedChem_) + .action("store_true") + .help("By default, bax2bam only allows the conversion of files " + "with chemistries that are supported in SMRT Analysis 3. " + "Set this flag to disable the strict check and allow " + "generation of BAM files containing legacy chemistries."); + parser.add_option_group(additionalGroup); + // parse command line Settings settings = Settings::FromCommandLine(parser, argc, argv); if (!settings.errors.empty()) { diff --git a/utils/bax2bam/tests/CMakeLists.txt b/utils/bax2bam/tests/CMakeLists.txt index 8045024..4a3680a 100644 --- a/utils/bax2bam/tests/CMakeLists.txt +++ b/utils/bax2bam/tests/CMakeLists.txt @@ -79,6 +79,7 @@ target_link_libraries(test_bax2bam ${ZLIB_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT} # quirky pthreads ${MY_LIBRT} + "-ldl" ) # add_test(test_bax2bam test_bax2bam) add_test( diff --git a/utils/bax2bam/tests/bax2bam.t b/utils/bax2bam/tests/bax2bam.t index 625ee6f..9f13a6e 100644 --- a/utils/bax2bam/tests/bax2bam.t +++ b/utils/bax2bam/tests/bax2bam.t @@ -4,4 +4,4 @@ Simple test to make sure bax2bam runs properly. $ cd $WORKSPACE $ cd smrtanalysis/_output/modulebuilds/bioinformatics/staging/PostPrimary/bax2bam/_output/install/binwrap-build $ rm -f tst1.*.bam - $ ./bax2bam -o tst1 /pbi/dept/secondary/siv/testdata/SA3-RS/lambda/2372215/0007_tiny/Analysis_Results/m150404_101626_42267_c100807920800000001823174110291514_s1_p0.1.bax.h5 + $ ./bax2bam -o tst1 /pbi/dept/secondary/siv/testdata/bax2bam/m160823_221224_ethan_c010091942559900001800000112311890_s1_p0.1.bax.h5 diff --git a/utils/bax2bam/tests/src/TestData.h.in b/utils/bax2bam/tests/src/TestData.h.in index 11fa1f8..53f0a89 100644 --- a/utils/bax2bam/tests/src/TestData.h.in +++ b/utils/bax2bam/tests/src/TestData.h.in @@ -10,7 +10,7 @@ namespace tests { const std::string Bax2Bam_Exe = std::string("@Bax2Bam_BinDir@/bax2bam"); const std::string Source_Dir = std::string("@Bax2Bam_TestsDir@"); const std::string Bin_Dir = std::string("@CMAKE_CURRENT_BINARY_DIR@"); -const std::string Data_Dir = std::string("/pbi/dept/secondary/siv/testdata/bax2bam/data"); +const std::string Data_Dir = std::string("/pbi/dept/secondary/siv/testdata/bax2bam"); } // namespace tests diff --git a/utils/bax2bam/tests/src/test_ccs.cpp b/utils/bax2bam/tests/src/test_ccs.cpp index f388bca..4851be0 100644 --- a/utils/bax2bam/tests/src/test_ccs.cpp +++ b/utils/bax2bam/tests/src/test_ccs.cpp @@ -26,7 +26,7 @@ TEST(CcsTest, EndToEnd_Multiple) const string movieName = "m131018_081703_42161_c100585152550000001823088404281404_s1_p0"; vector<string> baxFilenames; - baxFilenames.push_back(tests::Data_Dir + "/" + movieName + ".1.ccs.h5"); + baxFilenames.push_back(tests::Data_Dir + "/data/" + movieName + ".1.ccs.h5"); const string generatedBam = movieName + ".ccs.bam"; diff --git a/utils/bax2bam/tests/src/test_hqregions.cpp b/utils/bax2bam/tests/src/test_hqregions.cpp index c443850..fba41f1 100644 --- a/utils/bax2bam/tests/src/test_hqregions.cpp +++ b/utils/bax2bam/tests/src/test_hqregions.cpp @@ -25,13 +25,13 @@ TEST(HqRegionsTest, EndToEnd_Single) const string movieName = "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0"; vector<string> baxFilenames; - baxFilenames.push_back(tests::Data_Dir + "/" + movieName + ".1.bax.h5"); + baxFilenames.push_back(tests::Data_Dir + "/data/" + movieName + ".1.bax.h5"); const string generatedBam = movieName + ".hqregions.bam"; const string scrapBam = movieName + ".lqregions.bam"; - // run conversion - const int result = RunBax2Bam(baxFilenames, "--hqregion"); + // run conversion (manually disabling PW check... we can restore this later with a BAX that has both HQRegions & PW data) + const int result = RunBax2Bam(baxFilenames, "--hqregion --pulsefeatures=\"DeletionQV,DeletionTag,InsertionQV,IPD,MergeQV,SubstitutionQV\""); EXPECT_EQ(0, result); { // ensure PBIs exist @@ -51,7 +51,7 @@ TEST(HqRegionsTest, EndToEnd_Single) baxReader.IncludeField("MergeQV"); baxReader.IncludeField("SubstitutionQV"); baxReader.IncludeField("HQRegionSNR"); - // not using SubTag or PulseWidth here + // not using SubTag or PulseWidth string baxBasecallerVersion; string baxBindingKit; @@ -136,7 +136,7 @@ TEST(HqRegionsTest, EndToEnd_Single) EXPECT_EQ(baxBasecallerVersion, rg.BasecallerVersion()); EXPECT_EQ(baxBindingKit, rg.BindingKit()); EXPECT_EQ(baxSequencingKit, rg.SequencingKit()); - EXPECT_EQ(75, std::stod(rg.FrameRateHz())); + EXPECT_FLOAT_EQ(75.0, std::stof(rg.FrameRateHz())); EXPECT_EQ("dq", rg.BaseFeatureTag(BaseFeature::DELETION_QV)); EXPECT_EQ("dt", rg.BaseFeatureTag(BaseFeature::DELETION_TAG)); EXPECT_EQ("iq", rg.BaseFeatureTag(BaseFeature::INSERTION_QV)); @@ -293,7 +293,7 @@ TEST(HqRegionsTest, EndToEnd_Single) EXPECT_EQ(baxBasecallerVersion, rg.BasecallerVersion()); EXPECT_EQ(baxBindingKit, rg.BindingKit()); EXPECT_EQ(baxSequencingKit, rg.SequencingKit()); - EXPECT_EQ(75, std::stod(rg.FrameRateHz())); + EXPECT_FLOAT_EQ(75.0, std::stof(rg.FrameRateHz())); EXPECT_EQ("dq", rg.BaseFeatureTag(BaseFeature::DELETION_QV)); EXPECT_EQ("dt", rg.BaseFeatureTag(BaseFeature::DELETION_TAG)); EXPECT_EQ("iq", rg.BaseFeatureTag(BaseFeature::INSERTION_QV)); @@ -419,3 +419,4 @@ TEST(HqRegionsTest, EndToEnd_Single) RemoveFile(scrapBam + ".pbi"); }); } + diff --git a/utils/bax2bam/tests/src/test_polymerase.cpp b/utils/bax2bam/tests/src/test_polymerase.cpp index 611962a..73c50a6 100644 --- a/utils/bax2bam/tests/src/test_polymerase.cpp +++ b/utils/bax2bam/tests/src/test_polymerase.cpp @@ -20,7 +20,7 @@ using namespace PacBio::BAM; TEST(PolymeraseTest, EndToEnd_Single) { // setup - const string movieName = "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0"; + const string movieName = "m160823_221224_ethan_c010091942559900001800000112311890_s1_p0"; vector<string> baxFilenames; baxFilenames.push_back(tests::Data_Dir + "/" + movieName + ".1.bax.h5"); @@ -46,7 +46,8 @@ TEST(PolymeraseTest, EndToEnd_Single) baxReader.IncludeField("MergeQV"); baxReader.IncludeField("SubstitutionQV"); baxReader.IncludeField("HQRegionSNR"); - // not using SubTag or PulseWidth here + baxReader.IncludeField("WidthInFrames"); + // not using SubTag string baxBasecallerVersion; string baxBindingKit; @@ -118,19 +119,20 @@ TEST(PolymeraseTest, EndToEnd_Single) EXPECT_EQ(baxBasecallerVersion, rg.BasecallerVersion()); EXPECT_EQ(baxBindingKit, rg.BindingKit()); EXPECT_EQ(baxSequencingKit, rg.SequencingKit()); - EXPECT_EQ(75, std::stod(rg.FrameRateHz())); + EXPECT_FLOAT_EQ(75.00577, std::stof(rg.FrameRateHz())); EXPECT_EQ("dq", rg.BaseFeatureTag(BaseFeature::DELETION_QV)); EXPECT_EQ("dt", rg.BaseFeatureTag(BaseFeature::DELETION_TAG)); EXPECT_EQ("iq", rg.BaseFeatureTag(BaseFeature::INSERTION_QV)); EXPECT_EQ("ip", rg.BaseFeatureTag(BaseFeature::IPD)); EXPECT_EQ("mq", rg.BaseFeatureTag(BaseFeature::MERGE_QV)); EXPECT_EQ("sq", rg.BaseFeatureTag(BaseFeature::SUBSTITUTION_QV)); + EXPECT_EQ("pw", rg.BaseFeatureTag(BaseFeature::PULSE_WIDTH)); EXPECT_FALSE(rg.HasBaseFeature(BaseFeature::SUBSTITUTION_TAG)); EXPECT_EQ(FrameCodec::V1, rg.IpdCodec()); // compare 1st record from each file SMRTSequence baxRecord; - EXPECT_TRUE(baxReader.GetNext(baxRecord) > 0); + EXPECT_TRUE(baxReader.GetReadAt(8, baxRecord) > 0); vector<float> hqSnr; hqSnr.push_back(baxRecord.HQRegionSnr('A')); @@ -138,10 +140,10 @@ TEST(PolymeraseTest, EndToEnd_Single) hqSnr.push_back(baxRecord.HQRegionSnr('G')); hqSnr.push_back(baxRecord.HQRegionSnr('T')); - EXPECT_GT(hqSnr[0], 0); - EXPECT_GT(hqSnr[1], 0); - EXPECT_GT(hqSnr[2], 0); - EXPECT_GT(hqSnr[3], 0); + EXPECT_FLOAT_EQ(0.0, hqSnr[0]); + EXPECT_FLOAT_EQ(0.0, hqSnr[1]); + EXPECT_FLOAT_EQ(0.0, hqSnr[2]); + EXPECT_FLOAT_EQ(0.0, hqSnr[3]); bool firstRecord = true; EntireFileQuery entireFile(bamFile); diff --git a/utils/bax2bam/tests/src/test_subreads.cpp b/utils/bax2bam/tests/src/test_subreads.cpp index 681a17a..6394f4a 100644 --- a/utils/bax2bam/tests/src/test_subreads.cpp +++ b/utils/bax2bam/tests/src/test_subreads.cpp @@ -121,7 +121,7 @@ void ComputeSubreadIntervals(vector<SubreadInterval>* const intervals, TEST(SubreadsTest, EndToEnd_Multiple) { // setup - const string movieName = "m140905_042212_sidney_c100564852550000001823085912221377_s1_X0"; + const string movieName = "m160823_221224_ethan_c010091942559900001800000112311890_s1_p0"; vector<string> baxFilenames; baxFilenames.push_back(tests::Data_Dir + "/" + movieName + ".1.bax.h5"); @@ -150,7 +150,8 @@ TEST(SubreadsTest, EndToEnd_Multiple) baxReader.IncludeField("MergeQV"); baxReader.IncludeField("SubstitutionQV"); baxReader.IncludeField("HQRegionSNR"); - // not using SubTag or PulseWidth here + baxReader.IncludeField("WidthInFrames"); + // not using SubTag here string baxBasecallerVersion; string baxBindingKit; @@ -230,13 +231,14 @@ TEST(SubreadsTest, EndToEnd_Multiple) EXPECT_EQ(baxBasecallerVersion, rg.BasecallerVersion()); EXPECT_EQ(baxBindingKit, rg.BindingKit()); EXPECT_EQ(baxSequencingKit, rg.SequencingKit()); - EXPECT_EQ(75, std::stod(rg.FrameRateHz())); + EXPECT_FLOAT_EQ(75.00577, std::stof(rg.FrameRateHz())); EXPECT_EQ("dq", rg.BaseFeatureTag(BaseFeature::DELETION_QV)); EXPECT_EQ("dt", rg.BaseFeatureTag(BaseFeature::DELETION_TAG)); EXPECT_EQ("iq", rg.BaseFeatureTag(BaseFeature::INSERTION_QV)); EXPECT_EQ("ip", rg.BaseFeatureTag(BaseFeature::IPD)); EXPECT_EQ("mq", rg.BaseFeatureTag(BaseFeature::MERGE_QV)); EXPECT_EQ("sq", rg.BaseFeatureTag(BaseFeature::SUBSTITUTION_QV)); + EXPECT_EQ("pw", rg.BaseFeatureTag(BaseFeature::PULSE_WIDTH)); EXPECT_FALSE(rg.HasBaseFeature(BaseFeature::SUBSTITUTION_TAG)); EXPECT_EQ(FrameCodec::V1, rg.IpdCodec()); @@ -251,6 +253,10 @@ TEST(SubreadsTest, EndToEnd_Multiple) size_t numTested = 0; EntireFileQuery entireFile(bamFile); for (BamRecord& bamRecord : entireFile) { + + if (numTested > 30) + goto cleanup; + if (intervalIdx >= subreadIntervals.size()) { while (baxReader.GetNext(baxRecord)) diff --git a/utils/ctest/samFilter.t b/utils/ctest/samFilter.t index 9dfed3b..9bbabe0 100644 --- a/utils/ctest/samFilter.t +++ b/utils/ctest/samFilter.t @@ -11,15 +11,15 @@ Set up the executable: samFilter. $ TMP2=$OUTDIR/$$.tmp.stdout $ rm -f $OUTFILE - $ $EXEC $DATDIR/lambda_bax.sam $DATDIR/lambda_ref.fasta $OUTFILE -minAccuracy 70 -minPctSimilarity 30 -hitPolicy all + $ $EXEC $DATDIR/lambda_bax.sam $DATDIR/lambda_ref.fasta $OUTFILE --minAccuracy 70 --minPctSimilarity 30 --hitPolicy all $ tail -n+7 $OUTFILE |sort > $TMP1 $ tail -n+7 $STDFILE |sort > $TMP2 $ diff $TMP1 $TMP2 $ rm $TMP1 $TMP2 -#Test whether minAccuracy and minPctSimilarity can be float. +#Test whether --minAccuracy and --minPctSimilarity can be float. # $ rm -f $OUTFILE -# $ $EXEC $DATDIR/lambda_bax.sam $DATDIR/lambda_ref.fasta $OUTFILE -minAccuracy 70.0 -minPctSimilarity 30.0 -hitPolicy all +# $ $EXEC $DATDIR/lambda_bax.sam $DATDIR/lambda_ref.fasta $OUTFILE --minAccuracy 70.0 --minPctSimilarity 30.0 --hitPolicy all # $ tail -n+7 $OUTFILE | sort > $TMP1 # $ tail -n+7 $STDFILE | sort > $TMP2 # $ diff $TMP1 $TMP2 @@ -30,40 +30,40 @@ Set up the executable: samFilter. $ STDFILE=$STDDIR/lambda_bax_filter_2.sam $ rm -f $OUTFILE - $ $EXEC $DATDIR/lambda_bax.sam $DATDIR/lambda_ref.fasta $OUTFILE -hitPolicy allbest + $ $EXEC $DATDIR/lambda_bax.sam $DATDIR/lambda_ref.fasta $OUTFILE --hitPolicy allbest $ tail -n+7 $OUTFILE > $TMP1 $ tail -n+7 $STDFILE > $TMP2 $ diff $TMP1 $TMP2 $ rm $TMP1 $TMP2 -#Test samFilter with -hitPolicy random +#Test samFilter with --hitPolicy random $ OUTFILE=$OUTDIR/lambda_bax_filter_3.sam $ STDFILE=$STDDIR/lambda_bax_filter_3.sam $ rm -f $OUTFILE - $ $EXEC $DATDIR/lambda_bax.sam $DATDIR/lambda_ref.fasta $OUTFILE -hitPolicy random + $ $EXEC $DATDIR/lambda_bax.sam $DATDIR/lambda_ref.fasta $OUTFILE --hitPolicy random $ tail -n+7 $OUTFILE > $TMP1 $ tail -n+7 $STDFILE > $TMP2 $ diff $TMP1 $TMP2 $ rm $TMP1 $TMP2 -#Test samFilter with -hitPolicy randombest +#Test samFilter with --hitPolicy randombest $ OUTFILE=$OUTDIR/lambda_bax_filter_4.sam $ STDFILE=$STDDIR/lambda_bax_filter_4.sam $ rm -f $OUTFILE - $ $EXEC $DATDIR/lambda_bax.sam $DATDIR/lambda_ref.fasta $OUTFILE -hitPolicy randombest + $ $EXEC $DATDIR/lambda_bax.sam $DATDIR/lambda_ref.fasta $OUTFILE --hitPolicy randombest $ tail -n+7 $OUTFILE > $TMP1 $ tail -n+7 $STDFILE > $TMP2 $ diff $TMP1 $TMP2 $ rm $TMP1 $TMP2 -# Test samFilter with -hitPolicy leftmost +# Test samFilter with --hitPolicy leftmost $ OUTFILE=$OUTDIR/test_leftmost_out.sam $ rm -f $OUTFILE - $ $EXEC $DATDIR/test_leftmost.sam $DATDIR/test_leftmost_target.fasta $OUTFILE -hitPolicy leftmost + $ $EXEC $DATDIR/test_leftmost.sam $DATDIR/test_leftmost_target.fasta $OUTFILE --hitPolicy leftmost $ tail -n+6 $OUTFILE |cut -f 4 1 @@ -77,12 +77,12 @@ Set up the executable: samFilter. $ diff $TMP1 $TMP2 $ rm $TMP1 $TMP2 -#Test samFilter with -holeNumbers +#Test samFilter with --holeNumbers $ OUTFILE=$OUTDIR/lambda_bax_filter_6.sam $ STDFILE=$STDDIR/lambda_bax_filter_6.sam $ rm -f $OUTFILE - $ $EXEC $DATDIR/lambda_bax.sam $DATDIR/lambda_ref.fasta $OUTFILE -holeNumbers 101350-105000,21494 + $ $EXEC $DATDIR/lambda_bax.sam $DATDIR/lambda_ref.fasta $OUTFILE --holeNumbers 101350-105000,21494 $ tail -n+7 $OUTFILE > $TMP1 $ tail -n+7 $STDFILE > $TMP2 $ diff $TMP1 $TMP2 -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/blasr.git _______________________________________________ debian-med-commit mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit
