This is an automated email from the git hooks/post-receive script. afif pushed a commit to branch master in repository pbseqlib.
commit 849e1e0c48d4426de32561e5840a4c13d39b5f9a Author: Afif Elghraoui <[email protected]> Date: Mon Dec 19 23:46:45 2016 -0800 Imported Upstream version 0~20161219 --- .../algorithms/sorting/LightweightSuffixArray.cpp | 10 ++-- alignment/files/ReaderAgglomerate.cpp | 8 ++-- alignment/files/ReaderAgglomerate.hpp | 7 ++- alignment/format/BAMPrinter.hpp | 2 +- alignment/format/BAMPrinterImpl.hpp | 2 +- alignment/format/SAMHeaderPrinter.cpp | 56 +++++++++------------- hdf/HDFZMWWriter.cpp | 4 +- pbdata/SMRTSequence.cpp | 4 +- 8 files changed, 41 insertions(+), 52 deletions(-) diff --git a/alignment/algorithms/sorting/LightweightSuffixArray.cpp b/alignment/algorithms/sorting/LightweightSuffixArray.cpp index cb881ed..bc57ecf 100644 --- a/alignment/algorithms/sorting/LightweightSuffixArray.cpp +++ b/alignment/algorithms/sorting/LightweightSuffixArray.cpp @@ -286,7 +286,7 @@ bool LightweightSuffixSort(unsigned char text[], UInt textLength, UInt *index, i } } UInt dSetSize = dIndex; - std::cerr << "Sorting " << diffCoverSize << "-prefixes of the genome." << std::endl; + std::cout << "Sorting " << diffCoverSize << "-prefixes of the genome." << std::endl; MediankeyBoundedQuicksort(text, index, dIndex, 0, dSetSize, 0, diffCoverSize); UInt i; @@ -300,7 +300,7 @@ bool LightweightSuffixSort(unsigned char text[], UInt textLength, UInt *index, i DiffCoverMu mu; mu.Initialize(diffCover, diffCoverLength, diffCoverSize, textLength); UInt largestLexName; - std::cerr << "Enumerating " << diffCoverSize << "-prefixes." << std::endl; + std::cout << "Enumerating " << diffCoverSize << "-prefixes." << std::endl; largestLexName = DiffCoverBuildLexNaming(text, textLength, index, dSetSize, diffCoverLength, diffCoverSize, mu.diffCoverReverseLookup, lexVNaming); @@ -348,7 +348,7 @@ bool LightweightSuffixSort(unsigned char text[], UInt textLength, UInt *index, i // [0,n-v], the relative order of the suffixes starting at // i+\delta(,j) and j+\delta(i,j) is already known. // - std::cerr << "Sorting suffices." << std::endl; + std::cout << "Sorting suffices." << std::endl; // Step 2.1 v-order suffices using multikey quicksort for (i = 0; i < textLength; i++ ){ index[i] = i; @@ -369,14 +369,14 @@ bool LightweightSuffixSort(unsigned char text[], UInt textLength, UInt *index, i lOrderComparator.diffCoverReverseLookup = mu.diffCoverReverseLookup; UInt setBegin, setEnd; setBegin = setEnd = 0; - std::cerr << "Sorting buckets." << std::endl; + std::cout << "Sorting buckets." << std::endl; int percentDone = 0; int curPercentage = 0; while(setBegin < textLength) { setEnd = setBegin; percentDone = (int)(((1.0*setBegin) / textLength) * 100); if ( percentDone > curPercentage) { - std::cerr << " " << percentDone << "% of buckets sorted." << std::endl; + std::cout << " " << percentDone << "% of buckets sorted." << std::endl; curPercentage = percentDone; } while(setEnd < textLength and diff --git a/alignment/files/ReaderAgglomerate.cpp b/alignment/files/ReaderAgglomerate.cpp index 26d29b3..d9ef711 100644 --- a/alignment/files/ReaderAgglomerate.cpp +++ b/alignment/files/ReaderAgglomerate.cpp @@ -253,7 +253,7 @@ int ReaderAgglomerate::Initialize(bool unrolled_mode) { if (unrolled) { if (fileType == FileType::PBBAM) { // Handle PBBAM here , use scrapFileName - VPReader = new PacBio::BAM::VirtualPolymeraseReader(fileName, scrapsFileName); + VPReader = new PacBio::BAM::ZmwReadStitcher(fileName, scrapsFileName); assert(VPReader != nullptr); // } else if (fileType == FileType::PBDATASET) { @@ -261,7 +261,7 @@ int ReaderAgglomerate::Initialize(bool unrolled_mode) { // No need in setting filters for PolymeraseReads // prefiltering, in a form it is currently implemented migght crate Polymerase reads // with skipped subreads, which defies the whole purpose of unrolled mode - VPCReader = new PacBio::BAM::VirtualPolymeraseCompositeReader(*dataSetPtr); + VPCReader = new PacBio::BAM::ZmwReadStitcher(*dataSetPtr); assert(VPCReader != nullptr); } } @@ -506,7 +506,7 @@ int ReaderAgglomerate::GetNext(SMRTSequence &seq) { if ( VPCReader->HasNext() ) { // TODO check for length mismatch (as temporary fix) - PacBio::BAM::VirtualPolymeraseBamRecord record = VPCReader->Next(); + PacBio::BAM::VirtualZmwBamRecord record = VPCReader->Next(); numRecords = 1; // a single record only seq.Copy(record); // need to copy into seq @@ -525,7 +525,7 @@ int ReaderAgglomerate::GetNext(SMRTSequence &seq) { if ( VPReader->HasNext() ) { // TODO check for length mismatch (as temporary fix) - PacBio::BAM::VirtualPolymeraseBamRecord record = VPReader->Next(); + PacBio::BAM::VirtualZmwBamRecord record = VPReader->Next(); numRecords = 1; // a single record only seq.Copy(record); // need to copy into seq diff --git a/alignment/files/ReaderAgglomerate.hpp b/alignment/files/ReaderAgglomerate.hpp index 36fd83c..9db65d3 100644 --- a/alignment/files/ReaderAgglomerate.hpp +++ b/alignment/files/ReaderAgglomerate.hpp @@ -24,8 +24,7 @@ #include "../query/PbiFilterZmwGroupQuery.h" #include <pbbam/BamRecord.h> // the following added to support Polymerase read for unrolled mode -#include <pbbam/virtual/VirtualPolymeraseCompositeReader.h> -#include <pbbam/virtual/VirtualPolymeraseReader.h> +#include <pbbam/virtual/ZmwReadStitcher.h> // new interface #endif class ReaderAgglomerate : public BaseSequenceIO { @@ -139,8 +138,8 @@ public: PacBio::BAM::PbiFilterZmwGroupQuery * pbiFilterZmwQueryPtr; PacBio::BAM::PbiFilterZmwGroupQuery::iterator pbiFilterZmwIterator; // the following to added to support Polymerase reads in unrolled mode - PacBio::BAM::VirtualPolymeraseReader * VPReader; - PacBio::BAM::VirtualPolymeraseCompositeReader * VPCReader; + PacBio::BAM::ZmwReadStitcher * VPReader; // new interface + PacBio::BAM::ZmwReadStitcher * VPCReader; // new interface #endif }; diff --git a/alignment/format/BAMPrinter.hpp b/alignment/format/BAMPrinter.hpp index a0d9be7..1962a8a 100644 --- a/alignment/format/BAMPrinter.hpp +++ b/alignment/format/BAMPrinter.hpp @@ -21,7 +21,7 @@ namespace BAMOutput { template<typename T_Sequence> void PrintAlignment(T_AlignmentCandidate &alignment, T_Sequence &read, T_Sequence & subread, - PacBio::BAM::BamWriter &bamWriter, AlignmentContext &context, + PacBio::BAM::IRecordWriter &bamWriter, AlignmentContext &context, SupplementalQVList & qvList, Clipping clipping, bool cigarUseSeqMatch=false, const bool allowAdjacentIndels=true); } diff --git a/alignment/format/BAMPrinterImpl.hpp b/alignment/format/BAMPrinterImpl.hpp index 26103e2..95c332a 100644 --- a/alignment/format/BAMPrinterImpl.hpp +++ b/alignment/format/BAMPrinterImpl.hpp @@ -156,7 +156,7 @@ void AlignmentToBamRecord(T_AlignmentCandidate & alignment, template<typename T_Sequence> void BAMOutput::PrintAlignment(T_AlignmentCandidate &alignment, T_Sequence &read, T_Sequence & subread, - PacBio::BAM::BamWriter &bamWriter, AlignmentContext &context, + PacBio::BAM::IRecordWriter &bamWriter, AlignmentContext &context, SupplementalQVList & qvList, Clipping clipping, bool cigarUseSeqMatch, const bool allowAdjacentIndels) { diff --git a/alignment/format/SAMHeaderPrinter.cpp b/alignment/format/SAMHeaderPrinter.cpp index f5674bd..866a24c 100644 --- a/alignment/format/SAMHeaderPrinter.cpp +++ b/alignment/format/SAMHeaderPrinter.cpp @@ -345,42 +345,32 @@ SAMHeaderRGs SAMHeaderPrinter::MakeRGs(const std::vector<std::string> & readsFil delete reader; } else { #ifdef USE_PBBAM - if (fileType == FileType::PBBAM) { - // TODO: use Derek's API to merge bamHeaders from different files when - // it is in place. Use the following code for now. - std::vector<std::string>::const_iterator rfit; - for(rfit = readsFiles.begin(); rfit != readsFiles.end(); rfit++) { - try { - PacBio::BAM::BamFile bamFile(*rfit); - PacBio::BAM::BamHeader header = bamFile.Header(); - // Get read groups from bam header. - std::vector<PacBio::BAM::ReadGroupInfo> vrgs = header.ReadGroups(); - std::vector<PacBio::BAM::ReadGroupInfo>::iterator rgit; - for (rgit = vrgs.begin(); rgit != vrgs.end(); rgit++) { - rgs.Add(SAMHeaderRG((*rgit).ToSam())); - } - } catch (std::exception e) { - cout << "ERROR, unable to open bam file " << (*rfit) << endl; - exit(1); - } - } - } else if (fileType == FileType::PBDATASET) { + if (fileType == FileType::PBDATASET or fileType == FileType::PBBAM) { + // use + operator to merge headers + bool first = true; + PacBio::BAM::BamHeader mergedHeader; + // + // First stage : merge headers - loop thru all file headers and create a merged header + // for (auto xmlfn: readsFiles) { for (auto bamFile: PacBio::BAM::DataSet(xmlfn).BamFiles()) { - for (auto rg: bamFile.Header().ReadGroups()) - { - if (readType == ReadType::POLYMERASE) { - // fix for 27505 - rg.ReadType("POLYMERASE"); - rg.Id(rg.MovieName(),rg.ReadType()); - rgs.Add(SAMHeaderRG(rg.ToSam())); - } - else { - // For later, Investigate why no ReadType is used for REG and CCS - rgs.Add(SAMHeaderRG(rg.ToSam())); - } - } + if (first) { + mergedHeader = bamFile.Header(); // assign first header to mergedHeader + first = false; + } else + mergedHeader += bamFile.Header(); // add subsequent headers and to mergedHeader + } + } + + // + // Second stage : Loop thru all read groups, must modify ReadType if POLYMERASE + // + for (PacBio::BAM::ReadGroupInfo rg : mergedHeader.ReadGroups()) { + if (readType == ReadType::POLYMERASE) { + rg.ReadType("POLYMERASE"); + rg.Id(rg.MovieName(), rg.ReadType()); } + rgs.Add(SAMHeaderRG(rg.ToSam())); } } #else diff --git a/hdf/HDFZMWWriter.cpp b/hdf/HDFZMWWriter.cpp index e94413e..8f36f93 100644 --- a/hdf/HDFZMWWriter.cpp +++ b/hdf/HDFZMWWriter.cpp @@ -68,8 +68,8 @@ bool HDFZMWWriter::WriteOneZmw(const PacBio::BAM::BamRecord & read) { uint32_t hn_ = read.HoleNumber(); _WriteHoleNumber(hn_); - _WriteHoleXY(static_cast<int16_t>(hn_ & 0x0000FFFF), - static_cast<int16_t>(hn_ >> 16)); + _WriteHoleXY(static_cast<int16_t>(hn_ >> 16), + static_cast<int16_t>(hn_ & 0x0000FFFF)); _WriteHoleStatus(PacBio::AttributeValues::ZMW::HoleStatus::sequencingzmw); _WriteBaseLineSigma(read); return Errors().empty(); diff --git a/pbdata/SMRTSequence.cpp b/pbdata/SMRTSequence.cpp index 18756ee..ee1d3d9 100644 --- a/pbdata/SMRTSequence.cpp +++ b/pbdata/SMRTSequence.cpp @@ -467,8 +467,8 @@ void SMRTSequence::Copy(const PacBio::BAM::BamRecord & record, this->HoleNumber(hn). // Assumption: holeStatus of a bam record must be 'SEQUENCING' HoleStatus(static_cast<unsigned char> (PacBio::AttributeValues::ZMW::HoleStatus::sequencingzmw)). - // x = lower 16 bit, y = upper 16 bit - HoleXY(hn & 0x0000FFFF, hn >> 16); + // x = upper 16 bit, y = lower 16 bit + HoleXY(hn >> 16, hn & 0x0000FFFF); // Set hq region read score if (record.HasReadAccuracy()) { -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/pbseqlib.git _______________________________________________ debian-med-commit mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit
