This is an automated email from the git hooks/post-receive script. tille pushed a commit to branch master in repository libbio-graphics-perl.
commit 9eaceee862978f3108b0af30bfb5087724463163 Author: Andreas Tille <[email protected]> Date: Sat Dec 17 19:37:04 2016 +0100 New upstream version 2.40 --- Build.PL | 10 +- Changes | 4 + MANIFEST | 9 +- META.json | 11 +- META.yml | 137 ++------- Makefile.PL | 2 +- README | 1 + lib/Bio/Graphics.pm | 3 +- lib/Bio/Graphics/Glyph/gene.pm | 20 +- lib/Bio/Graphics/Glyph/rainbow_gene.pm | 4 +- lib/Bio/Graphics/Glyph/segments.pm | 33 +-- lib/Bio/Graphics/Glyph/topoview.pm | 496 +++++++++++++++++++++++++++++++++ scripts/bam_coverage_windows.pl | 131 +++++++++ scripts/coverage_to_topoview.pl | 247 ++++++++++++++++ 14 files changed, 940 insertions(+), 168 deletions(-) diff --git a/Build.PL b/Build.PL index 9e6c9cf..e89c025 100755 --- a/Build.PL +++ b/Build.PL @@ -12,11 +12,13 @@ my $build = Module::Build->new( requires => { 'Bio::Root::Version' => '1.005009001', 'GD' => 2.30, - 'Statistics::Descriptive' => 2.6, # required for Bio::Graphics::Wiggle::Loader - #and tests fail without it + 'CGI' => 0, + 'Bio::Coordinate::Pair' => '1.005009001', + 'Statistics::Descriptive' => 2.6, # required for Bio::Graphics::Wiggle::Loader + #and tests fail without it }, recommends => { - 'GD::SVG' => 0.32, + 'GD::SVG' => 0.32, 'Text::ParseWords' => 3.26, # required for Bio::Graphics::Wiggle::Loader # 'Bio::SCF' => 1.01, # required for Bio::Graphics::Glyph::trace }, @@ -26,6 +28,8 @@ my $build = Module::Build->new( 'scripts/glyph_help.pl', 'scripts/search_overview.pl', 'scripts/render_msa.pl', + 'scripts/bam_coverage_windows.pl', + 'scripts/coverage_to_topoview.pl' ], create_makefile_pl => 'passthrough', build_class => 'Module::Build', diff --git a/Changes b/Changes index 50ed607..dd66663 100755 --- a/Changes +++ b/Changes @@ -1,4 +1,8 @@ Revision history for Perl extension Bio::Graphics. +2.40 + - Add CGI as a requirement as it is not part of core perl distributions, post-5.21 + - Add Bio::Coordinate::Pair as a requirement (now released separately from BioPerl core) + 2.39 - Turn off image-based regression tests becuase libgd has started to produce visually-identical images that are different at the byte level. diff --git a/MANIFEST b/MANIFEST index afbf76f..25f62a5 100644 --- a/MANIFEST +++ b/MANIFEST @@ -90,6 +90,7 @@ lib/Bio/Graphics/Glyph/text_in_box.pm lib/Bio/Graphics/Glyph/three_letters.pm lib/Bio/Graphics/Glyph/tic_tac_toe.pm lib/Bio/Graphics/Glyph/toomany.pm +lib/Bio/Graphics/Glyph/topoview.pm lib/Bio/Graphics/Glyph/trace.pm lib/Bio/Graphics/Glyph/track.pm lib/Bio/Graphics/Glyph/transcript.pm @@ -114,14 +115,13 @@ lib/Bio/Graphics/RendererI.pm lib/Bio/Graphics/Util.pm lib/Bio/Graphics/Wiggle.pm lib/Bio/Graphics/Wiggle/Loader.pm -Makefile.PL MANIFEST MANIFEST.SKIP -META.json -META.yml README README.bioperl +scripts/bam_coverage_windows.pl scripts/contig_draw.pl +scripts/coverage_to_topoview.pl scripts/feature_draw.pl scripts/frend.pl scripts/glyph_help.pl @@ -223,3 +223,6 @@ t/data/t3/version9.png t/data/wig_data.wig t/decorated_transcript_t1.pl t/Wiggle.t +Makefile.PL +META.yml +META.json diff --git a/META.json b/META.json index 4b679b6..a066e90 100644 --- a/META.json +++ b/META.json @@ -4,7 +4,7 @@ "Lincoln Stein <[email protected]>" ], "dynamic_config" : 1, - "generated_by" : "Module::Build version 0.4205", + "generated_by" : "Module::Build version 0.4218", "license" : [ "perl_5" ], @@ -25,7 +25,9 @@ "Text::ParseWords" : "3.26" }, "requires" : { + "Bio::Coordinate::Pair" : "1.005009001", "Bio::Root::Version" : "1.005009001", + "CGI" : "0", "GD" : "2.3", "Statistics::Descriptive" : "2.6" } @@ -34,7 +36,7 @@ "provides" : { "Bio::Graphics" : { "file" : "lib/Bio/Graphics.pm", - "version" : "2.39" + "version" : "2.40" }, "Bio::Graphics::ConfiguratorI" : { "file" : "lib/Bio/Graphics/ConfiguratorI.pm" @@ -289,6 +291,9 @@ "Bio::Graphics::Glyph::toomany" : { "file" : "lib/Bio/Graphics/Glyph/toomany.pm" }, + "Bio::Graphics::Glyph::topoview" : { + "file" : "lib/Bio/Graphics/Glyph/topoview.pm" + }, "Bio::Graphics::Glyph::trace" : { "file" : "lib/Bio/Graphics/Glyph/trace.pm" }, @@ -376,5 +381,5 @@ "http://dev.perl.org/licenses/" ] }, - "version" : "2.39" + "version" : "2.40" } diff --git a/META.yml b/META.yml index 7f229d0..f69ec9e 100644 --- a/META.yml +++ b/META.yml @@ -4,355 +4,252 @@ author: - 'Lincoln Stein <[email protected]>' build_requires: {} configure_requires: - Module::Build: 0.42 + Module::Build: '0.42' dynamic_config: 1 -generated_by: 'Module::Build version 0.4205, CPAN::Meta::Converter version 2.120351' +generated_by: 'Module::Build version 0.4218, CPAN::Meta::Converter version 2.150001' license: perl meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html - version: 1.4 + version: '1.4' name: Bio-Graphics provides: Bio::Graphics: file: lib/Bio/Graphics.pm - version: 2.39 + version: '2.40' Bio::Graphics::ConfiguratorI: file: lib/Bio/Graphics/ConfiguratorI.pm - version: 0 Bio::Graphics::DrawTransmembrane: file: lib/Bio/Graphics/DrawTransmembrane.pm - version: 0 Bio::Graphics::Feature: file: lib/Bio/Graphics/Feature.pm - version: 0 Bio::Graphics::FeatureBase: file: lib/Bio/Graphics/FeatureBase.pm - version: 0 Bio::Graphics::FeatureDir: file: lib/Bio/Graphics/FeatureDir.pm - version: 0 Bio::Graphics::FeatureFile: file: lib/Bio/Graphics/FeatureFile.pm - version: 0 Bio::Graphics::FeatureFile::Iterator: file: lib/Bio/Graphics/FeatureFile/Iterator.pm - version: 0 Bio::Graphics::FileSplitter: file: lib/Bio/Graphics/FeatureDir.pm - version: 0 Bio::Graphics::GDWrapper: file: lib/Bio/Graphics/GDWrapper.pm - version: 0 Bio::Graphics::Glyph: file: lib/Bio/Graphics/Glyph.pm - version: 0 Bio::Graphics::Glyph::Factory: file: lib/Bio/Graphics/Glyph/Factory.pm - version: 0 Bio::Graphics::Glyph::alignment: file: lib/Bio/Graphics/Glyph/alignment.pm - version: 0 Bio::Graphics::Glyph::allele_tower: file: lib/Bio/Graphics/Glyph/allele_tower.pm - version: 0 Bio::Graphics::Glyph::anchored_arrow: file: lib/Bio/Graphics/Glyph/anchored_arrow.pm - version: 0 Bio::Graphics::Glyph::arrow: file: lib/Bio/Graphics/Glyph/arrow.pm - version: 0 Bio::Graphics::Glyph::box: file: lib/Bio/Graphics/Glyph/box.pm - version: 0 Bio::Graphics::Glyph::broken_line: file: lib/Bio/Graphics/Glyph/broken_line.pm - version: 0 Bio::Graphics::Glyph::cds: file: lib/Bio/Graphics/Glyph/cds.pm - version: 0 Bio::Graphics::Glyph::christmas_arrow: file: lib/Bio/Graphics/Glyph/christmas_arrow.pm - version: 0 Bio::Graphics::Glyph::cross: file: lib/Bio/Graphics/Glyph/cross.pm - version: 0 Bio::Graphics::Glyph::crossbox: file: lib/Bio/Graphics/Glyph/crossbox.pm - version: 0 Bio::Graphics::Glyph::dashed_line: file: lib/Bio/Graphics/Glyph/dashed_line.pm - version: 0 Bio::Graphics::Glyph::decorated_gene: file: lib/Bio/Graphics/Glyph/decorated_gene.pm - version: 0 Bio::Graphics::Glyph::decorated_transcript: file: lib/Bio/Graphics/Glyph/decorated_transcript.pm - version: 0 Bio::Graphics::Glyph::diamond: file: lib/Bio/Graphics/Glyph/diamond.pm - version: 0 Bio::Graphics::Glyph::dna: file: lib/Bio/Graphics/Glyph/dna.pm - version: 0 Bio::Graphics::Glyph::dot: file: lib/Bio/Graphics/Glyph/dot.pm - version: 0 Bio::Graphics::Glyph::dumbbell: file: lib/Bio/Graphics/Glyph/dumbbell.pm - version: 0 Bio::Graphics::Glyph::ellipse: file: lib/Bio/Graphics/Glyph/ellipse.pm - version: 0 Bio::Graphics::Glyph::ex: file: lib/Bio/Graphics/Glyph/ex.pm - version: 0 Bio::Graphics::Glyph::extending_arrow: file: lib/Bio/Graphics/Glyph/extending_arrow.pm - version: 0 Bio::Graphics::Glyph::fb_shmiggle: file: lib/Bio/Graphics/Glyph/fb_shmiggle.pm - version: 0 Bio::Graphics::Glyph::fixedwidth: file: lib/Bio/Graphics/Glyph/fixedwidth.pm - version: 0 Bio::Graphics::Glyph::flag: file: lib/Bio/Graphics/Glyph/flag.pm - version: 0 Bio::Graphics::Glyph::gene: file: lib/Bio/Graphics/Glyph/gene.pm - version: 0 Bio::Graphics::Glyph::generic: file: lib/Bio/Graphics/Glyph/generic.pm - version: 0 Bio::Graphics::Glyph::graded_segments: file: lib/Bio/Graphics/Glyph/graded_segments.pm - version: 0 Bio::Graphics::Glyph::group: file: lib/Bio/Graphics/Glyph/group.pm - version: 0 Bio::Graphics::Glyph::hat: file: lib/Bio/Graphics/Glyph/hat.pm - version: 0 Bio::Graphics::Glyph::heat_map: file: lib/Bio/Graphics/Glyph/heat_map.pm - version: 0 Bio::Graphics::Glyph::heat_map_ideogram: file: lib/Bio/Graphics/Glyph/heat_map_ideogram.pm - version: 0 Bio::Graphics::Glyph::heterogeneous_segments: file: lib/Bio/Graphics/Glyph/heterogeneous_segments.pm - version: 0 Bio::Graphics::Glyph::hidden: file: lib/Bio/Graphics/Glyph/hidden.pm - version: 0 Bio::Graphics::Glyph::hybrid_plot: file: lib/Bio/Graphics/Glyph/hybrid_plot.pm - version: 1.0 + version: '1.0' Bio::Graphics::Glyph::ideogram: file: lib/Bio/Graphics/Glyph/ideogram.pm - version: 0 Bio::Graphics::Glyph::image: file: lib/Bio/Graphics/Glyph/image.pm - version: 0 Bio::Graphics::Glyph::lightning: file: lib/Bio/Graphics/Glyph/lightning.pm - version: 0 Bio::Graphics::Glyph::line: file: lib/Bio/Graphics/Glyph/line.pm - version: 0 Bio::Graphics::Glyph::merge_parts: file: lib/Bio/Graphics/Glyph/merge_parts.pm - version: 0 Bio::Graphics::Glyph::merged_alignment: file: lib/Bio/Graphics/Glyph/merged_alignment.pm - version: 0 Bio::Graphics::Glyph::minmax: file: lib/Bio/Graphics/Glyph/minmax.pm - version: 0 Bio::Graphics::Glyph::operon: file: lib/Bio/Graphics/Glyph/operon.pm - version: 0 Bio::Graphics::Glyph::oval: file: lib/Bio/Graphics/Glyph/oval.pm - version: 0 Bio::Graphics::Glyph::pairplot: file: lib/Bio/Graphics/Glyph/pairplot.pm - version: 0 Bio::Graphics::Glyph::pentagram: file: lib/Bio/Graphics/Glyph/pentagram.pm - version: 0 Bio::Graphics::Glyph::phylo_align: file: lib/Bio/Graphics/Glyph/phylo_align.pm - version: 0 Bio::Graphics::Glyph::pinsertion: file: lib/Bio/Graphics/Glyph/pinsertion.pm - version: 0 Bio::Graphics::Glyph::point_glyph: file: lib/Bio/Graphics/Glyph/point_glyph.pm - version: 0 Bio::Graphics::Glyph::primers: file: lib/Bio/Graphics/Glyph/primers.pm - version: 0 Bio::Graphics::Glyph::processed_transcript: file: lib/Bio/Graphics/Glyph/processed_transcript.pm - version: 0 Bio::Graphics::Glyph::protein: file: lib/Bio/Graphics/Glyph/protein.pm - version: 0 Bio::Graphics::Glyph::ragged_ends: file: lib/Bio/Graphics/Glyph/ragged_ends.pm - version: 0 Bio::Graphics::Glyph::rainbow_gene: file: lib/Bio/Graphics/Glyph/rainbow_gene.pm - version: 0 Bio::Graphics::Glyph::read_pair: file: lib/Bio/Graphics/Glyph/read_pair.pm - version: 0 Bio::Graphics::Glyph::redgreen_box: file: lib/Bio/Graphics/Glyph/redgreen_box.pm - version: 0 Bio::Graphics::Glyph::redgreen_segment: file: lib/Bio/Graphics/Glyph/redgreen_segment.pm - version: 0 Bio::Graphics::Glyph::repeating_shape: file: lib/Bio/Graphics/Glyph/repeating_shape.pm - version: 0 Bio::Graphics::Glyph::rndrect: file: lib/Bio/Graphics/Glyph/rndrect.pm - version: 0 Bio::Graphics::Glyph::ruler_arrow: file: lib/Bio/Graphics/Glyph/ruler_arrow.pm - version: 0 Bio::Graphics::Glyph::saw_teeth: file: lib/Bio/Graphics/Glyph/saw_teeth.pm - version: 0 Bio::Graphics::Glyph::scale: file: lib/Bio/Graphics/Glyph/scale.pm - version: 0 Bio::Graphics::Glyph::segmented_keyglyph: file: lib/Bio/Graphics/Glyph/segmented_keyglyph.pm - version: 0 Bio::Graphics::Glyph::segments: file: lib/Bio/Graphics/Glyph/segments.pm - version: 0 Bio::Graphics::Glyph::smoothing: file: lib/Bio/Graphics/Glyph/smoothing.pm - version: 0 Bio::Graphics::Glyph::so_transcript: file: lib/Bio/Graphics/Glyph/so_transcript.pm - version: 0 Bio::Graphics::Glyph::span: file: lib/Bio/Graphics/Glyph/span.pm - version: 0 Bio::Graphics::Glyph::spectrogram: file: lib/Bio/Graphics/Glyph/spectrogram.pm - version: 0 Bio::Graphics::Glyph::splice_site: file: lib/Bio/Graphics/Glyph/splice_site.pm - version: 0 Bio::Graphics::Glyph::stackedplot: file: lib/Bio/Graphics/Glyph/stackedplot.pm - version: 0 Bio::Graphics::Glyph::ternary_plot: file: lib/Bio/Graphics/Glyph/ternary_plot.pm - version: 0 Bio::Graphics::Glyph::text_in_box: file: lib/Bio/Graphics/Glyph/text_in_box.pm - version: 0 Bio::Graphics::Glyph::three_letters: file: lib/Bio/Graphics/Glyph/three_letters.pm - version: 0 Bio::Graphics::Glyph::tic_tac_toe: file: lib/Bio/Graphics/Glyph/tic_tac_toe.pm - version: 0 Bio::Graphics::Glyph::toomany: file: lib/Bio/Graphics/Glyph/toomany.pm - version: 0 + Bio::Graphics::Glyph::topoview: + file: lib/Bio/Graphics/Glyph/topoview.pm Bio::Graphics::Glyph::trace: file: lib/Bio/Graphics/Glyph/trace.pm - version: 0 Bio::Graphics::Glyph::track: file: lib/Bio/Graphics/Glyph/track.pm - version: 0 Bio::Graphics::Glyph::transcript: file: lib/Bio/Graphics/Glyph/transcript.pm - version: 0 Bio::Graphics::Glyph::transcript2: file: lib/Bio/Graphics/Glyph/transcript2.pm - version: 0 Bio::Graphics::Glyph::translation: file: lib/Bio/Graphics/Glyph/translation.pm - version: 0 Bio::Graphics::Glyph::triangle: file: lib/Bio/Graphics/Glyph/triangle.pm - version: 0 Bio::Graphics::Glyph::two_bolts: file: lib/Bio/Graphics/Glyph/two_bolts.pm - version: 0 Bio::Graphics::Glyph::vista_plot: file: lib/Bio/Graphics/Glyph/vista_plot.pm - version: 1.0 + version: '1.0' Bio::Graphics::Glyph::wave: file: lib/Bio/Graphics/Glyph/wave.pm - version: 0 Bio::Graphics::Glyph::weighted_arrow: file: lib/Bio/Graphics/Glyph/weighted_arrow.pm - version: 0 Bio::Graphics::Glyph::whiskerplot: file: lib/Bio/Graphics/Glyph/whiskerplot.pm - version: 0 Bio::Graphics::Glyph::wiggle_box: file: lib/Bio/Graphics/Glyph/wiggle_box.pm - version: 0 Bio::Graphics::Glyph::wiggle_data: file: lib/Bio/Graphics/Glyph/wiggle_data.pm - version: 0 Bio::Graphics::Glyph::wiggle_density: file: lib/Bio/Graphics/Glyph/wiggle_density.pm - version: 0 Bio::Graphics::Glyph::wiggle_whiskers: file: lib/Bio/Graphics/Glyph/wiggle_whiskers.pm - version: 0 Bio::Graphics::Glyph::wiggle_xyplot: file: lib/Bio/Graphics/Glyph/wiggle_xyplot.pm - version: 0 Bio::Graphics::Glyph::xyplot: file: lib/Bio/Graphics/Glyph/xyplot.pm - version: 0 Bio::Graphics::Layout: file: lib/Bio/Graphics/Layout.pm - version: 0 Bio::Graphics::Layout::Contour: file: lib/Bio/Graphics/Layout.pm - version: 0 Bio::Graphics::Math: file: lib/Bio/Graphics/Layout.pm - version: 0 Bio::Graphics::Panel: file: lib/Bio/Graphics/Panel.pm - version: 0 Bio::Graphics::Pictogram: file: lib/Bio/Graphics/Pictogram.pm - version: 0 Bio::Graphics::RendererI: file: lib/Bio/Graphics/RendererI.pm - version: 0 Bio::Graphics::Util: file: lib/Bio/Graphics/Util.pm - version: 0 Bio::Graphics::Wiggle: file: lib/Bio/Graphics/Wiggle.pm - version: 1.0 + version: '1.0' Bio::Graphics::Wiggle::Loader: file: lib/Bio/Graphics/Wiggle/Loader.pm - version: 0 recommends: - GD::SVG: 0.32 - Text::ParseWords: 3.26 + GD::SVG: '0.32' + Text::ParseWords: '3.26' requires: - Bio::Root::Version: 1.005009001 - GD: 2.3 - Statistics::Descriptive: 2.6 + Bio::Coordinate::Pair: '1.005009001' + Bio::Root::Version: '1.005009001' + CGI: '0' + GD: '2.3' + Statistics::Descriptive: '2.6' resources: license: http://dev.perl.org/licenses/ -version: 2.39 +version: '2.40' diff --git a/Makefile.PL b/Makefile.PL index 34def82..5e79e67 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -1,4 +1,4 @@ -# Note: this file was auto-generated by Module::Build::Compat version 0.4205 +# Note: this file was auto-generated by Module::Build::Compat version 0.4218 unless (eval "use Module::Build::Compat 0.02; 1" ) { print "This module requires Module::Build to install itself.\n"; diff --git a/README b/README index 4cf6084..f0a99a2 100755 --- a/README +++ b/README @@ -1,5 +1,6 @@ Bio::Graphics - Generate GD images of Bio::Seq objects + SYNOPSIS This is a simple GD-based renderer (diagram drawer) for DNA and diff --git a/lib/Bio/Graphics.pm b/lib/Bio/Graphics.pm index a02c97a..3fb5a68 100755 --- a/lib/Bio/Graphics.pm +++ b/lib/Bio/Graphics.pm @@ -2,7 +2,7 @@ package Bio::Graphics; use strict; use Bio::Graphics::Panel; -our $VERSION = '2.39'; +our $VERSION = '2.40'; 1; @@ -129,4 +129,3 @@ it under the same terms as Perl itself. See DISCLAIMER.txt for disclaimers of warranty. =cut - diff --git a/lib/Bio/Graphics/Glyph/gene.pm b/lib/Bio/Graphics/Glyph/gene.pm index e9ef7ce..7e21623 100755 --- a/lib/Bio/Graphics/Glyph/gene.pm +++ b/lib/Bio/Graphics/Glyph/gene.pm @@ -70,7 +70,7 @@ sub extra_arrow_length { sub pad_left { my $self = shift; my $type = $self->feature->primary_tag; - return 0 unless $type =~ /gene|mRNA|transcript/; + return 0 unless $type =~ /gene|rna|transcript/i; $self->SUPER::pad_left; } @@ -148,6 +148,12 @@ sub maxdepth { return 2; } +sub fixup_glyph { + my $self = shift; + return unless $self->level == 1; + $self->create_implied_utrs if $self->option('implied_utrs'); + $self->adjust_exons if $self->option('implied_utrs') || $self->option('adjust_exons'); +} sub _subfeat { my $class = shift; @@ -155,7 +161,7 @@ sub _subfeat { if ($feature->primary_tag =~ /^gene/i) { my @transcripts; - for my $t (qw/mRNA tRNA snRNA snoRNA miRNA ncRNA pseudogene/) { + for my $t (qw/mRNA tRNA snRNA snoRNA miRNA ncRNA pseudogene transcript/) { push @transcripts, $feature->get_SeqFeatures($t); } return @transcripts if @transcripts; @@ -176,7 +182,11 @@ sub _subfeat { @subparts = $feature->get_SeqFeatures($class->option('sub_part')); } elsif ($feature->primary_tag =~ /^mRNA/i) { - @subparts = $feature->get_SeqFeatures(qw(CDS five_prime_UTR three_prime_UTR UTR)); + if ($class->option('implied_utrs') || $class->option('adjust_exons')) { + @subparts = $feature->get_SeqFeatures(qw(CDS exon five_prime_UTR three_prime_UTR UTR)); + } else { + @subparts = $feature->get_SeqFeatures(qw(CDS five_prime_UTR three_prime_UTR UTR)); + } } else { @subparts = $feature->get_SeqFeatures('exon'); @@ -299,10 +309,6 @@ options: Draw strand with little arrows undef (false) on the intron. -The B<-adjust_exons> and B<-implied_utrs> options are inherited from -processed_transcript, but are quietly ignored. Please use the -processed_transcript glyph for this type of processing. - =head1 BUGS The SO terms are hard-coded. They should be more flexible and should diff --git a/lib/Bio/Graphics/Glyph/rainbow_gene.pm b/lib/Bio/Graphics/Glyph/rainbow_gene.pm index a7df05c..62eb929 100755 --- a/lib/Bio/Graphics/Glyph/rainbow_gene.pm +++ b/lib/Bio/Graphics/Glyph/rainbow_gene.pm @@ -82,7 +82,7 @@ sub extra_arrow_length { sub pad_left { my $self = shift; my $type = $self->feature->primary_tag; - return 0 unless $type =~ /gene|mRNA/; + return 0 unless $type =~ /gene|rna|transcript/i; $self->SUPER::pad_left; } @@ -167,7 +167,7 @@ sub _subfeat { if ($feature->primary_tag =~ /^gene/i) { my @transcripts; - for my $t (qw/mRNA tRNA snRNA snoRNA miRNA ncRNA pseudogene/) { + for my $t (qw/mRNA tRNA snRNA snoRNA miRNA ncRNA pseudogene transcript/) { push @transcripts, $feature->get_SeqFeatures($t); } return @transcripts if @transcripts; diff --git a/lib/Bio/Graphics/Glyph/segments.pm b/lib/Bio/Graphics/Glyph/segments.pm index 7626fbb..49df27e 100755 --- a/lib/Bio/Graphics/Glyph/segments.pm +++ b/lib/Bio/Graphics/Glyph/segments.pm @@ -268,15 +268,16 @@ sub _split_on_cigar { my $source_end = $feature->end; my $ss = $feature->strand; my $ts = $feature->hit->strand; + + # Render -/- alignments on the + strand + if ($ss == -1 && $ts == -1) { + $ss = 1; + } + my $target_start = eval {$feature->hit->start} || return $feature; my (@parts); - # BUG: we handle +/+ and -/+ alignments, but not +/- or -/- - # (i.e. the target has got to have forward strand coordinates) - - # forward strand - if ($ss >= 0) { for my $event (@$cigar) { my ($op,$count) = @$event; if ($op eq 'I' || $op eq 'S' || $op eq 'H') { @@ -297,28 +298,6 @@ sub _split_on_cigar { } } - # minus strand - } else { - for my $event (@$cigar) { - my ($op,$count) = @$event; - if ($op eq 'I' || $op eq 'S' || $op eq 'H') { - $target_start += $count; - } - elsif ($op eq 'D' || $op eq 'N') { - $source_end -= $count; - } - elsif ($op eq 'P') { - # do nothing for pads - } else { # everything else is assumed to be a match -- revisit - push @parts,[$source_end-$count+1,$source_end, - $target_start,$target_start+$count-1]; - $source_end -= $count; - $target_start += $count; - } - } - - } - my $id = $feature->seq_id; my $tid = $feature->hit->seq_id; my @result = map { diff --git a/lib/Bio/Graphics/Glyph/topoview.pm b/lib/Bio/Graphics/Glyph/topoview.pm new file mode 100755 index 0000000..607cb2b --- /dev/null +++ b/lib/Bio/Graphics/Glyph/topoview.pm @@ -0,0 +1,496 @@ +package Bio::Graphics::Glyph::topoview; + +# Based on the fb_shmiggle glyph by Victor Strelets, FlyBase.org +# 2009-2010 Victor Strelets, FlyBase.org +# Sheldon McKay <[email protected]> 2015 + + +# "topoview.pm" the TopoView glyph was developed for fast +# 3D-like demonstration of RNA-seq data consisting of multiple +# individual subsets. The main purposes were to compact presentation +# as much as possible (in one reasonably sized track) and +# to allow easy visual detection of coordinated behavior +# of the expression profiles of different subsets. + +# See http://gmod.org/wiki/Using_the_topoview_Glyph for complete documentation + +use strict; +use GD; +use base 'Bio::Graphics::Glyph::generic'; +use Text::ParseWords 'shellwords'; +use Data::Dumper; +use List::Util qw/min max shuffle/; +use constant DEBUG => 0; + +use vars qw/$colors_selected $black $red $white $grey @colors %Indices + $black $darkgrey $lightgrey $charcoal/; + +sub draw { + my $self = shift; + my $gd = shift; + my ($left,$top,$partno,$total_parts) = @_; + my $ft = $self->feature; + + $black = $gd->colorClosest(0,0,0); + $lightgrey = $gd->colorClosest(225,225,225); + $grey = $gd->colorClosest(200,200,200); + $darkgrey = $gd->colorClosest(125,125,125); + $charcoal = $gd->colorClosest(75,75,75); + + # User specified edge color or else charcoal gray (black is very harsh) + if (my $edge_color = $self->option('edge color')) { + my $col = $self->factory->translate_color($edge_color); + $self->{fgcolor} = $col; + } + else { + $self->{fgcolor} = $charcoal; + } + + my($pnstart,$pnstop) = ($self->panel->start,$self->panel->end); # in seq coordinates + my($xf1,$yf1,$xf2,$yf2) = $self->calculate_boundaries($left,$top); + my $nseqpoints = $pnstop - $pnstart + 1; + my $leftpad = $self->panel->pad_left; + $ft->{datadir} ||= $self->option('datadir'); + my $datadir = $ENV{SERVER_PATH} . $ft->{datadir}; + + my($start,$stop) = $self->panel->location2pixel(($ft->{start},$ft->{end})); + my $ftscrstop = $stop + $leftpad; + my $ftscrstart = $start + $leftpad; + + my $chromosome = $ft->{ref}; + my $flipped = $self->{flip} ? 1 : 0; + my($subsets,$subsetsnames,$signals) = $self->getData($ft,$datadir,$chromosome,$pnstart,$pnstop,$xf1,$xf2,$flipped,$gd); + + my $poly_pkg = $self->polygon_package; + + my @orderedsubsets = @{$subsets}; + my $nsets = $#orderedsubsets+1; + + # x and y steps + my $xstep = $self->option('x_step'); + my $ystep = $self->option('y_step'); + unless ($ystep) { + $ystep = int(100/$nsets); + $ystep = 7 if $ystep >= 7; # empiricaly found - to read lines of tiny fonts + $ystep = 12 if $ystep > 12; # empirically found - to preserve topo feel when number of subsets is small + } + unless ($xstep) { + $xstep = 4; + } + my($xw,$yw) = ( $nsets*$xstep, ($nsets-1)*$ystep ); + + my $polybg = $poly_pkg->new(); + $polybg->addPt($xf1,$yf2-$yw); + $polybg->addPt($xf2,$yf2-$yw); + $polybg->addPt($xf2-$xw, $yf2); + $polybg->addPt($xf1-$xw, $yf2); + $gd->polygon($polybg,$lightgrey); # background + for( my $xx = $xf1+2; $xx<$xf2; $xx+=6 ) { $gd->line($xx,$yf2-$yw,$xx-$xw,$yf2,$grey); } # grid-helper + + my $xshift = 0; + my $yshift = $nsets * $ystep; + + my @screencoords = @{$signals->{screencoords}}; + my $max_signal = 30; + my $koeff = 4; + if( my $max = $self->max_score ) { + $max_signal = $max; + $koeff = 80.0/$max_signal; + } + my $predictor_cutoff = int($max_signal*0.95); # empirically found + my @prevx = (); + my @prevy = (); + my @prevvals = (); + my $profilen = $self->{no_max} ? 1 : 0; + my %SPEEDUP = (); + my $scrx = 0; + my $no_fill = defined $self->option('fill') && !$self->option('fill'); + + foreach my $subset ( @orderedsubsets ) { + my ($color,$bgcolor,$edgecolor); + if ( $self->{bgcolor} ) { + $bgcolor = $self->{bgcolor}->{$subset}; + } + + if ($profilen == 0) { + ($color,$edgecolor) = ($darkgrey,$charcoal); + } + else { + $color = $bgcolor; + $edgecolor = $self->{fgcolor}; + } + + $xshift -= $xstep; + $yshift -= $ystep; + my @values = @{$signals->{$subset}}; + my($xold,$yold)= ($xf1+$xshift,$yf2-$yshift+1); + my $xpos = 0; + my $poly = $poly_pkg->new(); + $poly->addPt($xold,$yold+1); + my @allx = ($xold); + my @ally = ($yold); + my @allvals = (0); + my $runx = $xf1 + $xshift; + + foreach my $val ( @values ) { + $scrx += $leftpad; + my $x = $screencoords[$xpos] + $xshift; + my $visval; + if( exists $SPEEDUP{$val} ) { $visval = $SPEEDUP{$val}; } + else { $visval = int($val*$koeff); $SPEEDUP{$val}= $visval; } + my $y = $yf2 - $yshift - $visval; + push(@allx,$x); + push(@ally,$y); + push(@allvals,$visval); + if( $xpos>0 ) { + $poly->addPt($x,$y+1); + } + ($xold,$yold)= ($x,$y); + $xpos++; + } + $poly->addPt($xf2+$xshift, $yf2-$yshift+1); + unless ($profilen == 0 || $no_fill) { + $gd->filledPolygon($poly,$color); + } + ($xold,$yold)= ($allx[0],$ally[0]); + for( my $en =1; $en<=$#allx; $en++ ) { + my $x = $allx[$en]; + my $y = $ally[$en]; + $gd->line($xold,$yold,$x,$y,$edgecolor); + ($xold,$yold)= ($x,$y); + } + + # add scale bar to left and mid-point + if ($profilen == 1) { + my($xmin,$yyy,$ymax) = ($allx[1]-1,$yf2-$yw,$ally[0]); + my $xmid = int(($allx[-1] - $xmin)/2); + $self->add_scale_bar([$xmin,$xmid],$yyy,$max_signal,$gd); + $self->{_xmid} = $xmid; + $self->{_ymin} = $ymax; + } + + $self->{_key}->{$subsetsnames->{$subset}} = $color; + + $gd->string(GD::Font->Tiny,$xf2+$xshift+3, $yf2-$yshift-5,$subsetsnames->{$subset},$color); + + unless( $profilen ==0 ) { @prevx = @allx; @prevy = @ally; @prevvals = @allvals; } + $profilen++; + } + + my $hide_key = defined $self->option('show key') && !$self->option('show key'); + $self->add_subset_key($gd,$subsets) unless $hide_key; + + $gd->flipVertical() if $self->option('flip vertical'); + + return; +} + +sub add_scale_bar { + my ($self,$xx,$y,$max_signal,$gd) = @_; + return if $self->option('flip vertical'); + for my $x (@$xx) { + $gd->string(GD::Font->Tiny,$x-12, $y-3,'0',$black); + $gd->line($x-2,$y,$x-2,$y-50,$black); + $gd->line($x-4,$y-47,$x-2,$y-50,$black); + $gd->line($x,$y-47,$x-2,$y-50,$black); + $gd->line($x-4,$y-44,$x-2,$y-44,$black); + $gd->string(GD::Font->Tiny,$x-18, $y-47,int($max_signal+0.5),$black); + } +} + +sub add_subset_key { + my ($self,$gd,$subsets) = @_; + my @subsets = grep {!/MAX/} @$subsets; + return if $self->option('flip vertical'); + my $first_x = $self->{_xmid}-250; + my $first_y = 12; + my $key_colors = $self->{_key}; + my $font = GD::Font->MediumBold; + my $width = $self->width; + my $total_key_width = 18; + + my ($longest_string) = sort {$b <=> $a} + map {$self->string_width($_,$font)} @subsets; + + my $count; + my $x = $first_x; + my $y = $first_y; + + my $cutoff = 100; + if (@subsets > 8 && !(@subsets %2)) { + $cutoff = @subsets/2 + 1; + } + elsif (@subsets > 8) { + $cutoff = int(@subsets/2 + 0.5); + } + + for my $subset (@subsets) { + if (++$count == $cutoff) { + $x = $first_x; + $y = $first_y + 12; + } + my $color = $key_colors->{$subset}; + my $edgecolor = $self->{fgcolor}; + my $string_width = $self->string_width($subset,$font); + $gd->rectangle($x,$y,$x+10,$y+10,$edgecolor); + $gd->filledRectangle($x,$y,$x+10,$y+10,$color); + $x += 14; + $gd->string($font,$x,$y,$subset,$black); + $x += $longest_string + 8; + } +} + +#-------------------------- +sub getData { + my $self = shift; + my($ft,$datadir,$chromosome,$start,$stop,$scrstart,$scrstop,$flipped,$gd) = @_; + my $global_max_signal = $self->option('max_score') || 0; + my %Signals = (); + $self->openDataFiles($datadir); + + my $subset_text = $self->option('subset order'); + if ($subset_text) { + my @words = shellwords($subset_text); + + # subset + color + if (!(@words %2) && $words[1] =~ /^[0-9A-F]{6}$/ && $words[2] !~ /^[.0-9]+$/) { + while (@words) { + push @{$ft->{subsetsorder}}, [splice(@words,0,2)]; + } + } + # subset + color + alpha + elsif (!(@words %3) && $words[1] =~ /^[0-9A-F]{6}$/) { + while (@words) { + push @{$ft->{subsetsorder}}, [splice(@words,0,3)]; + } + } + # no color specified? Random color for you. Good luck! + else { + for my $word (@words) { + push @{$ft->{subsetsorder}}, [$word,$self->random_color()]; + } + } + + } + + my @subsets = (exists $ft->{'subsetsorder'}) ? @{$ft->{'subsetsorder'}} : sort split(/\t+/,$Indices{'subsets'}); + + my $user_max = $self->option('max_score'); + + # This bit of code reads in user-specified bgcolor, if provided + if ( ref $subsets[0] eq 'ARRAY' ) { + for (@subsets) { + next unless ref $_ eq 'ARRAY'; + my ($subset,$color,$alpha) = @$_; + $alpha ||= $self->option('fill opacity') || 1.0; + + if ($alpha && $alpha > 1) { + die "Alpha must be between zero and 1"; + } + + # make it hex if it looks like hex + if ((length $color == 6) && $color =~ /^[0-9A-F]+$/) { + $color = '#'.$color; + } + my $bgcolor = $self->factory->transparent_color($alpha,$color); + my $fgcolor = $self->translate_color($color); + $self->{bgcolor}->{$subset} = $bgcolor; + + # We will re-use this array later + $_ = $subset; + } + } + + shift(@subsets) if $subsets[0] eq 'MAX'; + warn("subsets: @subsets\n") if DEBUG; + + my %SubsetsNames = (exists $ft->{'subsetsnames'}) ? %{$ft->{'subsetsnames'}} : map { $_, $_ } @subsets; + $SubsetsNames{MAX}= 'MAX'; + my $screenstep = ($scrstop-$scrstart+1) * 1.0 / ($stop-$start+1); + my $donecoords = 0; + my $local_max_signal = 0; + + foreach my $subset ( @subsets ) { + my $nstrings = 0; + # scan seq ranges offsets to see where to start reading + my $key = $subset.':'.$chromosome; + my $poskey = $key.':offsets'; + my $ranges_pos = (exists $Indices{$poskey}) ? int($Indices{$poskey}) : -1; + if( $ranges_pos == -1 ) { next; } # no such signal.. + warn(" positioning for $poskey starts at $ranges_pos\n") if DEBUG; + if( $start>=1000000 ) { + my $bigstep = int($start/1000000.0); + if( exists $Indices{$key.':offsets:'.$bigstep} ) { + my $jumpval = $Indices{$key.':offsets:'.$bigstep}; + warn(" jump in offset search to $jumpval\n") if DEBUG; + $ranges_pos = int($jumpval); } + } + seek(DATF,$ranges_pos,0); + my($offset,$offset1)= (0,0); + my $lastseqloc = -999999999; + my $useoffset = 0; + while( (my $strs =<DATF>) ) { + $nstrings++ if DEBUG; + if( DEBUG ) { + chop($strs); warn(" positioning read for coord $start ($strs)\n"); } + last unless $strs =~m/^(-?\d+)[ \t]+(\d+)/; + my($seqloc,$fileoffset)= ($1,$2); + if( DEBUG ) { + chop($strs); warn(" positioning read for $poskey => $seqloc, $fileoffset ($strs)\n"); } + $offset1 = $offset; + $offset = $fileoffset; + $lastseqloc = $seqloc; + if( $seqloc > $start ) { $useoffset = int($offset1); last; } + } + warn(" will use offset $useoffset\n") if DEBUG; + warn(" (scanned $nstrings offset strings)\n") if DEBUG; + if( $useoffset ==0 ) { # data offset cannot be 0 - means didn't find where to read required data.. + next; + my @emptyvals = (); + for( my $ii = $scrstart; $ii++ <= $scrstop; ) { push(@emptyvals,0); } + $Signals{$subset}= \@emptyvals; + } + $nstrings = 0; + # read signal profile + seek(DATF,$useoffset,0); + $lastseqloc = -999999999; + my $lastsignal = 0; + my($scrx,$scrxold)= ($scrstart,$scrstart-1); + my $runmax = 0; + my @values = (); + my @xscreencoords = (); + + while( (my $str =<DATF>) ) { + $nstrings++ if DEBUG; + unless( $str =~m/^(-?\d+)[ \t]+(\d+)/ ) { + warn(" header read: $str") if DEBUG; + last; # because no headers were indexed at the beginning of data packs + } + my($seqloc,$signal)= ($1,$2); + my $real_signal = $signal; + $signal = $user_max if $user_max && $signal > $user_max; + $local_max_signal = $signal if $signal > $local_max_signal; + + warn(" signal read: $seqloc, $signal line: $str") if DEBUG; + last if $lastseqloc > $seqloc; # just in case, as all sits merged in one file.. + if( $seqloc>=$start ) { # current is the next one after the one we need to start from.. + unless( $lastseqloc == -999999999 ) { # expand previous + $lastseqloc = $start-2 if $lastseqloc<$start; # limit empty steps (they may start from -200000) + while( $lastseqloc < $seqloc ) { # until another (one we just retrieved) wiggle reading + last if $lastseqloc > $stop; # end of subset data + next if $lastseqloc++ < $start; + # we have actual new seq position in our required range + my $scrpos = int($scrx); + $runmax = $lastsignal if $runmax < $lastsignal; + if( $scrpos != $scrxold ) { # we have actual new seq _and_ screen position + push(@values,$runmax); + push(@xscreencoords,$scrpos) unless $donecoords; + #print STDERR Dumper \@xscreencoords unless $donecoords; + $scrxold = $scrpos; + $runmax = 0; + } + $scrx += $screenstep; # remember - it is not integer + } + } + } + ($lastseqloc,$lastsignal)= ($seqloc,$signal); + last if $seqloc > $stop; # end of subset data + } + if( $lastseqloc < $stop ) { # if on the end of signal profile, but still in screen range + # just assume that we are getting one more reading with signal == 0 + my $signal = 0; + while( $lastseqloc++ < $stop ) { + my $scrpos = int($scrx); + if( $scrpos != $scrxold ) { # we have actual new seq _and_ screen position + push(@values,$signal); + push(@xscreencoords,$scrpos) unless $donecoords; + $scrxold = $scrpos; + } + $scrx += $screenstep; + } + } + warn(" (scanned $nstrings signal strings)\n") if DEBUG; + $nstrings = 0; + if( $flipped ) { + my @ch = reverse @values; @values = @ch; + } + warn(" ".$subset."=> ".@values." values @values\n") if DEBUG && $#values<1000; + $Signals{$subset}= \@values; + $Signals{screencoords}= \@xscreencoords unless $donecoords; + $donecoords = 1; + } # foreach my $subset ( @subsets ) { + + # scaling can be local, user-defined max or global max + my $scale = $self->option('autoscale') || 'global'; + if ($scale eq 'local' && !$user_max) { + $self->max_score($local_max_signal); + } + else { + $self->max_score($user_max || $Indices{max_signal}); + } + + warn(" max_signal => ".$self->max_score." \n") if DEBUG; + + # prepare MAX profile - will be used as a base for exon/UTR prediction + $self->{no_max} = defined $self->option('show max') && ! $self->option('show max'); + unless ($self->{no_max}) { + my @maxprofile = (); + my @ruler = @{$Signals{screencoords}}; + for( my $npos = 0; $npos<=$#ruler; $npos++ ) { + my $maxval = 0; + foreach my $subset ( @subsets ) { + my $p = $Signals{$subset}; + my $val = $p->[$npos]; + $maxval = $val if $maxval < $val; + } + push(@maxprofile,$maxval); + } + $Signals{MAX}= \@maxprofile; + warn(" MAX => ".@maxprofile." values @maxprofile\n") if DEBUG && $#maxprofile<1000; + unshift(@subsets,'MAX'); + } + + return(\@subsets,\%SubsetsNames, \%Signals); +} + +#-------------------------- +sub openDataFiles { + my $self = shift; + my $datadir = shift; + $datadir.= '/' unless $datadir =~m|/$|; + my $datafile = $datadir.'data.cat'; + open(DATF,$datafile) || die("cannot open $datafile\n"); + use BerkeleyDB; # caller should already used proper 'use lib' command with path + my $bdbfile = $datadir . 'index.bdbhash'; + tie %Indices, "BerkeleyDB::Hash", -Filename => $bdbfile, -Flags => DB_RDONLY || warn("can't read BDBHash $bdbfile\n"); + if( DEBUG ) { foreach my $kk ( sort keys %Indices ) { warn(" $kk => ".$Indices{$kk}."\n"); } } + return; +} + +sub min_score { + # not implemented +} + +sub max_score { + my $self = shift; + my $score = shift; + $self->{max_score} ||= $score; + return $self->{max_score}; +} + +sub random_color { + my $self = shift; + my @nums = 0..9,'A'..'F'; + my $color; + for (0..5) { + my @array = shuffle(@nums); + my $char = shift @array; + $color .= $char; + } + return $color; +} + + +1; + + diff --git a/scripts/bam_coverage_windows.pl b/scripts/bam_coverage_windows.pl new file mode 100755 index 0000000..f593df3 --- /dev/null +++ b/scripts/bam_coverage_windows.pl @@ -0,0 +1,131 @@ +#!/usr/bin/perl -w +use strict; +use List::Util 'sum'; +use Getopt::Long; + +use vars qw/$start $end $current_chr @scores %seen %highest $win $normal $bam/; + +use constant WIN => 25; + +# A script to make bam coverage windows in WIG/BED 4 format +# requires that samtools be installed +# This script operates on one bam file at a time. If you are comparing +# across bam files of diffenrent sizes (read numbers), take note +# of the normalization option. +# Sheldon McKay ([email protected]) + + +BEGIN { + die "samtools must be installed!" unless `which samtools`; +} + + +GetOptions ("normalize=i" => \$normal, + "bam=s" => \$bam, + "window=i" => \$win); + +$bam or usage(); + +open BAM, "samtools depth $bam |"; + +$win ||= WIN; +$start = 1; +$end = $win; + +my $factor = normalization_factor($normal); + +chomp(my $name = `basename $bam .bam`); +print qq(track type=wiggle_0 name="$name" description="read coverage for $bam (window size $win)"\n); + +while (<BAM>) { + chomp; + my ($chr,$coord,$score) = split; + $current_chr ||= $chr; + + check_sorted($chr,$coord); + + if ( $chr ne $current_chr || + $coord > $end ) { + open_window($chr,$coord); + } + + push @scores, $score; + $current_chr = $chr; +} + + +sub open_window { + my ($chr,$coord) = @_; + + if ($chr ne $current_chr) { + $seen{$current_chr}++; + } + + # close the last window, if needed + if (@scores > 0) { + close_window(); + } + + $start = nearest_start($coord); + $end = $start + $win; +} + +sub close_window { + my $sum = sum(@scores) or return 0; + my $score = $sum/$win; + $score *= $factor; + print join("\t",$current_chr,$start,$end,$score), "\n"; + @scores = (); + exit 0 unless $score; +} + +sub nearest_start { + my $start = shift; + + return 1 if $start < $win; + + while ($start % $win) { + $start--; + } + + return $start; +} + +sub normalization_factor { + return 1 unless my $nahm = shift; + print STDERR "Calculating total number of reads in $bam\n"; + chomp(my $total = `samtools view -c $bam`); + print STDERR "$bam has $total reads\n"; + return $total/$nahm; +} + +# sanity check for unsorted bam +sub check_sorted { + my ($chr,$coord) = @_; + + return 1 if $coord > ($highest{$chr} || 0); + $highest{$chr} = $coord; + + return 1 unless $seen{$chr}; + + die_unsorted($chr,$coord); +} + +sub die_unsorted { + my ($chr,$coord) = @_; + die "$chr $coord: $bam does not appear to be sorted\n", + "Please try 'samtools sort' first"; +} + +sub usage { + die ' +Usage: perl bam_coverage_windows.pl -b bamfile -n 10_000_000 -w 25 + -b name of bam file to read REQUIRED + -w window size (default 25) + -n normalized read number -- if you will be comparing multiple bam files + select the read number to normalize against. + all counts will be adjusted by a factor of: + actual readnum/normalized read num + +' +} diff --git a/scripts/coverage_to_topoview.pl b/scripts/coverage_to_topoview.pl new file mode 100755 index 0000000..cc432d6 --- /dev/null +++ b/scripts/coverage_to_topoview.pl @@ -0,0 +1,247 @@ +#!/usr/bin/perl -w +use strict; +use BerkeleyDB; +use Data::Dumper; +use Getopt::Long; + +# Based on http:/flybase.org/static_pages/docs/software/index_cov_files.pl +# This script will process the output of bam_coverage_windows.pl +# to the data structure needed by the topoview glyph +# Sheldon McKay ([email protected]) + +my ($log,$outdir,$help); +GetOptions ( + "log" => \$log, + "output-dir=s" => \$outdir, + "help" => \$help +); + +my $usage = q( +Usage: perl coverage_to_topoview.pl [-o output_dir] [-h] [-l] file1.wig.gz file2.wig.gz ... + -o output directory (default 'topoview') + -l use log2 for read counts (recommended) + -h this help message +); + +die $usage if !@ARGV || $help; + +$outdir ||= 'topoview'; + +my (%bdb_hash,$max_signal,@SubsetNames); + +system "mkdir -p $outdir"; + +my $outfile = "$outdir/data.cat"; +unlink $outfile if -e $outfile; + +open( COV, '>' . $outfile ) || die "Cannot open $outfile!"; +my $hashfile = "$outdir/index.bdbhash"; +unlink $hashfile if -e $hashfile; + +%bdb_hash = (); +tie(%bdb_hash, "BerkeleyDB::Hash", + -Filename => $hashfile, + -Flags => DB_CREATE); + +$max_signal = 0; +@SubsetNames = (); + +for my $file ( sort @ARGV ) { + next unless is_bed4($file); + indexCoverageFile($file); +} + +$bdb_hash{'subsets'} = join( "\t", @SubsetNames ); +$bdb_hash{'max_signal'} = $max_signal; + +my @all_keys = keys %bdb_hash; + +for my $kkey ( sort @all_keys ) { + print "\t$kkey => " . $bdb_hash{$kkey} . "\n"; +} + +if ( $max_signal > 10000 ) { + warn "WARNING: max_signal=$max_signal - TOO HIGH. Consider log2?\n"; +} + +untie %bdb_hash; +chmod( 0666, $hashfile ); # ! sometimes very important + +close COV; + + +sub is_bed4 { + my $file = shift; + my $cat = $file =~ /gz$/ ? 'zcat' + : $file =~ /bz2/ ? 'bzcat' + : 'cat'; + open WIG, "$cat $file |" or die "could not open $file: $!"; + + my $idx; + while (<WIG>) { + next if /^track/; + last if ++$idx > 9; + + my ($ref,$start,$end,$score,@other) = split "\t"; + if (@other > 0) { + die "Extra fields, I was expecting BED4"; + } + unless ($ref && $start && $end && $score) { + die "Not enough fields, I was expecting BED4"; + } + unless (is_numeric($start) && is_numeric($end)) { + die "start ($start) and end ($end) are supposed to be numbers"; + } + unless (is_numeric($score)) { + die "score ($score) is not numeric" + } + } + + return 1; +} + +sub is_numeric { + no warnings; + return defined eval { $_[ 0] == 0 }; +} + +sub indexCoverageFile { + my $file = shift; + my $zcat = get_zcat($file); + + open( INF, "$zcat $file |" ) || die "Can't open $file"; + + chomp(my $SubsetName = `basename $file .wig.gz`); + + print STDERR "Subset=$SubsetName\n"; + + push( @SubsetNames, $SubsetName ); + + my $old_ref = ""; + my @offsets = (); + + my $step = 1000; + my $coordstep = 5000; + my $counter = 0; + my $offset = tell(COV); + + $bdb_hash{$SubsetName} = $offset; # record offset where new subset data starts + + my $old_signal = 0; + my $old_coord = -200000; + my $start = 0; + my $lastRecordedCoord = -200000; + + my @signals; + while (<INF>) { + $offset = tell(COV); + + next if /^$/ || /^\#/; + my ($ref,$start,$end,$signal) = split; + + $signal = log($signal)/log(2) if $log; + + $start += 1; # zero-based, half-open coords in BED + + $signal = 0 if $signal < 0; + + # New chromosome + if ( $ref ne $old_ref ) { + print STDERR "chromosome = $ref\n"; + dumpOffsets( $start, $SubsetName . ':' . $old_ref, @offsets ) + unless $old_ref eq ""; # previous subset:arm + $old_ref = $ref; + print COV "# subset=$SubsetName chromosome=$old_ref\n"; + $offset = tell(COV); + $bdb_hash{ $SubsetName . ':' . $old_ref } = + $offset; # record offset where new subset:arm data starts + @offsets = ("-200000\t$offset"); + print COV "-200000\t0\n"; # insert one fictive zero read + $offset = tell(COV); + print COV "0\t0\n"; # insert one more fictive zero read + push( @offsets, "0\t$offset" ); + $counter = 0; + $old_signal = 0; + $old_coord = 0; + $lastRecordedCoord = 0; + } + + + # fill in holes in coverage with 0 + if ($start > $old_coord+1 && $old_signal > 0) { + print COV join("\t",++$old_coord,0), "\n"; + $old_coord++; + $counter++; + $offset = tell(COV); + $old_signal = 0; + } + + if ( $signal == $old_signal) { + $old_coord = $end; + next; + } + + $max_signal = $signal if $max_signal < $signal; + if ( $counter++ > $step + || $start - $lastRecordedCoord > $coordstep ) + { + push( @offsets, "$start\t$offset" ); + $counter = 0; + $lastRecordedCoord = $start; + } + + $old_coord = $end; + $old_signal = $signal; + + print COV join("\t",$start,$signal), "\n"; + } + + # don't forget to dump offsets data on file end.. + dumpOffsets( $start,$SubsetName . ':' . $old_ref, @offsets ) + unless $old_ref eq ""; # previous subset:arm + close(INF); + return; +} + +sub dumpOffsets { + my ( $start, $key, @offsetlines ) = @_; + print COV "# offsets for $key\n"; + my $offset = tell(COV); + my $prevoffset = $offset; + $bdb_hash{ $key . ':offsets' } = $offset + ; # record offset where offsets VALUES for subset:arm data start (skip header) + my $oldbigstep = 0; + foreach my $str (@offsetlines) { + print COV $str . "\n"; + my ( $start, $floffset ) = split( /[ \t]+/, $str ); + + # following wasn't working properly.. + my $newbigstep = int( $start / 1000000.0 ); + if ( $newbigstep > $oldbigstep ) { + $bdb_hash{ $key . ':offsets:' . $newbigstep } = + $prevoffset; # one before is the right start + $oldbigstep = $newbigstep; + } + $prevoffset = $offset; + $offset = tell(COV); + } + return; +} + +#*********************************************************** +# +#*********************************************************** + +sub get_zcat { + my $fullfile = shift; + if ( $fullfile =~ /\.gz$/i ) { + my $zcat = `which zcat`; + if ( $? != 0 ) { $zcat = `which gzcat`; } + chomp($zcat); + return ($zcat); + } + elsif ( $fullfile =~ /\.bz2$/i ) { return ('bzcat'); } + return ('/bin/cat'); +} + +#******* -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/libbio-graphics-perl.git _______________________________________________ debian-med-commit mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit
