[Bioperl-guts-l] [14535] bioperl-live/trunk/Bio: improved behavior of xy and density plots when spanning regions that contained undefined scores ; now just skips over these regions
Lincoln Stein
lstein at dev.open-bio.org
Fri Feb 22 11:09:23 EST 2008
Revision: 14535
Author: lstein
Date: 2008-02-22 11:09:22 -0500 (Fri, 22 Feb 2008)
Log Message:
-----------
improved behavior of xy and density plots when spanning regions that contained undefined scores; now just skips over these regions
Modified Paths:
--------------
bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/mysql.pm
bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm
bioperl-live/trunk/Bio/DB/SeqFeature/Store/berkeleydb.pm
bioperl-live/trunk/Bio/DB/SeqFeature/Store/memory.pm
bioperl-live/trunk/Bio/DB/SeqFeature/Store.pm
bioperl-live/trunk/Bio/Graphics/FeatureBase.pm
bioperl-live/trunk/Bio/Graphics/FeatureFile.pm
bioperl-live/trunk/Bio/Graphics/Glyph/generic.pm
bioperl-live/trunk/Bio/Graphics/Glyph/xyplot.pm
Modified: bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/mysql.pm
===================================================================
--- bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/mysql.pm 2008-02-22 08:09:01 UTC (rev 14534)
+++ bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/mysql.pm 2008-02-22 16:09:22 UTC (rev 14535)
@@ -81,6 +81,9 @@
$db->insert_sequence('Chr1','GATCCCCCGGGATTCCAAAA...');
my $sequence = $db->fetch_sequence('Chr1',5000=>6000);
+ # what feature types are defined in the database?
+ my @types = $db->types;
+
# create a new feature in the database
my $feature = $db->new_feature(-primary_tag => 'mRNA',
-seq_id => 'chr3',
@@ -1254,6 +1257,38 @@
$self->flag_for_indexing($dbh->{mysql_insertid}) if $self->{bulk_update_in_progress};
}
+=head2 types
+
+ Title : types
+ Usage : @type_list = $db->types
+ Function: Get all the types in the database
+ Returns : array of Bio::DB::GFF::Typename objects
+ Args : none
+ Status : public
+
+=cut
+
+sub types {
+ my $self = shift;
+ eval "require Bio::DB::GFF::Typename"
+ unless Bio::DB::GFF::Typename->can('new');
+ my $typelist_table = $self->_typelist_table;
+ my $sql = <<END;
+SELECT tag from $typelist_table
+END
+;
+ $self->_print_query($sql) if DEBUG || $self->debug;
+ my $sth = $self->_prepare($sql);
+ $sth->execute() or $self->throw($sth->errstr);
+
+ my @results;
+ while (my($tag) = $sth->fetchrow_array) {
+ push @results,Bio::DB::GFF::Typename->new($tag);
+ }
+ $sth->finish;
+ return @results;
+}
+
###
# Insert a bit of DNA or protein into the database
#
Modified: bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm
===================================================================
--- bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm 2008-02-22 08:09:01 UTC (rev 14534)
+++ bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm 2008-02-22 16:09:22 UTC (rev 14535)
@@ -64,13 +64,9 @@
use strict;
use Carp 'croak';
-use IO::File;
use Bio::DB::GFF::Util::Rearrange;
-use Bio::DB::SeqFeature::Store;
-use File::Spec;
-use base 'Bio::Root::Root';
+use base 'Bio::DB::SeqFeature::Store::Loader';
-use constant DEFAULT_SEQ_CHUNK_SIZE => 2000;
my %Special_attributes =(
Gap => 1, Target => 1,
@@ -155,60 +151,8 @@
=cut
-sub new {
- my $self = shift;
- my ($store,$seqfeature_class,$tmpdir,$verbose,$fast,$seq_chunk_size) = rearrange(['STORE',
- ['SF_CLASS','SEQFEATURE_CLASS'],
- ['TMP','TMPDIR'],
- 'VERBOSE',
- 'FAST',
- 'CHUNK_SIZE',
- ], at _);
+# sub new { } inherited
- $seqfeature_class ||= $self->default_seqfeature_class;
- eval "require $seqfeature_class" unless $seqfeature_class->can('new');
- $self->throw($@) if $@;
-
- my $normalized = $seqfeature_class->can('subfeatures_are_normalized')
- && $seqfeature_class->subfeatures_are_normalized;
-
- my $in_table = $seqfeature_class->can('subfeatures_are_stored_in_a_table')
- && $seqfeature_class->subfeatures_are_stored_in_a_table;
-
- if ($fast) {
- my $canfast = $normalized && $in_table;
- warn <<END unless $canfast;
-Only features that support the Bio::DB::SeqFeature::NormalizedTableFeature interface
-can be loaded using the -fast method. Reverting to slower feature-by-feature method.
-END
- $fast &&= $canfast;
- }
-
- # try to bring in highres time() function
- eval "require Time::HiRes";
-
- $tmpdir ||= File::Spec->tmpdir();
-
- my $tmp_store = Bio::DB::SeqFeature::Store->new(-adaptor => 'berkeleydb',
- -temporary=> 1,
- -dsn => $tmpdir,
- -cache => 1,
- -write => 1)
- unless $normalized;
-
- return bless {
- store => $store,
- tmp_store => $tmp_store,
- seqfeature_class => $seqfeature_class,
- fast => $fast,
- seq_chunk_size => $seq_chunk_size || DEFAULT_SEQ_CHUNK_SIZE,
- verbose => $verbose,
- load_data => {},
- subfeatures_normalized => $normalized,
- subfeatures_in_table => $in_table,
- },ref($self) || $self;
-}
-
=head2 load
Title : load
@@ -231,20 +175,8 @@
=cut
-sub load {
- my $self = shift;
- my $start = $self->time();
- my $count = 0;
+# sub load { } inherited
- for my $file_or_fh (@_) {
- $self->msg("loading $file_or_fh...\n");
- my $fh = $self->open_fh($file_or_fh) or $self->throw("Couldn't open $file_or_fh: $!");
- $count += $self->load_fh($fh);
- $self->msg(sprintf "load time: %5.2fs\n",$self->time()-$start);
- }
- $count;
-}
-
=head2 accessors
The following read-only accessors return values passed or created during new():
@@ -264,12 +196,12 @@
=cut
-sub store { shift->{store} }
-sub tmp_store { shift->{tmp_store} }
-sub sfclass { shift->{seqfeature_class} }
-sub fast { shift->{fast} }
-sub seq_chunk_size { shift->{seq_chunk_size} }
-sub verbose { shift->{verbose} }
+# sub store inherited
+# sub tmp_store inherited
+# sub sfclass inherited
+# sub fast inherited
+# sub seq_chunk_size inherited
+# sub verbose inherited
=head2 Internal Methods
@@ -286,10 +218,7 @@
=cut
-sub default_seqfeature_class {
- my $self = shift;
- return 'Bio::DB::SeqFeature';
-}
+# sub default_seqfeature_class { } inherited
=item subfeatures_normalized
@@ -300,12 +229,7 @@
=cut
-sub subfeatures_normalized {
- my $self = shift;
- my $d = $self->{subfeatures_normalized};
- $self->{subfeatures_normalized} = shift if @_;
- $d;
-}
+# sub subfeatures_normalized { } inherited
=item subfeatures_in_table
@@ -317,12 +241,7 @@
=cut
-sub subfeatures_in_table {
- my $self = shift;
- my $d = $self->{subfeatures_in_table};
- $self->{subfeatures_in_table} = shift if @_;
- $d;
-}
+# sub subfeatures_in_table { } inherited
=item load_fh
@@ -337,36 +256,29 @@
=cut
-sub load_fh {
- my $self = shift;
- my $fh = shift;
- $self->start_load();
- my $count = $self->do_load($fh);
- $self->finish_load();
- $count;
-}
+# sub load_fh { } inherited
-
=item start_load, finish_load
These methods are called at the start and end of a filehandle load.
=cut
-sub start_load {
+sub create_load_data { #overridden
my $self = shift;
+ $self->SUPER::create_load_data;
$self->{load_data}{Parent2Child} = {};
- $self->{load_data}{Local2GlobalID} = {};
$self->{load_data}{TemporaryID} = "GFFLoad0000000";
$self->{load_data}{IndexSubfeatures} = 1;
- $self->{load_data}{CurrentFeature} = undef;
- $self->{load_data}{CurrentID} = undef;
- $self->store->start_bulk_update() if $self->fast;
+ $self->{load_data}{mode} = 'gff';
}
-sub finish_load {
+sub finish_load { #overridden
my $self = shift;
+ $self->store_current_feature(); # during fast loading, we will have a feature left at the very end
+ $self->start_or_finish_sequence(); # finish any half-loaded sequences
+
$self->msg("Building object tree...");
my $start = $self->time();
$self->build_object_tree;
@@ -391,58 +303,66 @@
=cut
-sub do_load {
- my $self = shift;
- my $fh = shift;
+# sub do_load { } inherited
- my $start = $self->time();
- my $count = 0;
- my $mode = 'gff'; # or 'fasta'
+=item load_line
- while (<$fh>) {
- chomp;
+ $loader->load_line($data);
- next unless /^\S/; # blank line
- $mode = 'gff' if /\t/; # if it has a tab in it, switch to gff mode
+Load a line of a GFF3 file. You must bracket this with calls to
+start_load() and finish_load()!
- if (/^\#\s?\#\s*(.+)/) { ## meta instruction
- $mode = 'gff';
+ $loader->start_load();
+ $loader->load_line($_) while <FH>;
+ $loader->finish_load();
+
+=cut
+
+sub load_line { #overridden
+ my $self = shift;
+ my $line = shift;
+
+ chomp($line);
+ return unless $line =~ /^\S/; # blank line
+ my $load_data = $self->{load_data};
+
+ $load_data->{mode} = 'gff' if /\t/; # if it has a tab in it, switch to gff mode
+
+ if ($line =~ /^\#\s?\#\s*(.+)/) { ## meta instruction
+ $load_data->{mode} = 'gff';
$self->handle_meta($1);
- } elsif (/^\#/) {
- $mode = 'gff'; # just to be safe
- next; # comment
+ } elsif ($line =~ /^\#/) {
+ $load_data->{mode} = 'gff'; # just to be safe
+ return; # comment
}
- elsif (/^>\s*(\S+)/) { # FASTA lines are coming
- $mode = 'fasta';
+ elsif ($line =~ /^>\s*(\S+)/) { # FASTA lines are coming
+ $load_data->{mode} = 'fasta';
$self->start_or_finish_sequence($1);
}
- elsif ($mode eq 'fasta') {
- $self->load_sequence($_);
+ elsif ($load_data->{mode} eq 'fasta') {
+ $self->load_sequence($line);
}
- elsif ($mode eq 'gff') {
- $self->handle_feature($_);
- if (++$count % 1000 == 0) {
+ elsif ($load_data->{mode} eq 'gff') {
+ $self->handle_feature($line);
+ if (++$load_data->{count} % 1000 == 0) {
my $now = $self->time();
my $nl = -t STDOUT && !$ENV{EMACS} ? "\r" : "\n";
- $self->msg(sprintf("%d features loaded in %5.2fs...$nl",$count,$now - $start));
- $start = $now;
+ $self->msg(sprintf("%d features loaded in %5.2fs...$nl",
+ $load_data->{count},$now - $load_data->{start_time}));
+ $load_data->{start_time} = $now;
}
}
else {
- $self->throw("I don't know what to do with this line:\n$_");
+ $self->throw("I don't know what to do with this line:\n$line");
}
- }
- $self->store_current_feature(); # during fast loading, we will have a feature left at the very end
- $self->start_or_finish_sequence(); # finish any half-loaded sequences
- $self->msg(' 'x80,"\n"); #clear screen
- $count;
}
+
=item handle_meta
$loader->handle_meta($meta_directive)
@@ -483,14 +403,14 @@
=cut
-sub handle_feature {
+sub handle_feature { #overridden
my $self = shift;
my $gff_line = shift;
my $ld = $self->{load_data};
my @columns = map {$_ eq '.' ? undef : $_ } split /\t/,$gff_line;
return unless @columns >= 8;
- my ($refname,$source,$method,$start,$end, $score,$strand,$phase,$attributes) = @columns;
@@ Diff output truncated at 10000 characters. @@
More information about the Bioperl-guts-l
mailing list