[Bioperl-guts-l] [14535] bioperl-live/trunk/Bio: improved behavior of xy and density plots when spanning regions that contained undefined scores ; now just skips over these regions

Lincoln Stein lstein at dev.open-bio.org
Fri Feb 22 11:09:23 EST 2008


Revision: 14535
Author:   lstein
Date:     2008-02-22 11:09:22 -0500 (Fri, 22 Feb 2008)

Log Message:
-----------
improved behavior of xy and density plots when spanning regions that contained undefined scores; now just skips over these regions

Modified Paths:
--------------
    bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/mysql.pm
    bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm
    bioperl-live/trunk/Bio/DB/SeqFeature/Store/berkeleydb.pm
    bioperl-live/trunk/Bio/DB/SeqFeature/Store/memory.pm
    bioperl-live/trunk/Bio/DB/SeqFeature/Store.pm
    bioperl-live/trunk/Bio/Graphics/FeatureBase.pm
    bioperl-live/trunk/Bio/Graphics/FeatureFile.pm
    bioperl-live/trunk/Bio/Graphics/Glyph/generic.pm
    bioperl-live/trunk/Bio/Graphics/Glyph/xyplot.pm

Modified: bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/mysql.pm
===================================================================
--- bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/mysql.pm	2008-02-22 08:09:01 UTC (rev 14534)
+++ bioperl-live/trunk/Bio/DB/SeqFeature/Store/DBI/mysql.pm	2008-02-22 16:09:22 UTC (rev 14535)
@@ -81,6 +81,9 @@
   $db->insert_sequence('Chr1','GATCCCCCGGGATTCCAAAA...');
   my $sequence = $db->fetch_sequence('Chr1',5000=>6000);
 
+  # what feature types are defined in the database?
+  my @types    = $db->types;
+
   # create a new feature in the database
   my $feature = $db->new_feature(-primary_tag => 'mRNA',
                                  -seq_id      => 'chr3',
@@ -1254,6 +1257,38 @@
   $self->flag_for_indexing($dbh->{mysql_insertid}) if $self->{bulk_update_in_progress};
 }
 
+=head2 types
+
+ Title   : types
+ Usage   : @type_list = $db->types
+ Function: Get all the types in the database
+ Returns : array of Bio::DB::GFF::Typename objects
+ Args    : none
+ Status  : public
+
+=cut
+
+sub types {
+    my $self = shift;
+    eval "require Bio::DB::GFF::Typename" 
+	unless Bio::DB::GFF::Typename->can('new');
+    my $typelist_table      = $self->_typelist_table;
+    my $sql = <<END;
+SELECT tag from $typelist_table
+END
+;
+    $self->_print_query($sql) if DEBUG || $self->debug;
+    my $sth = $self->_prepare($sql);
+    $sth->execute() or $self->throw($sth->errstr);
+
+    my @results;
+    while (my($tag) = $sth->fetchrow_array) {
+	push @results,Bio::DB::GFF::Typename->new($tag);
+    }
+    $sth->finish;
+    return @results;
+}
+
 ###
 # Insert a bit of DNA or protein into the database
 #

Modified: bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm
===================================================================
--- bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm	2008-02-22 08:09:01 UTC (rev 14534)
+++ bioperl-live/trunk/Bio/DB/SeqFeature/Store/GFF3Loader.pm	2008-02-22 16:09:22 UTC (rev 14535)
@@ -64,13 +64,9 @@
 
 use strict;
 use Carp 'croak';
-use IO::File;
 use Bio::DB::GFF::Util::Rearrange;
-use Bio::DB::SeqFeature::Store;
-use File::Spec;
-use base 'Bio::Root::Root';
+use base 'Bio::DB::SeqFeature::Store::Loader';
 
-use constant DEFAULT_SEQ_CHUNK_SIZE => 2000;
 
 my %Special_attributes =(
 			 Gap    => 1, Target => 1,
@@ -155,60 +151,8 @@
 
 =cut
 
-sub new {
-  my $self = shift;
-  my ($store,$seqfeature_class,$tmpdir,$verbose,$fast,$seq_chunk_size) = rearrange(['STORE',
-										    ['SF_CLASS','SEQFEATURE_CLASS'],
-										    ['TMP','TMPDIR'],
-										    'VERBOSE',
-										    'FAST',
-										    'CHUNK_SIZE',
-										   ], at _);
+# sub new { } inherited
 
-  $seqfeature_class ||= $self->default_seqfeature_class;
-  eval "require $seqfeature_class" unless $seqfeature_class->can('new');
-  $self->throw($@) if $@;
-
-  my $normalized = $seqfeature_class->can('subfeatures_are_normalized')
-    && $seqfeature_class->subfeatures_are_normalized;
-
-  my $in_table = $seqfeature_class->can('subfeatures_are_stored_in_a_table')
-    && $seqfeature_class->subfeatures_are_stored_in_a_table;
-
-  if ($fast) {
-    my $canfast = $normalized && $in_table;
-    warn <<END unless $canfast;
-Only features that support the Bio::DB::SeqFeature::NormalizedTableFeature interface
-can be loaded using the -fast method. Reverting to slower feature-by-feature method.
-END
-    $fast &&= $canfast;
-  }
-
-  # try to bring in highres time() function
-  eval "require Time::HiRes";
-
-  $tmpdir ||= File::Spec->tmpdir();
-
-  my $tmp_store = Bio::DB::SeqFeature::Store->new(-adaptor  => 'berkeleydb',
-						  -temporary=> 1,
-						  -dsn      => $tmpdir,
-						  -cache    => 1,
-						  -write    => 1)
-    unless $normalized;
-
-  return bless {
-		store            => $store,
-		tmp_store        => $tmp_store,
-		seqfeature_class => $seqfeature_class,
-		fast             => $fast,
-		seq_chunk_size   => $seq_chunk_size || DEFAULT_SEQ_CHUNK_SIZE,
-		verbose          => $verbose,
-		load_data        => {},
-		subfeatures_normalized => $normalized,
-		subfeatures_in_table   => $in_table,
-	       },ref($self) || $self;
-}
-
 =head2 load
 
  Title   : load
@@ -231,20 +175,8 @@
 
 =cut
 
-sub load {
-  my $self       = shift;
-  my $start      = $self->time();
-  my $count = 0;
+# sub load { } inherited
 
-  for my $file_or_fh (@_) {
-    $self->msg("loading $file_or_fh...\n");
-    my $fh = $self->open_fh($file_or_fh) or $self->throw("Couldn't open $file_or_fh: $!");
-    $count += $self->load_fh($fh);
-    $self->msg(sprintf "load time: %5.2fs\n",$self->time()-$start);
-  }
-  $count;
-}
-
 =head2 accessors
 
 The following read-only accessors return values passed or created during new():
@@ -264,12 +196,12 @@
 
 =cut
 
-sub store          { shift->{store}            }
-sub tmp_store      { shift->{tmp_store}        }
-sub sfclass        { shift->{seqfeature_class} }
-sub fast           { shift->{fast}             }
-sub seq_chunk_size { shift->{seq_chunk_size}             }
-sub verbose        { shift->{verbose}          }
+# sub store           inherited
+# sub tmp_store       inherited
+# sub sfclass         inherited
+# sub fast            inherited
+# sub seq_chunk_size  inherited
+# sub verbose         inherited
 
 =head2 Internal Methods
 
@@ -286,10 +218,7 @@
 
 =cut
 
-sub default_seqfeature_class {
-  my $self = shift;
-  return 'Bio::DB::SeqFeature';
-}
+# sub default_seqfeature_class { } inherited
 
 =item subfeatures_normalized
 
@@ -300,12 +229,7 @@
 
 =cut
 
-sub subfeatures_normalized {
-  my $self = shift;
-  my $d    = $self->{subfeatures_normalized};
-  $self->{subfeatures_normalized} = shift if @_;
-  $d;
-}
+# sub subfeatures_normalized { } inherited
 
 =item subfeatures_in_table
 
@@ -317,12 +241,7 @@
 
 =cut
 
-sub subfeatures_in_table {
-  my $self = shift;
-  my $d    = $self->{subfeatures_in_table};
-  $self->{subfeatures_in_table} = shift if @_;
-  $d;
-}
+# sub subfeatures_in_table { } inherited
 
 =item load_fh
 
@@ -337,36 +256,29 @@
 
 =cut
 
-sub load_fh {
-  my $self = shift;
-  my $fh   = shift;
-  $self->start_load();
-  my $count = $self->do_load($fh);
-  $self->finish_load();
-  $count;
-}
+# sub load_fh { } inherited
 
-
 =item start_load, finish_load
 
 These methods are called at the start and end of a filehandle load.
 
 =cut
 
-sub start_load {
+sub create_load_data { #overridden
   my $self = shift;
+  $self->SUPER::create_load_data;
   $self->{load_data}{Parent2Child}     = {};
-  $self->{load_data}{Local2GlobalID}   = {};
   $self->{load_data}{TemporaryID}      = "GFFLoad0000000";
   $self->{load_data}{IndexSubfeatures} = 1;
-  $self->{load_data}{CurrentFeature}   = undef;
-  $self->{load_data}{CurrentID}        = undef;
-  $self->store->start_bulk_update() if $self->fast;
+  $self->{load_data}{mode}             = 'gff';
 }
 
-sub finish_load {
+sub finish_load { #overridden
   my $self  = shift;
 
+  $self->store_current_feature();      # during fast loading, we will have a feature left at the very end
+  $self->start_or_finish_sequence();   # finish any half-loaded sequences
+
   $self->msg("Building object tree...");
   my $start = $self->time();
   $self->build_object_tree;
@@ -391,58 +303,66 @@
 
 =cut
 
-sub do_load {
-  my $self = shift;
-  my $fh   = shift;
+# sub do_load { } inherited
 
-  my $start = $self->time();
-  my $count = 0;
-  my $mode  = 'gff';  # or 'fasta'
+=item load_line
 
-  while (<$fh>) {
-    chomp;
+    $loader->load_line($data);
 
-    next unless /^\S/;     # blank line
-    $mode = 'gff' if /\t/;  # if it has a tab in it, switch to gff mode
+Load a line of a GFF3 file. You must bracket this with calls to
+start_load() and finish_load()!
 
-    if (/^\#\s?\#\s*(.+)/) {  ## meta instruction
-      $mode = 'gff';
+    $loader->start_load();
+    $loader->load_line($_) while <FH>;
+    $loader->finish_load();
+
+=cut
+
+sub load_line { #overridden
+    my $self = shift;
+    my $line = shift;
+
+    chomp($line);
+    return unless $line =~ /^\S/;     # blank line
+    my $load_data = $self->{load_data};
+
+    $load_data->{mode} = 'gff' if /\t/;  # if it has a tab in it, switch to gff mode
+
+    if ($line =~ /^\#\s?\#\s*(.+)/) {  ## meta instruction
+      $load_data->{mode} = 'gff';
       $self->handle_meta($1);
 
-    } elsif (/^\#/) {
-      $mode = 'gff';  # just to be safe
-      next;  # comment
+    } elsif ($line =~ /^\#/) {
+      $load_data->{mode} = 'gff';  # just to be safe
+      return; # comment
     }
 
-    elsif (/^>\s*(\S+)/) { # FASTA lines are coming
-      $mode = 'fasta';
+    elsif ($line =~ /^>\s*(\S+)/) { # FASTA lines are coming
+      $load_data->{mode} = 'fasta';
       $self->start_or_finish_sequence($1);
     }
 
-    elsif ($mode eq 'fasta') {
-      $self->load_sequence($_);
+    elsif ($load_data->{mode} eq 'fasta') {
+      $self->load_sequence($line);
     }
 
-    elsif ($mode eq 'gff') {
-      $self->handle_feature($_);
-      if (++$count % 1000 == 0) {
+    elsif ($load_data->{mode} eq 'gff') {
+      $self->handle_feature($line);
+      if (++$load_data->{count} % 1000 == 0) {
 	my $now = $self->time();
 	my $nl = -t STDOUT && !$ENV{EMACS} ? "\r" : "\n";
-	$self->msg(sprintf("%d features loaded in %5.2fs...$nl",$count,$now - $start));
-	$start = $now;
+	$self->msg(sprintf("%d features loaded in %5.2fs...$nl",
+			   $load_data->{count},$now - $load_data->{start_time}));
+	$load_data->{start_time} = $now;
       }
     }
 
     else {
-      $self->throw("I don't know what to do with this line:\n$_");
+      $self->throw("I don't know what to do with this line:\n$line");
     }
-  }
-  $self->store_current_feature();      # during fast loading, we will have a feature left at the very end
-  $self->start_or_finish_sequence();   # finish any half-loaded sequences
-  $self->msg(' 'x80,"\n"); #clear screen
-  $count;
 }
 
+
 =item handle_meta
 
   $loader->handle_meta($meta_directive)
@@ -483,14 +403,14 @@
 
 =cut
 
-sub handle_feature {
+sub handle_feature { #overridden
   my $self     = shift;
   my $gff_line = shift;
   my $ld       = $self->{load_data};
 
   my @columns = map {$_ eq '.' ? undef : $_ } split /\t/,$gff_line;
   return unless @columns >= 8;
-  my ($refname,$source,$method,$start,$end, $score,$strand,$phase,$attributes)      = @columns;

@@ Diff output truncated at 10000 characters. @@



More information about the Bioperl-guts-l mailing list