[Bioperl-guts-l] bioperl-live/Bio/DB/GFF/Adaptor berkeleydb.pm, 1.5, 1.6

Lincoln Stein lstein at pub.open-bio.org
Wed Jul 27 18:49:16 EDT 2005


Update of /home/repository/bioperl/bioperl-live/Bio/DB/GFF/Adaptor
In directory pub.open-bio.org:/tmp/cvs-serv31244/Bio/DB/GFF/Adaptor

Modified Files:
	berkeleydb.pm 
Log Message:
fixed bug in types request; notes search is quite slow


Index: berkeleydb.pm
===================================================================
RCS file: /home/repository/bioperl/bioperl-live/Bio/DB/GFF/Adaptor/berkeleydb.pm,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -d -r1.5 -r1.6
*** berkeleydb.pm	27 Jul 2005 22:15:15 -0000	1.5
--- berkeleydb.pm	27 Jul 2005 22:49:14 -0000	1.6
***************
*** 27,42 ****
  sub new {
    my $class = shift ;
!   my ($dbdir,$preferred_groups) = rearrange([
! 					     [qw(DSN DB DIR DIRECTORY)],
! 					     'PREFERRED_GROUPS',
  					    ], at _);
    $dbdir ||= $ENV{TMPDIR} ? "$ENV{TMPDIR}/test" : "/tmp/test";
    my $self = bless {},$class;
    $self->dsn($dbdir);
    $self->preferred_groups($preferred_groups) if defined $preferred_groups;
    $self->_open_databases();
    return $self;
  }
  
  sub _open_databases {
    my $self = shift;
--- 27,85 ----
  sub new {
    my $class = shift ;
!   my ($dbdir,$preferred_groups,$autoindex) = rearrange([
! 							[qw(DSN DB)],
! 							'PREFERRED_GROUPS',
! 							[qw(DIR AUTOINDEX)],
  					    ], at _);
+   if (defined $dbdir && defined $autoindex) {
+     $class->throw("If both -dsn and -dir (or -autoindex) are specified, they must point to the same directory")
+       unless $dbdir eq $autoindex;
+   }
+ 
+   $dbdir ||= $autoindex;
    $dbdir ||= $ENV{TMPDIR} ? "$ENV{TMPDIR}/test" : "/tmp/test";
+ 
    my $self = bless {},$class;
    $self->dsn($dbdir);
    $self->preferred_groups($preferred_groups) if defined $preferred_groups;
+   $self->_autoindex                          if $autoindex;
    $self->_open_databases();
    return $self;
  }
  
+ sub _autoindex {
+   my $self = shift;
+   my $dir    = $self->dsn;
+   my %ignore = map {$_=>1} ($self->_index_file,$self->_hash_file,$self->_fasta_file,$self->_temp_file,$self->_timestamp_file);
+ 
+   my $maxtime = 0;
+   opendir (D,$dir) or $self->throw("Couldn't open directory $dir for reading: $!");
+ 
+   while (defined (my $node = readdir(D))) {
+     next if $node =~ /^\./;
+     my $path      = "$dir/$node";
+     next if $ignore{$path};
+     next unless -f $path;
+     my $mtime = _mtime(\*_);  # not a typo
+     $maxtime  = $mtime if $mtime > $maxtime;
+   }
+ 
+   close D;
+ 
+   my $timestamp_time  = _mtime($self->_timestamp_file) || 0;
+   my $all_files_exist = -e $self->_index_file && -e $self->_hash_file;
+ 
+   if ($maxtime > $timestamp_time || !$all_files_exist) {
+     $self->do_initialize(1);
+     $self->load_gff($dir);
+     $self->load_fasta($dir);
+   }
+ 
+   else {
+     $self->_open_databases();
+   }
+ 
+ }
+ 
  sub _open_databases {
    my $self = shift;
***************
*** 116,119 ****
--- 159,163 ----
      unlink $self->_fasta_file;
      unlink $self->_fasta_file.'.index';
+     unlink $self->_timestamp_file;
      $self->_open_databases;
      $self->_next_id(0);
***************
*** 142,145 ****
--- 186,190 ----
    my $dna_db = Bio::DB::Fasta->new($file) or $self->throw("Can't reindex sequence file: $@");
    $self->dna_db($dna_db);
+   $self->_touch_timestamp;
    return $loaded;
  }
***************
*** 160,181 ****
  }
  
  sub _index_file {
    my $self = shift;
!   return $self->dsn . "/features.btree";
  }
  
  sub _hash_file {
    my $self = shift;
!   return $self->dsn . "/features.hash";
  }
  
  sub _fasta_file {
    my $self = shift;
!   return $self->dsn . "/sequence.fa";
  }
  
  sub _temp_file {
    my $self = shift;
!   return $self->dsn ."/temporary_results.btree";
  }
  
--- 205,237 ----
  }
  
+ sub _mtime {
+   my $file = shift;
+   my @stat = stat($file);
+   return $stat[9];
+ }
+ 
  sub _index_file {
    my $self = shift;
!   return $self->dsn . "/_bdb_features.btree";
  }
  
  sub _hash_file {
    my $self = shift;
!   return $self->dsn . "/_bdb_features.hash";
  }
  
  sub _fasta_file {
    my $self = shift;
!   return $self->dsn . "/_bdb_sequence.fa";
  }
  
  sub _temp_file {
    my $self = shift;
!   return $self->dsn ."/_bdb_temporary_results.btree";
! }
! 
! sub _timestamp_file {
!   my $self = shift;
!   return $self->dsn ."/_bdb_timestamp";
  }
  
***************
*** 218,233 ****
  
    my $db = $self->{db};
!   $db->{"__class__". $feat->{gclass}}   = $id;
!   $db->{"__name__".(lc $feat->{gname})} = $id;
! 
!   $db->{"__bin__"."$feat->{ref}$;$bin"}   = $id;
!   $db->{"__type__".$type}                 = $id;
! 
!   for my $attr (@{$feat->{attributes}}) {
!     my ($attr_name,$attr_value) = @$attr;
!     $db->{"__attr__".$attr_name."__".$attr_value} = $id;
    }
  
-   #warn "Storing start $start, stop $stop, bin $bin, id $id";
    $self->{iddb}{$id} = freeze($feat);
  }
--- 274,281 ----
  
    my $db = $self->{db};
!   for my $skey ($self->_secondary_keys($feat)) {
!     $db->{$skey} = $id;
    }
  
    $self->{iddb}{$id} = freeze($feat);
  }
***************
*** 237,243 ****
--- 285,300 ----
    $self->iddb->sync;
    $self->db->sync;
+   $self->_touch_timestamp;
    1;
  }
  
+ sub _touch_timestamp {
+   my $self = shift;
+   my $tsf = $self->_timestamp_file;
+   open (F,">$tsf") or $self->throw("Couldn't open $tsf: $!");
+   print F scalar(localtime);
+   close F;
+ }
+ 
  
  # given sequence name, return (reference,start,stop,strand)
***************
*** 577,595 ****
  
      if (defined $refseq) {
!       next unless lc $refseq eq lc $ref;
!     }
! 
!     if (defined $start or defined $stop) {
        $start = 0               unless defined($start);
        $stop  = MAX_SEGMENT     unless defined($stop);
  
        if ($rangetype eq 'overlaps') {
! 	return unless $feature_stop >= $start && $feature_start <= $stop;
        } elsif ($rangetype eq 'contains') {
! 	return unless $feature_start >= $start && $feature_stop <= $stop;
        } elsif ($rangetype eq 'contained_in') {
! 	return unless $feature_start <= $start && $feature_stop >= $stop;
        } else {
! 	return unless $feature_start == $start && $feature_stop == $stop;
        }
      }
--- 634,649 ----
  
      if (defined $refseq) {
!       return 0 unless lc $refseq eq lc $ref;
        $start = 0               unless defined($start);
        $stop  = MAX_SEGMENT     unless defined($stop);
  
        if ($rangetype eq 'overlaps') {
! 	return 0 unless $feature_stop >= $start && $feature_start <= $stop;
        } elsif ($rangetype eq 'contains') {
! 	return 0 unless $feature_start >= $start && $feature_stop <= $stop;
        } elsif ($rangetype eq 'contained_in') {
! 	return 0 unless $feature_start <= $start && $feature_stop >= $stop;
        } else {
! 	return 0 unless $feature_start == $start && $feature_stop == $stop;
        }
      }
***************
*** 599,608 ****
  
      if (defined $types && @$types){
!       return unless $self->_matching_typelist($feature_method,$feature_source,$types);
      }
  
      my $feature_attributes = $feature->{attributes};
      if (defined $attributes){
!       return unless $self->_matching_attributes($feature_attributes,$attributes);
      }
  
--- 653,662 ----
  
      if (defined $types && @$types){
!       return 0 unless $self->_matching_typelist($feature_method,$feature_source,$types);
      }
  
      my $feature_attributes = $feature->{attributes};
      if (defined $attributes){
!       return 0 unless $self->_matching_attributes($feature_attributes,$attributes);
      }
  
***************
*** 638,646 ****
        $self->retrieve_features_range(-table    => 'type',
  				     -start    => $type,
  				     -do_while => sub { my $f = shift;
! 							$self->_matching_typelist(
! 										  $f->{method},
! 										  $f->{source},
! 										  [$_]) },
  				     -result => $results);
      }
--- 692,701 ----
        $self->retrieve_features_range(-table    => 'type',
  				     -start    => $type,
+ 				     -filter   => $filter,
  				     -do_while => sub { my $f = shift;
! 							lc($f->{method}) eq lc($_->[0]) 
! 							  &&
! 							lc($f->{source}||$_->[1]||'') eq lc($_->[1]||'')
! 						      },
  				     -result => $results);
      }



More information about the Bioperl-guts-l mailing list