[Bioperl-guts-l]
bioperl-live/Bio/DB/GFF/Adaptor berkeleydb.pm, 1.5, 1.6
Lincoln Stein
lstein at pub.open-bio.org
Wed Jul 27 18:49:16 EDT 2005
Update of /home/repository/bioperl/bioperl-live/Bio/DB/GFF/Adaptor
In directory pub.open-bio.org:/tmp/cvs-serv31244/Bio/DB/GFF/Adaptor
Modified Files:
berkeleydb.pm
Log Message:
fixed bug in types request; notes search is quite slow
Index: berkeleydb.pm
===================================================================
RCS file: /home/repository/bioperl/bioperl-live/Bio/DB/GFF/Adaptor/berkeleydb.pm,v
retrieving revision 1.5
retrieving revision 1.6
diff -C2 -d -r1.5 -r1.6
*** berkeleydb.pm 27 Jul 2005 22:15:15 -0000 1.5
--- berkeleydb.pm 27 Jul 2005 22:49:14 -0000 1.6
***************
*** 27,42 ****
sub new {
my $class = shift ;
! my ($dbdir,$preferred_groups) = rearrange([
! [qw(DSN DB DIR DIRECTORY)],
! 'PREFERRED_GROUPS',
], at _);
$dbdir ||= $ENV{TMPDIR} ? "$ENV{TMPDIR}/test" : "/tmp/test";
my $self = bless {},$class;
$self->dsn($dbdir);
$self->preferred_groups($preferred_groups) if defined $preferred_groups;
$self->_open_databases();
return $self;
}
sub _open_databases {
my $self = shift;
--- 27,85 ----
sub new {
my $class = shift ;
! my ($dbdir,$preferred_groups,$autoindex) = rearrange([
! [qw(DSN DB)],
! 'PREFERRED_GROUPS',
! [qw(DIR AUTOINDEX)],
], at _);
+ if (defined $dbdir && defined $autoindex) {
+ $class->throw("If both -dsn and -dir (or -autoindex) are specified, they must point to the same directory")
+ unless $dbdir eq $autoindex;
+ }
+
+ $dbdir ||= $autoindex;
$dbdir ||= $ENV{TMPDIR} ? "$ENV{TMPDIR}/test" : "/tmp/test";
+
my $self = bless {},$class;
$self->dsn($dbdir);
$self->preferred_groups($preferred_groups) if defined $preferred_groups;
+ $self->_autoindex if $autoindex;
$self->_open_databases();
return $self;
}
+ sub _autoindex {
+ my $self = shift;
+ my $dir = $self->dsn;
+ my %ignore = map {$_=>1} ($self->_index_file,$self->_hash_file,$self->_fasta_file,$self->_temp_file,$self->_timestamp_file);
+
+ my $maxtime = 0;
+ opendir (D,$dir) or $self->throw("Couldn't open directory $dir for reading: $!");
+
+ while (defined (my $node = readdir(D))) {
+ next if $node =~ /^\./;
+ my $path = "$dir/$node";
+ next if $ignore{$path};
+ next unless -f $path;
+ my $mtime = _mtime(\*_); # not a typo
+ $maxtime = $mtime if $mtime > $maxtime;
+ }
+
+ close D;
+
+ my $timestamp_time = _mtime($self->_timestamp_file) || 0;
+ my $all_files_exist = -e $self->_index_file && -e $self->_hash_file;
+
+ if ($maxtime > $timestamp_time || !$all_files_exist) {
+ $self->do_initialize(1);
+ $self->load_gff($dir);
+ $self->load_fasta($dir);
+ }
+
+ else {
+ $self->_open_databases();
+ }
+
+ }
+
sub _open_databases {
my $self = shift;
***************
*** 116,119 ****
--- 159,163 ----
unlink $self->_fasta_file;
unlink $self->_fasta_file.'.index';
+ unlink $self->_timestamp_file;
$self->_open_databases;
$self->_next_id(0);
***************
*** 142,145 ****
--- 186,190 ----
my $dna_db = Bio::DB::Fasta->new($file) or $self->throw("Can't reindex sequence file: $@");
$self->dna_db($dna_db);
+ $self->_touch_timestamp;
return $loaded;
}
***************
*** 160,181 ****
}
sub _index_file {
my $self = shift;
! return $self->dsn . "/features.btree";
}
sub _hash_file {
my $self = shift;
! return $self->dsn . "/features.hash";
}
sub _fasta_file {
my $self = shift;
! return $self->dsn . "/sequence.fa";
}
sub _temp_file {
my $self = shift;
! return $self->dsn ."/temporary_results.btree";
}
--- 205,237 ----
}
+ sub _mtime {
+ my $file = shift;
+ my @stat = stat($file);
+ return $stat[9];
+ }
+
sub _index_file {
my $self = shift;
! return $self->dsn . "/_bdb_features.btree";
}
sub _hash_file {
my $self = shift;
! return $self->dsn . "/_bdb_features.hash";
}
sub _fasta_file {
my $self = shift;
! return $self->dsn . "/_bdb_sequence.fa";
}
sub _temp_file {
my $self = shift;
! return $self->dsn ."/_bdb_temporary_results.btree";
! }
!
! sub _timestamp_file {
! my $self = shift;
! return $self->dsn ."/_bdb_timestamp";
}
***************
*** 218,233 ****
my $db = $self->{db};
! $db->{"__class__". $feat->{gclass}} = $id;
! $db->{"__name__".(lc $feat->{gname})} = $id;
!
! $db->{"__bin__"."$feat->{ref}$;$bin"} = $id;
! $db->{"__type__".$type} = $id;
!
! for my $attr (@{$feat->{attributes}}) {
! my ($attr_name,$attr_value) = @$attr;
! $db->{"__attr__".$attr_name."__".$attr_value} = $id;
}
- #warn "Storing start $start, stop $stop, bin $bin, id $id";
$self->{iddb}{$id} = freeze($feat);
}
--- 274,281 ----
my $db = $self->{db};
! for my $skey ($self->_secondary_keys($feat)) {
! $db->{$skey} = $id;
}
$self->{iddb}{$id} = freeze($feat);
}
***************
*** 237,243 ****
--- 285,300 ----
$self->iddb->sync;
$self->db->sync;
+ $self->_touch_timestamp;
1;
}
+ sub _touch_timestamp {
+ my $self = shift;
+ my $tsf = $self->_timestamp_file;
+ open (F,">$tsf") or $self->throw("Couldn't open $tsf: $!");
+ print F scalar(localtime);
+ close F;
+ }
+
# given sequence name, return (reference,start,stop,strand)
***************
*** 577,595 ****
if (defined $refseq) {
! next unless lc $refseq eq lc $ref;
! }
!
! if (defined $start or defined $stop) {
$start = 0 unless defined($start);
$stop = MAX_SEGMENT unless defined($stop);
if ($rangetype eq 'overlaps') {
! return unless $feature_stop >= $start && $feature_start <= $stop;
} elsif ($rangetype eq 'contains') {
! return unless $feature_start >= $start && $feature_stop <= $stop;
} elsif ($rangetype eq 'contained_in') {
! return unless $feature_start <= $start && $feature_stop >= $stop;
} else {
! return unless $feature_start == $start && $feature_stop == $stop;
}
}
--- 634,649 ----
if (defined $refseq) {
! return 0 unless lc $refseq eq lc $ref;
$start = 0 unless defined($start);
$stop = MAX_SEGMENT unless defined($stop);
if ($rangetype eq 'overlaps') {
! return 0 unless $feature_stop >= $start && $feature_start <= $stop;
} elsif ($rangetype eq 'contains') {
! return 0 unless $feature_start >= $start && $feature_stop <= $stop;
} elsif ($rangetype eq 'contained_in') {
! return 0 unless $feature_start <= $start && $feature_stop >= $stop;
} else {
! return 0 unless $feature_start == $start && $feature_stop == $stop;
}
}
***************
*** 599,608 ****
if (defined $types && @$types){
! return unless $self->_matching_typelist($feature_method,$feature_source,$types);
}
my $feature_attributes = $feature->{attributes};
if (defined $attributes){
! return unless $self->_matching_attributes($feature_attributes,$attributes);
}
--- 653,662 ----
if (defined $types && @$types){
! return 0 unless $self->_matching_typelist($feature_method,$feature_source,$types);
}
my $feature_attributes = $feature->{attributes};
if (defined $attributes){
! return 0 unless $self->_matching_attributes($feature_attributes,$attributes);
}
***************
*** 638,646 ****
$self->retrieve_features_range(-table => 'type',
-start => $type,
-do_while => sub { my $f = shift;
! $self->_matching_typelist(
! $f->{method},
! $f->{source},
! [$_]) },
-result => $results);
}
--- 692,701 ----
$self->retrieve_features_range(-table => 'type',
-start => $type,
+ -filter => $filter,
-do_while => sub { my $f = shift;
! lc($f->{method}) eq lc($_->[0])
! &&
! lc($f->{source}||$_->[1]||'') eq lc($_->[1]||'')
! },
-result => $results);
}
More information about the Bioperl-guts-l
mailing list