[Bioperl-guts-l] [15010] bioperl-live/trunk: Add test for bug 2339, and fix

Brian Osborne bosborne at dev.open-bio.org
Sun Nov 23 15:16:53 EST 2008


Revision: 15010
Author:   bosborne
Date:     2008-11-23 15:16:53 -0500 (Sun, 23 Nov 2008)

Log Message:
-----------
Add test for bug 2339, and fix

Modified Paths:
--------------
    bioperl-live/trunk/Bio/DB/Flat/BinarySearch.pm
    bioperl-live/trunk/t/flat.t

Modified: bioperl-live/trunk/Bio/DB/Flat/BinarySearch.pm
===================================================================
--- bioperl-live/trunk/Bio/DB/Flat/BinarySearch.pm	2008-11-23 19:42:29 UTC (rev 15009)
+++ bioperl-live/trunk/Bio/DB/Flat/BinarySearch.pm	2008-11-23 20:16:53 UTC (rev 15010)
@@ -779,112 +779,113 @@
 =cut
 
 sub _index_file {
-    my ($self,$file) = @_;
-    my $v = $self->verbose;
-    open(my $FILE,"<", $file) || $self->throw("Can't open file [$file]");
+	my ($self,$file) = @_;
+	my $v = $self->verbose;
+	open(my $FILE,"<", $file) || $self->throw("Can't open file [$file]");
 
-    my $recstart = 0;
-    my $fileid = $self->get_fileid_by_filename($file);
-    my $found = 0;
-    my $id;
-    my $count = 0;
+	my $recstart = 0;
+	my $fileid = $self->get_fileid_by_filename($file);
+	my $found = 0;
+	my $id;
+	my $count = 0;
 
-    my $primary       = $self->primary_pattern;
-    my $start_pattern = $self->start_pattern;
+	my $primary       = $self->primary_pattern;
+	my $start_pattern = $self->start_pattern;
 
-    my $pos = 0;
+	my $pos = 0;
 
-    my $new_primary_entry;
+	my $new_primary_entry;
 
-    my $length;
-    #my $pos = 0;
-    my $fh = $FILE;
+	my $length;
 
-    my $done = -1;
+	my $fh = $FILE;
 
-    my @secondary_names = $self->secondary_namespaces;
-    my %secondary_id;
-    my $last_one;
+	my $done = -1;
 
-    while (<$fh>) {
+	my @secondary_names = $self->secondary_namespaces;
+	my %secondary_id;
+	my $last_one;
+
+	while (<$fh>) {
       $last_one = $_;
       $self->{alphabet} ||= $self->guess_alphabet($_);		
       if ($_ =~ /$start_pattern/) {
-	 if ($done == 0) {
-	    $id = $new_primary_entry;
-	    $self->{alphabet} ||= $self->guess_alphabet($_);
+			if ($done == 0) {
+				$id = $new_primary_entry;
+				$self->{alphabet} ||= $self->guess_alphabet($_);
 	  
-	  my $tmplen = (tell $fh) - length($_);
+				my $tmplen = (tell $fh) - length($_);
 
-	  $length = $tmplen  - $pos;
+				$length = $tmplen  - $pos;
 		
-	  unless( defined($id)) {
-	    $self->throw("No id defined for sequence");
-	  }
-	  unless( defined($fileid)) {
-	    $self->throw("No fileid defined for file $file");
-	  }
-	  unless( defined($pos)) {
-	    $self->throw("No position defined for " . $id . "\n");
-	  }
-	  unless( defined($length)) {
-	    $self->throw("No length defined for " . $id . "\n");
-	  }
-	  $self->_add_id_position($id,$pos,$fileid,$length,\%secondary_id);
+				unless( defined($id)) {
+					$self->throw("No id defined for sequence");
+				}
+				unless( defined($fileid)) {
+					$self->throw("No fileid defined for file $file");
+				}
+				unless( defined($pos)) {
+					$self->throw("No position defined for " . $id . "\n");
+				}
+				unless( defined($length)) {
+					$self->throw("No length defined for " . $id . "\n");
+				}
+				$self->_add_id_position($id,$pos,$fileid,$length,\%secondary_id);
 
-	  $pos   = $tmplen;
+				$pos   = $tmplen;
 		
-	  if ($count > 0 && $count%1000 == 0) {
-	    $self->debug( "Indexed $count ids\n") if $v > 0;
-	  }
+				if ($count > 0 && $count%1000 == 0) {
+					$self->debug( "Indexed $count ids\n") if $v > 0;
+				}
 	    
-	  $count++;
-	} else {
-	  $done = 0;
-	}
+				$count++;
+			} else {
+				$done = 0;
+			}
       }
 
       if ($_ =~ /$primary/) {
-	$new_primary_entry = $1;    
+			$new_primary_entry = $1;    
       }
 
       my $secondary_patterns = $self->secondary_patterns;
 
       foreach my $sec (@secondary_names) {
-	my $pattern = $secondary_patterns->{$sec};
+			my $pattern = $secondary_patterns->{$sec};
 
-	if ($_ =~ /$pattern/) {
-	  $secondary_id{$sec} = $1;
+			if ($_ =~ /$pattern/) {
+				$secondary_id{$sec} = $1;
+			}
+      }
+		
 	}
-      }
-	
-    }
 
-    # Remember to add in the last one
+	# Remember to add in the last one
 
-    $id = $new_primary_entry;
-    my $tmplen = (tell $fh) - length($last_one);
+	$id = $new_primary_entry;
+	# my $tmplen = (tell $fh) - length($last_one);
+	my $tmplen = (tell $fh);
 
-    $length = $tmplen  - $pos;
+	$length = $tmplen - $pos;
     
-    if (!defined($id)) {
-	$self->throw("No id defined for sequence");
-    }
-    if (!defined($fileid)) {
-	$self->throw("No fileid defined for file $file");
-    }
-    if (!defined($pos)) {
-	$self->throw("No position defined for " . $id . "\n");
-    }
-    if (!defined($length)) {
-	$self->throw("No length defined for " . $id . "\n");
-    }
+	if (!defined($id)) {
+		$self->throw("No id defined for sequence");
+	}
+	if (!defined($fileid)) {
+		$self->throw("No fileid defined for file $file");
+	}
+	if (!defined($pos)) {
+		$self->throw("No position defined for " . $id . "\n");
+	}
+	if (!defined($length)) {
+		$self->throw("No length defined for " . $id . "\n");
+	}
     
-    $self->_add_id_position($id,$pos,$fileid,$length,\%secondary_id);
-    $count++;
+	$self->_add_id_position($id,$pos,$fileid,$length,\%secondary_id);
+	$count++;
     
-    close(FILE);
-    $count;
+	close(FILE);
+	$count;
 }
 
 =head2 write_primary_index
@@ -900,42 +901,41 @@
 =cut
 
 sub write_primary_index {
-    my ($self) = @_;
+	my ($self) = @_;
 
-    my @ids = keys %{$self->{_id}};
+	my @ids = keys %{$self->{_id}};
 
-    @ids = sort {$a cmp $b} @ids;
+	@ids = sort {$a cmp $b} @ids;
 
-    open (my $INDEX,">" . $self->primary_index_file) || 
-	$self->throw("Can't open primary index file [" . 
-		     $self->primary_index_file . "]");
+	open (my $INDEX,">" . $self->primary_index_file) || 
+	  $self->throw("Can't open primary index file [" . 
+						$self->primary_index_file . "]");
 
-    my $recordlength = $self->{_maxidlength} +
-	               $self->{_maxfileidlength} + 
-	               $self->{_maxposlength} +
-   		       $self->{_maxlengthlength} + 3;
-	
+	my $recordlength = $self->{_maxidlength} +
+	                   $self->{_maxfileidlength} + 
+		                $self->{_maxposlength} +
+			             $self->{_maxlengthlength} + 3;
     
-    print $INDEX sprintf("%4d",$recordlength);
+	print $INDEX sprintf("%04d",$recordlength);
 
-    foreach my $id (@ids) {
+	foreach my $id (@ids) {
 
-	if (!defined($self->{_id}{$id}{_fileid})) {
-	    $self->throw("No fileid for $id\n");
-	}
-	if (!defined($self->{_id}{$id}{_pos})) {
-	    $self->throw("No position for $id\n");
-	}
-	if (!defined($self->{_id}{$id}{_length})) {
-	    $self->throw("No length for $id");
-	}
+		if (!defined($self->{_id}{$id}{_fileid})) {
+			$self->throw("No fileid for $id\n");
+		}
+		if (!defined($self->{_id}{$id}{_pos})) {
+			$self->throw("No position for $id\n");
+		}
+		if (!defined($self->{_id}{$id}{_length})) {
+			$self->throw("No length for $id");
+		}
 
-	my $record =  $id              . "\t" . 
-	    $self->{_id}{$id}{_fileid} . "\t" .
-	    $self->{_id}{$id}{_pos}    . "\t" .
-	    $self->{_id}{$id}{_length};
+		my $record =  $id              . "\t" . 
+		  $self->{_id}{$id}{_fileid} . "\t" .
+			 $self->{_id}{$id}{_pos}    . "\t" .
+				$self->{_id}{$id}{_length};
 
-	print $INDEX sprintf("%-${recordlength}s",$record);
+		print $INDEX sprintf("%-${recordlength}s",$record);
 
     }
 }
@@ -983,7 +983,7 @@
 	
 	my $fh = $self->new_secondary_filehandle($name);	
 
-	print $fh sprintf("%4d",$length);
+	print $fh sprintf("%04d",$length);
 	@seconds = sort @seconds;
 	
 	foreach my $second (@seconds) {

Modified: bioperl-live/trunk/t/flat.t
===================================================================
--- bioperl-live/trunk/t/flat.t	2008-11-23 19:42:29 UTC (rev 15009)
+++ bioperl-live/trunk/t/flat.t	2008-11-23 20:16:53 UTC (rev 15010)
@@ -8,8 +8,8 @@
     use lib 't/lib';
 	use BioperlTest;
 	
-	test_begin(-tests => 17,
-			   -requires_module => 'DB_File');
+	test_begin(-tests => 18,
+				  -requires_module => 'DB_File');
 	
 	use_ok('Bio::DB::Flat');
 }
@@ -32,7 +32,7 @@
 my $result = $db->build_index(glob($dir));
 ok($result);
 
-#Now let's get the sequence out again
+# Now let's get the sequence out again
 my $seq = $db->get_Seq_by_id('AAC12660');
 ok($seq);
 is($seq->length,504);
@@ -41,7 +41,7 @@
 $db = Bio::DB::Flat->new(-directory  => $tmpdir,
                          -index      => 'bdb',
                          -format     => 'embl',
-			 -dbname     => 'myembl',
+								 -dbname     => 'myembl',
                          -verbose    => $verbose,
                          -write_flag => 1
 			 );
@@ -97,3 +97,16 @@
 ok($seq && ref($seq));
 
 undef $db;
+
+$db = Bio::DB::Flat->new(-directory  => $tmpdir,
+                         -index      => 'binarysearch',
+                         -format     => 'fasta',
+								 -dbname     => 'myfasta',
+                         -verbose    => $verbose,
+                         -write_flag => 1
+			 );
+
+$dir = test_input_file('tmp.fst');
+$result = $db->build_index(glob($dir));
+ok($result);
+




More information about the Bioperl-guts-l mailing list