[Bioperl-guts-l] bioperl commit

Lincoln Stein lstein at dev.open-bio.org
Mon Feb 17 21:30:36 EST 2003


Mon Feb 17 21:30:36 EST 2003
Update of /home/repository/bioperl/bioperl-live/Bio/DB/Flat
In directory dev:/tmp/cvs-serv981/Bio/DB/Flat

Modified Files:
	BDB.pm BinarySearch.pm 
Log Message:
fixed multiple feature inconsistencies between the bdb and binary search versions of the flat databases
bioperl-live/Bio/DB/Flat BDB.pm,1.7,1.8 BinarySearch.pm,1.1,1.2
===================================================================
RCS file: /home/repository/bioperl/bioperl-live/Bio/DB/Flat/BDB.pm,v
retrieving revision 1.7
retrieving revision 1.8
diff -u -r1.7 -r1.8
--- /tmp/T0D6aG.b	2003-02-17 21:30:36.450005148 -0500
+++ /tmp/T1E6aG.b	2003-02-17 21:30:36.460001206 -0500
@@ -134,7 +134,18 @@
 # fetch array of Bio::Seq objects
 sub get_Seq_by_acc {
   my $self = shift;
-  return $self->get_Seq_by_id(shift) if @_ == 1;
+  unshift @_,'ACC' if @_==1;
+  my ($ns,$key) = @_;
+  my @primary_ids = $self->expand_ids($ns => $key);
+  $self->throw("more than one sequences correspond to this accession")
+    if @primary_ids > 1 && !wantarray;
+  return map {$self->get_Seq_by_id($_)} @primary_ids;
+}
+
+# fetch array of Bio::Seq objects
+sub get_Seq_by_version {
+  my $self = shift;
+  unshift @_,'VERSION' if @_==1;
   my ($ns,$key) = @_;
   my @primary_ids = $self->expand_ids($ns => $key);
   $self->throw("more than one sequences correspond to this accession")

===================================================================
RCS file: /home/repository/bioperl/bioperl-live/Bio/DB/Flat/BinarySearch.pm,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- /tmp/T0t7ai_b	2003-02-17 21:30:36.540000810 -0500
+++ /tmp/T1u7ai_b	2003-02-17 21:30:36.550005958 -0500
@@ -39,7 +39,7 @@
     my $index = new Bio::DB::Flat::BinarySearch(
 	     -start_pattern   => $start_pattern,
 	     -primary_pattern => $primary_pattern,
-             -primary_namespace => "ACC",
+             -primary_namespace => "ID",
 					     );
 
 To actually write it out to disk we need to enter a directory where the 
@@ -91,7 +91,7 @@
     my $index = new Bio::DB::Flat::BinarySearch(
                 -start_pattern     => $start_pattern,
                 -primary_pattern   => $primary_pattern,
-                -primary_namespace  => 'ACC',
+                -primary_namespace  => 'ID',
                 -secondary_patterns => \%secondary_patterns);
 
     $index->build_index("/Users/michele/indices","mydb",($seqfile));
@@ -486,7 +486,7 @@
 
     while ($current_id eq $newid) {
 	$record = $self->read_record($fh,$pos,$recsize);
-	print "Record is :$record:\n";
+	# print "Record is :$record:\n";
 	my ($secid,$primary_id) = split(/\t/,$record,2);
 	$current_id = $secid;
 
@@ -504,12 +504,12 @@
       return;
     }
 
-    my $entry;
+    my @entry;
 
     foreach my $id (keys %primary_id) {
-	$entry .= $self->get_Seq_by_id($id);
+      push @entry,$self->get_Seq_by_id($id);
     }
-    return $entry;
+    return wantarray ? @entry : $entry[0];
 
 }
 
@@ -585,12 +585,13 @@
 
     my ($record) = $self->read_record($fh,$pos,$recsize);
     my ($entryid,$rest)  = split(/\t/,$record,2);
+    $rest =~ s/\s+$//;
 
 #    print "Mid $recsize $mid $pos:$entryid:$rest:$record\n";
 #    print "Entry :$id:$entryid:$rest\n";
 
     
-    my ($first,$second) = sort { $a cmp $b} ($id,$entryid);
+    my ($first,$second) = $id le $entryid ? ($id,$entryid) : ($entryid,$id);
 
     if ($id eq $entryid) {
 
@@ -1207,11 +1208,11 @@
 	}
 
 	# Look for namespace lines
-	if ($_ =~ /(.*)_namespace.*\t(\S+)/) {
+	if (/(.*)_namespaces?\t(.+)/) {
 	    if ($1 eq "primary") {
 		$self->primary_namespace($2);
 	    } elsif ($1 eq "secondary") {
-		$self->secondary_namespaces($2);
+		$self->secondary_namespaces(split "\t",$2);
 	    } else {
 		$self->throw("Unknown namespace name in config file [$1");
 	    }
@@ -1563,13 +1564,13 @@
 =cut
 
 sub secondary_namespaces{
-   my ($obj,$value) = @_;
+   my ($obj, at values) = @_;
 
    if (!defined($obj->{secondary_namespaces})) {
        $obj->{secondary_namespaces} = [];
    }
-   if( defined $value) {
-       push(@{$obj->{'secondary_namespaces'}},$value);
+   if (@values) {
+       push(@{$obj->{'secondary_namespaces'}}, at values);
     }
    return @{$obj->{'secondary_namespaces'}};
 
@@ -1652,24 +1653,42 @@
   my $self = shift;
   my $format = shift;
   if ($format eq 'swissprot') {
-    return ('ACC',
-	    "^AC   (\\S+)\\;",
+    return ('ID',
+	    "^ID   (\\S+)",
 	    "^ID   (\\S+)",
-	    {ID  => "^ID   (\\S+)"});
+	    {
+	     ACC  => "^AC   (\\S+);"
+	    });
   }
 
   if ($format eq 'embl') {
-    return ('ACC',
-	    "^AC   (\\S+)\\;",
+    return ('ID',
+	    "^ID   (\\S+)",
 	    "^ID   (\\S+)",
-	    {ID  => "^ID   (\\S+)"});
+	    {
+	     ACC     => q/^AC   (\S+);/,
+	     VERSION => q/^SV\s+(\S+)/
+	    });
+  }
+
+  if ($format eq 'genbank') {
+    return ('ID',
+	    q/^LOCUS\s+(\S+)/,
+	    q/^LOCUS/,
+	    {
+	     ACC     => q/^ACCESSION\s+(\S+)/,
+	     VERSION => q/^VERSION\s+(\S+)/
+	    });
   }
 
   if ($format eq 'fasta') {
     return ('ACC',
 	    "^>(\\S+)",
 	    "^>",
-	    {ID => "^>\\S+ +(\\S+)"});
+	    {
+	     ID => "^>\\S+ +(\\S+)"
+	    }
+	   );
   }
 
   $self->throw("I can't handle format $format");



More information about the Bioperl-guts-l mailing list