[Bioperl-guts-l] [16739] bioperl-live/trunk/Bio/AlignIO: Can pass in -alphabet as init argument to Bio:: AlignIO to prevent alphabet guessing code in Bio:: PrimarySeq from being invoked.

Jason Stajich jason at dev.open-bio.org
Fri Jan 22 14:17:14 EST 2010


Revision: 16739
Author:   jason
Date:     2010-01-22 14:17:13 -0500 (Fri, 22 Jan 2010)
Log Message:
-----------
Can pass in -alphabet as init argument to Bio::AlignIO to prevent alphabet guessing code in Bio::PrimarySeq from being invoked. Should be speed up and removes warnings when there are seqs with very gappy (or all gaps) in alignments

Modified Paths:
--------------
    bioperl-live/trunk/Bio/AlignIO/arp.pm
    bioperl-live/trunk/Bio/AlignIO/clustalw.pm
    bioperl-live/trunk/Bio/AlignIO/emboss.pm
    bioperl-live/trunk/Bio/AlignIO/fasta.pm
    bioperl-live/trunk/Bio/AlignIO/largemultifasta.pm
    bioperl-live/trunk/Bio/AlignIO/maf.pm
    bioperl-live/trunk/Bio/AlignIO/mase.pm
    bioperl-live/trunk/Bio/AlignIO/mega.pm
    bioperl-live/trunk/Bio/AlignIO/meme.pm
    bioperl-live/trunk/Bio/AlignIO/metafasta.pm
    bioperl-live/trunk/Bio/AlignIO/msf.pm
    bioperl-live/trunk/Bio/AlignIO/nexml.pm
    bioperl-live/trunk/Bio/AlignIO/nexus.pm
    bioperl-live/trunk/Bio/AlignIO/pfam.pm
    bioperl-live/trunk/Bio/AlignIO/phylip.pm
    bioperl-live/trunk/Bio/AlignIO/po.pm
    bioperl-live/trunk/Bio/AlignIO/proda.pm
    bioperl-live/trunk/Bio/AlignIO/prodom.pm
    bioperl-live/trunk/Bio/AlignIO/psi.pm
    bioperl-live/trunk/Bio/AlignIO/selex.pm
    bioperl-live/trunk/Bio/AlignIO/xmfa.pm

Modified: bioperl-live/trunk/Bio/AlignIO/arp.pm
===================================================================
--- bioperl-live/trunk/Bio/AlignIO/arp.pm	2010-01-22 19:16:39 UTC (rev 16738)
+++ bioperl-live/trunk/Bio/AlignIO/arp.pm	2010-01-22 19:17:13 UTC (rev 16739)
@@ -133,7 +133,7 @@
                 $self->{state}->{in_curly_block} = 1;
                 next SCAN;
             }
-            $cur_data =~ s{["']}{}g;
+            $cur_data =~ s{[\"\']}{}g;
             $cur_data =~ s{\s*$}{};
             # per alignment annotation data (i.e. Sample Blocks) or
             # annotation data retained for each alignment?
@@ -209,9 +209,10 @@
     return unless defined $raw;
     $raw =~ s{(?:^\s+|\s+$)}{}g;
     my ($id, $samples, $seq) = split(' ', $raw);
-    my $ls = Bio::LocatableSeq->new(-seq => $seq,
-                                    -start => 1,
-                                    -id => $id);
+    my $ls = Bio::LocatableSeq->new('-seq'        => $seq,
+                                    '-start'      => 1,
+                                    '-display_id' => $id,
+				    '-alphabet'   => $self->alphabet);
     return($ls, $samples);
 }
 

Modified: bioperl-live/trunk/Bio/AlignIO/clustalw.pm
===================================================================
--- bioperl-live/trunk/Bio/AlignIO/clustalw.pm	2010-01-22 19:16:39 UTC (rev 16738)
+++ bioperl-live/trunk/Bio/AlignIO/clustalw.pm	2010-01-22 19:17:13 UTC (rev 16739)
@@ -201,11 +201,13 @@
             $str =~ s/[^A-Za-z]//g;
             $end = length($str);
         }
-        my $seq = Bio::LocatableSeq->new(
-            -seq   => $alignments{$name},
-            -id    => $sname,
-            -start => $start,
-            -end   => $end
+        my $seq = Bio::LocatableSeq->new
+	    (
+	     '-seq'         => $alignments{$name},
+	     '-display_id'  => $sname,
+	     '-start'       => $start,
+	     '-end'         => $end,
+	    '-alphabet'     => $self->alphabet,
         );
         $aln->add_seq($seq);
     }

Modified: bioperl-live/trunk/Bio/AlignIO/emboss.pm
===================================================================
--- bioperl-live/trunk/Bio/AlignIO/emboss.pm	2010-01-22 19:16:39 UTC (rev 16738)
+++ bioperl-live/trunk/Bio/AlignIO/emboss.pm	2010-01-22 19:17:13 UTC (rev 16739)
@@ -227,11 +227,13 @@
     foreach my $seqname ( qw(seq1 seq2) ) {
 	return unless ( defined $data{$seqname} );
 	$data{$seqname}->{'name'} ||= $seqname;
-	my $seq = Bio::LocatableSeq->new('-seq' => $data{$seqname}->{'data'},
-					'-id'  => $data{$seqname}->{'name'},
-					'-start'=> $data{$seqname}->{'start'},
-					'-end' => $data{$seqname}->{'end'},
-					);
+	my $seq = Bio::LocatableSeq->new
+	    ('-seq'         => $data{$seqname}->{'data'},
+	     '-display_id'  => $data{$seqname}->{'name'},
+	     '-start'       => $data{$seqname}->{'start'},
+	     '-end'         => $data{$seqname}->{'end'},
+	     '-alphabet'    => $self->alphabet,
+	     );
 	$aln->add_seq($seq);
     }
     return $aln;

Modified: bioperl-live/trunk/Bio/AlignIO/fasta.pm
===================================================================
--- bioperl-live/trunk/Bio/AlignIO/fasta.pm	2010-01-22 19:16:39 UTC (rev 16738)
+++ bioperl-live/trunk/Bio/AlignIO/fasta.pm	2010-01-22 19:17:13 UTC (rev 16739)
@@ -105,13 +105,15 @@
 			$start = 1;
 			$end = $self->_get_len($seqchar);
 		    }
-		    $seq = Bio::LocatableSeq->new(
-						  -seq         => $seqchar,
-						  -display_id  => $seqname,
-						  -description => $desc,
-						  -start       => $start,
-						  -end         => $end,
-						  );
+		    $seq = Bio::LocatableSeq->new
+			( 
+			  '-seq'         => $seqchar,
+			  '-display_id'  => $seqname,
+			  '-description' => $desc,
+			  '-start'       => $start,
+			  '-end'         => $end,
+			  '-alphabet'    => $self->alphabet,
+			  );
 		    $aln->add_seq($seq);
 		    $self->debug("Reading $seqname\n");
 		}
@@ -146,12 +148,14 @@
 	# This logic now also reads empty lines at the 
 	# end of the file. Skip this is seqchar and seqname is null
 	unless ( length($seqchar) == 0 && length($seqname) == 0 ) {
-	    $seq = Bio::LocatableSeq->new(-seq         => $seqchar,
-					  -display_id  => $seqname,
-					  -description => $desc,
-					  -start       => $start,
-					  -end         => $end,
-					  );
+	    $seq = Bio::LocatableSeq->new
+		('-seq'         => $seqchar,
+		 '-display_id'  => $seqname,
+		 '-description' => $desc,
+		 '-start'       => $start,
+		 '-end'         => $end,
+		 '-alphabet'    => $self->alphabet,
+		 );
 	    $aln->add_seq($seq);
 	    $self->debug("Reading $seqname\n");
 	}

Modified: bioperl-live/trunk/Bio/AlignIO/largemultifasta.pm
===================================================================
--- bioperl-live/trunk/Bio/AlignIO/largemultifasta.pm	2010-01-22 19:16:39 UTC (rev 16738)
+++ bioperl-live/trunk/Bio/AlignIO/largemultifasta.pm	2010-01-22 19:17:13 UTC (rev 16739)
@@ -101,7 +101,7 @@
 
 sub next_seq {
     my ($self) = @_;
-    my $largeseq = $self->sequence_factory->create();
+    my $largeseq = $self->sequence_factory->create(-alphabet=>$self->alphabet);
     my ($id,$fulldesc,$entry);
     my $count = 0;
     my $seen = 0;

Modified: bioperl-live/trunk/Bio/AlignIO/maf.pm
===================================================================
--- bioperl-live/trunk/Bio/AlignIO/maf.pm	2010-01-22 19:16:39 UTC (rev 16738)
+++ bioperl-live/trunk/Bio/AlignIO/maf.pm	2010-01-22 19:17:13 UTC (rev 16739)
@@ -171,11 +171,12 @@
 	# adjust coordinates to be one-based inclusive
         $start = $start + 1;
     $strand = $strand eq '+' ? 1 : $strand eq '-' ? -1 : 0;
-	my $seq = Bio::LocatableSeq->new('-seq'    => $text,
-					'-id'     => $src,
-					'-start'  => $start,
-					'-end'    => $start + $size - 1,
-					'-strand' => $strand,
+	my $seq = Bio::LocatableSeq->new('-seq'          => $text,
+					 '-display_id'   => $src,
+					 '-start'        => $start,
+					 '-end'          => $start + $size - 1,
+					 '-strand'       => $strand,
+					 '-alphabet'     => $self->alphabet,
 					);
 	$aln->add_seq($seq);
     }

Modified: bioperl-live/trunk/Bio/AlignIO/mase.pm
===================================================================
--- bioperl-live/trunk/Bio/AlignIO/mase.pm	2010-01-22 19:16:39 UTC (rev 16738)
+++ bioperl-live/trunk/Bio/AlignIO/mase.pm	2010-01-22 19:17:13 UTC (rev 16739)
@@ -121,11 +121,12 @@
 	    $end = length($seq_residues);
 	}
 
-	$add = Bio::LocatableSeq->new('-seq'=>$seq,
-			    '-id'=>$name,
-			    '-start'=>$start,
-			    '-end'=>$end,
-			    );
+	$add = Bio::LocatableSeq->new('-seq'        => $seq,
+				      '-display_id' => $name,
+				      '-start'      => $start,
+				      '-end'        => $end,
+				      '-alphabet'   => $self->alphabet,
+				      );
 
 
        $aln->add_seq($add);

Modified: bioperl-live/trunk/Bio/AlignIO/mega.pm
===================================================================
--- bioperl-live/trunk/Bio/AlignIO/mega.pm	2010-01-22 19:16:39 UTC (rev 16738)
+++ bioperl-live/trunk/Bio/AlignIO/mega.pm	2010-01-22 19:17:13 UTC (rev 16739)
@@ -163,11 +163,11 @@
        my $s = $seqs{$seqname};
        $s =~ s/[$Bio::LocatableSeq::GAP_SYMBOLS]+//g;
        my $end = length($s);
-       my $seq = Bio::LocatableSeq->new(-alphabet => $alphabet,
-				       -id => $seqname,
-				       -seq => $seqs{$seqname},
-				       -start => 1,
-				       -end   => $end);
+       my $seq = Bio::LocatableSeq->new('-alphabet'   => $alphabet,
+					'-display_id' => $seqname,
+					'-seq'        => $seqs{$seqname},
+					'-start'      => 1,
+					'-end'        => $end);
 
        $aln->add_seq($seq);
    }

Modified: bioperl-live/trunk/Bio/AlignIO/meme.pm
===================================================================
--- bioperl-live/trunk/Bio/AlignIO/meme.pm	2010-01-22 19:16:39 UTC (rev 16738)
+++ bioperl-live/trunk/Bio/AlignIO/meme.pm	2010-01-22 19:17:13 UTC (rev 16739)
@@ -154,12 +154,14 @@
 			# Make the sequence.  Meme gives the start coordinate at the left
 			# hand side of the motif relative to the INPUT sequence.
 			my $end_pos = $start_pos + length($central) - 1;
-			my $seq = Bio::LocatableSeq->new(-seq    => $central,
-													  -id     => $seq_name,
-													  -start  => $start_pos,
-													  -end    => $end_pos,
-													  -strand => $strand
-													 );
+			my $seq = Bio::LocatableSeq->new
+			    ('-seq'            => $central,
+			     '-display_id'     => $seq_name,
+			     '-start'          => $start_pos,
+			     '-end'            => $end_pos,
+			     '-strand'         => $strand,
+			     '-alphabet'       => $self->alphabet,
+			     );
 			# Add the sequence motif to the alignment
 			$aln->add_seq($seq);
 		} elsif (($line =~ /^\-/) || ($line =~ /Sequence name/)){

Modified: bioperl-live/trunk/Bio/AlignIO/metafasta.pm
===================================================================
--- bioperl-live/trunk/Bio/AlignIO/metafasta.pm	2010-01-22 19:16:39 UTC (rev 16738)
+++ bioperl-live/trunk/Bio/AlignIO/metafasta.pm	2010-01-22 19:17:13 UTC (rev 16739)
@@ -141,11 +141,12 @@
 
         defined $sequence && $sequence =~ s/\s//g; # Remove whitespace
 
-        $seq = Bio::Seq::Meta->new('-seq'=>$sequence,
-				   '-id'=>$id,
-				   '-start'=>$start,
-				   '-end'=>$end
-				  );
+        $seq = Bio::Seq::Meta->new('-seq'        => $sequence,
+				   '-display_id' => $id,
+				   '-start'      => $start,
+				   '-end'        => $end,
+				   '-alphabet'   => $self->alphabet,
+				   );
 
         foreach my $meta (@metas) {
             my ($name,$string) = split /\n/, $meta;

Modified: bioperl-live/trunk/Bio/AlignIO/msf.pm
===================================================================
--- bioperl-live/trunk/Bio/AlignIO/msf.pm	2010-01-22 19:16:39 UTC (rev 16738)
+++ bioperl-live/trunk/Bio/AlignIO/msf.pm	2010-01-22 19:17:13 UTC (rev 16739)
@@ -89,72 +89,73 @@
 =cut
 
 sub next_aln {
-	my $self = shift;
-	my $entry;
-	my (%hash,$name,$str, at names,$seqname,$start,$end,$count,$seq);
+    my $self = shift;
+    my $entry;
+    my (%hash,$name,$str, at names,$seqname,$start,$end,$count,$seq);
 
-	my $aln =  Bio::SimpleAlign->new(-source => 'gcg' );
+    my $aln =  Bio::SimpleAlign->new(-source => 'gcg' );
 

@@ Diff output truncated at 10000 characters. @@


More information about the Bioperl-guts-l mailing list