[Bioperl-guts-l] [16739] bioperl-live/trunk/Bio/AlignIO: Can pass in -alphabet as init argument to Bio:: AlignIO to prevent alphabet guessing code in Bio:: PrimarySeq from being invoked.
Jason Stajich
jason at dev.open-bio.org
Fri Jan 22 14:17:14 EST 2010
Revision: 16739
Author: jason
Date: 2010-01-22 14:17:13 -0500 (Fri, 22 Jan 2010)
Log Message:
-----------
Can pass in -alphabet as init argument to Bio::AlignIO to prevent alphabet guessing code in Bio::PrimarySeq from being invoked. Should be speed up and removes warnings when there are seqs with very gappy (or all gaps) in alignments
Modified Paths:
--------------
bioperl-live/trunk/Bio/AlignIO/arp.pm
bioperl-live/trunk/Bio/AlignIO/clustalw.pm
bioperl-live/trunk/Bio/AlignIO/emboss.pm
bioperl-live/trunk/Bio/AlignIO/fasta.pm
bioperl-live/trunk/Bio/AlignIO/largemultifasta.pm
bioperl-live/trunk/Bio/AlignIO/maf.pm
bioperl-live/trunk/Bio/AlignIO/mase.pm
bioperl-live/trunk/Bio/AlignIO/mega.pm
bioperl-live/trunk/Bio/AlignIO/meme.pm
bioperl-live/trunk/Bio/AlignIO/metafasta.pm
bioperl-live/trunk/Bio/AlignIO/msf.pm
bioperl-live/trunk/Bio/AlignIO/nexml.pm
bioperl-live/trunk/Bio/AlignIO/nexus.pm
bioperl-live/trunk/Bio/AlignIO/pfam.pm
bioperl-live/trunk/Bio/AlignIO/phylip.pm
bioperl-live/trunk/Bio/AlignIO/po.pm
bioperl-live/trunk/Bio/AlignIO/proda.pm
bioperl-live/trunk/Bio/AlignIO/prodom.pm
bioperl-live/trunk/Bio/AlignIO/psi.pm
bioperl-live/trunk/Bio/AlignIO/selex.pm
bioperl-live/trunk/Bio/AlignIO/xmfa.pm
Modified: bioperl-live/trunk/Bio/AlignIO/arp.pm
===================================================================
--- bioperl-live/trunk/Bio/AlignIO/arp.pm 2010-01-22 19:16:39 UTC (rev 16738)
+++ bioperl-live/trunk/Bio/AlignIO/arp.pm 2010-01-22 19:17:13 UTC (rev 16739)
@@ -133,7 +133,7 @@
$self->{state}->{in_curly_block} = 1;
next SCAN;
}
- $cur_data =~ s{["']}{}g;
+ $cur_data =~ s{[\"\']}{}g;
$cur_data =~ s{\s*$}{};
# per alignment annotation data (i.e. Sample Blocks) or
# annotation data retained for each alignment?
@@ -209,9 +209,10 @@
return unless defined $raw;
$raw =~ s{(?:^\s+|\s+$)}{}g;
my ($id, $samples, $seq) = split(' ', $raw);
- my $ls = Bio::LocatableSeq->new(-seq => $seq,
- -start => 1,
- -id => $id);
+ my $ls = Bio::LocatableSeq->new('-seq' => $seq,
+ '-start' => 1,
+ '-display_id' => $id,
+ '-alphabet' => $self->alphabet);
return($ls, $samples);
}
Modified: bioperl-live/trunk/Bio/AlignIO/clustalw.pm
===================================================================
--- bioperl-live/trunk/Bio/AlignIO/clustalw.pm 2010-01-22 19:16:39 UTC (rev 16738)
+++ bioperl-live/trunk/Bio/AlignIO/clustalw.pm 2010-01-22 19:17:13 UTC (rev 16739)
@@ -201,11 +201,13 @@
$str =~ s/[^A-Za-z]//g;
$end = length($str);
}
- my $seq = Bio::LocatableSeq->new(
- -seq => $alignments{$name},
- -id => $sname,
- -start => $start,
- -end => $end
+ my $seq = Bio::LocatableSeq->new
+ (
+ '-seq' => $alignments{$name},
+ '-display_id' => $sname,
+ '-start' => $start,
+ '-end' => $end,
+ '-alphabet' => $self->alphabet,
);
$aln->add_seq($seq);
}
Modified: bioperl-live/trunk/Bio/AlignIO/emboss.pm
===================================================================
--- bioperl-live/trunk/Bio/AlignIO/emboss.pm 2010-01-22 19:16:39 UTC (rev 16738)
+++ bioperl-live/trunk/Bio/AlignIO/emboss.pm 2010-01-22 19:17:13 UTC (rev 16739)
@@ -227,11 +227,13 @@
foreach my $seqname ( qw(seq1 seq2) ) {
return unless ( defined $data{$seqname} );
$data{$seqname}->{'name'} ||= $seqname;
- my $seq = Bio::LocatableSeq->new('-seq' => $data{$seqname}->{'data'},
- '-id' => $data{$seqname}->{'name'},
- '-start'=> $data{$seqname}->{'start'},
- '-end' => $data{$seqname}->{'end'},
- );
+ my $seq = Bio::LocatableSeq->new
+ ('-seq' => $data{$seqname}->{'data'},
+ '-display_id' => $data{$seqname}->{'name'},
+ '-start' => $data{$seqname}->{'start'},
+ '-end' => $data{$seqname}->{'end'},
+ '-alphabet' => $self->alphabet,
+ );
$aln->add_seq($seq);
}
return $aln;
Modified: bioperl-live/trunk/Bio/AlignIO/fasta.pm
===================================================================
--- bioperl-live/trunk/Bio/AlignIO/fasta.pm 2010-01-22 19:16:39 UTC (rev 16738)
+++ bioperl-live/trunk/Bio/AlignIO/fasta.pm 2010-01-22 19:17:13 UTC (rev 16739)
@@ -105,13 +105,15 @@
$start = 1;
$end = $self->_get_len($seqchar);
}
- $seq = Bio::LocatableSeq->new(
- -seq => $seqchar,
- -display_id => $seqname,
- -description => $desc,
- -start => $start,
- -end => $end,
- );
+ $seq = Bio::LocatableSeq->new
+ (
+ '-seq' => $seqchar,
+ '-display_id' => $seqname,
+ '-description' => $desc,
+ '-start' => $start,
+ '-end' => $end,
+ '-alphabet' => $self->alphabet,
+ );
$aln->add_seq($seq);
$self->debug("Reading $seqname\n");
}
@@ -146,12 +148,14 @@
# This logic now also reads empty lines at the
# end of the file. Skip this is seqchar and seqname is null
unless ( length($seqchar) == 0 && length($seqname) == 0 ) {
- $seq = Bio::LocatableSeq->new(-seq => $seqchar,
- -display_id => $seqname,
- -description => $desc,
- -start => $start,
- -end => $end,
- );
+ $seq = Bio::LocatableSeq->new
+ ('-seq' => $seqchar,
+ '-display_id' => $seqname,
+ '-description' => $desc,
+ '-start' => $start,
+ '-end' => $end,
+ '-alphabet' => $self->alphabet,
+ );
$aln->add_seq($seq);
$self->debug("Reading $seqname\n");
}
Modified: bioperl-live/trunk/Bio/AlignIO/largemultifasta.pm
===================================================================
--- bioperl-live/trunk/Bio/AlignIO/largemultifasta.pm 2010-01-22 19:16:39 UTC (rev 16738)
+++ bioperl-live/trunk/Bio/AlignIO/largemultifasta.pm 2010-01-22 19:17:13 UTC (rev 16739)
@@ -101,7 +101,7 @@
sub next_seq {
my ($self) = @_;
- my $largeseq = $self->sequence_factory->create();
+ my $largeseq = $self->sequence_factory->create(-alphabet=>$self->alphabet);
my ($id,$fulldesc,$entry);
my $count = 0;
my $seen = 0;
Modified: bioperl-live/trunk/Bio/AlignIO/maf.pm
===================================================================
--- bioperl-live/trunk/Bio/AlignIO/maf.pm 2010-01-22 19:16:39 UTC (rev 16738)
+++ bioperl-live/trunk/Bio/AlignIO/maf.pm 2010-01-22 19:17:13 UTC (rev 16739)
@@ -171,11 +171,12 @@
# adjust coordinates to be one-based inclusive
$start = $start + 1;
$strand = $strand eq '+' ? 1 : $strand eq '-' ? -1 : 0;
- my $seq = Bio::LocatableSeq->new('-seq' => $text,
- '-id' => $src,
- '-start' => $start,
- '-end' => $start + $size - 1,
- '-strand' => $strand,
+ my $seq = Bio::LocatableSeq->new('-seq' => $text,
+ '-display_id' => $src,
+ '-start' => $start,
+ '-end' => $start + $size - 1,
+ '-strand' => $strand,
+ '-alphabet' => $self->alphabet,
);
$aln->add_seq($seq);
}
Modified: bioperl-live/trunk/Bio/AlignIO/mase.pm
===================================================================
--- bioperl-live/trunk/Bio/AlignIO/mase.pm 2010-01-22 19:16:39 UTC (rev 16738)
+++ bioperl-live/trunk/Bio/AlignIO/mase.pm 2010-01-22 19:17:13 UTC (rev 16739)
@@ -121,11 +121,12 @@
$end = length($seq_residues);
}
- $add = Bio::LocatableSeq->new('-seq'=>$seq,
- '-id'=>$name,
- '-start'=>$start,
- '-end'=>$end,
- );
+ $add = Bio::LocatableSeq->new('-seq' => $seq,
+ '-display_id' => $name,
+ '-start' => $start,
+ '-end' => $end,
+ '-alphabet' => $self->alphabet,
+ );
$aln->add_seq($add);
Modified: bioperl-live/trunk/Bio/AlignIO/mega.pm
===================================================================
--- bioperl-live/trunk/Bio/AlignIO/mega.pm 2010-01-22 19:16:39 UTC (rev 16738)
+++ bioperl-live/trunk/Bio/AlignIO/mega.pm 2010-01-22 19:17:13 UTC (rev 16739)
@@ -163,11 +163,11 @@
my $s = $seqs{$seqname};
$s =~ s/[$Bio::LocatableSeq::GAP_SYMBOLS]+//g;
my $end = length($s);
- my $seq = Bio::LocatableSeq->new(-alphabet => $alphabet,
- -id => $seqname,
- -seq => $seqs{$seqname},
- -start => 1,
- -end => $end);
+ my $seq = Bio::LocatableSeq->new('-alphabet' => $alphabet,
+ '-display_id' => $seqname,
+ '-seq' => $seqs{$seqname},
+ '-start' => 1,
+ '-end' => $end);
$aln->add_seq($seq);
}
Modified: bioperl-live/trunk/Bio/AlignIO/meme.pm
===================================================================
--- bioperl-live/trunk/Bio/AlignIO/meme.pm 2010-01-22 19:16:39 UTC (rev 16738)
+++ bioperl-live/trunk/Bio/AlignIO/meme.pm 2010-01-22 19:17:13 UTC (rev 16739)
@@ -154,12 +154,14 @@
# Make the sequence. Meme gives the start coordinate at the left
# hand side of the motif relative to the INPUT sequence.
my $end_pos = $start_pos + length($central) - 1;
- my $seq = Bio::LocatableSeq->new(-seq => $central,
- -id => $seq_name,
- -start => $start_pos,
- -end => $end_pos,
- -strand => $strand
- );
+ my $seq = Bio::LocatableSeq->new
+ ('-seq' => $central,
+ '-display_id' => $seq_name,
+ '-start' => $start_pos,
+ '-end' => $end_pos,
+ '-strand' => $strand,
+ '-alphabet' => $self->alphabet,
+ );
# Add the sequence motif to the alignment
$aln->add_seq($seq);
} elsif (($line =~ /^\-/) || ($line =~ /Sequence name/)){
Modified: bioperl-live/trunk/Bio/AlignIO/metafasta.pm
===================================================================
--- bioperl-live/trunk/Bio/AlignIO/metafasta.pm 2010-01-22 19:16:39 UTC (rev 16738)
+++ bioperl-live/trunk/Bio/AlignIO/metafasta.pm 2010-01-22 19:17:13 UTC (rev 16739)
@@ -141,11 +141,12 @@
defined $sequence && $sequence =~ s/\s//g; # Remove whitespace
- $seq = Bio::Seq::Meta->new('-seq'=>$sequence,
- '-id'=>$id,
- '-start'=>$start,
- '-end'=>$end
- );
+ $seq = Bio::Seq::Meta->new('-seq' => $sequence,
+ '-display_id' => $id,
+ '-start' => $start,
+ '-end' => $end,
+ '-alphabet' => $self->alphabet,
+ );
foreach my $meta (@metas) {
my ($name,$string) = split /\n/, $meta;
Modified: bioperl-live/trunk/Bio/AlignIO/msf.pm
===================================================================
--- bioperl-live/trunk/Bio/AlignIO/msf.pm 2010-01-22 19:16:39 UTC (rev 16738)
+++ bioperl-live/trunk/Bio/AlignIO/msf.pm 2010-01-22 19:17:13 UTC (rev 16739)
@@ -89,72 +89,73 @@
=cut
sub next_aln {
- my $self = shift;
- my $entry;
- my (%hash,$name,$str, at names,$seqname,$start,$end,$count,$seq);
+ my $self = shift;
+ my $entry;
+ my (%hash,$name,$str, at names,$seqname,$start,$end,$count,$seq);
- my $aln = Bio::SimpleAlign->new(-source => 'gcg' );
+ my $aln = Bio::SimpleAlign->new(-source => 'gcg' );
@@ Diff output truncated at 10000 characters. @@
More information about the Bioperl-guts-l
mailing list