[Bioperl-guts-l] [15033] bioperl-live/trunk/Bio/AlignIO/stockholm.pm: [bug 2567]
Christopher John Fields
cjfields at dev.open-bio.org
Wed Nov 26 16:03:17 EST 2008
Revision: 15033
Author: cjfields
Date: 2008-11-26 16:03:17 -0500 (Wed, 26 Nov 2008)
Log Message:
-----------
[bug 2567]
* store sequence by placement in block (not by name)
* this will not fix locatableseqs with same id/start-end (which are replaced in a SimpleAlign with a warning)
Modified Paths:
--------------
bioperl-live/trunk/Bio/AlignIO/stockholm.pm
Modified: bioperl-live/trunk/Bio/AlignIO/stockholm.pm
===================================================================
--- bioperl-live/trunk/Bio/AlignIO/stockholm.pm 2008-11-26 18:19:45 UTC (rev 15032)
+++ bioperl-live/trunk/Bio/AlignIO/stockholm.pm 2008-11-26 21:03:17 UTC (rev 15033)
@@ -282,10 +282,10 @@
my $self = shift;
my $line;
- my ($id, $name, $seqname, $seq, $count, $tag, $data);
+ #my ( $seqname, $seq, $count, $tag, $data);
my $seen_rc;
- my ($refct, $bct, $lnkct) = (0,0,0);
- my @c2name;
+ my ($refct, $bct, $lnkct, $cpos) = (0,0,0,0);
+ my %name2c;
my (%align, %accession, %desc, %seq_meta, %aln_meta, %annotation);
# in stockholm format, every non-blank line that does not start
@@ -311,7 +311,11 @@
READLINE:
while( defined($line = $self->_readline) ) {
#skip empty lines
- next if $line =~ /^\s+$/;
+ my ($name, $id, $tag, $data, $seq);
+ if ($line =~ /^\s+$/) {
+ $cpos = 0;
+ next;
+ }
# Double slash (//) signals end of file.
last if $line =~ m{^//};
@@ -381,9 +385,9 @@
}
# Bio::Seq::Meta is not AnnotationI, so can't add seq-based
# Annotations yet; uncomment to see what is passed by
- #else {
- # $self->debug("Missed data: $entry");
- #}
+ else {
+ $self->debug("Missed data: $line");
+ }
} elsif( $line =~ m{^\#=GR\s+(\S+)\s+(\S+)\s+([^\n]+)} ) {
# meta strings per sequence
($name, $tag, $data) = ($1, $2, $3);
@@ -393,20 +397,24 @@
($tag, $data) = ($1, $2);
$aln_meta{$tag} .= $data;
} elsif( $line =~ m{^([^\#]\S+)\s+([A-Za-z.\-\*]+)\s*}xms ) {
- ($name,$seq) = ($1,$2);
- if( ! exists $align{$name} ) {
- push @c2name, $name;
+ ($name, $seq) = ($1,$2);
+ if( ! exists $name2c{++$cpos} ) {
+ $name2c{$cpos} = $name;
}
- $align{$name} .= $seq;
+ $align{ $cpos } .= $seq;
} else {
# debugging to catch missed data; uncomment to turn on
- #$self->debug("Missed Data: $line");
+ $self->debug("Missed Data: $line");
}
}
+ #$self->debug(Dumper(\%align));
+ #$self->debug(Dumper(\%name2c));
+
# ok... now we can make the sequences
- for my $name ( @c2name ) {
+ for my $data ( map {[$name2c{$_}, $_] } sort { $a <=> $b } keys %name2c ) {
+ my ($name, $pos) = @$data;
my ($start, $end, $stranded, $seqname);
my $alphabet = $self->alphabet || 'dna';
my $strand = ($alphabet eq 'dna' || $alphabet eq 'rna') ? 1 : 0;
@@ -422,8 +430,9 @@
$start = 1;
# if end is not specified, allow LocatableSeq to calculate it
}
- $seq = Bio::Seq::Meta->new
- ('-seq' => $align{$name},
+ my $loc = Bio::Seq::Meta->new
+ (
+ '-seq' => $align{$pos},
'-display_id' => $seqname,
'-start' => $start,
'-end' => $end,
@@ -433,10 +442,10 @@
);
if (exists $seq_meta{$name}) {
for my $tag (sort keys %{ $seq_meta{$name} }) {
- $seq->named_meta($tag, $seq_meta{$name}->{$tag});
+ $loc->named_meta($tag, $seq_meta{$name}->{$tag});
}
}
- $aln->add_seq($seq);
+ $aln->add_seq($loc);
}
# add meta strings w/o sequence for consensus meta data. This is a hack and
@@ -506,10 +515,11 @@
}
# add annotations
- $aln->annotation($coll);
+ $aln->annotation($coll);
# If $end <= 0, we have either reached the end of
# file in <fh> or we have encountered some other error
+
return $aln;
}
More information about the Bioperl-guts-l
mailing list