[Bioperl-guts-l] [15093] bioperl-live/trunk: [bug 2450]
Christopher John Fields
cjfields at dev.open-bio.org
Thu Dec 4 21:46:40 EST 2008
Revision: 15093
Author: cjfields
Date: 2008-12-04 21:46:40 -0500 (Thu, 04 Dec 2008)
Log Message:
-----------
[bug 2450]
* aln-specific annotation in SimpleAlign->annotation
* seq-specific annotation in SimpleAlign's FeatureHolderI (and tied to related sequence)
* Bio::Annotation::Target (which was DBLink-y) now inherits DBLink (so we now have a DBLink that's RangeI)
* tests for above
Modified Paths:
--------------
bioperl-live/trunk/Bio/AlignIO/Handler/GenericAlignHandler.pm
bioperl-live/trunk/Bio/AlignIO/stockholm.pm
bioperl-live/trunk/Bio/Annotation/Target.pm
bioperl-live/trunk/Bio/SimpleAlign.pm
bioperl-live/trunk/t/AlignIO/stockholm.t
Modified: bioperl-live/trunk/Bio/AlignIO/Handler/GenericAlignHandler.pm
===================================================================
--- bioperl-live/trunk/Bio/AlignIO/Handler/GenericAlignHandler.pm 2008-12-04 19:02:16 UTC (rev 15092)
+++ bioperl-live/trunk/Bio/AlignIO/Handler/GenericAlignHandler.pm 2008-12-05 02:46:40 UTC (rev 15093)
@@ -22,9 +22,11 @@
use Bio::Annotation::Collection;
use Bio::Annotation::Comment;
use Bio::Annotation::SimpleValue;
+use Bio::Annotation::Target;
use Bio::Annotation::DBLink;
use Bio::Annotation::Reference;
use Bio::SimpleAlign;
+use Data::Dumper;
use base qw(Bio::Root::Root Bio::HandlerBaseI);
@@ -40,7 +42,7 @@
'ID' => \&_generic_store,
'DESCRIPTION' => \&_generic_store,
'REFERENCE' => \&_generic_reference,
- 'DBLINK' => \&_stockholm_dblink,
+ 'DBLINK' => \&_stockholm_target,
'DATABASE_COMMENT' => \&_generic_comment,
'ALIGNMENT_COMMENT' => \&_generic_comment,
'_DEFAULT_' => \&_generic_simplevalue
@@ -145,6 +147,8 @@
sub reset_parameters {
my $self = shift;
$self->{'_params'} = undef;
+ $self->{'_nse_cache'} = undef;
+ $self->{'_features'} = undef;
}
=head2 format
@@ -194,6 +198,7 @@
}
$data->{$id} = $self->{'_params'}->{$id} if (exists $self->{'_params'}->{$id});
}
+ $data ||= {};
} else {
$data = $self->{'_params'};
}
@@ -333,6 +338,16 @@
$param{'-'.lc $p} = $seq->{$p} if exists $seq->{$p};
}
my $ls = $class->new(%param);
+ # a little switcheroo to attach the sequence
+ # (though using it to get seq() doesn't work correctly yet!)
+ if (defined $seq->{NSE} &&
+ exists $self->{'_features'} &&
+ exists $self->{'_features'}->{ $seq->{NSE} }) {
+ for my $feat (@{ $self->{'_features'}->{ $seq->{NSE} } }) {
+ push @{ $self->{'_params'}->{'-features'} }, $feat;
+ $feat->attach_seq($ls);
+ }
+ }
$seq = $ls;
}
}
@@ -425,31 +440,46 @@
}
# Some DBLinks in Stockholm format are unique, so a unique handler for them
-sub _stockholm_dblink {
+sub _stockholm_target {
my ($self, $data) = @_;
# process database info
$self->_from_stk_dblink($data);
my $comment;
- # Note that DBLink has no start/end methods, so storing this in comment for
- # now
- if ($data->{DBLINK_START} || $data->{DBLINK_END}) {
- $comment = "Start: ".$data->{DBLINK_START}." End: ".$data->{DBLINK_END};
- }
- my $dblink = Bio::Annotation::DBLink->new(
+ # Bio::Annotation::Target is now a DBLink, but has additional (RangeI)
+ # capabilities (for PDB data)
+ my $dblink = Bio::Annotation::Target->new(
-database => $data->{DBLINK_DB},
-primary_id => $data->{DBLINK_ACC},
-optional_id => $data->{DBLINK_OPT},
- -tagname => lc $data->{NAME},
+ -start => $data->{DBLINK_START},
+ -end => $data->{DBLINK_END},
+ -strand => $data->{DBLINK_STRAND},
+ -comment => $comment,
+ -tagname => 'dblink',
);
if ($data->{ALIGNMENT}) {
- # alignment DBLink
- $dblink->comment($comment);
+ # Alignment-specific DBLinks
$self->annotation_collection->add_Annotation($dblink);
} else {
- # Sequence DBLink
- $comment = "NSE: ".($data->{NSE} || '').' '.$comment;
- $dblink->comment($comment);
- $self->seq_annotation_collection->add_Annotation($dblink);
+ # Sequence-specific DBLinks
+ # These should come with identifying information of some sort
+ # (ID/START/END/STRAND). Make into a SeqFeature (SimpleAlign is
+ # FeatureHolderI) spanning the length acc. to the NSE. Add the DBLink as
+ # Annotation specific to that SeqFeature, store in an internal hash by
+ # NSE so we can tie the LocatableSeq to the proper Features
+ $self->_from_nse($data) if $data->{NSE};
+ $self->throw("Must supply an sequence DISPLAY_ID or NSE for sequence-related
+ DBLinks") unless $data->{ACCESSION_NUMBER} || $data->{DISPLAY_ID};
+ my $sf = Bio::SeqFeature::Generic->new(-seq_id => $data->{DISPLAY_ID},
+ -accession_number => $data->{ACCESSION_NUMBER},
+ -start => $data->{START},
+ -end => $data->{END},
+ -strand => $data->{STRAND}
+ );
+ $sf->annotation->add_Annotation($dblink);
+ # index by NSE
+ push @{ $self->{'_features'}->{ $data->{NSE} } }, $sf;
+ #$self->seq_annotation_collection->add_Annotation($dblink);
}
}
@@ -467,7 +497,7 @@
if (exists $self->{'_params'}->{'-seq_accession'}) {
$new_acc = $self->{'_params'}->{'-seq_accession'}->{$data->{NSE}};
}
- if ($nse =~ m{(\S+?)\.?(\d+)?/(\d+)-(\d+)}xmso) {
+ if ($nse =~ m{(\S+?)(?:\.(\d+))?/(\d+)-(\d+)}xmso) {
my $strand = $data->{ALPHABET} eq 'dna' || $data->{ALPHABET} eq 'rna' ? 1 : undef;
my ($start, $end) = ($3, $4);
if ($start > $end) {
@@ -483,9 +513,9 @@
# we can parse for version here if needed
$data->{ACCESSION_NUMBER} = $data->{NSE};
}
- #delete $data->{NSE};
}
+# this will probably be split up into subhandlers based on Record/DB
sub _from_stk_dblink {
my ($self, $data) = @_;
return unless my $raw = $data->{DATA};
Modified: bioperl-live/trunk/Bio/AlignIO/stockholm.pm
===================================================================
--- bioperl-live/trunk/Bio/AlignIO/stockholm.pm 2008-12-04 19:02:16 UTC (rev 15092)
+++ bioperl-live/trunk/Bio/AlignIO/stockholm.pm 2008-12-05 02:46:40 UTC (rev 15093)
@@ -90,7 +90,7 @@
PI SimpleValue previous_ids value
DC Comment database_comment comment
CC Comment alignment_comment comment
- DR DBLink aln_dblink database
+ DR Target dblink database
primary_id
comment
AM SimpleValue build_method value
@@ -205,9 +205,10 @@
reference
database_comment
custom
- aln_dblink
+ dblink
alignment_comment
num_sequences
+ seq_annotation
);
# This maps the tagname back to a tagname-annotation value combination.
@@ -228,7 +229,7 @@
'num_sequences' => 'SQ/SimpleValue',
'previous_ids' => 'PI/SimpleValue',
'database_comment' => 'DC/SimpleValue',
- 'aln_dblink' => 'DR/DBLink',
+ 'dblink' => 'DR/DBLink',
'reference' => 'RX/Reference',
'ref_number' => 'RN/number',
'ref_comment' => 'RC/comment',
@@ -237,6 +238,7 @@
'ref_authors' => 'RA/authors',
'ref_location' => 'RL/location',
'alignment_comment' => 'CC/Comment',
+ 'seq_annotation' => 'DR/Collection',
#Pfam-specific
'build_method' => 'AM/SimpleValue',
'pfam_family_accession' => 'NE/SimpleValue',
@@ -258,17 +260,22 @@
-file => '>file');
Function: Initialize a new L<Bio::AlignIO::phylip> reader or writer
Returns : L<Bio::AlignIO> object
- Args : -linelength : length of the line for the alignment block
- -alphabet : symbol alphabet to set the sequences to. If not set,
- the parser will try to guess based on the alignment
- accession (if present), defaulting to 'dna'.
+ Args : -line_length : length of the line for the alignment block
+ -alphabet : symbol alphabet to set the sequences to. If not set,
+ the parser will try to guess based on the alignment
+ accession (if present), defaulting to 'dna'.
+ -spaces : (optional, def = 1) boolean to add a space in between
+ the "# STOCKHOLM 1.0" header and the annotation and
+ the annotation and the alignment.
=cut
sub _initialize {
my ( $self, @args ) = @_;
$self->SUPER::_initialize(@args);
- my ($handler, $linelength) = $self->_rearrange([qw(HANDLER LINE_LENGTH)], at args);
+ my ($handler, $linelength, $spaces) = $self->_rearrange([qw(HANDLER LINE_LENGTH SPACES)], at args);
+ $spaces = defined $spaces ? $spaces : 1;
+ $self->spaces($spaces);
# hash for functions for decoding keys.
$handler ? $self->alignhandler($handler) :
$self->alignhandler(Bio::AlignIO::Handler::GenericAlignHandler->new(
@@ -409,6 +416,20 @@
=cut
+{
+ my %LINK_CB = (
+ 'PDB' => sub {join('; ',($_[0]->database,
+ $_[0]->primary_id.' '.
+ ($_[0]->optional_id || ''),
+ $_[0]->start,
+ $_[0]->end)).';'},
+ 'SCOP' => sub {join('; ',($_[0]->database,
+ $_[0]->primary_id || '',
+ $_[0]->optional_id)).';'},
+ '_DEFAULT_' => sub {join('; ',($_[0]->database,
+ $_[0]->primary_id)).';'},
+ );
+
sub write_aln {
# enable array of SimpleAlign objects as well (see clustalw write_aln())
my ($self, @aln) = @_;
@@ -419,10 +440,11 @@
my $coll = $aln->annotation;
my ($aln_ann, $seq_ann) =
('#=GF ', '#=GS ');
- $self->_print("# $STKVERSION\n\n") || return 0;
-
+ $self->_print("# $STKVERSION\n") || return 0;
@@ Diff output truncated at 10000 characters. @@
More information about the Bioperl-guts-l
mailing list