[Bioperl-guts-l] [15093] bioperl-live/trunk: [bug 2450]

Christopher John Fields cjfields at dev.open-bio.org
Thu Dec 4 21:46:40 EST 2008


Revision: 15093
Author:   cjfields
Date:     2008-12-04 21:46:40 -0500 (Thu, 04 Dec 2008)

Log Message:
-----------
[bug 2450] 
* aln-specific annotation in SimpleAlign->annotation
* seq-specific annotation in SimpleAlign's FeatureHolderI (and tied to related sequence)
* Bio::Annotation::Target (which was DBLink-y) now inherits DBLink (so we now have a DBLink that's RangeI)
* tests for above

Modified Paths:
--------------
    bioperl-live/trunk/Bio/AlignIO/Handler/GenericAlignHandler.pm
    bioperl-live/trunk/Bio/AlignIO/stockholm.pm
    bioperl-live/trunk/Bio/Annotation/Target.pm
    bioperl-live/trunk/Bio/SimpleAlign.pm
    bioperl-live/trunk/t/AlignIO/stockholm.t

Modified: bioperl-live/trunk/Bio/AlignIO/Handler/GenericAlignHandler.pm
===================================================================
--- bioperl-live/trunk/Bio/AlignIO/Handler/GenericAlignHandler.pm	2008-12-04 19:02:16 UTC (rev 15092)
+++ bioperl-live/trunk/Bio/AlignIO/Handler/GenericAlignHandler.pm	2008-12-05 02:46:40 UTC (rev 15093)
@@ -22,9 +22,11 @@
 use Bio::Annotation::Collection;
 use Bio::Annotation::Comment;
 use Bio::Annotation::SimpleValue;
+use Bio::Annotation::Target;
 use Bio::Annotation::DBLink;
 use Bio::Annotation::Reference;
 use Bio::SimpleAlign;
+use Data::Dumper;
 
 use base qw(Bio::Root::Root Bio::HandlerBaseI);
 
@@ -40,7 +42,7 @@
         'ID'                => \&_generic_store,
         'DESCRIPTION'       => \&_generic_store,
         'REFERENCE'         => \&_generic_reference,
-        'DBLINK'            => \&_stockholm_dblink,
+        'DBLINK'            => \&_stockholm_target,
         'DATABASE_COMMENT'  => \&_generic_comment,
         'ALIGNMENT_COMMENT' => \&_generic_comment,
         '_DEFAULT_'         => \&_generic_simplevalue
@@ -145,6 +147,8 @@
 sub reset_parameters {
     my $self = shift;
     $self->{'_params'} = undef;
+    $self->{'_nse_cache'} = undef;
+    $self->{'_features'} = undef;
 }
 
 =head2 format
@@ -194,6 +198,7 @@
             }
             $data->{$id} = $self->{'_params'}->{$id} if (exists $self->{'_params'}->{$id});
         }
+        $data ||= {};
     } else {
         $data = $self->{'_params'};
     }
@@ -333,6 +338,16 @@
                 $param{'-'.lc $p} = $seq->{$p} if exists $seq->{$p};
             }
             my $ls = $class->new(%param);
+            # a little switcheroo to attach the sequence
+            # (though using it to get seq() doesn't work correctly yet!)
+            if (defined $seq->{NSE} &&
+                exists $self->{'_features'} &&
+                exists $self->{'_features'}->{ $seq->{NSE} }) {
+                for my $feat (@{ $self->{'_features'}->{ $seq->{NSE} } }) {
+                    push @{ $self->{'_params'}->{'-features'} }, $feat;
+                    $feat->attach_seq($ls);
+                }
+            }
             $seq = $ls;
         }
     }
@@ -425,31 +440,46 @@
 }
 
 # Some DBLinks in Stockholm format are unique, so a unique handler for them
-sub _stockholm_dblink {
+sub _stockholm_target {
     my ($self, $data) = @_;
     # process database info
     $self->_from_stk_dblink($data);
     my $comment;
-    # Note that DBLink has no start/end methods, so storing this in comment for
-    # now
-    if ($data->{DBLINK_START} || $data->{DBLINK_END}) {
-        $comment = "Start: ".$data->{DBLINK_START}." End: ".$data->{DBLINK_END};
-    }
-    my $dblink = Bio::Annotation::DBLink->new(
+    # Bio::Annotation::Target is now a DBLink, but has additional (RangeI) 
+    # capabilities (for PDB data)
+    my $dblink = Bio::Annotation::Target->new(
         -database => $data->{DBLINK_DB},
         -primary_id => $data->{DBLINK_ACC},
         -optional_id => $data->{DBLINK_OPT},
-        -tagname => lc $data->{NAME},
+        -start => $data->{DBLINK_START},
+        -end => $data->{DBLINK_END},
+        -strand => $data->{DBLINK_STRAND},
+        -comment => $comment,
+        -tagname => 'dblink',
     );
     if ($data->{ALIGNMENT}) {
-        # alignment DBLink
-        $dblink->comment($comment);
+        # Alignment-specific DBLinks
         $self->annotation_collection->add_Annotation($dblink);
     } else {
-        # Sequence DBLink
-        $comment = "NSE: ".($data->{NSE} || '').' '.$comment;
-        $dblink->comment($comment);
-        $self->seq_annotation_collection->add_Annotation($dblink);
+        # Sequence-specific DBLinks
+        # These should come with identifying information of some sort
+        # (ID/START/END/STRAND).  Make into a SeqFeature (SimpleAlign is
+        # FeatureHolderI) spanning the length acc. to the NSE. Add the DBLink as
+        # Annotation specific to that SeqFeature, store in an internal hash by
+        # NSE so we can tie the LocatableSeq to the proper Features
+        $self->_from_nse($data) if $data->{NSE};
+        $self->throw("Must supply an sequence DISPLAY_ID or NSE for sequence-related
+            DBLinks") unless $data->{ACCESSION_NUMBER} || $data->{DISPLAY_ID};
+        my $sf = Bio::SeqFeature::Generic->new(-seq_id => $data->{DISPLAY_ID},
+                                               -accession_number => $data->{ACCESSION_NUMBER},
+                                               -start => $data->{START},
+                                               -end => $data->{END},
+                                               -strand => $data->{STRAND}
+                                               );
+        $sf->annotation->add_Annotation($dblink);
+        # index by NSE
+        push @{ $self->{'_features'}->{ $data->{NSE} } }, $sf;
+        #$self->seq_annotation_collection->add_Annotation($dblink);
     }
 }
 
@@ -467,7 +497,7 @@
     if (exists $self->{'_params'}->{'-seq_accession'}) {
         $new_acc = $self->{'_params'}->{'-seq_accession'}->{$data->{NSE}};
     }        
-    if ($nse =~ m{(\S+?)\.?(\d+)?/(\d+)-(\d+)}xmso) {
+    if ($nse =~ m{(\S+?)(?:\.(\d+))?/(\d+)-(\d+)}xmso) {
         my $strand = $data->{ALPHABET} eq 'dna' || $data->{ALPHABET} eq 'rna' ? 1 : undef;
         my ($start, $end) = ($3, $4);
         if ($start > $end) {
@@ -483,9 +513,9 @@
         # we can parse for version here if needed
         $data->{ACCESSION_NUMBER} = $data->{NSE};
     }
-    #delete $data->{NSE};
 }
 
+# this will probably be split up into subhandlers based on Record/DB 
 sub _from_stk_dblink {
     my ($self, $data) = @_;
     return unless my $raw = $data->{DATA};

Modified: bioperl-live/trunk/Bio/AlignIO/stockholm.pm
===================================================================
--- bioperl-live/trunk/Bio/AlignIO/stockholm.pm	2008-12-04 19:02:16 UTC (rev 15092)
+++ bioperl-live/trunk/Bio/AlignIO/stockholm.pm	2008-12-05 02:46:40 UTC (rev 15093)
@@ -90,7 +90,7 @@
      PI        SimpleValue       previous_ids               value
      DC        Comment           database_comment           comment
      CC        Comment           alignment_comment          comment
-     DR        DBLink            aln_dblink                 database
+     DR        Target            dblink                     database
                                                             primary_id
                                                             comment
      AM        SimpleValue       build_method               value
@@ -205,9 +205,10 @@
   reference
   database_comment
   custom
-  aln_dblink
+  dblink
   alignment_comment
   num_sequences
+  seq_annotation
   );
 
 # This maps the tagname back to a tagname-annotation value combination.
@@ -228,7 +229,7 @@
             'num_sequences'         =>  'SQ/SimpleValue',
             'previous_ids'          =>  'PI/SimpleValue',
             'database_comment'      =>  'DC/SimpleValue',
-            'aln_dblink'            =>  'DR/DBLink',
+            'dblink'                =>  'DR/DBLink',
             'reference'             =>  'RX/Reference',
             'ref_number'            =>  'RN/number',
             'ref_comment'           =>  'RC/comment',
@@ -237,6 +238,7 @@
             'ref_authors'           =>  'RA/authors',
             'ref_location'          =>  'RL/location',
             'alignment_comment'     =>  'CC/Comment',
+            'seq_annotation'        =>  'DR/Collection',
             #Pfam-specific 
             'build_method'          =>  'AM/SimpleValue',
             'pfam_family_accession' =>  'NE/SimpleValue',
@@ -258,17 +260,22 @@
 					  -file   => '>file');
  Function: Initialize a new L<Bio::AlignIO::phylip> reader or writer
  Returns : L<Bio::AlignIO> object
- Args    : -linelength :  length of the line for the alignment block
-           -alphabet   :  symbol alphabet to set the sequences to.  If not set,
-                          the parser will try to guess based on the alignment
-                          accession (if present), defaulting to 'dna'.
+ Args    : -line_length :  length of the line for the alignment block
+           -alphabet    :  symbol alphabet to set the sequences to.  If not set,
+                           the parser will try to guess based on the alignment
+                           accession (if present), defaulting to 'dna'.
+           -spaces      :  (optional, def = 1) boolean to add a space in between
+                           the "# STOCKHOLM 1.0" header and the annotation and
+                           the annotation and the alignment.
 
 =cut
 
 sub _initialize {
     my ( $self, @args ) = @_;
     $self->SUPER::_initialize(@args);
-    my ($handler, $linelength) = $self->_rearrange([qw(HANDLER LINE_LENGTH)], at args);
+    my ($handler, $linelength, $spaces) = $self->_rearrange([qw(HANDLER LINE_LENGTH SPACES)], at args);
+    $spaces = defined $spaces ? $spaces : 1;
+    $self->spaces($spaces);
     # hash for functions for decoding keys.
     $handler ? $self->alignhandler($handler) :
     $self->alignhandler(Bio::AlignIO::Handler::GenericAlignHandler->new(
@@ -409,6 +416,20 @@
 
 =cut
 
+{
+    my %LINK_CB = (
+        'PDB' => sub {join('; ',($_[0]->database,
+                                 $_[0]->primary_id.' '.
+                                 ($_[0]->optional_id || ''),
+                                 $_[0]->start,
+                                 $_[0]->end)).';'},
+        'SCOP' => sub {join('; ',($_[0]->database,
+                                 $_[0]->primary_id || '',
+                                 $_[0]->optional_id)).';'},
+        '_DEFAULT_' => sub {join('; ',($_[0]->database,
+                                 $_[0]->primary_id)).';'},
+    );
+
 sub write_aln {
     # enable array of SimpleAlign objects as well (see clustalw write_aln())
     my ($self, @aln) = @_;
@@ -419,10 +440,11 @@
     my $coll = $aln->annotation;
     my ($aln_ann, $seq_ann) =
        ('#=GF ', '#=GS ');
-    $self->_print("# $STKVERSION\n\n") || return 0;
-    
+    $self->_print("# $STKVERSION\n") || return 0;

@@ Diff output truncated at 10000 characters. @@



More information about the Bioperl-guts-l mailing list