[Bioperl-guts-l] [15619] bioperl-dev/trunk: added slim seqfeatures array-based instead of hash-based, get about 30-40% speedup if I remember correctly

Jason Stajich jason at dev.open-bio.org
Sat Mar 28 12:51:21 EDT 2009


Revision: 15619
Author:   jason
Date:     2009-03-28 12:51:20 -0400 (Sat, 28 Mar 2009)

Log Message:
-----------
added slim seqfeatures array-based instead of hash-based, get about 30-40% speedup if I remember correctly

Added Paths:
-----------
    bioperl-dev/trunk/Bio/SeqFeature/
    bioperl-dev/trunk/Bio/SeqFeature/Slim.pm
    bioperl-dev/trunk/t/SeqFeature_Slim.t

Added: bioperl-dev/trunk/Bio/SeqFeature/Slim.pm
===================================================================
--- bioperl-dev/trunk/Bio/SeqFeature/Slim.pm	                        (rev 0)
+++ bioperl-dev/trunk/Bio/SeqFeature/Slim.pm	2009-03-28 16:51:20 UTC (rev 15619)
@@ -0,0 +1,912 @@
+# $Id: Slim.pm 11754 2007-11-07 19:44:54Z jason $
+#
+# BioPerl module for Bio::SeqFeature::Slim
+#
+# Cared for by Jason Stajich <jason_AT_bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqFeature::Slim - A very lightweight Bio::SeqFeatureI implementation
+
+=head1 SYNOPSIS
+
+use Bio::SeqFeature::Slim;
+
+=head1 DESCRIPTION
+
+Lightweight Bio::SeqFeatureI implemention.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org              - General discussion
+  http://bioperl.org/MailList.shtml  - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+  http://bugzilla.bioperl.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason_AT_bioperl.org
+
+Describe contact details here
+
+=head1 CONTRIBUTORS
+
+Additional contributors names and emails here
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SeqFeature::Slim;
+use strict;
+use base 'Bio::SeqFeatureI';
+
+use constant {
+    SEQ_ID      => 0,
+    SOURCE      => 1,
+    PRIMARY     => 2,
+    START       => 3,
+    STOP        => 4,
+    SCORE       => 5,
+    STRAND      => 6,
+    FRAME       => 7,
+    TAGS        => 8,
+    NAME        => 9,
+    PARENT      => 10,
+    GFF_TYPE    => 11,
+    SUBFEATURES => 12,
+    SEQ_OBJ     => 13,
+    VERBOSE     => 14,
+};
+
+
+=head2 new
+
+ Title   : new
+ Usage   : my $obj = new Bio::SeqFeature::Slim();
+ Function: Builds a new Bio::SeqFeature::Slim object 
+ Returns : an instance of Bio::SeqFeature::Slim
+ Args    :
+
+
+=cut
+
+sub new {
+  my($class) = shift;
+  my ($start, $end, $strand, $primary_tag, $source_tag, $primary, 
+      $source, $frame,$phase, $score, $tag, $gff_string, $gff1_string,
+      $seqname, $seqid, $annot, $location,$display_name,$pid,$id,
+      $parent_id,$parent) =
+	  Bio::Root::RootI->_rearrange([qw
+					(START
+					 END
+					 STRAND
+					 PRIMARY_TAG
+					 SOURCE_TAG
+					 PRIMARY
+					 SOURCE
+					 FRAME
+					 PHASE
+					 SCORE
+					 TAG
+					 GFF_STRING
+					 GFF1_STRING
+					 SEQNAME
+					 SEQ_ID
+					 ANNOTATION
+					 LOCATION
+					 DISPLAY_NAME
+					 PRIMARY_ID
+					 ID
+					 PARENT_ID
+					 PARENT
+					)], @_);
+  if( defined $primary_tag && defined $primary ) {
+      Bio::Root::RootI->warn("Both primary and primary_tag are defined, only use one");
+  } 
+  if( defined $source_tag && defined $source ) {
+      Bio::Root::RootI->warn("Both source and source_tag are defined, only use one");
+  } 
+
+  $primary_tag = $primary if defined $primary && ! defined $primary_tag;
+  $source_tag  = $source  if defined $source && ! defined $source_tag;
+  $frame = $phase if ! defined $frame && defined $phase;
+  my $self = bless [$seqid,        #0
+		    $source_tag,   #1
+		    $primary_tag,  #2
+		    $start,        #3
+		    $end,          #4
+		    $score,        #5
+		    $strand,       #6
+		    $frame,        #7
+		    {},            #8 tags
+		    $display_name, #9 display name
+		    undef,         #10 parent
+		    undef,         #11 gff_type
+		    [],            #12 seqfeatures
+		    undef,         #13 seqobj
+      ], $class;
+
+  $tag            && do {
+      foreach my $t ( keys %$tag ) {
+	  $self->add_tag_value($t, UNIVERSAL::isa($tag->{$t}, "ARRAY") ? 
+			       @{$tag->{$t}} : $tag->{$t});
+      }
+  };
+  if( defined $pid && defined $id ) {
+      Bio::Root::RootI->warn("Both primary_id and id are defined, only use one");
+  }
+  # save primary ID if it exists
+  $pid = $id if ! defined $pid;
+  defined $pid && $self->primary_id($pid);
+
+  if( defined $parent && defined $parent_id ) {
+      Bio::Root::RootI->warn("Both parent_id and parent are defined, only use one");
+  }
+
+  # save parent ID if it exists
+  $parent_id = $parent if ! defined $parent_id;
+  $parent_id && $self->parent_id($parent_id);
+
+  return $self;
+}
+
+sub verbose {
+    my ($self,$value) = @_;
+    
+    if (defined $value || ! defined $self->[VERBOSE]) {
+       $self->[VERBOSE] = $value || 0;
+    }
+    return $self->[VERBOSE];
+}
+
+=head1 Bio::SeqFeatureI specific methods
+
+New method interfaces.
+
+=cut
+
+=head2 display_name
+
+ Title   : display_name
+ Usage   : $name = $feat->display_name()
+ Function: Returns the human-readable name of the feature for displays.
+ Returns : a string
+ Args    : none
+
+=cut
+
+sub display_name {
+    my ($self) = shift;
+    if( @_) {
+	($self->[NAME]) = shift @_;
+    }
+    return $self->[NAME];
+}
+
+=head2 primary_tag
+
+ Title   : primary_tag
+ Usage   : $tag = $feat->primary_tag()
+ Function: Returns the primary tag for a feature,
+           eg 'exon'
+ Returns : a string
+ Args    : none
+
+
+=cut
+
+sub primary_tag{
+   my ($self) = shift;
+    if( @_) {
+	($self->[PRIMARY]) = shift @_;
+    }
+    return $self->[PRIMARY];
+}
+
+=head2 source_tag
+
+ Title   : source_tag
+ Usage   : $tag = $feat->source_tag()
+ Function: Returns the source tag for a feature,
+           eg, 'genscan'
+ Returns : a string
+ Args    : none
+
+
+=cut
+
+sub source_tag{
+   my ($self) = shift;
+    if( @_) {
+	($self->[SOURCE]) = shift @_;
+    }
+    return $self->[SOURCE];
+}
+
+
+=head2 frame
+
+ Title   : frame
+ Usage   : $frame = $feat->frame()
+ Function: Returns the frame for a feature,
+           eg, '1'
+ Returns : '.', 0,1,2
+ Args    : none
+
+
+=cut
+
+sub frame {
+   my ($self) = shift;
+    if( @_) {
+	($self->[FRAME]) = shift @_;
+    }
+    return $self->[FRAME];
+}
+
+*phase = \&frame;
+
+=head2 has_tag
+
+ Title   : has_tag
+ Usage   : $tag_exists = $self->has_tag('some_tag')
+ Function: 
+ Returns : TRUE if the specified tag exists, and FALSE otherwise
+ Args    :
+
+
+=cut
+
+sub has_tag{
+   my ($self,$tag) = @_;
+   return unless defined $tag;   
+   return exists($self->[TAGS]->{$tag});
+}
+
+=head2 score
+
+ Title   : score
+ Usage   : $score = $feat->score()
+ Function: Returns the score
+ Returns : a string/number
+ Args    : none
+
+=cut
+
+sub score {
+    my ($self) = shift;
+    if( @_) {
+	($self->[SCORE]) = shift @_;
+    }
+    return $self->[SCORE];
+}
+
+=head2 get_tag_values
+
+ Title   : get_tag_values
+ Usage   : @values = $self->get_tag_values('some_tag')
+ Function: 
+ Returns : An array comprising the values of the specified tag.
+ Args    : a string
+
+throws an exception if there is no such tag
+
+=cut
+
+sub get_tag_values {
+   my ($self,$tag) = @_;
+   return() unless defined $tag; 
+   return @{$self->[TAGS]->{$tag} || []};
+}
+
+=head2 get_tagset_values
+
+ Title   : get_tagset_values
+ Usage   : @values = $self->get_tagset_values(qw(label transcript_id product))
+ Function: 
+ Returns : An array comprising the values of the specified tags, in order of tags
+ Args    : An array of strings
+
+does NOT throw an exception if none of the tags are not present
+
+this method is useful for getting a human-readable label for a
+SeqFeatureI; not all tags can be assumed to be present, so a list of
+possible tags in preferential order is provided
+
+=cut
+
+# interface + abstract method
+sub get_tagset_values {
+    my ($self, @args) = @_;
+    my @vals = ();
+    foreach my $arg (@args) {
+        if ($self->has_tag($arg)) {
+            push(@vals, $self->get_tag_values($arg));
+        }
+    }
+    return @vals;
+}
+
+=head2 get_all_tags
+
+ Title   : get_all_tags
+ Usage   : @tags = $feat->get_all_tags()
+ Function: gives all tags for this feature
+ Returns : an array of strings
+ Args    : none
+
+
+=cut
+
+sub get_all_tags{
+    my ($self) = shift;
+   return keys %{$self->[TAGS] || {}};
+}
+
+=head2 attach_seq
+
+ Title   : attach_seq
+ Usage   : $sf->attach_seq($seq)
+ Function: Attaches a Bio::Seq object to this feature. This
+           Bio::Seq object is for the *entire* sequence: ie
+           from 1 to 10000
+
+           Note that it is not guaranteed that if you obtain a feature from
+           an object in bioperl, it will have a sequence attached. Also,
+           implementors of this interface can choose to provide an empty
+           implementation of this method. I.e., there is also no guarantee
+           that if you do attach a sequence, seq() or entire_seq() will not
+           return undef.
+
+           The reason that this method is here on the interface is to enable
+           you to call it on every SeqFeatureI compliant object, and
+           that it will be implemented in a useful way and set to a useful
+           value for the great majority of use cases. Implementors who choose
+           to ignore the call are encouraged to specifically state this in
+           their documentation.
+
+ Example :
+ Returns : TRUE on success
+ Args    : a Bio::PrimarySeqI compliant object
+
+
+=cut
+
+sub attach_seq {
+    my ($self) = shift;
+    if(@_) {
+	$self->[SEQ_OBJ] = shift @_;
+	return 1 if defined $self->[SEQ_OBJ];
+    }
+    return 0;
+}
+
+=head2 seq
+
+ Title   : seq
+ Usage   : $tseq = $sf->seq()
+ Function: returns the truncated sequence (if there is a sequence attached)
+           for this feature
+ Example :
+ Returns : sub seq (a Bio::PrimarySeqI compliant object) on attached sequence
+           bounded by start & end, or undef if there is no sequence attached
+ Args    : none
+
+
+=cut
+
+sub seq {
+    my ($self) = shift;
+    if(defined $self->[SEQ_OBJ] ) {
+	if( ! ref($self->[SEQ_OBJ]) ||
+	    ! $self->[SEQ_OBJ]->isa('Bio::PrimarySeqI') ) {
+	    $self->throw("Have a seq_obj which is not Bio::PrimarySeqI compliant");
+	} else {
+	    return $self->[SEQ_OBJ]->trunc($self->start, $self->end);
+	}
+    }
+    return undef;
+}
+

@@ Diff output truncated at 10000 characters. @@



More information about the Bioperl-guts-l mailing list