[Bioperl-guts-l] [15619] bioperl-dev/trunk: added slim seqfeatures array-based instead of hash-based, get about 30-40% speedup if I remember correctly
Jason Stajich
jason at dev.open-bio.org
Sat Mar 28 12:51:21 EDT 2009
Revision: 15619
Author: jason
Date: 2009-03-28 12:51:20 -0400 (Sat, 28 Mar 2009)
Log Message:
-----------
added slim seqfeatures array-based instead of hash-based, get about 30-40% speedup if I remember correctly
Added Paths:
-----------
bioperl-dev/trunk/Bio/SeqFeature/
bioperl-dev/trunk/Bio/SeqFeature/Slim.pm
bioperl-dev/trunk/t/SeqFeature_Slim.t
Added: bioperl-dev/trunk/Bio/SeqFeature/Slim.pm
===================================================================
--- bioperl-dev/trunk/Bio/SeqFeature/Slim.pm (rev 0)
+++ bioperl-dev/trunk/Bio/SeqFeature/Slim.pm 2009-03-28 16:51:20 UTC (rev 15619)
@@ -0,0 +1,912 @@
+# $Id: Slim.pm 11754 2007-11-07 19:44:54Z jason $
+#
+# BioPerl module for Bio::SeqFeature::Slim
+#
+# Cared for by Jason Stajich <jason_AT_bioperl.org>
+#
+# Copyright Jason Stajich
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::SeqFeature::Slim - A very lightweight Bio::SeqFeatureI implementation
+
+=head1 SYNOPSIS
+
+use Bio::SeqFeature::Slim;
+
+=head1 DESCRIPTION
+
+Lightweight Bio::SeqFeatureI implemention.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list. Your participation is much appreciated.
+
+ bioperl-l at bioperl.org - General discussion
+ http://bioperl.org/MailList.shtml - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via
+the web:
+
+ http://bugzilla.bioperl.org/
+
+=head1 AUTHOR - Jason Stajich
+
+Email jason_AT_bioperl.org
+
+Describe contact details here
+
+=head1 CONTRIBUTORS
+
+Additional contributors names and emails here
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+
+# Let the code begin...
+
+
+package Bio::SeqFeature::Slim;
+use strict;
+use base 'Bio::SeqFeatureI';
+
+use constant {
+ SEQ_ID => 0,
+ SOURCE => 1,
+ PRIMARY => 2,
+ START => 3,
+ STOP => 4,
+ SCORE => 5,
+ STRAND => 6,
+ FRAME => 7,
+ TAGS => 8,
+ NAME => 9,
+ PARENT => 10,
+ GFF_TYPE => 11,
+ SUBFEATURES => 12,
+ SEQ_OBJ => 13,
+ VERBOSE => 14,
+};
+
+
+=head2 new
+
+ Title : new
+ Usage : my $obj = new Bio::SeqFeature::Slim();
+ Function: Builds a new Bio::SeqFeature::Slim object
+ Returns : an instance of Bio::SeqFeature::Slim
+ Args :
+
+
+=cut
+
+sub new {
+ my($class) = shift;
+ my ($start, $end, $strand, $primary_tag, $source_tag, $primary,
+ $source, $frame,$phase, $score, $tag, $gff_string, $gff1_string,
+ $seqname, $seqid, $annot, $location,$display_name,$pid,$id,
+ $parent_id,$parent) =
+ Bio::Root::RootI->_rearrange([qw
+ (START
+ END
+ STRAND
+ PRIMARY_TAG
+ SOURCE_TAG
+ PRIMARY
+ SOURCE
+ FRAME
+ PHASE
+ SCORE
+ TAG
+ GFF_STRING
+ GFF1_STRING
+ SEQNAME
+ SEQ_ID
+ ANNOTATION
+ LOCATION
+ DISPLAY_NAME
+ PRIMARY_ID
+ ID
+ PARENT_ID
+ PARENT
+ )], @_);
+ if( defined $primary_tag && defined $primary ) {
+ Bio::Root::RootI->warn("Both primary and primary_tag are defined, only use one");
+ }
+ if( defined $source_tag && defined $source ) {
+ Bio::Root::RootI->warn("Both source and source_tag are defined, only use one");
+ }
+
+ $primary_tag = $primary if defined $primary && ! defined $primary_tag;
+ $source_tag = $source if defined $source && ! defined $source_tag;
+ $frame = $phase if ! defined $frame && defined $phase;
+ my $self = bless [$seqid, #0
+ $source_tag, #1
+ $primary_tag, #2
+ $start, #3
+ $end, #4
+ $score, #5
+ $strand, #6
+ $frame, #7
+ {}, #8 tags
+ $display_name, #9 display name
+ undef, #10 parent
+ undef, #11 gff_type
+ [], #12 seqfeatures
+ undef, #13 seqobj
+ ], $class;
+
+ $tag && do {
+ foreach my $t ( keys %$tag ) {
+ $self->add_tag_value($t, UNIVERSAL::isa($tag->{$t}, "ARRAY") ?
+ @{$tag->{$t}} : $tag->{$t});
+ }
+ };
+ if( defined $pid && defined $id ) {
+ Bio::Root::RootI->warn("Both primary_id and id are defined, only use one");
+ }
+ # save primary ID if it exists
+ $pid = $id if ! defined $pid;
+ defined $pid && $self->primary_id($pid);
+
+ if( defined $parent && defined $parent_id ) {
+ Bio::Root::RootI->warn("Both parent_id and parent are defined, only use one");
+ }
+
+ # save parent ID if it exists
+ $parent_id = $parent if ! defined $parent_id;
+ $parent_id && $self->parent_id($parent_id);
+
+ return $self;
+}
+
+sub verbose {
+ my ($self,$value) = @_;
+
+ if (defined $value || ! defined $self->[VERBOSE]) {
+ $self->[VERBOSE] = $value || 0;
+ }
+ return $self->[VERBOSE];
+}
+
+=head1 Bio::SeqFeatureI specific methods
+
+New method interfaces.
+
+=cut
+
+=head2 display_name
+
+ Title : display_name
+ Usage : $name = $feat->display_name()
+ Function: Returns the human-readable name of the feature for displays.
+ Returns : a string
+ Args : none
+
+=cut
+
+sub display_name {
+ my ($self) = shift;
+ if( @_) {
+ ($self->[NAME]) = shift @_;
+ }
+ return $self->[NAME];
+}
+
+=head2 primary_tag
+
+ Title : primary_tag
+ Usage : $tag = $feat->primary_tag()
+ Function: Returns the primary tag for a feature,
+ eg 'exon'
+ Returns : a string
+ Args : none
+
+
+=cut
+
+sub primary_tag{
+ my ($self) = shift;
+ if( @_) {
+ ($self->[PRIMARY]) = shift @_;
+ }
+ return $self->[PRIMARY];
+}
+
+=head2 source_tag
+
+ Title : source_tag
+ Usage : $tag = $feat->source_tag()
+ Function: Returns the source tag for a feature,
+ eg, 'genscan'
+ Returns : a string
+ Args : none
+
+
+=cut
+
+sub source_tag{
+ my ($self) = shift;
+ if( @_) {
+ ($self->[SOURCE]) = shift @_;
+ }
+ return $self->[SOURCE];
+}
+
+
+=head2 frame
+
+ Title : frame
+ Usage : $frame = $feat->frame()
+ Function: Returns the frame for a feature,
+ eg, '1'
+ Returns : '.', 0,1,2
+ Args : none
+
+
+=cut
+
+sub frame {
+ my ($self) = shift;
+ if( @_) {
+ ($self->[FRAME]) = shift @_;
+ }
+ return $self->[FRAME];
+}
+
+*phase = \&frame;
+
+=head2 has_tag
+
+ Title : has_tag
+ Usage : $tag_exists = $self->has_tag('some_tag')
+ Function:
+ Returns : TRUE if the specified tag exists, and FALSE otherwise
+ Args :
+
+
+=cut
+
+sub has_tag{
+ my ($self,$tag) = @_;
+ return unless defined $tag;
+ return exists($self->[TAGS]->{$tag});
+}
+
+=head2 score
+
+ Title : score
+ Usage : $score = $feat->score()
+ Function: Returns the score
+ Returns : a string/number
+ Args : none
+
+=cut
+
+sub score {
+ my ($self) = shift;
+ if( @_) {
+ ($self->[SCORE]) = shift @_;
+ }
+ return $self->[SCORE];
+}
+
+=head2 get_tag_values
+
+ Title : get_tag_values
+ Usage : @values = $self->get_tag_values('some_tag')
+ Function:
+ Returns : An array comprising the values of the specified tag.
+ Args : a string
+
+throws an exception if there is no such tag
+
+=cut
+
+sub get_tag_values {
+ my ($self,$tag) = @_;
+ return() unless defined $tag;
+ return @{$self->[TAGS]->{$tag} || []};
+}
+
+=head2 get_tagset_values
+
+ Title : get_tagset_values
+ Usage : @values = $self->get_tagset_values(qw(label transcript_id product))
+ Function:
+ Returns : An array comprising the values of the specified tags, in order of tags
+ Args : An array of strings
+
+does NOT throw an exception if none of the tags are not present
+
+this method is useful for getting a human-readable label for a
+SeqFeatureI; not all tags can be assumed to be present, so a list of
+possible tags in preferential order is provided
+
+=cut
+
+# interface + abstract method
+sub get_tagset_values {
+ my ($self, @args) = @_;
+ my @vals = ();
+ foreach my $arg (@args) {
+ if ($self->has_tag($arg)) {
+ push(@vals, $self->get_tag_values($arg));
+ }
+ }
+ return @vals;
+}
+
+=head2 get_all_tags
+
+ Title : get_all_tags
+ Usage : @tags = $feat->get_all_tags()
+ Function: gives all tags for this feature
+ Returns : an array of strings
+ Args : none
+
+
+=cut
+
+sub get_all_tags{
+ my ($self) = shift;
+ return keys %{$self->[TAGS] || {}};
+}
+
+=head2 attach_seq
+
+ Title : attach_seq
+ Usage : $sf->attach_seq($seq)
+ Function: Attaches a Bio::Seq object to this feature. This
+ Bio::Seq object is for the *entire* sequence: ie
+ from 1 to 10000
+
+ Note that it is not guaranteed that if you obtain a feature from
+ an object in bioperl, it will have a sequence attached. Also,
+ implementors of this interface can choose to provide an empty
+ implementation of this method. I.e., there is also no guarantee
+ that if you do attach a sequence, seq() or entire_seq() will not
+ return undef.
+
+ The reason that this method is here on the interface is to enable
+ you to call it on every SeqFeatureI compliant object, and
+ that it will be implemented in a useful way and set to a useful
+ value for the great majority of use cases. Implementors who choose
+ to ignore the call are encouraged to specifically state this in
+ their documentation.
+
+ Example :
+ Returns : TRUE on success
+ Args : a Bio::PrimarySeqI compliant object
+
+
+=cut
+
+sub attach_seq {
+ my ($self) = shift;
+ if(@_) {
+ $self->[SEQ_OBJ] = shift @_;
+ return 1 if defined $self->[SEQ_OBJ];
+ }
+ return 0;
+}
+
+=head2 seq
+
+ Title : seq
+ Usage : $tseq = $sf->seq()
+ Function: returns the truncated sequence (if there is a sequence attached)
+ for this feature
+ Example :
+ Returns : sub seq (a Bio::PrimarySeqI compliant object) on attached sequence
+ bounded by start & end, or undef if there is no sequence attached
+ Args : none
+
+
+=cut
+
+sub seq {
+ my ($self) = shift;
+ if(defined $self->[SEQ_OBJ] ) {
+ if( ! ref($self->[SEQ_OBJ]) ||
+ ! $self->[SEQ_OBJ]->isa('Bio::PrimarySeqI') ) {
+ $self->throw("Have a seq_obj which is not Bio::PrimarySeqI compliant");
+ } else {
+ return $self->[SEQ_OBJ]->trunc($self->start, $self->end);
+ }
+ }
+ return undef;
+}
+
@@ Diff output truncated at 10000 characters. @@
More information about the Bioperl-guts-l
mailing list