[Bioperl-guts-l] [15018] bioperl-live/trunk/Bio/PrimarySeq.pm: [bug 2682]

Christopher John Fields cjfields at dev.open-bio.org
Tue Nov 25 12:16:38 EST 2008


Revision: 15018
Author:   cjfields
Date:     2008-11-25 12:16:38 -0500 (Tue, 25 Nov 2008)

Log Message:
-----------
[bug 2682]
* patch substr() to accept named arguments, optionally strip gaps
* patch courtesy of Mark Jensen

Modified Paths:
--------------
    bioperl-live/trunk/Bio/PrimarySeq.pm

Modified: bioperl-live/trunk/Bio/PrimarySeq.pm
===================================================================
--- bioperl-live/trunk/Bio/PrimarySeq.pm	2008-11-25 15:40:07 UTC (rev 15017)
+++ bioperl-live/trunk/Bio/PrimarySeq.pm	2008-11-25 17:16:38 UTC (rev 15018)
@@ -110,10 +110,11 @@
 
 
 package Bio::PrimarySeq;
-use vars qw($MATCHPATTERN);
+use vars qw($MATCHPATTERN $GAP_SYMBOLS);
 use strict;
 
 $MATCHPATTERN = 'A-Za-z\-\.\*\?=~';
+$GAP_SYMBOLS = '-~';
 
 use base qw(Bio::Root::Root Bio::PrimarySeqI
 	    Bio::IdentifiableI Bio::DescribableI);
@@ -332,27 +333,42 @@
 
  Title   : subseq
  Usage   : $substring = $obj->subseq(10,40);
- Function: returns the subseq from start to end, where the first base
-           is 1 and the number is inclusive, ie 1-2 are the first two
-           bases of the sequence
+           $substring = $obj->subseq(10,40,NOGAP)
+           $substring = $obj->subseq(-START=>10,-END=>40,-REPLACE_WITH=>'tga')
+ Function: returns the subseq from start to end, where the first sequence
+           character has coordinate 1 number is inclusive, ie 1-2 are the 
+           first two characters of the sequence
  Returns : a string
  Args    : integer for start position
            integer for end position
                  OR
            Bio::LocationI location for subseq (strand honored)
+           Specify -NOGAP=>1 to return subseq with gap characters removed
+           Specify -REPLACE_WITH=>$new_subseq to replace the subseq returned
+           with $new_subseq in the sequence object
 
 =cut
 
 sub subseq {
-   my ($self,$start,$end,$replace) = @_;
+   my $self = shift;
+   my @args = @_;
+   my ($start,$end,$nogap,$replace) = $self->_rearrange([qw(START 
+                                                            END
+                                                            NOGAP
+                                                            REPLACE_WITH)], at args);
+   
+   # if $replace is specified, have the constructor validate it as seq
+   my $dummy = new Bio::PrimarySeq(-seq=>$replace, -alphabet=>$self->alphabet) if defined($replace);
 
    if( ref($start) && $start->isa('Bio::LocationI') ) {
        my $loc = $start;
-       $replace = $end; # do we really use this anywhere? scary. HL
        my $seq = "";
        foreach my $subloc ($loc->each_Location()) {
-	   my $piece = $self->subseq($subloc->start(),
-				     $subloc->end(), $replace);
+	   my $piece = $self->subseq(-START=>$subloc->start(),
+				     '-END'=>$subloc->end(), 
+				     -REPLACE_WITH=>$replace,
+	                             -NOGAP=>$nogap);
+	   $piece =~ s/[$GAP_SYMBOLS]//g if $nogap;
 	   if($subloc->strand() < 0) {
 	       $piece = Bio::PrimarySeq->new('-seq' => $piece)->revcom()->seq();
 	   }
@@ -373,13 +389,13 @@
 
        # remove one from start, and then length is end-start
        $start--;
-       if( defined $replace ) {
-	   return substr( $self->seq(), $start, ($end-$start), $replace);
-       } else {
-	   return substr( $self->seq(), $start, ($end-$start));
-       }
+       my @ss_args = map { eval "defined $_"  ? $_ : () } qw( $self->{seq} $start $end-$start $replace);
+       my $seqstr = eval join( '', "substr(", join(',', at ss_args), ")");
+       $seqstr =~ s/[$GAP_SYMBOLS]//g if ($nogap);
+       return $seqstr;
+
    } else {
-       $self->warn("Incorrect parameters to subseq - must be two integers or a Bio::LocationI object. Got:", $self,$start,$end,$replace);
+       $self->warn("Incorrect parameters to subseq - must be two integers or a Bio::LocationI object. Got:", $self,$start,$end,$replace,$nogap);
        return;
    }
 }




More information about the Bioperl-guts-l mailing list