[Bioperl-guts-l] bioperl-live/Bio/Factory FTLocationFactory.pm, 1.24, 1.25
Christopher John Fields
cjfields at dev.open-bio.org
Thu Sep 14 01:26:15 EDT 2006
Update of /home/repository/bioperl/bioperl-live/Bio/Factory
In directory dev.open-bio.org:/tmp/cvs-serv24152
Modified Files:
FTLocationFactory.pm
Log Message:
Refactored code now passes all tests; let others try it out
Index: FTLocationFactory.pm
===================================================================
RCS file: /home/repository/bioperl/bioperl-live/Bio/Factory/FTLocationFactory.pm,v
retrieving revision 1.24
retrieving revision 1.25
diff -C2 -d -r1.24 -r1.25
*** FTLocationFactory.pm 14 Sep 2006 02:11:30 -0000 1.24
--- FTLocationFactory.pm 14 Sep 2006 05:26:13 -0000 1.25
***************
*** 8,12 ****
#
# You may distribute this module under the same terms as perl itself
-
#
# (c) Hilmar Lapp, hlapp at gnf.org, 2002.
--- 8,11 ----
***************
*** 66,69 ****
--- 65,69 ----
Jason Stajich, jason-at-bioperl-dot-org
+ Chris Fields, cjfields-at-uiuc-dot-edu
=head1 APPENDIX
***************
*** 77,83 ****
# Let the code begin...
-
package Bio::Factory::FTLocationFactory;
! use vars qw(@ISA);
use strict;
--- 77,82 ----
# Let the code begin...
package Bio::Factory::FTLocationFactory;
! use vars qw(@ISA $LOCREG);
use strict;
***************
*** 93,96 ****
--- 92,108 ----
@ISA = qw(Bio::Root::Root Bio::Factory::LocationFactoryI);
+ BEGIN {
+ # the below is an optimized regex obj. from J. Freidl's Mastering Reg Exp.
+ $LOCREG = qr{
+ (?>
+ [^()]+
+ |
+ \(
+ (??{$LOCREG})
+ \)
+ )*
+ }x;
+ }
+
=head2 new
***************
*** 118,201 ****
=cut
! sub from_string{
! # the third parameter is purely optional and indicates a recursive
! # call if set
! my ($self,$locstr,$is_rec) = @_;
my $loc;
! # there is no place in FT-formatted location strings where whitespace
! # carries meaning, so strip it off entirely upfront
! $locstr =~ s/\s+//g if ! $is_rec;
! # does it contain an operator?
! if($locstr =~ /^(\w+)\((.*)\)$/) {
! # yes:
! my $op = lc($1);
! my $oparg = $2;
! if($op eq "complement") {
! # parse the argument recursively, then set the strand to -1
! $loc = $self->from_string($oparg, 1);
! $loc->strand(-1);
! } elsif($op eq "join" || $op eq "order" || $op eq "bond" ) {
! # This is a split location. Split into components and parse each
! # one recursively, then gather into a SplitLocationI instance.
! #
! # Note: The following code will /not/ work with nested
! # joins (you want to have grammar-based parsing for that).
! $loc = Bio::Location::Split->new(-verbose => $self->verbose,
! -splittype => $op);
! # have to do this to capture nested joins, something like this
! # join(11..21,join(100..300,complement(150..230)))
! # This fixes bug #1674
! my $re;
! $re = qr{
! \( # <<--- paired parens required
! (?:
! (?> [^()]+ ) # Non-parens without backtracking
! |
! (??{ $re }) # Group with matching parens
! )*
! \) # ---->> paired parens required
! }x;
! my $oparg_orig = $oparg;
! my @sections;
! # lets capture and remove all the sections which
! # are groups
! while( $oparg =~ s/(join|complement|bond|order)$re//ig ) {
! # oh man this is SUUUCCCH a hack
! # I don't know what else to do though
! # s// seems to be dropping the whole
! # warn("rematch is $& $` $'\n");
! # the code use to just be this line
! push @sections, $&;
! # but I recognized join(...,complement(join(..)))
! # was failling
! my $before = $`;
! if( $oparg ne $before . $') { #'
! $oparg = $before . $'; # '
}
}
! push @sections, split(/,/,$oparg) if length($oparg);
! # because we don't necessarily process the string in-order
! # as we are pulling the data from the string out for
! # groups first, then pulling out data, comma delimited
! # I am re-sorting the sections based on their position
! # in the original string, using the index function to figure
! # out their position in the string
! # --jason
! # resort based on input order, schwartzian style!
! @sections = map { shift @$_ } sort { $a->[1] <=> $b->[1] }
! map { [$_, index($oparg_orig, $_)] } @sections;
! # end of fix for bug #1674
! foreach my $s (@sections) {
! next unless length($s);
! $loc->add_sub_Location($self->from_string($s, 1));
! }
} else {
! $self->throw("operator \"$op\" unrecognized by parser");
}
! } else {
! # no operator, parse away
! $loc = $self->_parse_location($locstr);
}
return $loc;
--- 130,215 ----
=cut
! sub from_string {
! my ($self,$locstr,$op) = @_;
my $loc;
! #$self->debug("$locstr\n");
!
! # $op for operator (error handling)
!
! # run on first pass only
! # Note : These location types are now deprecated in GenBank (Oct. 2006)
! if (!defined($op)) {
! # convert all (X.Y) to [X.Y]
! $locstr =~ s{\((\d+\.\d+)\)}{\[$1\]}g;
! # convert ABC123:(X..Y) to ABC123:[X..Y]
! # we should never see the above
! $locstr =~ s{:\((\d+\.{2}\d+)\)}{:\[$1\]}g;
! }
!
! if ($locstr =~ m{(.*?)\(($LOCREG)\)(.*)}o) { # any matching parentheses?
! my ($beg, $mid, $end) = ($1, $2, $3);
! my (@sublocs) = (split(q(,),$beg), $mid, split(q(,),$end));
!
! my @loc_objs;
! my $loc_obj;
!
! SUBLOCS:
! while (@sublocs) {
! my $subloc = shift @sublocs;
! next if !$subloc;
! my $oparg = ($subloc eq 'join' || $subloc eq 'bond' ||
! $subloc eq 'order' || $subloc eq 'complement') ? $subloc : undef;
!
! # has operator, requires further work (recurse)
! if ($oparg) {
! my $sub = shift @sublocs;
! if (($oparg eq 'join' || $oparg eq 'order' || $oparg eq 'bond' )
! && $sub !~ m{$oparg}) {
! my @splitlocs = split(q(,), $sub);
! $loc_obj = Bio::Location::Split->new();
! while (my $splitloc = shift @splitlocs) {
! next unless $splitloc;
! #$loc_obj->add_sub_Location($self->from_string($splitloc, 1));
! # this should work but doesn't
! my $sobj;
! if ($splitloc =~ m{\(($LOCREG)\)}) {
! my $comploc = $1;
! $sobj = $self->_parse_location($comploc);
! $sobj->strand(-1);
! } else {
! $sobj = $self->_parse_location($splitloc);
! }
! $loc_obj->add_sub_Location($sobj);
! }
! } else {
! $loc_obj = $self->from_string($sub, $oparg);
}
}
! # no operator, simple or fuzzy
! else {
! $loc_obj = $self->from_string($subloc,1);
! }
! $loc_obj->strand(-1) if ($op && $op eq 'complement');
! push @loc_objs, $loc_obj;
! }
! my $ct = @loc_objs;
! if ($op && !($op eq 'join' || $op eq 'order' || $op eq 'bond')
! && $ct > 1 ) {
! $self->throw("Bad operator $op: had multiple locations ".
! scalar(@loc_objs).", should be SplitLocationI");
! }
! if ($ct > 1) {
! $loc = Bio::Location::Split->new();
! $loc->add_sub_Location(shift @loc_objs) while (@loc_objs);
! return $loc;
} else {
! $loc = shift @loc_objs;
! return $loc;
}
! } else { # simple location(s)
! $loc = $self->_parse_location($locstr);
! $loc->strand(-1) if ($op && $op eq 'complement');
}
return $loc;
***************
*** 218,225 ****
my ($self, $locstr) = @_;
my ($loc, $seqid);
-
#$self->debug( "Location parse, processing $locstr\n");
# 'remote' location?
! if($locstr =~ /^(\S+):(.*)$/) {
# yes; memorize remote ID and strip from location string
$seqid = $1;
--- 232,238 ----
my ($self, $locstr) = @_;
my ($loc, $seqid);
#$self->debug( "Location parse, processing $locstr\n");
# 'remote' location?
! if($locstr =~ m{^(\S+):(.*)$}o) {
# yes; memorize remote ID and strip from location string
$seqid = $1;
***************
*** 232,239 ****
# possibly surrounding the entire location the parentheses around start
# and/or may be asymmetrical
! $start =~ s/^\(+//;
! $start =~ s/\)+$//;
! $end =~ s/^\(+// if $end;
! $end =~ s/\)+$// if $end;
# Is this a simple (exact) or a fuzzy location? Simples have exact start
--- 245,251 ----
# possibly surrounding the entire location the parentheses around start
# and/or may be asymmetrical
! # Note: these are from X.Y fuzzy locations, which are deprecated!
! $start =~ s/(?:^\[+|\]+$)//g if $start;
! $end =~ s/(?:^\[+|\]+$)//g if $end;
# Is this a simple (exact) or a fuzzy location? Simples have exact start
***************
*** 241,248 ****
my $loctype = ".."; # exact with start and end as default
! $loctype = '?' if ( ($locstr =~ /\?/) &&
! ($locstr !~ /\?\d+/) );
! $loctype = '?' if ( ($locstr =~ /\?/) &&
! ($locstr !~ /\?\d+/) );
my $locclass = "Bio::Location::Simple";
--- 253,257 ----
my $loctype = ".."; # exact with start and end as default
! $loctype = '?' if ( ($locstr =~ /\?/) && ($locstr !~ /\?\d+/) );
my $locclass = "Bio::Location::Simple";
More information about the Bioperl-guts-l
mailing list