[Bioperl-guts-l] bioperl-live/Bio/AlignIO fasta.pm,1.18,1.19
Brian Osborne
bosborne at pub.open-bio.org
Thu Oct 20 14:12:19 EDT 2005
Update of /home/repository/bioperl/bioperl-live/Bio/AlignIO
In directory pub.open-bio.org:/tmp/cvs-serv29528/Bio/AlignIO
Modified Files:
fasta.pm
Log Message:
Start and End refer to length of sequence without gaps
Index: fasta.pm
===================================================================
RCS file: /home/repository/bioperl/bioperl-live/Bio/AlignIO/fasta.pm,v
retrieving revision 1.18
retrieving revision 1.19
diff -C2 -d -r1.18 -r1.19
*** fasta.pm 9 Oct 2005 14:53:06 -0000 1.18
--- fasta.pm 20 Oct 2005 18:12:16 -0000 1.19
***************
*** 2,32 ****
#
# BioPerl module for Bio::AlignIO::fasta
-
- # based on the Bio::SeqIO::fasta module
- # by Ewan Birney <birney at sanger.ac.uk>
- # and Lincoln Stein <lstein at cshl.org>
- #
- # and the SimpleAlign.pm module of Ewan Birney
#
# Copyright Peter Schattner
#
# You may distribute this module under the same terms as perl itself
- # _history
- # September 5, 2000
# POD documentation - main docs before the code
=head1 NAME
! Bio::AlignIO::fasta - FastA MSA Sequence input/output stream
=head1 SYNOPSIS
! Do not use this module directly. Use it via the L<Bio::AlignIO> class.
=head1 DESCRIPTION
This object can transform L<Bio::SimpleAlign> objects to and from
! fasta flat file databases. This is for the fasta sequence format NOT
! FastA analysis program. To process the pairwise alignments from a
FastA (FastX, FastN, FastP, tFastA, etc) use the Bio::SearchIO module.
--- 2,25 ----
#
# BioPerl module for Bio::AlignIO::fasta
#
# Copyright Peter Schattner
#
# You may distribute this module under the same terms as perl itself
# POD documentation - main docs before the code
=head1 NAME
! Bio::AlignIO::fasta - fasta MSA Sequence input/output stream
=head1 SYNOPSIS
! Do not use this module directly. Use it via the L<Bio::AlignIO>
! class.
=head1 DESCRIPTION
This object can transform L<Bio::SimpleAlign> objects to and from
! fasta flat file databases. This is for the fasta alignment format, not
! for the FastA sequence analysis program. To process the alignments from
FastA (FastX, FastN, FastP, tFastA, etc) use the Bio::SearchIO module.
***************
*** 41,48 ****
http://bugzilla.bioperl.org/
! =head1 AUTHORS - Peter Schattner
!
! Email: schattner at alum.mit.edu
=head1 APPENDIX
--- 34,40 ----
http://bugzilla.bioperl.org/
! =head1 AUTHORS
+ Peter Schattner
=head1 APPENDIX
***************
*** 71,76 ****
Usage : $aln = $stream->next_aln()
Function: returns the next alignment in the stream.
! Returns : L<Bio::Align::AlignI> object - returns 0 on end of file
! or on error
Args : NONE
--- 63,68 ----
Usage : $aln = $stream->next_aln()
Function: returns the next alignment in the stream.
! Returns : Bio::Align::AlignI object - returns 0 on end of file
! or on error
Args : NONE
***************
*** 78,174 ****
sub next_aln {
! my $self = shift;
! my $entry;
! my ($start,$end,$name,$seqname,$seq,$seqchar,$tempname,$tempdesc,
! %align,$desc);
! my $aln = Bio::SimpleAlign->new();
! my $maxlen;
! while(defined ($entry = $self->_readline) ) {
! if( $entry =~ s/^>(\S+)\s*// ) {
! $tempname = $1;
! chomp($entry);
! $tempdesc = $entry;
! if( defined $name ) {
! # put away last name and sequence
! if( $name =~ /(\S+)\/(\d+)-(\d+)/ ) {
! $seqname = $1;
! $start = $2;
! $end = $3;
! } else {
! $seqname=$name;
! $start = 1;
! $end = length($seqchar); #ps 9/6/00
}
! # print STDERR "Going to add with $seqchar $seqname\n";
! $seq = new Bio::LocatableSeq('-seq' =>$seqchar,
! '-display_id' =>$seqname,
! '-description'=>$desc,
! '-start' =>$start,
! '-end' =>$end,
! );
! $aln->add_seq($seq);
! $self->debug("Reading $seqname");
! }
! $desc = $tempdesc;
! $name = $tempname;
! $desc = $entry;
! $seqchar = "";
! next;
}
- $entry =~ s/[$MATCHPATTERN]//g;
- $seqchar .= $entry;
- }
- #
- # Next two lines are to silence warnings that
- # otherwise occur at EOF when using <$fh>
-
- if (!defined $name) {$name="";}
- if (!defined $seqchar) {$seqchar="";}
-
- # Put away last name and sequence
- if( $name =~ /(\S+)\/(\d+)-(\d+)/ ) {
- $seqname = $1;
- $start = $2;
- $end = $3;
- } else {
- $seqname=$name;
- $start = 1;
- $end = length($seqchar); #ps 9/6/00
- # $end = length($align{$name});
- }
-
-
- # If $end <= 0, we have either reached the end of
- # file in <> or we have encountered some other error
- #
- if ($end <= 0) { undef $aln; return $aln;}
-
- # This logic now also reads empty lines at the
- # end of the file. Skip this is seqchar and seqname is null
! if( length($seqchar) == 0 && length($seqname) == 0 ) {
! # skip
! } else {
! # print STDERR "end to add with $seqchar $seqname\n";
! $seq = new Bio::LocatableSeq('-seq' => $seqchar,
! '-display_id' => $seqname,
! '-description'=> $desc,
! '-start' => $start,
! '-end' => $end,
! );
! $aln->add_seq($seq);
! }
! $self->debug("Reading $seqname");
! my $alnlen = $aln->length;
! foreach my $seq ( $aln->each_seq ) {
! if( $seq->length < $alnlen ) {
! my ($diff) = ($alnlen - $seq->length);
! $seq->seq( $seq->seq() . "-" x $diff);
}
- }
return $aln;
-
}
--- 70,157 ----
sub next_aln {
! my $self = shift;
! my ($start, $end, $name, $seqname, $seq, $seqchar, $entry,
! $tempname, $tempdesc, %align, $desc, $maxlen);
! my $aln = Bio::SimpleAlign->new();
!
! while (defined ($entry = $self->_readline) ) {
! if ( $entry =~ s/^>\s*(\S+)\s*// ) {
! $tempname = $1;
! chomp($entry);
! $tempdesc = $entry;
! if ( defined $name ) {
! # put away last name and sequence
! if ( $name =~ /(\S+)\/(\d+)-(\d+)/ ) {
! $seqname = $1;
! $start = $2;
! $end = $3;
! } else {
! $seqname = $name;
! $start = 1;
! $end = $self->_get_len($seqchar);
! }
! $seq = new Bio::LocatableSeq(
! -seq => $seqchar,
! -display_id => $seqname,
! -description => $desc,
! -start => $start,
! -end => $end,
! );
! $aln->add_seq($seq);
! $self->debug("Reading $seqname");
! }
! $desc = $tempdesc;
! $name = $tempname;
! $desc = $entry;
! $seqchar = "";
! next;
}
! $entry =~ s/[$MATCHPATTERN]//g;
! $seqchar .= $entry;
}
! # Next two lines are to silence warnings that
! # otherwise occur at EOF when using <$fh>
! $name = "" if (!defined $name);
! $seqchar="" if (!defined $seqchar);
! # Put away last name and sequence
! if ( $name =~ /(\S+)\/(\d+)-(\d+)/ ) {
! $seqname = $1;
! $start = $2;
! $end = $3;
! } else {
! $seqname = $name;
! $start = 1;
! $end = $self->_get_len($seqchar);
}
+ # If $end <= 0, we have either reached the end of
+ # file in <> or we have encountered some other error
+ if ( $end <= 0 ) {
+ undef $aln;
+ return $aln;
+ }
+
+ # This logic now also reads empty lines at the
+ # end of the file. Skip this is seqchar and seqname is null
+ unless ( length($seqchar) == 0 && length($seqname) == 0 ) {
+ $seq = new Bio::LocatableSeq(-seq => $seqchar,
+ -display_id => $seqname,
+ -description => $desc,
+ -start => $start,
+ -end => $end,
+ );
+ $aln->add_seq($seq);
+ }
+ $self->debug("Reading $seqname");
+ my $alnlen = $aln->length;
+ foreach my $seq ( $aln->each_seq ) {
+ if ( $seq->length < $alnlen ) {
+ my ($diff) = ($alnlen - $seq->length);
+ $seq->seq( $seq->seq() . "-" x $diff);
+ }
+ }
return $aln;
}
***************
*** 180,185 ****
Function: writes the $aln object into the stream in fasta format
Returns : 1 for success and 0 for error
! Args : L<Bio::Align::AlignI> object
=cut
--- 163,169 ----
Function: writes the $aln object into the stream in fasta format
Returns : 1 for success and 0 for error
! Args : Bio::Align::AlignI object
+ See <Bio::Align::AlignI>
=cut
***************
*** 213,216 ****
--- 197,216 ----
$self->flush if $self->_flush_on_write && defined $self->_fh;
return 1;
+ }
+
+ =head2 _get_len
+
+ Title : _get_len
+ Usage :
+ Function: determine number of alphabetic chars
+ Returns : integer
+ Args : sequence string
+
+ =cut
+
+ sub _get_len {
+ my ($self,$seq) = @_;
+ $seq =~ s/[^A-Z]//gi;
+ return CORE::length($seq);
}
More information about the Bioperl-guts-l
mailing list