[Bioperl-guts-l] bioperl-live/Bio/AlignIO fasta.pm,1.18,1.19

Brian Osborne bosborne at pub.open-bio.org
Thu Oct 20 14:12:19 EDT 2005


Update of /home/repository/bioperl/bioperl-live/Bio/AlignIO
In directory pub.open-bio.org:/tmp/cvs-serv29528/Bio/AlignIO

Modified Files:
	fasta.pm 
Log Message:
Start and End refer to length of sequence without gaps


Index: fasta.pm
===================================================================
RCS file: /home/repository/bioperl/bioperl-live/Bio/AlignIO/fasta.pm,v
retrieving revision 1.18
retrieving revision 1.19
diff -C2 -d -r1.18 -r1.19
*** fasta.pm	9 Oct 2005 14:53:06 -0000	1.18
--- fasta.pm	20 Oct 2005 18:12:16 -0000	1.19
***************
*** 2,32 ****
  #
  # BioPerl module for Bio::AlignIO::fasta
- 
- #	based on the Bio::SeqIO::fasta module
- #       by Ewan Birney <birney at sanger.ac.uk>
- #       and Lincoln Stein  <lstein at cshl.org>
- #
- #       and the SimpleAlign.pm module of Ewan Birney
  #
  # Copyright Peter Schattner
  #
  # You may distribute this module under the same terms as perl itself
- # _history
- # September 5, 2000
  # POD documentation - main docs before the code
  
  =head1 NAME
  
! Bio::AlignIO::fasta - FastA MSA Sequence input/output stream
  
  =head1 SYNOPSIS
  
! Do not use this module directly.  Use it via the L<Bio::AlignIO> class.
  
  =head1 DESCRIPTION
  
  This object can transform L<Bio::SimpleAlign> objects to and from
! fasta flat file databases.  This is for the fasta sequence format NOT
! FastA analysis program.  To process the pairwise alignments from a
  FastA (FastX, FastN, FastP, tFastA, etc) use the Bio::SearchIO module.
  
--- 2,25 ----
  #
  # BioPerl module for Bio::AlignIO::fasta
  #
  # Copyright Peter Schattner
  #
  # You may distribute this module under the same terms as perl itself
  # POD documentation - main docs before the code
  
  =head1 NAME
  
! Bio::AlignIO::fasta - fasta MSA Sequence input/output stream
  
  =head1 SYNOPSIS
  
! Do not use this module directly.  Use it via the L<Bio::AlignIO> 
! class.
  
  =head1 DESCRIPTION
  
  This object can transform L<Bio::SimpleAlign> objects to and from
! fasta flat file databases.  This is for the fasta alignment format, not
! for the FastA sequence analysis program.  To process the alignments from
  FastA (FastX, FastN, FastP, tFastA, etc) use the Bio::SearchIO module.
  
***************
*** 41,48 ****
    http://bugzilla.bioperl.org/
  
! =head1 AUTHORS - Peter Schattner
! 
! Email: schattner at alum.mit.edu
  
  
  =head1 APPENDIX
--- 34,40 ----
    http://bugzilla.bioperl.org/
  
! =head1 AUTHORS
  
+ Peter Schattner
  
  =head1 APPENDIX
***************
*** 71,76 ****
   Usage   : $aln = $stream->next_aln()
   Function: returns the next alignment in the stream.
!  Returns : L<Bio::Align::AlignI> object - returns 0 on end of file
! 	    or on error
   Args    : NONE
  
--- 63,68 ----
   Usage   : $aln = $stream->next_aln()
   Function: returns the next alignment in the stream.
!  Returns : Bio::Align::AlignI object - returns 0 on end of file
! 	        or on error
   Args    : NONE
  
***************
*** 78,174 ****
  
  sub next_aln {
!     my $self = shift;
!     my $entry;
!     my ($start,$end,$name,$seqname,$seq,$seqchar,$tempname,$tempdesc,
! 	%align,$desc);
!     my $aln =  Bio::SimpleAlign->new();
!     my $maxlen;
!     while(defined ($entry = $self->_readline) ) {
! 	if( $entry =~ s/^>(\S+)\s*// ) {
! 	    $tempname  = $1;
! 	    chomp($entry);
! 	    $tempdesc  = $entry;
! 	    if( defined $name ) {
! 		# put away last name and sequence
! 		if( $name =~ /(\S+)\/(\d+)-(\d+)/ ) {
! 		    $seqname = $1;
! 		    $start = $2;
! 		    $end = $3;
! 		} else {
! 		    $seqname=$name;
! 		    $start = 1;
! 		    $end = length($seqchar); #ps 9/6/00
  		}
! #		print STDERR  "Going to add with $seqchar $seqname\n";
! 		$seq = new Bio::LocatableSeq('-seq'        =>$seqchar,
! 					     '-display_id' =>$seqname,
! 					     '-description'=>$desc,
! 					     '-start'      =>$start,
! 					     '-end'        =>$end,
! 					     );
! 		$aln->add_seq($seq);
!                 $self->debug("Reading $seqname");
! 	    }
! 	    $desc = $tempdesc;	
! 	    $name = $tempname;
! 	    $desc = $entry;
! 	    $seqchar  = "";
! 	    next;
  	}
- 	$entry =~ s/[$MATCHPATTERN]//g;
- 	$seqchar .= $entry;	
-     }
- #
- #  Next two lines are to silence warnings that
- #  otherwise occur at EOF when using <$fh>
- 
-    if (!defined $name) {$name="";}
-    if (!defined $seqchar) {$seqchar="";}
- 
- #  Put away last name and sequence
-     if( $name =~ /(\S+)\/(\d+)-(\d+)/ ) {
- 	$seqname = $1;
- 	$start = $2;
- 	$end = $3;
-     } else {
- 	$seqname=$name;
- 	$start = 1;
- 	$end = length($seqchar);   #ps 9/6/00
- #	$end = length($align{$name});
-     }
- 
- 
- #  If $end <= 0, we have either reached the end of
- #  file in <> or we have encountered some other error
- #
-    if ($end <= 0) { undef $aln; return $aln;}
- 
- # This logic now also reads empty lines at the 
- # end of the file. Skip this is seqchar and seqname is null
  
!     if( length($seqchar) == 0 && length($seqname) == 0 ) {
! 	# skip
!     } else {
! #	print STDERR "end to add with $seqchar $seqname\n";
! 	$seq = new Bio::LocatableSeq('-seq'        => $seqchar,
! 				     '-display_id' => $seqname,
! 				     '-description'=> $desc,
! 				     '-start'      => $start,
! 				     '-end'        => $end,
! 				     );
  	
! 	$aln->add_seq($seq);
!     }
!     $self->debug("Reading $seqname");
!     my $alnlen = $aln->length;
!     foreach my $seq ( $aln->each_seq ) {
! 	if( $seq->length < $alnlen ) {
! 	    my ($diff) = ($alnlen - $seq->length);
! 	    $seq->seq( $seq->seq() . "-" x $diff);
  	}
-     }
  
      return $aln;
- 
  }
  	
--- 70,157 ----
  
  sub next_aln {
! 	my $self = shift;
! 	my ($start, $end, $name, $seqname, $seq, $seqchar, $entry, 
! 		 $tempname, $tempdesc, %align, $desc, $maxlen);
! 	my $aln =  Bio::SimpleAlign->new();
! 
! 	while (defined ($entry = $self->_readline) ) {
! 		if ( $entry =~ s/^>\s*(\S+)\s*// ) {
! 			$tempname  = $1;
! 			chomp($entry);
! 			$tempdesc = $entry;
! 			if ( defined $name ) {
! 				# put away last name and sequence
! 				if ( $name =~ /(\S+)\/(\d+)-(\d+)/ ) {
! 					$seqname = $1;
! 					$start = $2;
! 					$end = $3;
! 				} else {
! 					$seqname = $name;
! 					$start = 1;
! 					$end = $self->_get_len($seqchar);
! 				}
! 				$seq = new Bio::LocatableSeq(
! 						  -seq         => $seqchar,
! 					     -display_id  => $seqname,
! 					     -description => $desc,
! 					     -start       => $start,
! 					     -end         => $end,
! 													 );
! 				$aln->add_seq($seq);
! 				$self->debug("Reading $seqname");
! 			}
! 			$desc = $tempdesc;	
! 			$name = $tempname;
! 			$desc = $entry;
! 			$seqchar  = "";
! 			next;
  		}
! 		$entry =~ s/[$MATCHPATTERN]//g;
! 		$seqchar .= $entry;	
  	}
  
! 	#  Next two lines are to silence warnings that
! 	#  otherwise occur at EOF when using <$fh>
!    $name = "" if (!defined $name);
!    $seqchar="" if (!defined $seqchar);
  	
! 	#  Put away last name and sequence
! 	if ( $name =~ /(\S+)\/(\d+)-(\d+)/ ) {
! 		$seqname = $1;
! 		$start = $2;
! 		$end = $3;
! 	} else {
! 		$seqname = $name;
! 		$start = 1;
! 		$end = $self->_get_len($seqchar);
  	}
  
+ 	#  If $end <= 0, we have either reached the end of
+ 	#  file in <> or we have encountered some other error
+    if ( $end <= 0 ) { 
+ 		undef $aln; 
+ 		return $aln;
+ 	}
+ 	
+ 	# This logic now also reads empty lines at the 
+ 	# end of the file. Skip this is seqchar and seqname is null
+ 	unless ( length($seqchar) == 0 && length($seqname) == 0 ) {
+ 		$seq = new Bio::LocatableSeq(-seq         => $seqchar,
+ 											  -display_id  => $seqname,
+ 											  -description => $desc,
+ 											  -start       => $start,
+ 											  -end         => $end,
+ 											 );
+ 		$aln->add_seq($seq);
+ 	}
+ 	$self->debug("Reading $seqname");
+ 	my $alnlen = $aln->length;
+ 	foreach my $seq ( $aln->each_seq ) {
+ 		if ( $seq->length < $alnlen ) {
+ 			my ($diff) = ($alnlen - $seq->length);
+ 			$seq->seq( $seq->seq() . "-" x $diff);
+ 		}
+ 	}
      return $aln;
  }
  	
***************
*** 180,185 ****
   Function: writes the $aln object into the stream in fasta format
   Returns : 1 for success and 0 for error
!  Args    : L<Bio::Align::AlignI> object
  
  
  =cut
--- 163,169 ----
   Function: writes the $aln object into the stream in fasta format
   Returns : 1 for success and 0 for error
!  Args    : Bio::Align::AlignI object
  
+ See <Bio::Align::AlignI>
  
  =cut
***************
*** 213,216 ****
--- 197,216 ----
      $self->flush if $self->_flush_on_write && defined $self->_fh;
      return 1;
+ }
+ 
+ =head2 _get_len
+ 
+  Title   : _get_len
+  Usage   : 
+  Function: determine number of alphabetic chars
+  Returns : integer
+  Args    : sequence string
+ 
+ =cut
+ 
+ sub _get_len {
+ 	my ($self,$seq) = @_;
+ 	$seq =~ s/[^A-Z]//gi;
+ 	return CORE::length($seq);
  }
  



More information about the Bioperl-guts-l mailing list