[Bioperl-guts-l] bioperl-live/Bio/SearchIO blastxml.pm,1.42,1.43

Christopher John Fields cjfields at dev.open-bio.org
Thu Dec 28 17:34:09 EST 2006


Update of /home/repository/bioperl/bioperl-live/Bio/SearchIO
In directory dev.open-bio.org:/tmp/cvs-serv15636

Modified Files:
	blastxml.pm 
Log Message:
Moving BLAST XML SAX methods to separate class

Index: blastxml.pm
===================================================================
RCS file: /home/repository/bioperl/bioperl-live/Bio/SearchIO/blastxml.pm,v
retrieving revision 1.42
retrieving revision 1.43
diff -C2 -d -r1.42 -r1.43
*** blastxml.pm	5 Dec 2006 22:31:22 -0000	1.42
--- blastxml.pm	28 Dec 2006 22:34:07 -0000	1.43
***************
*** 103,192 ****
  
  package Bio::SearchIO::blastxml;
- use vars qw($DTD %MAPPING %MODEMAP $DEBUG);
  use strict;
- 
- $DTD = 'ftp://ftp.ncbi.nlm.nih.gov/blast/documents/NCBI_BlastOutput.dtd';
  # Object preamble - inherits from Bio::Root::Root
  
  use Bio::Root::Root;
  use XML::SAX;
- use HTML::Entities;
  use IO::File;
  
! BEGIN {
!     # uncomment only for testing; trying to get XML::SAX::Expat to play nice...
!     #$XML::SAX::ParserPackage = 'XML::SAX::PurePerl';
!     # mapping of NCBI Blast terms to Bioperl hash keys
!     %MODEMAP = ('BlastOutput' => 'result',
! 		'Hit'         => 'hit',
! 		'Hsp'         => 'hsp'
! 		);
! 
!     %MAPPING = ( 
! 		 # HSP specific fields
! 		 'Hsp_bit-score'  => 'HSP-bits',
! 		 'Hsp_score'      => 'HSP-score',
! 		 'Hsp_evalue'     => 'HSP-evalue',
! 		 'Hsp_query-from' => 'HSP-query_start',
! 		 'Hsp_query-to'   => 'HSP-query_end',
! 		 'Hsp_hit-from'   => 'HSP-hit_start',
! 		 'Hsp_hit-to'     => 'HSP-hit_end',
! 		 'Hsp_positive'   => 'HSP-conserved',
! 		 'Hsp_identity'   => 'HSP-identical',
! 		 'Hsp_gaps'       => 'HSP-gaps',
! 		 'Hsp_hitgaps'    => 'HSP-hit_gaps',
! 		 'Hsp_querygaps'  => 'HSP-query_gaps',
! 		 'Hsp_qseq'       => 'HSP-query_seq',
! 		 'Hsp_hseq'       => 'HSP-hit_seq',
! 		 'Hsp_midline'    => 'HSP-homology_seq',
! 		 'Hsp_align-len'  => 'HSP-hsp_length',
! 		 'Hsp_query-frame'=> 'HSP-query_frame',
! 		 'Hsp_hit-frame'  => 'HSP-hit_frame',
! 
! 		 # these are ignored for now
! 		 'Hsp_num'          => 'HSP-order',
! 		 'Hsp_pattern-from' => 'patternend',
! 		 'Hsp_pattern-to'   => 'patternstart',
! 		 'Hsp_density'      => 'hspdensity',
! 
! 		 # Hit specific fields
! 		 'Hit_id'               => 'HIT-name',
! 		 'Hit_len'              => 'HIT-length',
! 		 'Hit_accession'        => 'HIT-accession',
! 		 'Hit_def'              => 'HIT-description',
! 		 'Hit_num'              => 'HIT-order',
! 		 'Iteration_iter-num'   => 'HIT-iteration',
! 		 'Iteration_stat'       => 'HIT-iteration_statistic',
! 		 
! 		 'BlastOutput_program'   => 'RESULT-algorithm_name',
! 		 'BlastOutput_version'   => 'RESULT-algorithm_version',
! 		 'BlastOutput_query-def' => 'RESULT-query_description',
! 		 'BlastOutput_query-len' => 'RESULT-query_length',
! 		 'BlastOutput_db'        => 'RESULT-database_name',
! 		 'BlastOutput_reference' => 'RESULT-program_reference',
! 		 'BlastOutput_query-ID'  => 'runid',
! 		 
! 		 'Parameters_matrix'    => { 'RESULT-parameters' => 'matrix'},
! 		 'Parameters_expect'    => { 'RESULT-parameters' => 'expect'},
! 		 'Parameters_include'   => { 'RESULT-parameters' => 'include'},
! 		 'Parameters_sc-match'  => { 'RESULT-parameters' => 'match'},
! 		 'Parameters_sc-mismatch' => { 'RESULT-parameters' => 'mismatch'},
! 		 'Parameters_gap-open'  => { 'RESULT-parameters' => 'gapopen'},
! 		 'Parameters_gap-extend'=> { 'RESULT-parameters' => 'gapext'},
! 		 'Parameters_filter'    => {'RESULT-parameters' => 'filter'},
! 		 'Statistics_db-num'    => 'RESULT-database_entries',
! 		 'Statistics_db-len'    => 'RESULT-database_letters',
! 		 'Statistics_hsp-len'   => { 'RESULT-statistics' => 'hsplength'},
! 		 'Statistics_eff-space' => { 'RESULT-statistics' => 'effectivespace'},
! 		 'Statistics_kappa'     => { 'RESULT-statistics' => 'kappa' },
! 		 'Statistics_lambda'    => { 'RESULT-statistics' => 'lambda' },
! 		 'Statistics_entropy'   => { 'RESULT-statistics' => 'entropy'},
! 		 );
!     eval {  require Time::HiRes };	
!     if( $@ ) { $DEBUG = 0; }
! }
  
  
! use base qw(Bio::SearchIO);
  
  =head2 new
--- 103,120 ----
  
  package Bio::SearchIO::blastxml;
  use strict;
  # Object preamble - inherits from Bio::Root::Root
  
+ use base qw(Bio::SearchIO);
  use Bio::Root::Root;
  use XML::SAX;
  use IO::File;
+ use Bio::SearchIO::XML::BlastHandler;
  
! our $DTD = 'ftp://ftp.ncbi.nlm.nih.gov/blast/documents/NCBI_BlastOutput.dtd';
  
+ our $DEBUG;
  
! # mapping of NCBI Blast terms to Bioperl hash keys
  
  =head2 new
***************
*** 199,205 ****
   Returns : Bio::SearchIO::blastxml object
   Args    : One additional argument from the format and file/fh parameters.
!            -tempfile => boolean.  Defaults to false.  Write out XML data
!                                   to a temporary filehandle to send to 
!                                   PerlSAX parser.
  =cut
  
--- 127,133 ----
   Returns : Bio::SearchIO::blastxml object
   Args    : One additional argument from the format and file/fh parameters.
!            -tempfile    => boolean.  Defaults to false.  Write out XML data
!                            to a temporary filehandle to send to PerlSAX parser.
!                     
  =cut
  
***************
*** 217,227 ****
      my ($usetempfile) = $self->_rearrange([qw(TEMPFILE)], at args);
      defined $usetempfile && $self->use_tempfile($usetempfile);
!     $self->{'_xmlparser'} = XML::SAX::ParserFactory->parser(Handler => $self);
!     my $local_parser = ref($self->{'_xmlparser'});
!     if ($local_parser eq 'XML::SAX::Expat') {
          $self->throw('XML::SAX::Expat not supported as it is no '.
                       'longer maintained.  Please use any other XML::SAX '.
                       'backend (such as XML::SAX::ExpatXS or XML::LibXML)');
!     } elsif ($local_parser eq 'XML::SAX::PurePerl') {
          $self->warn("XML::SAX::PurePerl installed as default XML::SAX parser.\n".
                       "This works but has a small bug which breaks ".
--- 145,168 ----
      my ($usetempfile) = $self->_rearrange([qw(TEMPFILE)], at args);
      defined $usetempfile && $self->use_tempfile($usetempfile);
!     
!     # uncomment only for testing XML::SAX backend parsers
!     #$XML::SAX::ParserPackage = 'XML::SAX::PurePerl';
!     
!     # BlastHandler does the heavy lifting 
!     my $xmlhandler = Bio::SearchIO::XML::BlastHandler->new(-verbose => $self->verbose);
!     
!     # Pass the SearchIO eventhandler to the XML handler
!     # The XML handler does the heavy work, passes data to object handler
!     $xmlhandler->_eventHandler($self->_eventHandler());
!     
!     # start up the parser factory
!     my $parserfactory = XML::SAX::ParserFactory->parser(
!         Handler => $xmlhandler);
!     
!     if (ref($parserfactory) eq 'XML::SAX::Expat') {
          $self->throw('XML::SAX::Expat not supported as it is no '.
                       'longer maintained.  Please use any other XML::SAX '.
                       'backend (such as XML::SAX::ExpatXS or XML::LibXML)');
!     } elsif (ref($parserfactory) eq 'XML::SAX::PurePerl' && $self->verbose > -1) {
          $self->warn("XML::SAX::PurePerl installed as default XML::SAX parser.\n".
                       "This works but has a small bug which breaks ".
***************
*** 230,234 ****
                       "backend (such as XML::SAX::ExpatXS or XML::LibXML)");
      }
!     $DEBUG = 1 if( ! defined $DEBUG && $self->verbose > 0);
  }
  
--- 171,180 ----
                       "backend (such as XML::SAX::ExpatXS or XML::LibXML)");
      }
!     
!     $self->{'_xmlparser'} = $parserfactory;
!     $self->{'_result_cache'} = [];
!     eval {  require Time::HiRes };	
!     if( $@ ) { $DEBUG = 0; }
!     $DEBUG = 1 if( ! defined $DEBUG && ($self->verbose > 0));
  }
  
***************
*** 245,251 ****
  sub next_result {
      my ($self) = @_;
      local $/ = "\n";
      local $_;
!  
      my $data = '';
      my $firstline = 1;
--- 191,204 ----
  sub next_result {
      my ($self) = @_;
+     
+     my $result;
+     
+     if ($result = shift @{$self->{'_result_cache'} }) {
+         return $result;
+     }
+     
      local $/ = "\n";
      local $_;
! 
      my $data = '';
      my $firstline = 1;
***************
*** 268,272 ****
  	    }
  	    $sawxmlheader = 1;
! 	} 
  	# for the non xml version prefixed in each section
  	if( /DOCTYPE/ ) { #|| /<BlastOutput>/
--- 221,225 ----
  	    }
  	    $sawxmlheader = 1;
! 	}
  	# for the non xml version prefixed in each section
  	if( /DOCTYPE/ ) { #|| /<BlastOutput>/
***************
*** 303,453 ****
  	%parser_args = ('Source' => { 'String' => $data });
      }
-     my $result;
-     my $starttime;
-     #if(  $DEBUG ) {  $starttime = [ Time::HiRes::gettimeofday() ]; }
  
!     eval { 
! 	$result = $self->{'_xmlparser'}->parse(%parser_args);
!         $self->{'_result_count'}++;
      };
      if( $@ ) {
  	$self->warn("error in parsing a report:\n $@");
  	$result = undef;
      }    
!     #if( $DEBUG ) {
! 	#$self->debug( sprintf("parsing took %f seconds\n", Time::HiRes::tv_interval($starttime)));
!     #}
      # parsing magic here - but we call event handlers rather than 
      # instantiating things 
!     return $result;
! }
! 
! =head2 SAX methods
! 
! =cut
! 
! =head2 start_document
! 
!  Title   : start_document
!  Usage   : $parser->start_document;
!  Function: SAX method to indicate starting to parse a new document
!  Returns : none
!  Args    : none
! 
! 
! =cut
! 
! sub start_document{
!     my ($self) = @_;
!     $self->{'_lasttype'} = '';
!     $self->{'_values'} = {};
!     $self->{'_result'}= undef;
! }
! 
! =head2 end_document
! 
!  Title   : end_document
!  Usage   : $parser->end_document;
!  Function: SAX method to indicate finishing parsing a new document
!  Returns : Bio::Search::Result::ResultI object
!  Args    : none
! 
! =cut
! 
! sub end_document{
!    my ($self, at args) = @_;
!    return $self->{'_result'};
! }
! 
! =head2 start_element
! 
!  Title   : start_element
!  Usage   : $parser->start_element($data)
!  Function: SAX method to indicate starting a new element
!  Returns : none
!  Args    : hash ref for data
! 
! =cut
! 
! sub start_element{
!     my ($self,$data) = @_;
!     # we currently don't care about attributes
!     my $nm = $data->{'Name'};    
! 
!     if( my $type = $MODEMAP{$nm} ) {
! 	if( $self->_eventHandler->will_handle($type) ) {
! 	    my $func = sprintf("start_%s",lc $type);
! 	    $self->_eventHandler->$func($data->{'Attributes'});
! 	}						     
!     }
! 
!     if($nm eq 'BlastOutput') {
! 	$self->{'_values'} = {};
! 	$self->{'_result'}= undef;
!     }
! }
! 
! =head2 end_element
! 
!  Title   : end_element
!  Usage   : $parser->end_element($data)
!  Function: Signals finishing an element
!  Returns : Bio::Search object dpending on what type of element
!  Args    : hash ref for data
! 
! =cut
! 
! sub end_element{
!     my ($self,$data) = @_;
! 
!     my $nm = $data->{'Name'};
!     my $rc;
!     if($nm eq 'BlastOutput_program' &&
!        $self->{'_last_data'} =~ /(t?blast[npx])/i ) {
! 	$self->{'_type'} = uc $1; 
!     }
! 
!     if( my $type = $MODEMAP{$nm} ) {
! 	if( $self->_eventHandler->will_handle($type) ) {
! 	    my $func = sprintf("end_%s",lc $type);
! 	    $rc = $self->_eventHandler->$func($self->{'_type'},
! 					      $self->{'_values'});
! 	}
!     } elsif( $MAPPING{$nm} ) { 
! 	if ( ref($MAPPING{$nm}) =~ /hash/i ) {
! 	    my $key = (keys %{$MAPPING{$nm}})[0];
! 	    $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'};
! 	} else {
! 	    $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'};
! 	}
!     } elsif( $nm eq 'Iteration' || $nm eq 'Hit_hsps' || $nm eq 'Parameters' ||
! 	     $nm eq 'BlastOutput_param' || $nm eq 'Iteration_hits' || 
! 	     $nm eq 'Statistics' || $nm eq 'BlastOutput_iterations' ){
!         # ignores these elements for now; no iteration parsing
!     } else { 	
! 	
! 	$self->debug("ignoring unrecognized element type $nm\n");
!     }
!     $self->{'_last_data'} = ''; # remove read data if we are at 
! 				# end of an element
!     $self->{'_result'} = $rc if( $nm eq 'BlastOutput' );
!     return $rc;
! }
! 
! =head2 characters
! 
!  Title   : characters
!  Usage   : $parser->characters($data)
!  Function: Signals new characters to be processed
!  Returns : characters read
!  Args    : hash ref with the key 'Data'
! 
! 
! =cut
! 
! sub characters{
!    my ($self,$data) = @_;   
!    return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/ );
!    $self->{'_last_data'} = &decode_entities($data->{'Data'}); 
  }
  
--- 256,280 ----
  	%parser_args = ('Source' => { 'String' => $data });
      }
  
!     my $starttime;
!     if(  $DEBUG ) {  $starttime = [ Time::HiRes::gettimeofday() ]; }
!     
!     eval {
! 	$self->{'_result_cache'} = $self->{'_xmlparser'}->parse(%parser_args);
!         $self->{'_result_count'} += scalar(@{ $self->{'_result_cache'} });
!         # remove result refs from handler
!         $self->{'_xmlparser'}->get_handler->reset_results;
      };
+     
      if( $@ ) {
  	$self->warn("error in parsing a report:\n $@");
  	$result = undef;
      }    
!     if( $DEBUG ) {
! 	$self->debug( sprintf("parsing took %f seconds\n", Time::HiRes::tv_interval($starttime)));
!     }
      # parsing magic here - but we call event handlers rather than 
      # instantiating things 
!     return shift @{ $self->{'_result_cache'} };
  }
  
***************
*** 477,479 ****
--- 304,317 ----
  }
  
+ sub no_preparse {
+     my $self = shift;
+     return $self->{'_result_count'} = shift if @_;
+     return $self->{'_result_count'};
+ }
+ 
+ sub saxparser {
+     my $self = shift;
+     return ref($self->{'_xmlparser'});
+ }
+ 
  1;



More information about the Bioperl-guts-l mailing list