[Bioperl-guts-l] bioperl-live/Bio/SearchIO/XML BlastHandler.pm, 1.1, 1.2

Christopher John Fields cjfields at dev.open-bio.org
Sat Dec 30 15:36:47 EST 2006


Update of /home/repository/bioperl/bioperl-live/Bio/SearchIO/XML
In directory dev.open-bio.org:/tmp/cvs-serv7329/XML

Modified Files:
	BlastHandler.pm 
Log Message:
tentative fix for bug 2166

Index: BlastHandler.pm
===================================================================
RCS file: /home/repository/bioperl/bioperl-live/Bio/SearchIO/XML/BlastHandler.pm,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** BlastHandler.pm	28 Dec 2006 22:34:07 -0000	1.1
--- BlastHandler.pm	30 Dec 2006 20:36:45 -0000	1.2
***************
*** 72,153 ****
  use base qw(Bio::Root::Root XML::SAX::Base);
  
! our %MODEMAP = ('BlastOutput' => 'result',
! 		'Hit'         => 'hit',
! 		'Hsp'         => 'hsp'
! 		);
  
  our %MAPPING = ( 
! 		 # HSP specific fields
! 		 'Hsp_bit-score'  => 'HSP-bits',
! 		 'Hsp_score'      => 'HSP-score',
! 		 'Hsp_evalue'     => 'HSP-evalue',
! 		 'Hsp_query-from' => 'HSP-query_start',
! 		 'Hsp_query-to'   => 'HSP-query_end',
! 		 'Hsp_hit-from'   => 'HSP-hit_start',
! 		 'Hsp_hit-to'     => 'HSP-hit_end',
! 		 'Hsp_positive'   => 'HSP-conserved',
! 		 'Hsp_identity'   => 'HSP-identical',
! 		 'Hsp_gaps'       => 'HSP-gaps',
! 		 'Hsp_hitgaps'    => 'HSP-hit_gaps',
! 		 'Hsp_querygaps'  => 'HSP-query_gaps',
! 		 'Hsp_qseq'       => 'HSP-query_seq',
! 		 'Hsp_hseq'       => 'HSP-hit_seq',
! 		 'Hsp_midline'    => 'HSP-homology_seq',
! 		 'Hsp_align-len'  => 'HSP-hsp_length',
! 		 'Hsp_query-frame'=> 'HSP-query_frame',
! 		 'Hsp_hit-frame'  => 'HSP-hit_frame',
  
! 		 # these are ignored for now
! 		 'Hsp_num'          => 'HSP-order',
! 		 'Hsp_pattern-from' => 'patternend',
! 		 'Hsp_pattern-to'   => 'patternstart',
! 		 'Hsp_density'      => 'hspdensity',
  
! 		 # Hit specific fields
! 		 'Hit_id'               => 'HIT-name',
! 		 'Hit_len'              => 'HIT-length',
! 		 'Hit_accession'        => 'HIT-accession',
! 		 'Hit_def'              => 'HIT-description',
! 		 'Hit_num'              => 'HIT-order',
! 		 'Iteration_iter-num'   => 'HIT-iteration',
! 		 'Iteration_stat'       => 'HIT-iteration_statistic',
! 		 
! 		 'BlastOutput_program'   => 'RESULT-algorithm_name',
! 		 'BlastOutput_version'   => 'RESULT-algorithm_version',
! 		 'BlastOutput_query-def' => 'RESULT-query_description',
! 		 'BlastOutput_query-len' => 'RESULT-query_length',
! 		 'BlastOutput_db'        => 'RESULT-database_name',
! 		 'BlastOutput_reference' => 'RESULT-program_reference',
! 		 'BlastOutput_query-ID'  => 'runid',
! 		 
! 		 'Parameters_matrix'    => { 'RESULT-parameters' => 'matrix'},
! 		 'Parameters_expect'    => { 'RESULT-parameters' => 'expect'},
! 		 'Parameters_include'   => { 'RESULT-parameters' => 'include'},
! 		 'Parameters_sc-match'  => { 'RESULT-parameters' => 'match'},
! 		 'Parameters_sc-mismatch' => { 'RESULT-parameters' => 'mismatch'},
! 		 'Parameters_gap-open'  => { 'RESULT-parameters' => 'gapopen'},
! 		 'Parameters_gap-extend'=> { 'RESULT-parameters' => 'gapext'},
! 		 'Parameters_filter'    => {'RESULT-parameters' => 'filter'},
! 		 'Statistics_db-num'    => 'RESULT-database_entries',
! 		 'Statistics_db-len'    => 'RESULT-database_letters',
! 		 'Statistics_hsp-len'   => { 'RESULT-statistics' => 'hsplength'},
! 		 'Statistics_eff-space' => { 'RESULT-statistics' => 'effectivespace'},
! 		 'Statistics_kappa'     => { 'RESULT-statistics' => 'kappa' },
! 		 'Statistics_lambda'    => { 'RESULT-statistics' => 'lambda' },
! 		 'Statistics_entropy'   => { 'RESULT-statistics' => 'entropy'},
! 		 );
  
! our %IGNOREDTAGS = map {$_ => 1}
!         qw(Iteration
!            Hit_hsps
!            Parameters
!            BlastOutput_param
!            Iteration_hits
!            Statistics
!            BlastOutput_iterations
!            Iteration_query-ID
!            Iteration_query-def
!            Iteration_query-len
!            );
  
  =head2 SAX methods
--- 72,171 ----
  use base qw(Bio::Root::Root XML::SAX::Base);
  
! our %MODEMAP = (
!                 'Iteration'   => 'result',
!                 'Hit'         => 'hit',
!                 'Hsp'         => 'hsp'
! );
! 
! # major post 2.2.12 BLAST XML changes
! # 1) moved XML Handler to it's own class
! # 2) reconfigure blastxml to deal with old and new BLAST XML output
! 
! # Some tagged data prior to Iteration must be retained thoughout the entire parse,
! # then added back to $self->{'_values'} prior to end_result (from 'Iteration').
! # Ugly yes, but it works for now, until ExpatXS implements a parse_chunk() method
! 
! our %HEADER = (  # Result-specific fields
! );
  
  our %MAPPING = ( 
!                 # HSP specific fields
  
!                 'Hsp_bit-score'  => 'HSP-bits',
!                 'Hsp_score'      => 'HSP-score',
!                 'Hsp_evalue'     => 'HSP-evalue',
!                 'Hsp_query-from' => 'HSP-query_start',
!                 'Hsp_query-to'   => 'HSP-query_end',
!                 'Hsp_hit-from'   => 'HSP-hit_start',
!                 'Hsp_hit-to'     => 'HSP-hit_end',
!                 'Hsp_positive'   => 'HSP-conserved',
!                 'Hsp_identity'   => 'HSP-identical',
!                 'Hsp_gaps'       => 'HSP-gaps',
!                 'Hsp_hitgaps'    => 'HSP-hit_gaps',
!                 'Hsp_querygaps'  => 'HSP-query_gaps',
!                 'Hsp_qseq'       => 'HSP-query_seq',
!                 'Hsp_hseq'       => 'HSP-hit_seq',
!                 'Hsp_midline'    => 'HSP-homology_seq',
!                 'Hsp_align-len'  => 'HSP-hsp_length',
!                 'Hsp_query-frame'=> 'HSP-query_frame',
!                 'Hsp_hit-frame'  => 'HSP-hit_frame',
  
!                 # Hit specific fields
!                 'Hit_id'               => 'HIT-name',
!                 'Hit_len'              => 'HIT-length',
!                 'Hit_accession'        => 'HIT-accession',
!                 'Hit_def'              => 'HIT-description',
!                 'Hit_num'              => 'HIT-order',
!                 'Iteration_iter-num'   => 'HIT-iteration',
!                 'Iteration_stat'       => 'HIT-iteration_statistic',
!                 
!                 # Result-specific fields
!                 'Statistics_db-num'    => 'RESULT-database_entries',
!                 'Statistics_db-len'    => 'RESULT-database_letters',
!                 'Statistics_hsp-len'   => { 'RESULT-statistics' => 'hsplength'},
!                 'Statistics_eff-space' => { 'RESULT-statistics' => 'effectivespace'},
!                 'Statistics_kappa'     => { 'RESULT-statistics' => 'kappa' },
!                 'Statistics_lambda'    => { 'RESULT-statistics' => 'lambda' },
!                 'Statistics_entropy'   => { 'RESULT-statistics' => 'entropy'},
!                 
!                 'BlastOutput_query-def' => 'RESULT-query_description',
!                 'BlastOutput_query-len' => 'RESULT-query_length',
!                 'BlastOutput_query-ID'  => 'runid',
  
!                 'BlastOutput_program'   => 'RESULT-algorithm_name',
!                 'BlastOutput_version'   => 'RESULT-algorithm_version',
!                 'BlastOutput_db'        => 'RESULT-database_name',
!                 'BlastOutput_reference' => 'RESULT-program_reference',
!                 'Parameters_matrix'    => { 'RESULT-parameters' => 'matrix'},
!                 'Parameters_expect'    => { 'RESULT-parameters' => 'expect'},
!                 'Parameters_include'   => { 'RESULT-parameters' => 'include'},
!                 'Parameters_sc-match'  => { 'RESULT-parameters' => 'match'},
!                 'Parameters_sc-mismatch' => { 'RESULT-parameters' => 'mismatch'},
!                 'Parameters_gap-open'  => { 'RESULT-parameters' => 'gapopen'},
!                 'Parameters_gap-extend'=> { 'RESULT-parameters' => 'gapext'},
!                 'Parameters_filter'    => {'RESULT-parameters' => 'filter'},
!                 
!                 # if these tags are present, they will overwrite the
!                 # above with more current data (i.e. multiquery hits)
!                 'Iteration_query-def'   => 'RESULT-query_description',
!                 'Iteration_query-len'   => 'RESULT-query_length',       
!                 'Iteration_query-ID'    => 'runid',
!                );
! 
! # these XML tags are ignored for now
! our %IGNOREDTAGS = (
!                 'Hsp_num'              => 1,#'HSP-order',
!                 'Hsp_pattern-from'     => 1,#'patternend',
!                 'Hsp_pattern-to'       => 1,#'patternstart',
!                 'Hsp_density'          => 1,#'hspdensity',
!                 'Iteration_message'    => 1,
!                 'Hit_hsps'             => 1,
!                 'BlastOutput_param'    => 1,
!                 'Iteration_hits'       => 1,
!                 'Statistics'           => 1,
!                 'Parameters'           => 1,
!                 'BlastOutput'          => 1,
!                 'BlastOutput_iterations' => 1,     
!                    );
  
  =head2 SAX methods
***************
*** 155,158 ****
--- 173,186 ----
  =cut
  
+ =head2 parse
+ 
+  Title   : parse
+  Usage   : $parser->parse(%params);
+  Function: SAX method to indicate starting to parse a new document
+  Returns : Bio::Result::ResultI
+  Args    : parameters.  For full list see XML::SAX::Base::parse()
+ 
+ =cut
+ 
  =head2 start_document
  
***************
*** 162,166 ****
   Returns : none
   Args    : none
!  
  =cut
  
--- 190,194 ----
   Returns : none
   Args    : none
! 
  =cut
  
***************
*** 184,187 ****
--- 212,220 ----
  sub end_document{
     my ($self, at args) = @_;
+    
+    # reset data carried throughout parse
+    $self->{'_header'} = undef;
+    
+    # pass back ref to results queue; caller must reset handler results queue
     return $self->{'_result'};
  }
***************
*** 200,214 ****
      my ($self,$data) = @_;
      # we currently don't care about attributes
!     my $nm = $data->{'Name'};    
  
      if( my $type = $MODEMAP{$nm} ) {
! 	if( $self->_eventHandler->will_handle($type) ) {
! 	    my $func = sprintf("start_%s",lc $type);
! 	    $self->_eventHandler->$func($data->{'Attributes'});
! 	}						     
!     }
! 
!     if($nm eq 'BlastOutput') {
!         $self->{'_values'} = {};
      }
  }
--- 233,243 ----
      my ($self,$data) = @_;
      # we currently don't care about attributes
!     my $nm = $data->{'Name'};
  
      if( my $type = $MODEMAP{$nm} ) {
!         if( $self->_eventHandler->will_handle($type) ) {
!             my $func = sprintf("start_%s",lc $type);
!             $self->_eventHandler->$func($data->{'Attributes'});
!         }                                                    
      }
  }
***************
*** 231,258 ****
      if($nm eq 'BlastOutput_program' &&
         $self->{'_last_data'} =~ /(t?blast[npx])/i ) {
! 	$self->{'_type'} = uc $1; 
      }
- 
      if( my $type = $MODEMAP{$nm} ) {
! 	if( $self->_eventHandler->will_handle($type) ) {
! 	    my $func = sprintf("end_%s",lc $type);
! 	    $rc = $self->_eventHandler->$func($self->{'_type'},
! 					      $self->{'_values'});
! 	}
!     } elsif( $MAPPING{$nm} ) { 
! 	if ( ref($MAPPING{$nm}) =~ /hash/i ) {
! 	    my $key = (keys %{$MAPPING{$nm}})[0];
! 	    $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'};
! 	} else {
! 	    $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'};
! 	}
!     } elsif(exists $IGNOREDTAGS{$nm}){
!         # ignores these elements for now; no iteration parsing
!     } else { 	
          $self->debug("ignoring unrecognized element type $nm\n");
      }
      $self->{'_last_data'} = ''; # remove read data if we are at 
! 				# end of an element
!     push @{ $self->{'_result'} }, $rc if( $nm eq 'BlastOutput' );
  }
  
--- 260,303 ----
      if($nm eq 'BlastOutput_program' &&
         $self->{'_last_data'} =~ /(t?blast[npx])/i ) {
!         $self->{'_type'} = uc $1; 
      }
      if( my $type = $MODEMAP{$nm} ) {
!         if( $self->_eventHandler->will_handle($type) ) {
!             my $func = sprintf("end_%s",lc $type);
!             $rc = $self->_eventHandler->$func($self->{'_type'},
!                                               $self->{'_values'});
!         }
!     }
!     elsif( exists $MAPPING{$nm} ) { 
!         if ( ref($MAPPING{$nm}) =~ /hash/i ) {
!             my $key = (keys %{$MAPPING{$nm}})[0];
!             $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'};
!         } else {
!             $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'};
!         }
!     }
!     elsif( exists $HEADER{$nm} ){
!         if ( ref($HEADER{$nm}) =~ /hash/i ) {
!             my $key = (keys %{$HEADER{$nm}})[0];
!             $self->{'_header'}->{$key}->{$HEADER{$nm}->{$key}} = $self->{'_last_data'};
!         } else {
!             $self->{'_header'}->{$HEADER{$nm}} = $self->{'_last_data'};
!         }
!     }
!     elsif( exists $IGNOREDTAGS{$nm} ){
!         # ignores these elements for now
!     }
!     else {      
          $self->debug("ignoring unrecognized element type $nm\n");
      }
      $self->{'_last_data'} = ''; # remove read data if we are at 
!                                 # end of an element
!                                 
!     # add to ResultI array
!     push @{ $self->{'_result'} }, $rc if( $nm eq 'Iteration' );
!     # reset values for each Result round
!     if ($nm eq 'Iteration') {
!         $self->{'_values'} = {};
!     }
  }
  
***************
*** 285,288 ****
  }
  
- 
  1;
\ No newline at end of file
--- 330,332 ----



More information about the Bioperl-guts-l mailing list