[Bioperl-guts-l] bioperl-live/Bio/SearchIO blastxml.pm,1.42,1.43
Christopher John Fields
cjfields at dev.open-bio.org
Thu Dec 28 17:34:09 EST 2006
Update of /home/repository/bioperl/bioperl-live/Bio/SearchIO
In directory dev.open-bio.org:/tmp/cvs-serv15636
Modified Files:
blastxml.pm
Log Message:
Moving BLAST XML SAX methods to separate class
Index: blastxml.pm
===================================================================
RCS file: /home/repository/bioperl/bioperl-live/Bio/SearchIO/blastxml.pm,v
retrieving revision 1.42
retrieving revision 1.43
diff -C2 -d -r1.42 -r1.43
*** blastxml.pm 5 Dec 2006 22:31:22 -0000 1.42
--- blastxml.pm 28 Dec 2006 22:34:07 -0000 1.43
***************
*** 103,192 ****
package Bio::SearchIO::blastxml;
- use vars qw($DTD %MAPPING %MODEMAP $DEBUG);
use strict;
-
- $DTD = 'ftp://ftp.ncbi.nlm.nih.gov/blast/documents/NCBI_BlastOutput.dtd';
# Object preamble - inherits from Bio::Root::Root
use Bio::Root::Root;
use XML::SAX;
- use HTML::Entities;
use IO::File;
! BEGIN {
! # uncomment only for testing; trying to get XML::SAX::Expat to play nice...
! #$XML::SAX::ParserPackage = 'XML::SAX::PurePerl';
! # mapping of NCBI Blast terms to Bioperl hash keys
! %MODEMAP = ('BlastOutput' => 'result',
! 'Hit' => 'hit',
! 'Hsp' => 'hsp'
! );
!
! %MAPPING = (
! # HSP specific fields
! 'Hsp_bit-score' => 'HSP-bits',
! 'Hsp_score' => 'HSP-score',
! 'Hsp_evalue' => 'HSP-evalue',
! 'Hsp_query-from' => 'HSP-query_start',
! 'Hsp_query-to' => 'HSP-query_end',
! 'Hsp_hit-from' => 'HSP-hit_start',
! 'Hsp_hit-to' => 'HSP-hit_end',
! 'Hsp_positive' => 'HSP-conserved',
! 'Hsp_identity' => 'HSP-identical',
! 'Hsp_gaps' => 'HSP-gaps',
! 'Hsp_hitgaps' => 'HSP-hit_gaps',
! 'Hsp_querygaps' => 'HSP-query_gaps',
! 'Hsp_qseq' => 'HSP-query_seq',
! 'Hsp_hseq' => 'HSP-hit_seq',
! 'Hsp_midline' => 'HSP-homology_seq',
! 'Hsp_align-len' => 'HSP-hsp_length',
! 'Hsp_query-frame'=> 'HSP-query_frame',
! 'Hsp_hit-frame' => 'HSP-hit_frame',
!
! # these are ignored for now
! 'Hsp_num' => 'HSP-order',
! 'Hsp_pattern-from' => 'patternend',
! 'Hsp_pattern-to' => 'patternstart',
! 'Hsp_density' => 'hspdensity',
!
! # Hit specific fields
! 'Hit_id' => 'HIT-name',
! 'Hit_len' => 'HIT-length',
! 'Hit_accession' => 'HIT-accession',
! 'Hit_def' => 'HIT-description',
! 'Hit_num' => 'HIT-order',
! 'Iteration_iter-num' => 'HIT-iteration',
! 'Iteration_stat' => 'HIT-iteration_statistic',
!
! 'BlastOutput_program' => 'RESULT-algorithm_name',
! 'BlastOutput_version' => 'RESULT-algorithm_version',
! 'BlastOutput_query-def' => 'RESULT-query_description',
! 'BlastOutput_query-len' => 'RESULT-query_length',
! 'BlastOutput_db' => 'RESULT-database_name',
! 'BlastOutput_reference' => 'RESULT-program_reference',
! 'BlastOutput_query-ID' => 'runid',
!
! 'Parameters_matrix' => { 'RESULT-parameters' => 'matrix'},
! 'Parameters_expect' => { 'RESULT-parameters' => 'expect'},
! 'Parameters_include' => { 'RESULT-parameters' => 'include'},
! 'Parameters_sc-match' => { 'RESULT-parameters' => 'match'},
! 'Parameters_sc-mismatch' => { 'RESULT-parameters' => 'mismatch'},
! 'Parameters_gap-open' => { 'RESULT-parameters' => 'gapopen'},
! 'Parameters_gap-extend'=> { 'RESULT-parameters' => 'gapext'},
! 'Parameters_filter' => {'RESULT-parameters' => 'filter'},
! 'Statistics_db-num' => 'RESULT-database_entries',
! 'Statistics_db-len' => 'RESULT-database_letters',
! 'Statistics_hsp-len' => { 'RESULT-statistics' => 'hsplength'},
! 'Statistics_eff-space' => { 'RESULT-statistics' => 'effectivespace'},
! 'Statistics_kappa' => { 'RESULT-statistics' => 'kappa' },
! 'Statistics_lambda' => { 'RESULT-statistics' => 'lambda' },
! 'Statistics_entropy' => { 'RESULT-statistics' => 'entropy'},
! );
! eval { require Time::HiRes };
! if( $@ ) { $DEBUG = 0; }
! }
! use base qw(Bio::SearchIO);
=head2 new
--- 103,120 ----
package Bio::SearchIO::blastxml;
use strict;
# Object preamble - inherits from Bio::Root::Root
+ use base qw(Bio::SearchIO);
use Bio::Root::Root;
use XML::SAX;
use IO::File;
+ use Bio::SearchIO::XML::BlastHandler;
! our $DTD = 'ftp://ftp.ncbi.nlm.nih.gov/blast/documents/NCBI_BlastOutput.dtd';
+ our $DEBUG;
! # mapping of NCBI Blast terms to Bioperl hash keys
=head2 new
***************
*** 199,205 ****
Returns : Bio::SearchIO::blastxml object
Args : One additional argument from the format and file/fh parameters.
! -tempfile => boolean. Defaults to false. Write out XML data
! to a temporary filehandle to send to
! PerlSAX parser.
=cut
--- 127,133 ----
Returns : Bio::SearchIO::blastxml object
Args : One additional argument from the format and file/fh parameters.
! -tempfile => boolean. Defaults to false. Write out XML data
! to a temporary filehandle to send to PerlSAX parser.
!
=cut
***************
*** 217,227 ****
my ($usetempfile) = $self->_rearrange([qw(TEMPFILE)], at args);
defined $usetempfile && $self->use_tempfile($usetempfile);
! $self->{'_xmlparser'} = XML::SAX::ParserFactory->parser(Handler => $self);
! my $local_parser = ref($self->{'_xmlparser'});
! if ($local_parser eq 'XML::SAX::Expat') {
$self->throw('XML::SAX::Expat not supported as it is no '.
'longer maintained. Please use any other XML::SAX '.
'backend (such as XML::SAX::ExpatXS or XML::LibXML)');
! } elsif ($local_parser eq 'XML::SAX::PurePerl') {
$self->warn("XML::SAX::PurePerl installed as default XML::SAX parser.\n".
"This works but has a small bug which breaks ".
--- 145,168 ----
my ($usetempfile) = $self->_rearrange([qw(TEMPFILE)], at args);
defined $usetempfile && $self->use_tempfile($usetempfile);
!
! # uncomment only for testing XML::SAX backend parsers
! #$XML::SAX::ParserPackage = 'XML::SAX::PurePerl';
!
! # BlastHandler does the heavy lifting
! my $xmlhandler = Bio::SearchIO::XML::BlastHandler->new(-verbose => $self->verbose);
!
! # Pass the SearchIO eventhandler to the XML handler
! # The XML handler does the heavy work, passes data to object handler
! $xmlhandler->_eventHandler($self->_eventHandler());
!
! # start up the parser factory
! my $parserfactory = XML::SAX::ParserFactory->parser(
! Handler => $xmlhandler);
!
! if (ref($parserfactory) eq 'XML::SAX::Expat') {
$self->throw('XML::SAX::Expat not supported as it is no '.
'longer maintained. Please use any other XML::SAX '.
'backend (such as XML::SAX::ExpatXS or XML::LibXML)');
! } elsif (ref($parserfactory) eq 'XML::SAX::PurePerl' && $self->verbose > -1) {
$self->warn("XML::SAX::PurePerl installed as default XML::SAX parser.\n".
"This works but has a small bug which breaks ".
***************
*** 230,234 ****
"backend (such as XML::SAX::ExpatXS or XML::LibXML)");
}
! $DEBUG = 1 if( ! defined $DEBUG && $self->verbose > 0);
}
--- 171,180 ----
"backend (such as XML::SAX::ExpatXS or XML::LibXML)");
}
!
! $self->{'_xmlparser'} = $parserfactory;
! $self->{'_result_cache'} = [];
! eval { require Time::HiRes };
! if( $@ ) { $DEBUG = 0; }
! $DEBUG = 1 if( ! defined $DEBUG && ($self->verbose > 0));
}
***************
*** 245,251 ****
sub next_result {
my ($self) = @_;
local $/ = "\n";
local $_;
!
my $data = '';
my $firstline = 1;
--- 191,204 ----
sub next_result {
my ($self) = @_;
+
+ my $result;
+
+ if ($result = shift @{$self->{'_result_cache'} }) {
+ return $result;
+ }
+
local $/ = "\n";
local $_;
!
my $data = '';
my $firstline = 1;
***************
*** 268,272 ****
}
$sawxmlheader = 1;
! }
# for the non xml version prefixed in each section
if( /DOCTYPE/ ) { #|| /<BlastOutput>/
--- 221,225 ----
}
$sawxmlheader = 1;
! }
# for the non xml version prefixed in each section
if( /DOCTYPE/ ) { #|| /<BlastOutput>/
***************
*** 303,453 ****
%parser_args = ('Source' => { 'String' => $data });
}
- my $result;
- my $starttime;
- #if( $DEBUG ) { $starttime = [ Time::HiRes::gettimeofday() ]; }
! eval {
! $result = $self->{'_xmlparser'}->parse(%parser_args);
! $self->{'_result_count'}++;
};
if( $@ ) {
$self->warn("error in parsing a report:\n $@");
$result = undef;
}
! #if( $DEBUG ) {
! #$self->debug( sprintf("parsing took %f seconds\n", Time::HiRes::tv_interval($starttime)));
! #}
# parsing magic here - but we call event handlers rather than
# instantiating things
! return $result;
! }
!
! =head2 SAX methods
!
! =cut
!
! =head2 start_document
!
! Title : start_document
! Usage : $parser->start_document;
! Function: SAX method to indicate starting to parse a new document
! Returns : none
! Args : none
!
!
! =cut
!
! sub start_document{
! my ($self) = @_;
! $self->{'_lasttype'} = '';
! $self->{'_values'} = {};
! $self->{'_result'}= undef;
! }
!
! =head2 end_document
!
! Title : end_document
! Usage : $parser->end_document;
! Function: SAX method to indicate finishing parsing a new document
! Returns : Bio::Search::Result::ResultI object
! Args : none
!
! =cut
!
! sub end_document{
! my ($self, at args) = @_;
! return $self->{'_result'};
! }
!
! =head2 start_element
!
! Title : start_element
! Usage : $parser->start_element($data)
! Function: SAX method to indicate starting a new element
! Returns : none
! Args : hash ref for data
!
! =cut
!
! sub start_element{
! my ($self,$data) = @_;
! # we currently don't care about attributes
! my $nm = $data->{'Name'};
!
! if( my $type = $MODEMAP{$nm} ) {
! if( $self->_eventHandler->will_handle($type) ) {
! my $func = sprintf("start_%s",lc $type);
! $self->_eventHandler->$func($data->{'Attributes'});
! }
! }
!
! if($nm eq 'BlastOutput') {
! $self->{'_values'} = {};
! $self->{'_result'}= undef;
! }
! }
!
! =head2 end_element
!
! Title : end_element
! Usage : $parser->end_element($data)
! Function: Signals finishing an element
! Returns : Bio::Search object dpending on what type of element
! Args : hash ref for data
!
! =cut
!
! sub end_element{
! my ($self,$data) = @_;
!
! my $nm = $data->{'Name'};
! my $rc;
! if($nm eq 'BlastOutput_program' &&
! $self->{'_last_data'} =~ /(t?blast[npx])/i ) {
! $self->{'_type'} = uc $1;
! }
!
! if( my $type = $MODEMAP{$nm} ) {
! if( $self->_eventHandler->will_handle($type) ) {
! my $func = sprintf("end_%s",lc $type);
! $rc = $self->_eventHandler->$func($self->{'_type'},
! $self->{'_values'});
! }
! } elsif( $MAPPING{$nm} ) {
! if ( ref($MAPPING{$nm}) =~ /hash/i ) {
! my $key = (keys %{$MAPPING{$nm}})[0];
! $self->{'_values'}->{$key}->{$MAPPING{$nm}->{$key}} = $self->{'_last_data'};
! } else {
! $self->{'_values'}->{$MAPPING{$nm}} = $self->{'_last_data'};
! }
! } elsif( $nm eq 'Iteration' || $nm eq 'Hit_hsps' || $nm eq 'Parameters' ||
! $nm eq 'BlastOutput_param' || $nm eq 'Iteration_hits' ||
! $nm eq 'Statistics' || $nm eq 'BlastOutput_iterations' ){
! # ignores these elements for now; no iteration parsing
! } else {
!
! $self->debug("ignoring unrecognized element type $nm\n");
! }
! $self->{'_last_data'} = ''; # remove read data if we are at
! # end of an element
! $self->{'_result'} = $rc if( $nm eq 'BlastOutput' );
! return $rc;
! }
!
! =head2 characters
!
! Title : characters
! Usage : $parser->characters($data)
! Function: Signals new characters to be processed
! Returns : characters read
! Args : hash ref with the key 'Data'
!
!
! =cut
!
! sub characters{
! my ($self,$data) = @_;
! return unless ( defined $data->{'Data'} && $data->{'Data'} !~ /^\s+$/ );
! $self->{'_last_data'} = &decode_entities($data->{'Data'});
}
--- 256,280 ----
%parser_args = ('Source' => { 'String' => $data });
}
! my $starttime;
! if( $DEBUG ) { $starttime = [ Time::HiRes::gettimeofday() ]; }
!
! eval {
! $self->{'_result_cache'} = $self->{'_xmlparser'}->parse(%parser_args);
! $self->{'_result_count'} += scalar(@{ $self->{'_result_cache'} });
! # remove result refs from handler
! $self->{'_xmlparser'}->get_handler->reset_results;
};
+
if( $@ ) {
$self->warn("error in parsing a report:\n $@");
$result = undef;
}
! if( $DEBUG ) {
! $self->debug( sprintf("parsing took %f seconds\n", Time::HiRes::tv_interval($starttime)));
! }
# parsing magic here - but we call event handlers rather than
# instantiating things
! return shift @{ $self->{'_result_cache'} };
}
***************
*** 477,479 ****
--- 304,317 ----
}
+ sub no_preparse {
+ my $self = shift;
+ return $self->{'_result_count'} = shift if @_;
+ return $self->{'_result_count'};
+ }
+
+ sub saxparser {
+ my $self = shift;
+ return ref($self->{'_xmlparser'});
+ }
+
1;
More information about the Bioperl-guts-l
mailing list