[Bioperl-guts-l] bioperl-live/Bio/SearchIO blast.pm,1.111,1.112

Christopher John Fields cjfields at dev.open-bio.org
Wed Jan 17 20:58:21 EST 2007


Update of /home/repository/bioperl/bioperl-live/Bio/SearchIO
In directory dev.open-bio.org:/tmp/cvs-serv4699

Modified Files:
	blast.pm 
Log Message:
Bug 2189; did some pre-spring cleaning

Index: blast.pm
===================================================================
RCS file: /home/repository/bioperl/bioperl-live/Bio/SearchIO/blast.pm,v
retrieving revision 1.111
retrieving revision 1.112
diff -C2 -d -r1.111 -r1.112
*** blast.pm	17 Jan 2007 14:11:48 -0000	1.111
--- blast.pm	18 Jan 2007 01:58:19 -0000	1.112
***************
*** 437,440 ****
--- 437,441 ----
                               # lambda, K, H
      local $_ = "\n";   #consistency
+     PARSER:
      while ( defined( $_ = $self->_readline ) ) {
          next if (/^\s+$/);       # skip empty lines
***************
*** 453,464 ****
                  # This handles multi-result input streams
                  $self->_pushback($_);
!                 $self->in_element('hsp')
!                   && $self->end_element( { 'Name' => 'Hsp' } );
!                 $self->in_element('hit')
!                   && $self->end_element( { 'Name' => 'Hit' } );
!                 $self->within_element('iteration')
!                   && $self->end_element( { 'Name' => 'Iteration' } );
!                 $self->end_element( { 'Name' => 'BlastOutput' } );
!                 return $self->end_document();
              }
              $self->_start_blastoutput;
--- 454,458 ----
                  # This handles multi-result input streams
                  $self->_pushback($_);
!                 last PARSER;
              }
              $self->_start_blastoutput;
***************
*** 514,533 ****
                  $self->_pushback($reportline) if $reportline;
                  $self->_pushback($_);
!                 $self->in_element('hsp')
!                   && $self->end_element( { 'Name' => 'Hsp' } );
!                 $self->in_element('hit')
!                   && $self->end_element( { 'Name' => 'Hit' } );
!                 $self->within_element('iteration')
!                   && $self->end_element( { 'Name' => 'Iteration' } );
!                 if ($bl2seq_fix) {
!                     $self->element(
!                         {
!                             'Name' => 'BlastOutput_program',
!                             'Data' => $reporttype
!                         }
!                     );
!                 }
!                 $self->end_element( { 'Name' => 'BlastOutput' } );
!                 return $self->end_document();
              }
              else {
--- 508,512 ----
                  $self->_pushback($reportline) if $reportline;
                  $self->_pushback($_);
!                 last PARSER;
              }
              else {
***************
*** 616,630 ****
            descline:
              while ( defined( $_ = $self->_readline() ) ) {
!                 if (/^>/ 
!                     || /^\s+Database:\s+?/
!                     || /^Parameters:/
!                     || /^\s+Subset/
!                     || /^\s*Lambda/
!                     || /^\s*Histogram/
!                     ) {
!                     $self->_pushback($_); # Catch leading > (end of section)
!                     last descline;
!                 }
!                 elsif (/(?<!cor)([\d\.\+\-eE]+)\s+([\d\.\+\-eE]+)(\s+\d+)?\s*$/) {
  
                      # the last match is for gapped BLAST output
--- 595,600 ----
            descline:
              while ( defined( $_ = $self->_readline() ) ) {
!                 
!                 if (/(?<!cor)([\d\.\+\-eE]+)\s+([\d\.\+\-eE]+)(\s+\d+)?\s*$/) {
  
                      # the last match is for gapped BLAST output
***************
*** 658,664 ****
                          }
                      );
                  }
-                 @hit_signifs = sort {$a->[0] <=> $b->[0]} @hit_signifs;
              }
          }
          elsif (/Sequences producing High-scoring Segment Pairs:/) {
--- 628,644 ----
                          }
                      );
+                 } elsif (/^>/ 
+                     || /^\s+Database:\s+?/
+                     || /^Parameters:/
+                     || /^\s+Subset/
+                     || /^\s*Lambda/
+                     || /^\s*Histogram/
+                     || /^Query=/
+                     ) {
+                     $self->_pushback($_); # Catch leading > (end of section)
+                     last descline;
                  }
              }
+             @hit_signifs = sort {$a->[0] <=> $b->[0]} @hit_signifs;
          }
          elsif (/Sequences producing High-scoring Segment Pairs:/) {
***************
*** 1141,1186 ****
              # This is for the case when we specify -b 0 (or B=0 for WU-BLAST)
              # and still want to construct minimal Hit objects
!             while ( my $v = shift @hit_signifs ) {
!                 next unless defined $v;
!                 $self->start_element( { 'Name' => 'Hit' } );
!                 my $id   = $v->[2];
!                 my $desc = $v->[3];
!                 $self->element(
!                     {
!                         'Name' => 'Hit_id',
!                         'Data' => $id
!                     }
!                 );
!                 my ( $acc, $version ) = &_get_accession_version($id);
!                 $self->element(
!                     {
!                         'Name' => 'Hit_accession',
!                         'Data' => $acc
!                     }
!                 );
! 
!                 if ( defined $v ) {
!                     $self->element(
!                         {
!                             'Name' => 'Hit_signif',
!                             'Data' => $v->[0]
!                         }
!                     );
!                     $self->element(
!                         {
!                             'Name' => 'Hit_score',
!                             'Data' => $v->[1]
!                         }
!                     );
!                 }
!                 $self->element(
!                     {
!                         'Name' => 'Hit_def',
!                         'Data' => $desc
!                     }
!                 );
!                 $self->end_element( { 'Name' => 'Hit' } );
!             }
! 
              $self->within_element('iteration')
                && $self->end_element( { 'Name' => 'Iteration' } );
--- 1121,1125 ----
              # This is for the case when we specify -b 0 (or B=0 for WU-BLAST)
              # and still want to construct minimal Hit objects
!             $self->_cleanup_hits(\@hit_signifs) if scalar(@hit_signifs);
              $self->within_element('iteration')
                && $self->end_element( { 'Name' => 'Iteration' } );
***************
*** 1216,1237 ****
                      $self->_pushback($reportline) if $reportline;
                      $self->_pushback($_);
! 
!                     # -- Superfluous I think, but adding nonetheless
!                     $self->in_element('hsp')
!                       && $self->end_element( { 'Name' => 'Hsp' } );
!                     $self->in_element('hit')
!                       && $self->end_element( { 'Name' => 'Hit' } );
! 
!                     # --
!                     if ($bl2seq_fix) {
!                         $self->element(
!                             {
!                                 'Name' => 'BlastOutput_program',
!                                 'Data' => $reporttype
!                             }
!                         );
!                     }
!                     $self->end_element( { 'Name' => 'BlastOutput' } );
!                     return $self->end_document();
                  }
  
--- 1155,1159 ----
                      $self->_pushback($reportline) if $reportline;
                      $self->_pushback($_);
!                     last PARSER;
                  }
  
***************
*** 1847,1850 ****
--- 1769,1774 ----
          $self->within_element('hit')
            && $self->end_element( { 'Name' => 'Hit' } );
+         # cleanup extra hits
+         $self->_cleanup_hits(\@hit_signifs) if scalar(@hit_signifs);
          $self->within_element('iteration')
            && $self->end_element( { 'Name' => 'Iteration' } );
***************
*** 2087,2091 ****
  sub element {
      my ( $self, $data ) = @_;
!     $self->start_element($data);
      $self->characters($data);
      $self->end_element($data);
--- 2011,2015 ----
  sub element {
      my ( $self, $data ) = @_;
!     #$self->start_element($data);
      $self->characters($data);
      $self->end_element($data);
***************
*** 2389,2392 ****
--- 2313,2365 ----
  }
  
+ # general private method used to make minimal hits from leftover
+ # data in the hit table
+ 
+ sub _cleanup_hits {
+     my ($self, $hits) = @_;
+     while ( my $v = shift @{ $hits }) {
+         next unless defined $v;
+         $self->start_element( { 'Name' => 'Hit' } );
+         my $id   = $v->[2];
+         my $desc = $v->[3];
+         $self->element(
+             {
+                 'Name' => 'Hit_id',
+                 'Data' => $id
+             }
+         );
+         my ( $acc, $version ) = &_get_accession_version($id);
+         $self->element(
+             {
+                 'Name' => 'Hit_accession',
+                 'Data' => $acc
+             }
+         );
+     
+         if ( defined $v ) {
+             $self->element(
+                 {
+                     'Name' => 'Hit_signif',
+                     'Data' => $v->[0]
+                 }
+             );
+             $self->element(
+                 {
+                     'Name' => 'Hit_score',
+                     'Data' => $v->[1]
+                 }
+             );
+         }
+         $self->element(
+             {
+                 'Name' => 'Hit_def',
+                 'Data' => $desc
+             }
+         );
+         $self->end_element( { 'Name' => 'Hit' } );
+     }
+ }
+ 
+ 
  1;
  



More information about the Bioperl-guts-l mailing list