[Bioperl-guts-l] bioperl-live/Bio/Search/HSP GenericHSP.pm, 1.70, 1.71

Senduran Balasubramaniam sendu at dev.open-bio.org
Tue Sep 19 04:31:11 EDT 2006


Update of /home/repository/bioperl/bioperl-live/Bio/Search/HSP
In directory dev.open-bio.org:/tmp/cvs-serv1561/Bio/Search/HSP

Modified Files:
	GenericHSP.pm 
Log Message:
merge Bio::SearchIO speedup changes from branch-1-5-2

Index: GenericHSP.pm
===================================================================
RCS file: /home/repository/bioperl/bioperl-live/Bio/Search/HSP/GenericHSP.pm,v
retrieving revision 1.70
retrieving revision 1.71
diff -C2 -d -r1.70 -r1.71
*** GenericHSP.pm	25 Aug 2006 17:23:47 -0000	1.70
--- GenericHSP.pm	19 Sep 2006 08:31:09 -0000	1.71
***************
*** 90,93 ****
--- 90,97 ----
  Email sac-at-bioperl.org
  
+ =head1 CONTRIBUTORS
+ 
+ Sendu Bala, bix at sendu.me.uk
+ 
  =head1 APPENDIX
  
***************
*** 97,104 ****
  =cut
  
- 
  # Let the code begin...
  
- 
  package Bio::Search::HSP::GenericHSP;
  use vars qw(@ISA $GAP_SYMBOL);
--- 101,106 ----
***************
*** 138,148 ****
             -query_start => HSP Query start (in original query sequence coords)
             -query_end   => HSP Query end (in original query sequence coords)
             -hit_name    => HSP Hit sequence name (if available)
             -hit_start   => HSP Hit start (in original hit sequence coords)
             -hit_end     => HSP Hit end (in original hit sequence coords)
             -hit_length  => total length of the hit sequence
-            -query_length=> total length of the query sequence
-            -query_seq   => query sequence portion of the HSP
             -hit_seq     => hit sequence portion of the HSP
             -homology_seq=> homology sequence for the HSP
             -hit_frame   => hit frame (only if hit is translated protein)
--- 140,152 ----
             -query_start => HSP Query start (in original query sequence coords)
             -query_end   => HSP Query end (in original query sequence coords)
+            -query_length=> total length of the query sequence
+            -query_seq   => query sequence portion of the HSP
+            -query_desc  => textual description of the query
             -hit_name    => HSP Hit sequence name (if available)
             -hit_start   => HSP Hit start (in original hit sequence coords)
             -hit_end     => HSP Hit end (in original hit sequence coords)
             -hit_length  => total length of the hit sequence
             -hit_seq     => hit sequence portion of the HSP
+            -hit_desc    => textual description of the hit
             -homology_seq=> homology sequence for the HSP
             -hit_frame   => hit frame (only if hit is translated protein)
***************
*** 155,356 ****
  sub new {
      my($class, at args) = @_;
- 
-     my $self = $class->SUPER::new(@args);
-     my ($algo, $evalue, $pvalue, $identical, $percent_id,$conserved, 
-         $gaps, $query_gaps, $hit_gaps,
-         $hit_seq, $query_seq, $homology_seq,
-         $hsp_len, $query_len,$hit_len,
-         $hit_name,$query_name,$bits,$score,
-         $hs,$he,$qs,$qe,
-         $qframe,$hframe, $links,$hsp_group,
-         $rank) = $self->_rearrange([qw(ALGORITHM
-                                        EVALUE
-                                        PVALUE				       
-                                        IDENTICAL
- 				       PERCENT_IDENTITY
-                                        CONSERVED
-                                        HSP_GAPS
-                                        QUERY_GAPS
-                                        HIT_GAPS
-                                        HIT_SEQ
-                                        QUERY_SEQ
-                                        HOMOLOGY_SEQ
-                                        HSP_LENGTH
-                                        QUERY_LENGTH
-                                        HIT_LENGTH
-                                        HIT_NAME
-                                        QUERY_NAME
-                                        BITS
-                                        SCORE
-                                        HIT_START
-                                        HIT_END
-                                        QUERY_START
-                                        QUERY_END
-                                        QUERY_FRAME
-                                        HIT_FRAME
- 				       LINKS
-                                        HSP_GROUP
-                                        RANK
-                                        )], @args);
- 
-     $algo = 'GENERIC' unless defined $algo;
-     $self->algorithm($algo);
- 
- #    defined $evalue    && $self->evalue($evalue)
- #    $hsp->significance is initialized by the 
- #    the SimilarityPair object - let's only keep one
- #    value, don't need 2 slots.
- 
-     defined $pvalue    && $self->pvalue($pvalue);
-     defined $bits      && $self->bits($bits);
-     defined $score     && $self->score($score);
-     my ($queryfactor, $hitfactor) = (0,0);
-     if( $algo =~ /^(PSI)?T(BLAST|FAST|SW)[NY]/oi ) {
- 	$hitfactor = 1;	
-     } elsif ($algo =~ /^(FAST|BLAST)(X|Y|XY)/oi ||
- 	     $algo =~ /^P?GENEWISE/oi ) {
- 	$queryfactor = 1;	
-     } elsif ($algo =~ /^T(BLAST|FAST|SW)(X|Y|XY)/oi ||
- 	     $algo =~ /^(BLAST|FAST|SW)N/oi ||
- 	     $algo =~ /^WABA|AXT|BLAT|BLASTZ|PSL|MEGABLAST|EXONERATE|SW|SMITH\-WATERMAN|SIM4$/ 
- 	     ){
- 	$hitfactor = 1;
- 	$queryfactor = 1;
-     } elsif( $algo =~ /^RPS-BLAST/ ) {
- 	    $queryfactor = ( $algo =~ /^RPS-BLAST\(BLASTX\)/ ) ? 1 : 0;
-         $hframe = 0;
-     }
-     # Store the aligned query as sequence feature
-     my $strand;
-     unless( defined $qe && defined $qs ) { $self->throw("Did not specify a Query End or Query Begin @args (qs='".$qs||''."',qe='?".$qe."')"); }
-     unless( defined $he && defined $hs ) { $self->throw("Did not specify a Hit End or Hit Begin"); }
-     if ($qe > $qs) {  # normal query: start < end
-         if ($queryfactor) { $strand = 1; } else { $strand = undef; }        
-     } else { # reverse query (i dont know if this is possible, 
-              # but feel free to correct)
-         if ($queryfactor) { $strand = -1; } else { $strand = undef; }
-         ($qs,$qe) = ($qe,$qs);
      
!     }
! 
!     # Note: many of these data are not query- and hit-specific.
!     # Only start, end, name, length are.
!     # We could be more efficient by only storing this info once.
!     # steve chervitz --- Sat Apr  5 00:55:07 2003
! 
!     $self->query( new  Bio::SeqFeature::Similarity
!                   ('-primary'  => $self->primary_tag,
!                    '-start'    => $qs,
!                    '-expect'   => $evalue,
!                    '-bits'     => $bits,
!                    '-score'    => $score,
!                    '-end'      => $qe,
!                    '-strand'   => $strand,
!                    '-seq_id'   => $query_name,
!                    '-seqlength'=> $query_len,
!                    '-source'   => $algo,
!                    ) );
      
!     # to determine frame from something like FASTXY which doesn't
!     # report the frame
!     if( defined $strand && ! defined $qframe && $queryfactor ) {
!         $qframe = ( $self->query->start % 3 ) * $strand;
!     } elsif( ! defined $strand ) { 
!         $qframe = 0;
!     }
!     # store the aligned subject as sequence feature
!     if ($he > $hs) {                # normal subject
!         if ($hitfactor) { $strand = 1; } else { $strand = undef; }
!     } else {
!         if ($hitfactor) { $strand = -1; } else { $strand = undef; }
!         ($hs,$he) = ( $he,$hs); # reverse subject: start bigger than end
      }
! 
!     $self->hit( Bio::SeqFeature::Similarity->new
!                 ('-start'     => $hs,
!                  '-end'       => $he,
!                  '-strand'    => $strand,
!                  '-expect'    => $evalue,
!                  '-bits'      => $bits,
!                  '-score'     => $score,
!                  '-source'    => $algo,
!                  '-seq_id'    => $hit_name,
!                  '-seqlength' => $hit_len,
!                  '-primary'   => $self->primary_tag ));
      
!     if( defined $strand && ! defined $hframe && $hitfactor ) {
!         $hframe = ( $hs % 3 ) * $strand;
!     } elsif( ! defined $strand ) { 
!         $hframe = 0;
!     }
! 
!     $self->frame($qframe,$hframe);
! 
!     if( ! defined $query_len || ! defined $hit_len ) { 
!         $self->throw("Must defined hit and query length");
!     }
! 
!     if( ! defined $identical  ) { 
! 	if( ! defined $percent_id ) {
! 	    $self->warn("Did not defined the number of identical matches or overall percent identity in the HSP assuming 0");
! 	    $identical = 0;
! 	} else { 
! 	    $identical = int($percent_id * $hsp_len);
! 	}
!     } 
!     if( ! defined $conserved ) {
! 	$self->warn("Did not defined the number of conserved matches in the HSP assuming conserved == identical ($identical)") 
! 	    if( $algo !~ /^((FAST|BLAST)N)|EXONERATE|SIM4|AXT|PSL|BLAT|BLASTZ|WABA/oi);
! 	$conserved = $identical;
!     } 
!     # protect for divide by zero if user does not specify 
!     # hsp_len, query_len, or hit_len
!     $self->num_identical($identical);
!     $self->num_conserved($conserved);
      
!     my $logical;
!     if( $hsp_len ) {
!         $self->length('total', $hsp_len);
!         $logical = $self->_logical_length('total');
!         $self->frac_identical( 'total', $identical / $hsp_len);
!         $self->frac_conserved( 'total', $conserved / $hsp_len);
!     }
!     if( $hit_len ) {
! #        $self->length('hit', $self->hit->length);
!         $logical = $self->_logical_length('hit');
!         $self->frac_identical( 'hit', $identical / $logical);
!         $self->frac_conserved( 'hit', $conserved / $logical);
!     }
!     if( $query_len ) {
! #        $self->length('query', $self->query->length);        
!         $logical = $self->_logical_length('query');
!         $self->frac_identical( 'query', $identical / $logical) ;
!         $self->frac_conserved( 'query', $conserved / $logical);
!     }
!     $self->query_string($query_seq);
!     $self->hit_string($hit_seq);
!     $self->homology_string($homology_seq);
      
!     if( defined $query_gaps ) {
!         $self->gaps('query', $query_gaps);
!     } elsif( defined $query_seq ) {
!         $self->gaps('query', scalar ( $query_seq =~ tr/\-//));
!     }
!     if( defined $hit_gaps ) {
!         $self->gaps('hit', $hit_gaps);
!     } elsif( defined $hit_seq ) {
!         $self->gaps('hit', scalar ( $hit_seq =~ tr/\-//));
!     }
!     if( ! defined $gaps ) {
!         $gaps = $self->gaps("query") + $self->gaps("hit");
      }
!     $self->gaps('total', $gaps);
! 
!     $self->percent_identity($percent_id ||
! 			    $self->frac_identical('total')*100) if( $hsp_len > 0 );
! 
!     defined $rank && $self->rank($rank);
!     defined $links && $self->links($links);    
!     defined $hsp_group && $self->hsp_group($hsp_group);
      return $self;
  }
--- 159,187 ----
  sub new {
      my($class, at args) = @_;
      
!     # don't pass anything to SUPER; complex heirarchy results in lots of work
!     # for nothing
!     my $self = $class->SUPER::new();
      
!     # for speed, don't use _rearrange and just store all input data directly
!     # with no method calls and no work done. work can be carried
!     # out just-in-time later if desired
!     my %args = @args;
!     while (my ($arg, $value) = each %args) {
!         $arg =~ tr/a-z\055/A-Z/d;
!         $self->{$arg} = $value;
      }
!     my $bits = $self->{BITS};
      
!     defined $self->{VERBOSE} && $self->verbose($self->{VERBOSE});
      
!     $self->{ALGORITHM} ||= 'GENERIC';
      
!     if (! defined $self->{QUERY_LENGTH} || ! defined $self->{HIT_LENGTH}) { 
!         $self->throw("Must define hit and query length");
      }
!     
!     $self->{'_sequenceschanged'} = 1;
!     
      return $self;
  }
***************
*** 388,395 ****
  sub algorithm{
      my ($self,$value) = @_;
!     my $previous = $self->{'_algorithm'};
      if( defined $value || ! defined $previous ) { 
          $value = $previous = '' unless defined $value;
!         $self->{'_algorithm'} = $value;
      } 
  
--- 219,226 ----
  sub algorithm{
      my ($self,$value) = @_;
!     my $previous = $self->{'ALGORITHM'};
      if( defined $value || ! defined $previous ) { 
          $value = $previous = '' unless defined $value;
!         $self->{'ALGORITHM'} = $value;
      } 
  
***************
*** 410,416 ****
  sub pvalue {
      my ($self,$value) = @_;
!     my $previous = $self->{'_pvalue'};
      if( defined $value  ) {         
!         $self->{'_pvalue'} = $value;
      } 
      return $previous;   
--- 241,247 ----
  sub pvalue {
      my ($self,$value) = @_;
!     my $previous = $self->{'PVALUE'};
      if( defined $value  ) {         
!         $self->{'PVALUE'} = $value;
      } 
      return $previous;   
***************
*** 456,460 ****
  sub frac_identical {
     my ($self, $type,$value) = @_;
! 
     $type = lc $type if defined $type;
     $type = 'hit' if( defined $type && 
--- 287,295 ----
  sub frac_identical {
     my ($self, $type,$value) = @_;
!     
!     unless ($self->{_did_prefrac}) {
!         $self->_pre_frac;
!     }
!     
     $type = lc $type if defined $type;
     $type = 'hit' if( defined $type && 
***************
*** 494,497 ****
--- 329,337 ----
  sub frac_conserved {
      my ($self, $type,$value) = @_;
+     
+     unless ($self->{_did_prefrac}) {
+         $self->_pre_frac;
+     }
+     
      $type = lc $type if defined $type;
      $type = 'hit' if( defined $type && $type =~ /subject|sbjct/);
***************
*** 520,525 ****
  =cut
  
! sub gaps        {
      my ($self, $type,$value) = @_;
      $type = lc $type if defined $type;
      $type = 'total' if( ! defined $type || $type eq 'hsp' ||
--- 360,370 ----
  =cut
  
! sub gaps {
      my ($self, $type,$value) = @_;
+     
+     unless ($self->{_did_pregaps}) {
+         $self->_pre_gaps;
+     }
+     
      $type = lc $type if defined $type;
      $type = 'total' if( ! defined $type || $type eq 'hsp' ||
***************
*** 547,554 ****
  sub query_string{
      my ($self,$value) = @_;
!     my $previous = $self->{'_query_string'};
      if( defined $value || ! defined $previous ) { 
          $value = $previous = '' unless defined $value;
!         $self->{'_query_string'} = $value;
          # do some housekeeping so we know when to 
          # re-run _calculate_seq_positions
--- 392,399 ----
  sub query_string{
      my ($self,$value) = @_;
!     my $previous = $self->{QUERY_SEQ};
      if( defined $value || ! defined $previous ) { 
          $value = $previous = '' unless defined $value;
!         $self->{QUERY_SEQ} = $value;
          # do some housekeeping so we know when to 
          # re-run _calculate_seq_positions
***************
*** 571,578 ****
  sub hit_string{
      my ($self,$value) = @_;
!     my $previous = $self->{'_hit_string'};
      if( defined $value || ! defined $previous ) { 
          $value = $previous = '' unless defined $value;
!         $self->{'_hit_string'} = $value;
          # do some housekeeping so we know when to 
          # re-run _calculate_seq_positions
--- 416,423 ----
  sub hit_string{
      my ($self,$value) = @_;
!     my $previous = $self->{HIT_SEQ};
      if( defined $value || ! defined $previous ) { 
          $value = $previous = '' unless defined $value;
!         $self->{HIT_SEQ} = $value;
          # do some housekeeping so we know when to 
          # re-run _calculate_seq_positions
***************
*** 597,604 ****
  sub homology_string{
      my ($self,$value) = @_;
!     my $previous = $self->{'_homology_string'};
      if( defined $value || ! defined $previous ) { 
          $value = $previous = '' unless defined $value;
!         $self->{'_homology_string'} = $value;
          # do some housekeeping so we know when to 
          # re-run _calculate_seq_positions
--- 442,449 ----
  sub homology_string{
      my ($self,$value) = @_;
!     my $previous = $self->{HOMOLOGY_SEQ};
      if( defined $value || ! defined $previous ) { 
          $value = $previous = '' unless defined $value;
!         $self->{HOMOLOGY_SEQ} = $value;
          # do some housekeeping so we know when to 
          # re-run _calculate_seq_positions
***************
*** 640,646 ****
          my $v = shift;
          if( defined $v ) { 
!             $self->{'_hsplength'} = $v;
          }
!         return $self->{'_hsplength'};
     }
      return 0; # should never get here
--- 485,491 ----
          my $v = shift;
          if( defined $v ) { 
!             $self->{HSP_LENGTH} = $v;
          }
!         return $self->{HSP_LENGTH};
     }
      return 0; # should never get here
***************
*** 670,673 ****
--- 515,527 ----
  =cut
  
+ sub percent_identity {
+     my $self = shift;
+     
+     unless ($self->{_did_prepi}) {
+         $self->_pre_pi;
+     }
+     
+     return $self->SUPER::percent_identity(@_);
+ }
  
  =head2 frame
***************
*** 689,693 ****
  
  sub frame {
!     my ($self, $qframe, $sframe) = @_;
      if( defined $qframe ) {
          if( $qframe == 0 ) {
--- 543,554 ----
  
  sub frame {
!     my $self = shift;
!     
!     unless (defined $self->{_did_preframe}) {
!         $self->_pre_frame;
!     }
!     my $qframe = $self->{QUERY_FRAME};
!     my $sframe = $self->{HIT_FRAME};
!     
      if( defined $qframe ) {
          if( $qframe == 0 ) {
***************
*** 803,811 ****
  
  sub num_conserved{
!    my ($self,$value) = @_;
!    if( defined $value) {
!        $self->{'num_conserved'} = $value;
!    }
!    return $self->{'num_conserved'};
  }
  
--- 664,677 ----
  
  sub num_conserved{
!     my ($self,$value) = @_;
!     
!     unless ($self->{_did_presimilar}) {
!         $self->_pre_similar_stats;
!     }
!     
!     if (defined $value) {
!         $self->{CONSERVED} = $value;
!     }
!     return $self->{CONSERVED};
  }
  
***************
*** 823,830 ****
  sub num_identical{
     my ($self,$value) = @_;
     if( defined $value) {
!        $self->{'_num_identical'} = $value;
     }
!    return $self->{'_num_identical'};
  }
  
--- 689,701 ----
  sub num_identical{
     my ($self,$value) = @_;
+    
+    unless ($self->{_did_presimilar}) {
+         $self->_pre_similar_stats;
+     }
+    
     if( defined $value) {
!        $self->{IDENTICAL} = $value;
     }
!    return $self->{IDENTICAL};
  }
  
***************
*** 842,848 ****
      my ($self,$value) = @_;
      if( defined $value) {
!         $self->{'_rank'} = $value;
      }
!     return $self->{'_rank'};
  }
  
--- 713,719 ----
      my ($self,$value) = @_;
      if( defined $value) {
!         $self->{RANK} = $value;
      }
!     return $self->{RANK};
  }
  
***************
*** 957,960 ****
--- 828,840 ----
   Args    : [optional] new value to set
  
+ =cut
+ 
+ sub query {
+     my $self = shift;
+     unless ($self->{_created_qff}) {
+         $self->_query_seq_feature;
+     }
+     return $self->SUPER::query(@_);
+ }
  
  =head2 hit
***************
*** 966,969 ****
--- 846,858 ----
   Args    : [optional] new value to set
  
+ =cut
+ 
+ sub hit {
+     my $self = shift;
+     unless ($self->{_created_sff}) {
+         $self->_subject_seq_feature;
+     }
+     return $self->SUPER::hit(@_);
+ }
  
  =head2 significance
***************
*** 975,978 ****
--- 864,878 ----
   Returns : numeric
   Args    : [optional] new value to set
+  
+  
+ =head2 score
+ 
+  Title   : score
+  Usage   : $score = $obj->score();
+            $obj->score($value);
+  Function: Get/Set the score value
+  Returns : numeric
+  Args    : [optional] new value to set
+ 
  
  =head2 bits
***************
*** 1113,1117 ****
  =cut
  
- #-----
  sub n { 
      my $self = shift; 
--- 1013,1016 ----
***************
*** 1126,1132 ****
  =cut
  
- #----------
  sub range {
- #----------
      my ($self, $seqType) = @_;
  
--- 1025,1029 ----
***************
*** 1162,1167 ****
      my $self = shift;
  
!     return $self->{'links'} = shift if @_;
!     return $self->{'links'};
  }
  
--- 1059,1064 ----
      my $self = shift;
  
!     return $self->{LINKS} = shift if @_;
!     return $self->{LINKS};
  }
  
***************
*** 1181,1186 ****
      my $self = shift;
  
!     return $self->{'_hsp_group'} = shift if @_;
!     return $self->{'_hsp_group'};
  }
  
--- 1078,1083 ----
      my $self = shift;
  
!     return $self->{HSP_GROUP} = shift if @_;
!     return $self->{HSP_GROUP};
  }
  
***************
*** 1305,1307 ****
--- 1202,1452 ----
  }
  
+ 
+ 
+ # needed before seqfeatures can be made
+ sub _pre_seq_feature {
+     my $self = shift;
+     my $algo = $self->{ALGORITHM};
+     
+     my ($queryfactor, $hitfactor) = (0,0);
+     if( $algo =~ /^(PSI)?T(BLAST|FAST|SW)[NY]/oi ) {
+         $hitfactor = 1;	
+     }
+     elsif ($algo =~ /^(FAST|BLAST)(X|Y|XY)/oi || $algo =~ /^P?GENEWISE/oi ) {
+         $queryfactor = 1;	
+     }
+     elsif ($algo =~ /^T(BLAST|FAST|SW)(X|Y|XY)/oi || $algo =~ /^(BLAST|FAST|SW)N/oi || $algo =~ /^WABA|AXT|BLAT|BLASTZ|PSL|MEGABLAST|EXONERATE|SW|SMITH\-WATERMAN|SIM4$/){
+         $hitfactor = 1;
+         $queryfactor = 1;
+     }
+     elsif ($algo =~ /^RPS-BLAST/) {
+         $queryfactor = ($algo =~ /^RPS-BLAST\(BLASTX\)/) ? 1 : 0;
+         $hitfactor = 0;
+     }
+     $self->{_query_factor} = $queryfactor;
+     $self->{_hit_factor} = $hitfactor;
+ }
+ 
+ # make query seq feature
+ sub _query_seq_feature {
+     my $self = shift;
+     my $qs = $self->{QUERY_START};
+     my $qe = $self->{QUERY_END};
+     unless (defined $self->{_query_factor}) {
+         $self->_pre_seq_feature;
+     }
+     my $queryfactor = $self->{_query_factor};
+     
+     unless( defined $qe && defined $qs ) { $self->throw("Did not specify a Query End or Query Begin"); }
+     
+     my $strand;
+     if ($qe > $qs) {  # normal query: start < end
+         if ($queryfactor) {
+             $strand = 1;
+         }
+         else {
+             $strand = undef;
+         }        
+     }
+     else {
+         if ($queryfactor) {
+             $strand = -1;
+         }
+         else {
+             $strand = undef;
+         }
+         ($qs,$qe) = ($qe,$qs);
+     }
+     
+     # Note: many of these data are not query- and hit-specific.
+     # Only start, end, name, length are.
+     # We could be more efficient by only storing this info once.
+     # steve chervitz --- Sat Apr  5 00:55:07 2003
+     
+     $self->SUPER::query( new  Bio::SeqFeature::Similarity
+                   ('-primary'  => $self->primary_tag,
+                    '-start'    => $qs,
+                    '-expect'   => $self->{EVALUE},
+                    '-bits'     => $self->{BITS},
+                    '-score'    => $self->{SCORE},
+                    '-end'      => $qe,
+                    '-strand'   => $strand,
+                    '-seq_id'   => $self->{QUERY_NAME},
+                    '-seqlength'=> $self->{QUERY_LENGTH},
+                    '-source'   => $self->{ALGORITHM},
+                    '-seqdesc'  => $self->{QUERY_DESC} 
+                    ) );
+     
+     # to determine frame from something like FASTXY which doesn't
+     # report the frame
+     my $qframe = $self->{QUERY_FRAME};
+     if (defined $strand && ! defined $qframe && $queryfactor) {
+         $qframe = ( $self->query->start % 3 ) * $strand;
+     }
+     elsif (! defined $strand) { 
+         $qframe = 0;
+     }
+     $self->{QUERY_FRAME} = $qframe;
+     
+     $self->{_created_qff} = 1;
+     $self->_pre_frame;
+ }
+ 
+ # make subject seq feature
+ sub _subject_seq_feature {
+     my $self = shift;
+     my $hs = $self->{HIT_START};
+     my $he = $self->{HIT_END};
+     unless (defined $self->{_hit_factor}) {
+         $self->_pre_seq_feature;
+     }
+     my $hitfactor = $self->{_hit_factor};
+     
+     unless( defined $he && defined $hs ) { $self->throw("Did not specify a Hit End or Hit Begin"); }
+     
+     my $strand;
+     if ($he > $hs) { # normal subject
+         if ($hitfactor) {
+             $strand = 1;
+         }
+         else {
+             $strand = undef;
+         }
+     }
+     else {
+         if ($hitfactor) {
+             $strand = -1;
+         }
+         else {
+             $strand = undef;
+         }
+         ($hs,$he) = ( $he,$hs); # reverse subject: start bigger than end
+     }
+     
+     $self->SUPER::hit( Bio::SeqFeature::Similarity->new
+                 ('-start'     => $hs,
+                  '-end'       => $he,
+                  '-strand'    => $strand,
+                  '-expect'    => $self->{EVALUE},
+                  '-bits'      => $self->{BITS},
+                  '-score'     => $self->{SCORE},
+                  '-source'    => $self->{ALGORITHM},
+                  '-seq_id'    => $self->{HIT_NAME},
+                  '-seqlength' => $self->{HIT_LENGTH},
+                  '-primary'   => $self->primary_tag,
+                  '-seqdesc'   => $self->{HIT_DESC}
+                  ));
+     
+     my $hframe = $self->{HIT_FRAME};
+     if (defined $strand && ! defined $hframe && $hitfactor) {
+         $hframe = ( $hs % 3 ) * $strand;
+     }
+     elsif (! defined $strand) { 
+         $hframe = 0;
+     }
+     $self->{HIT_FRAME} = $hframe;
+     
+     $self->{_created_sff} = 1;
+     $self->_pre_frame;
+ }
+ 
+ # know the frame following seq feature creation
+ sub _pre_frame {
+     my $self = shift;
+     $self->{_created_qff} || $self->_query_seq_feature;
+     $self->{_created_sff} || $self->_subject_seq_feature;
+     $self->{_did_preframe} = 1;
+     $self->frame;
+ }
+ 
+ # before calling the num_* methods
+ sub _pre_similar_stats {
+     my $self = shift;
+     my $identical = $self->{IDENTICAL};
+     my $conserved = $self->{CONSERVED};
+     my $percent_id = $self->{PERCENT_IDENTITY};
+     
+     if (! defined $identical) {
+         if (! defined $percent_id) {
+             $self->warn("Did not defined the number of identical matches or overall percent identity in the HSP assuming 0");
+             $identical = 0;
+         }
+         else { 
+             $identical = int($percent_id * $self->{HSP_LENGTH});
+         }
+     }
+     
+     if (! defined $conserved) {
+         $self->warn("Did not defined the number of conserved matches in the HSP assuming conserved == identical ($identical)") 
+             if( $self->{ALGORITHM} !~ /^((FAST|BLAST)N)|EXONERATE|SIM4|AXT|PSL|BLAT|BLASTZ|WABA/oi);
+         $conserved = $identical;
+     }
+     $self->{IDENTICAL} = $identical;
+     $self->{CONSERVED} = $conserved;
+     $self->{_did_presimilar} = 1;
+ }
+ 
+ # before calling the frac_* methods
+ sub _pre_frac {
+     my $self = shift;
+     my $hsp_len = $self->{HSP_LENGTH};
+     my $hit_len = $self->{HIT_LENGTH};
+     my $query_len = $self->{QUERY_LENGTH};
+     
+     my $identical = $self->num_identical;
+     my $conserved = $self->num_conserved;
+     
+     $self->{_did_prefrac} = 1;
+     my $logical;
+     if( $hsp_len ) {
+         $self->length('total', $hsp_len);
+         $logical = $self->_logical_length('total');
+         $self->frac_identical( 'total', $identical / $hsp_len);
+         $self->frac_conserved( 'total', $conserved / $hsp_len);
+     }
+     if( $hit_len ) {
+         $logical = $self->_logical_length('hit');
+         $self->frac_identical( 'hit', $identical / $logical);
+         $self->frac_conserved( 'hit', $conserved / $logical);
+     }
+     if( $query_len ) {     
+         $logical = $self->_logical_length('query');
+         $self->frac_identical( 'query', $identical / $logical) ;
+         $self->frac_conserved( 'query', $conserved / $logical);
+     }
+ }
+ 
+ # before calling gaps()
+ sub _pre_gaps {
+     my $self = shift;
+     my $query_gaps = $self->{QUERY_GAPS};
+     my $query_seq = $self->{QUERY_SEQ};
+     my $hit_gaps = $self->{HIT_GAPS};
+     my $hit_seq = $self->{HIT_SEQ};
+     my $gaps = $self->{HSP_GAPS};
+     
+     $self->{_did_pregaps} = 1; # well, we're in the process; avoid recursion
+     if( defined $query_gaps ) {
+         $self->gaps('query', $query_gaps);
+     } elsif( defined $query_seq ) {
+         $self->gaps('query', scalar ( $query_seq =~ tr/\-//));
+     }
+     if( defined $hit_gaps ) {
+         $self->gaps('hit', $hit_gaps);
+     } elsif( defined $hit_seq ) {
+         $self->gaps('hit', scalar ( $hit_seq =~ tr/\-//));
+     }
+     if( ! defined $gaps ) {
+         $gaps = $self->gaps("query") + $self->gaps("hit");
+     }
+     $self->gaps('total', $gaps);
+ }
+ 
+ # before percent_identity
+ sub _pre_pi {
+     my $self = shift;
+     $self->{_did_prepi} = 1;
+     $self->percent_identity($self->{PERCENT_IDENTITY} || $self->frac_identical('total')*100) if( $self->{HSP_LENGTH} > 0 );
+ }
+ 
  1;



More information about the Bioperl-guts-l mailing list