[Bioperl-guts-l] [14978] bioperl-live/trunk: * parse ' Features in this part of subject sequence' data from some NCBI output ( string can be retrieved using new GenericHSP::hit_features method)

Christopher John Fields cjfields at dev.open-bio.org
Tue Nov 4 14:08:00 EST 2008


Revision: 14978
Author:   cjfields
Date:     2008-11-04 14:07:59 -0500 (Tue, 04 Nov 2008)

Log Message:
-----------
* parse 'Features in this part of subject sequence' data from some NCBI output (string can be retrieved using new GenericHSP::hit_features method)
* add test

Modified Paths:
--------------
    bioperl-live/trunk/Bio/Search/HSP/GenericHSP.pm
    bioperl-live/trunk/t/SearchIO.t

Modified: bioperl-live/trunk/Bio/Search/HSP/GenericHSP.pm
===================================================================
--- bioperl-live/trunk/Bio/Search/HSP/GenericHSP.pm	2008-11-01 18:46:30 UTC (rev 14977)
+++ bioperl-live/trunk/Bio/Search/HSP/GenericHSP.pm	2008-11-04 19:07:59 UTC (rev 14978)
@@ -150,6 +150,9 @@
            -links       => HSP links information (WU-BLAST only)
            -hsp_group   => HSP Group informat (WU-BLAST only)
            -gap_symbol  => symbol representing a gap (default = '-')
+           -hit_features=> string of features found in or near HSP hit
+                           region (reported in some BLAST text output,
+                           v. 2.2.13 and up)
            -stranded    => If the algorithm isn't known (i.e. defaults to
                            'generic'), setting this will indicate start/end
                            coordinates are to be used to determine the strand
@@ -1176,6 +1179,26 @@
     return $self->{HSP_GROUP};
 }
 
+=head2 hit_features
+
+ Title   : hit_features
+ Usage   : $obj->hit_features($newval)
+ Function: Get/Set the HSP hit feature string (from some BLAST 2.2.13 text
+           output), which is a string of overlapping or nearby features in HSP
+           hit
+ Returns : Value of hit features, if present
+ Args    : On set, new value (a scalar or undef, optional)
+
+
+=cut
+
+sub hit_features {
+    my $self = shift;
+
+    return $self->{HIT_FEATURES} = shift if @_;
+    return $self->{HIT_FEATURES};
+}
+
 # The cigar string code is written by Juguang Xiao <juguang at fugu-sg.org>
 
 =head1 Brief introduction on cigar string

Modified: bioperl-live/trunk/t/SearchIO.t
===================================================================
--- bioperl-live/trunk/t/SearchIO.t	2008-11-01 18:46:30 UTC (rev 14977)
+++ bioperl-live/trunk/t/SearchIO.t	2008-11-04 19:07:59 UTC (rev 14978)
@@ -7,7 +7,7 @@
 	use lib 't/lib';
     use BioperlTest;
     
-    test_begin(-tests => 1811);
+    test_begin(-tests => 1812);
 	
 	use_ok('Bio::SearchIO');
 	use_ok('Bio::SearchIO::Writer::HitTableWriter');
@@ -247,9 +247,12 @@
     is($result->get_statistic('kappa') , 0.0475563);
     cmp_ok($result->get_statistic('lambda'), '==', 0.267);
     cmp_ok($result->get_statistic('entropy'), '==', 0.14);
-    #is($result->get_statistic('dbletters'), 31984247);
-    #is($result->get_statistic('dbentries'), 88780);
-    #is($result->get_statistic('effective_hsplength'), 49);
+    #TODO: {
+    #    local $TODO = 'Some stats not working';
+    #    is($result->get_statistic('dbletters'), 31984247);
+    #    is($result->get_statistic('dbentries'), 88780);
+    #    is($result->get_statistic('effective_hsplength'), 49);
+    #}
     is($result->get_statistic('effectivespace'), '6.44279e+07');
     is($result->get_parameter('matrix'), 'BLOSUM62');
     is($result->get_parameter('gapopen'), 11);
@@ -766,12 +769,12 @@
             is(sprintf("%.2f",$hsp->percent_identity), '57.30');
             is(sprintf("%.4f",$hsp->frac_identical('query')), 0.5907); 
             is(sprintf("%.4f",$hsp->frac_identical('hit')), 0.5752);
-	    # these are really UNGAPPED values not CONSERVED
-	    # otherwise ident and conserved would be identical for
-	    # nucleotide alignments
-	    is(sprintf("%.4f",$hsp->frac_conserved('total')), 0.5955); 
-	    is(sprintf("%.4f",$hsp->frac_conserved('query')), 0.6139); 
-	    is(sprintf("%.4f",$hsp->frac_conserved('hit')), 0.5977); 
+			# these are really UNGAPPED values not CONSERVED
+			# otherwise ident and conserved would be identical for
+			# nucleotide alignments
+			is(sprintf("%.4f",$hsp->frac_conserved('total')), 0.5955); 
+			is(sprintf("%.4f",$hsp->frac_conserved('query')), 0.6139); 
+			is(sprintf("%.4f",$hsp->frac_conserved('hit')), 0.5977); 
             is($hsp->query->frame(), 0);
             is($hsp->hit->frame(), 0);
             is($hsp->gaps, 159);
@@ -1024,7 +1027,7 @@
 $searchio = Bio::SearchIO->new(-format => 'fasta',
 			      -file   => test_input_file('BOSS_DROME.FASTP_v35_04'));
 $result = $searchio->next_result;
-like($result->database_name, qr/wormpep190/);
+like($result->database_name, qr/wormpep190/, 'TFASTXY');
 is($result->database_letters, 10449259);
 is($result->database_entries, 23771);
 is($result->algorithm, 'FASTA');
@@ -2205,7 +2208,6 @@
 is($result->get_parameter('expect'), '1e-23');
 is($result->get_statistic('num_extensions'), '117843');
 
-
 @valid = ( [ 'gi|41400296|gb|AE016958.1|', 4829781, 'AE016958', 41400296, '6e-059', 119, 236],
 	      [ 'gi|54013472|dbj|AP006618.1|', 6021225, 'AP006618', 54013472, '4e-026', 64, 127],
 	      [ 'gi|57546753|dbj|BA000030.2|', 9025608, 'BA000030', 57546753, '1e-023', 60, 119]);
@@ -2229,6 +2231,7 @@
             is($hsp->hit->start, 1166897);
             is($hsp->hit->end, 1167187);
             is($hsp->length('hsp'), 291);
+            is($hsp->hit_features, 'PyrR');
             is($hsp->start('hit'), $hsp->hit->start);
             is($hsp->end('query'), $hsp->query->end);
             is($hsp->strand('sbjct'), $hsp->subject->strand);# alias for hit




More information about the Bioperl-guts-l mailing list