[Bioperl-guts-l] [14442] bioperl-live/trunk: first version of Gerp parser, usable
Senduran Balasubramaniam
sendu at dev.open-bio.org
Wed Jan 16 05:51:53 EST 2008
Revision: 14442
Author: sendu
Date: 2008-01-16 05:51:53 -0500 (Wed, 16 Jan 2008)
Log Message:
-----------
first version of Gerp parser, usable
Added Paths:
-----------
bioperl-live/trunk/Bio/Tools/Phylo/Gerp.pm
bioperl-live/trunk/t/data/ENr111.mfa.example.elems
bioperl-live/trunk/t/gerp.t
Added: bioperl-live/trunk/Bio/Tools/Phylo/Gerp.pm
===================================================================
--- bioperl-live/trunk/Bio/Tools/Phylo/Gerp.pm (rev 0)
+++ bioperl-live/trunk/Bio/Tools/Phylo/Gerp.pm 2008-01-16 10:51:53 UTC (rev 14442)
@@ -0,0 +1,147 @@
+# $Id: Gumby.pm,v 1.2 2007/06/14 18:01:52 nathan Exp $
+#
+# BioPerl module for Bio::Tools::Phylo::Gerp
+#
+# Cared for by Sendu Bala <bix at sendu.me.uk>
+#
+# Copyright Sendu Bala
+#
+# You may distribute this module under the same terms as perl itself
+
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+Bio::Tools::Phylo::Gerp - Parses output from GERP
+
+=head1 SYNOPSIS
+
+ use strict;
+
+ use Bio::Tools::Phylo::Gerp;
+
+ my $parser = Bio::Tools::Phylo::Gerp->new(-file => "alignment.rates.elems");
+
+ while (my $feat = $parser->next_result) {
+ my $start = $feat->start;
+ my $end = $feat->end;
+ my $rs_score = $feat->score;
+ my $p_value = ($feat->annotation->get_Annotations('p-value'))[0]->value;
+ }
+
+=head1 DESCRIPTION
+
+This module is used to parse the output from 'GERP' (v2) by Eugene Davydov
+(originally Gregory M. Cooper et al.). You can get details here:
+http://mendel.stanford.edu/sidowlab/
+
+It works on the .elems files produced by gerpelem.
+
+Each result is a Bio::SeqFeature::Annotated representing a single contrained
+element.
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other
+Bioperl modules. Send your comments and suggestions preferably to
+the Bioperl mailing list. Your participation is much appreciated.
+
+ bioperl-l at bioperl.org - General discussion
+ http://bioperl.org/wiki/Mailing_lists - About the mailing lists
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track
+of the bugs and their resolution. Bug reports can be submitted via the
+web:
+
+ http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Sendu Bala
+
+Email bix at sendu.me.uk
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods.
+Internal methods are usually preceded with a _
+
+=cut
+
+# Let the code begin...
+
+package Bio::Tools::Phylo::Gerp;
+use strict;
+
+use Bio::SeqFeature::Annotated;
+use Bio::Annotation::SimpleValue;
+
+use base qw(Bio::Root::Root Bio::Root::IO);
+
+
+=head2 new
+
+ Title : new
+ Usage : my $obj = Bio::Tools::Phylo::Gerp->new();
+ Function: Builds a new Bio::Tools::Phylo::Gerp object
+ Returns : Bio::Tools::Phylo::Gerp
+ Args : -file (or -fh) should contain the contents of a gerpelem .elems file
+
+=cut
+
+sub new {
+ my ($class, @args) = @_;
+ my $self = $class->SUPER::new(@args);
+
+ $self->_initialize_io(@args);
+
+ return $self;
+}
+
+=head2 next_result
+
+ Title : next_result
+ Usage : $result = $obj->next_result();
+ Function: Returns the next result available from the input, or undef if there
+ are no more results.
+ Returns : Bio::SeqFeature::Annotated object. Features are annotated with a tag
+ for 'pvalue', and a 'predicted' tag. They have no sequence id unless
+ the input GERP file is non-standard, with the seq id as the 6th
+ column.
+ Args : none
+
+=cut
+
+sub next_result {
+ my ($self) = @_;
+
+ my $line = $self->_readline || return;
+
+ while ($line !~ /^\d+\s+\d+\s+\d+\s+\S+\s+\S+\s*(?:\S+\s*)?$/) {
+ $line = $self->_readline || return;
+ }
+
+ #start end length RS-score p-value
+ # code elsewhere adds seq_id on the end (not valid GERP), so we capture that
+ # if present
+ my ($start, $end, undef, $rs_score, $p_value, $seq_id) = split(/\s+/, $line);
+ my $feat = Bio::SeqFeature::Annotated->new(
+ $seq_id ? (-seq_id => $seq_id) : (),
+ -start => $start,
+ -end => $end,
+ -strand => 1,
+ -score => $rs_score,
+ #-type => 'conserved_region', ***causes 740x increase in SeqFeatureDB storage requirments!
+ -source => 'GERP');
+
+ my $sv = Bio::Annotation::SimpleValue->new(-tagname => 'predicted', -value => 1);
+ $feat->annotation->add_Annotation($sv);
+ $sv = Bio::Annotation::SimpleValue->new(-tagname => 'pvalue', -value => $p_value);
+ $feat->annotation->add_Annotation($sv);
+
+ return $feat;
+}
+
+1;
Property changes on: bioperl-live/trunk/Bio/Tools/Phylo/Gerp.pm
___________________________________________________________________
Name: svn:executable
+ *
Added: bioperl-live/trunk/t/data/ENr111.mfa.example.elems
===================================================================
--- bioperl-live/trunk/t/data/ENr111.mfa.example.elems (rev 0)
+++ bioperl-live/trunk/t/data/ENr111.mfa.example.elems 2008-01-16 10:51:53 UTC (rev 14442)
@@ -0,0 +1,10 @@
+## Note that element detection relies on computing a false positive rate that uses random permutations of the original data.
+## Due to the stochastic nature of the permutations, you may see subtle differences from run to run.
+## Do not expect these test results to be precisely what you will get when you do your own run on the test dataset.
+
+500000 3.85
+334180 334352 173 449 1.03744e-165
+337735 337915 181 458.2 5.02405e-164
+262604 262861 258 473.1 3.64789e-117
+285427 285608 182 386.1 8.42494e-113
+309563 309744 182 383.6 2.88895e-111
Added: bioperl-live/trunk/t/gerp.t
===================================================================
--- bioperl-live/trunk/t/gerp.t (rev 0)
+++ bioperl-live/trunk/t/gerp.t 2008-01-16 10:51:53 UTC (rev 14442)
@@ -0,0 +1,36 @@
+# -*-Perl-*- Test Harness script for Bioperl
+# $Id: gerp.t,v 1.15 2007/06/27 10:16:38 sendu Exp $
+
+use strict;
+
+BEGIN {
+ use lib 't/lib';
+ use BioperlTest;
+
+ test_begin(-tests => 33);
+
+ use_ok('Bio::Tools::Phylo::Gerp');
+}
+
+ok my $parser = Bio::Tools::Phylo::Gerp->new(-file => test_input_file('ENr111.mfa.example.elems'));
+
+my $count = 0;
+my @expected = ([qw(334180 334352 449 1.03744e-165)],
+ [qw(337735 337915 458.2 5.02405e-164)],
+ [qw(262604 262861 473.1 3.64789e-117)],
+ [qw(285427 285608 386.1 8.42494e-113)],
+ [qw(309563 309744 383.6 2.88895e-111)]);
+while (my $feat = $parser->next_result) {
+ $count++;
+ my @exp = @{shift(@expected)};
+
+ isa_ok $feat, 'Bio::SeqFeature::Annotated';
+ is $feat->source->value, 'GERP', 'correct source';
+ is $feat->start, shift(@exp), 'feature start correct';
+ is $feat->end, shift(@exp), 'feature end correct';
+ is $feat->score, shift(@exp), 'feature score correct';
+ my ($p_value) = $feat->get_Annotations('pvalue');
+ is ref $p_value ? $p_value->value : $p_value, shift(@exp), 'feature pvalue correct';
+}
+
+is $count, 5, "correct number of results parsed out";
More information about the Bioperl-guts-l
mailing list