[Bioperl-guts-l] [15074] bioperl-live/trunk: Add method that finds the most common codons for the aa's, uses those to reverse-translate a nucleotide sequence
Brian Osborne
bosborne at dev.open-bio.org
Tue Dec 2 23:57:50 EST 2008
Revision: 15074
Author: bosborne
Date: 2008-12-02 23:57:49 -0500 (Tue, 02 Dec 2008)
Log Message:
-----------
Add method that finds the most common codons for the aa's, uses those to reverse-translate a nucleotide sequence
Modified Paths:
--------------
bioperl-live/trunk/Bio/CodonUsage/Table.pm
bioperl-live/trunk/Bio/Tools/CodonTable.pm
bioperl-live/trunk/t/CodonTable.t
Modified: bioperl-live/trunk/Bio/CodonUsage/Table.pm
===================================================================
--- bioperl-live/trunk/Bio/CodonUsage/Table.pm 2008-12-03 03:40:20 UTC (rev 15073)
+++ bioperl-live/trunk/Bio/CodonUsage/Table.pm 2008-12-03 04:57:49 UTC (rev 15074)
@@ -27,7 +27,7 @@
-gc => 1);
# Or from local file
- my $io = Bio::CodonUsage::IO->new(-file=>"file");
+ my $io = Bio::CodonUsage::IO->new(-file => "file");
my $cdtable = $io->next_data();
# Or create your own from a Bio::PrimarySeq compliant object,
@@ -275,7 +275,45 @@
}
+=head2 most_common_codons
+ Title : most_common_codons
+ Usage : my $common_codons = $cd_table->most_common_codons();
+ Purpose : To obtain the most common codon for a given amino acid
+ Returns : A reference to a hash where keys are 1 letter amino acid codes
+ and the values are the single most common codons for those amino acids
+ Arguments: None
+
+=cut
+
+sub most_common_codons {
+ my $self = shift;
+
+ my %return_hash;
+
+ for my $a ( keys %STRICTAA ) {
+
+ my $common_codon = '';
+ my $rel_freq = 0;
+ my $aa = $Bio::SeqUtils::THREECODE{$a};
+
+ if ( ! defined $self->{'_table'}{$aa} ) {
+ $self->warn("Amino acid $aa ($a) does not have any codons!");
+ next;
+ }
+
+ for my $codon ( keys %{ $self->{'_table'}{$aa}} ) {
+ if ($self->{'_table'}{$aa}{$codon}{'rel_freq'} > $rel_freq ){
+ $common_codon = $codon;
+ $rel_freq = $self->{'_table'}{$aa}{$codon}{'rel_freq'};
+ }
+ }
+ $return_hash{$a} = $common_codon;
+ }
+
+ return \%return_hash;
+}
+
=head2 codon_count
Title : codon_count
Modified: bioperl-live/trunk/Bio/Tools/CodonTable.pm
===================================================================
--- bioperl-live/trunk/Bio/Tools/CodonTable.pm 2008-12-03 03:40:20 UTC (rev 15073)
+++ bioperl-live/trunk/Bio/Tools/CodonTable.pm 2008-12-03 04:57:49 UTC (rev 15074)
@@ -20,7 +20,6 @@
# the ones used by nucleotide sequence databases. All common IUPAC
# ambiguity codes for DNA, RNA and amino acids are recognized.
- # to use
use Bio::Tools::CodonTable;
# defaults to ID 1 "Standard"
@@ -57,9 +56,9 @@
my $seqobj = Bio::PrimarySeq->new(-seq => 'FHGERHEL');
my $iupac_str = $myCodonTable->reverse_translate_all($seqobj);
- #boolean tests
+ # boolean tests
print "Is a start\n" if $myCodonTable->is_start_codon('ATG');
- print "Is a termianator\n" if $myCodonTable->is_ter_codon('tar');
+ print "Is a terminator\n" if $myCodonTable->is_ter_codon('tar');
print "Is a unknown\n" if $myCodonTable->is_unknown_codon('JTG');
=head1 DESCRIPTION
@@ -172,8 +171,8 @@
# Let the code begin...
package Bio::Tools::CodonTable;
-use vars qw(@NAMES @TABLES @STARTS $TRCOL $CODONS %IUPAC_DNA $CODONGAP $GAP
- %IUPAC_AA %THREELETTERSYMBOLS $VALID_PROTEIN $TERMINATOR);
+use vars qw(@NAMES @TABLES @STARTS $TRCOL $CODONS %IUPAC_DNA $CODONGAP $GAP
+ %IUPAC_AA %THREELETTERSYMBOLS $VALID_PROTEIN $TERMINATOR);
use strict;
# Object preamble - inherits from Bio::Root::Root
@@ -298,14 +297,10 @@
Title : id
Usage : $obj->id(3); $id_integer = $obj->id();
- Function:
-
- Sets or returns the id of the translation table. IDs are
+ Function: Sets or returns the id of the translation table. IDs are
integers from 1 to 15, excluding 7 and 8 which have been
removed as redundant. If an invalid ID is given the method
returns 0, false.
-
-
Example :
Returns : value of id, a scalar, 0 if not a valid
Args : newvalue (optional)
@@ -537,38 +532,39 @@
=cut
sub revtranslate {
- my ($self, $value, $coding) = @_;
- my ($id) = $self->{'id'};
- my (@aas, $p);
- my (@codons) = ();
+ my ($self, $value, $coding) = @_;
+ my ($id) = $self->{'id'};
+ my (@aas, $p);
+ my (@codons) = ();
- if (length($value) == 3 ) {
- $value = lc $value;
- $value = ucfirst $value;
- $value = $THREELETTERSYMBOLS{$value};
- }
- if ( defined $value and $value =~ /$VALID_PROTEIN/
- and length($value) == 1 ) {
- $value = uc $value;
- @aas = @{$IUPAC_AA{$value}};
- foreach my $aa (@aas) {
- #print $aa, " -2\n";
- $aa = '\*' if $aa eq '*';
- while ($TABLES[$id-1] =~ m/$aa/g) {
- $p = pos $TABLES[$id-1];
- push (@codons, $TRCOL->{--$p});
+ if (length($value) == 3 ) {
+ $value = lc $value;
+ $value = ucfirst $value;
+ $value = $THREELETTERSYMBOLS{$value};
+ }
+ if ( defined $value and $value =~ /$VALID_PROTEIN/
+ and length($value) == 1 ) {
+ $value = uc $value;
+ @aas = @{$IUPAC_AA{$value}};
+ foreach my $aa (@aas) {
+ #print $aa, " -2\n";
+ $aa = '\*' if $aa eq '*';
+ while ($TABLES[$id-1] =~ m/$aa/g) {
+ $p = pos $TABLES[$id-1];
+ push (@codons, $TRCOL->{--$p});
+ }
}
- }
}
- if ($coding and uc ($coding) eq 'RNA') {
- for my $i (0..$#codons) {
- $codons[$i] =~ tr/t/u/;
- }
- }
+ if ($coding and uc ($coding) eq 'RNA') {
+ for my $i (0..$#codons) {
+ $codons[$i] =~ tr/t/u/;
+ }
+ }
+
+ return @codons;
+}
- return @codons;
-}
=head2 reverse_translate_all
Title : reverse_translate_all
@@ -583,16 +579,14 @@
Args : a Bio::PrimarySeqI compatible object (mandatory)
a Bio::CodonUsage::Table object and a threshold if only
codons with a relative frequency above the threshold are
- to be considered.
-
-
+ to be considered.
=cut
sub reverse_translate_all {
my ($self, $obj, $cut, $threshold) = @_;
- ## check args are OK
+ ## check args are OK
if (!$obj || !$obj->isa('Bio::PrimarySeqI')){
$self->throw(" I need a Bio::PrimarySeqI object, not a [".
@@ -637,6 +631,53 @@
}
+=head2 reverse_translate_best
+
+ Title : reverse_translate_best
+ Usage : my $str = $cttable->reverse_translate_best($seq_object,$cutable);
+ Function: Reverse translates a protein sequence into plain nucleotide
+ sequence (GATC), uses the most common codon for each amino acid
+ Returns : A string
+ Args : A Bio::PrimarySeqI compatible object and a Bio::CodonUsage::Table object
+
+=cut
+
+sub reverse_translate_best {
+
+ my ($self, $obj, $cut) = @_;
+
+ if (!$obj || !$obj->isa('Bio::PrimarySeqI')){
+ $self->throw(" I need a Bio::PrimarySeqI object, not a [".
+ ref($obj) . "]");
+ }
+ if ($obj->alphabet ne 'protein') {
+ $self->throw("Cannot reverse translate, need an amino acid sequence .".
+ "This sequence is of type [" . $obj->alphabet ."]");
+ }
+ if ( !$cut | !$cut->isa('Bio::CodonUsage::Table')) {
+ $self->throw("I need a Bio::CodonUsage::Table object, not a [".
+ ref($cut). "].");
+ }
+
+ my $str = '';
+ my @seq = split '', $obj->seq;
+
+ my $cod_ref = $cut->most_common_codons();
+
+ for my $aa ( @seq ) {
+ if ($aa =~ /x/i) {
+ $str .= 'NNN';
+ next;
+ }
+ if ( defined $cod_ref->{$aa} ) {
+ $str .= $cod_ref->{$aa};
+ } else {
+ $self->throw("Input sequence contains invalid character: $aa");
+ }
+ }
+ $str;
+}
+
=head2 is_start_codon
Title : is_start_codon
@@ -647,7 +688,6 @@
Returns : boolean
Args : codon
-
=cut
sub is_start_codon{
@@ -682,7 +722,6 @@
Returns : boolean
Args : codon
-
=cut
sub is_ter_codon{
Modified: bioperl-live/trunk/t/CodonTable.t
===================================================================
--- bioperl-live/trunk/t/CodonTable.t 2008-12-03 03:40:20 UTC (rev 15073)
+++ bioperl-live/trunk/t/CodonTable.t 2008-12-03 04:57:49 UTC (rev 15074)
@@ -7,9 +7,10 @@
use lib 't/lib';
use BioperlTest;
- test_begin(-tests => 51);
+ test_begin(-tests => 56);
use_ok('Bio::Tools::CodonTable');
+ use_ok('Bio::CodonUsage::IO');
}
# create a table object by giving an ID
@@ -179,7 +180,16 @@
-alphabet => 'dna');
is $seq->translate->seq, 'M-K--N';
-ok $seq = Bio::PrimarySeq->new(-seq=>'ASDFGHKL');
+ok $seq = Bio::PrimarySeq->new(-seq =>'ASDFGHKL');
is $myCodonTable->reverse_translate_all($seq), 'GCBWSNGAYTTYGGVCAYAARYTN';
-ok $seq = Bio::PrimarySeq->new(-seq=>'ASXFHKL');
+ok $seq = Bio::PrimarySeq->new(-seq => 'ASXFHKL');
is $myCodonTable->reverse_translate_all($seq), 'GCBWSNNNNTTYCAYAARYTN';
+
+#
+# test reverse_translate_best(), requires a Bio::CodonUsage::Table object
+#
+
+ok $seq = Bio::PrimarySeq->new(-seq =>'ACDEFGHIKLMNPQRSTVWY');
+ok my $io = Bio::CodonUsage::IO->new(-file => test_input_file('MmCT'));
+ok my $cut = $io->next_data();
+is $myCodonTable->reverse_translate_best($seq,$cut), 'GCCTGCGACGAGTTCGGCCACATCAAGCTGATGAACCCCCAGCGCTCCACCGTGTGGTAC';
More information about the Bioperl-guts-l
mailing list