[Bioperl-guts-l] [14993] bioperl-live/trunk: [bug 2635]
Christopher John Fields
cjfields at dev.open-bio.org
Sun Nov 16 01:14:42 EST 2008
Revision: 14993
Author: cjfields
Date: 2008-11-16 01:14:41 -0500 (Sun, 16 Nov 2008)
Log Message:
-----------
[bug 2635]
* new method cat()
* needs further evaluation using meta data, but code is iin place which should take care of it.
* patch courtesy of Roy Chaudhuri
Modified Paths:
--------------
bioperl-live/trunk/Bio/Align/Utilities.pm
bioperl-live/trunk/t/AlignUtil.t
Modified: bioperl-live/trunk/Bio/Align/Utilities.pm
===================================================================
--- bioperl-live/trunk/Bio/Align/Utilities.pm 2008-11-14 03:48:54 UTC (rev 14992)
+++ bioperl-live/trunk/Bio/Align/Utilities.pm 2008-11-16 06:14:41 UTC (rev 14993)
@@ -89,7 +89,7 @@
use base qw(Exporter);
@EXPORT = qw();
- at EXPORT_OK = qw(aa_to_dna_aln bootstrap_replicates);
+ at EXPORT_OK = qw(aa_to_dna_aln bootstrap_replicates cat);
%EXPORT_TAGS = (all =>[@EXPORT, @EXPORT_OK]);
BEGIN {
use constant CODONSIZE => 3;
@@ -214,5 +214,142 @@
return \@alns;
}
+=head2 cat
+ Title : cat
+ Usage : $aln123 = cat($aln1, $aln2, $aln3)
+ Function : Concatenates alignment objects. Sequences are identified by id.
+ An error will be thrown if the sequence ids are not unique in the
+ first alignment. If any ids are not present or not unique in any
+ of the additional alignments then those sequences are omitted from
+ the concatenated alignment, and a warning is issued. An error will
+ be thrown if any of the alignments are not flush, since
+ concatenating such alignments is unlikely to make biological
+ sense.
+ Returns : A new Bio::SimpleAlign object
+ Args : A list of Bio::SimpleAlign objects
+
+=cut
+
+sub cat {
+ my ($self, @aln) = @_;
+ $self->throw("cat method called with no arguments") unless $self;
+ for ($self, at aln) {
+ $self->throw($_->id. " not a Bio::Align::AlignI object") unless $_->isa('Bio::Align::AlignI');
+ $self->throw($_->id. " is not flush") unless $_->is_flush;
+ }
+ my $aln = $self->new;
+ $aln->id($self->id);
+ $aln->annotation($self->annotation);
+ my %unique;
+ SEQ: foreach my $seq ( $self->each_seq() ) {
+ throw("ID: ", $seq->id, " is not unique in initial alignment.") if exists $unique{$seq->id};
+ $unique{$seq->id}=1;
+
+ # Can be Bio::LocatableSeq, Bio::Seq::Meta or Bio::Seq::Meta::Array
+ my $new_seq = $seq->new(-id=> $seq->id,
+ -strand => $seq->strand,
+ -verbose => $self->verbose);
+ $new_seq->seq($seq->seq);
+ $new_seq->start($seq->start);
+ $new_seq->end($seq->end);
+ if ($new_seq->isa('Bio::Seq::MetaI')) {
+ for my $meta_name ($seq->meta_names) {
+ $new_seq->named_submeta($meta_name, $new_seq->start, $new_seq->end, $seq->named_meta($meta_name));
+ }
+ }
+ for my $cat_aln (@aln) {
+ my @cat_seq=$cat_aln->each_seq_with_id($seq->id);
+ if (@cat_seq==0) {
+ $self->warn($seq->id. " not found in alignment ". $cat_aln->id. ", skipping this sequence.");
+ next SEQ;
+ }
+ if (@cat_seq>1) {
+ $self->warn($seq->id. " found multiple times in alignment ". $cat_aln->id. ", skipping this sequence.");
+ next SEQ;
+ }
+ my $cat_seq=$cat_seq[0];
+ my $old_end=$new_seq->end;
+ $new_seq->seq($new_seq->seq.$cat_seq->seq);
+
+ # Not sure if this is a sensible way to deal with end coordinates
+ $new_seq->end($new_seq->end+$cat_seq->end+1-$cat_seq->start);
+
+ if ($cat_seq->isa('Bio::Seq::Meta::Array')) {
+ unless ($new_seq->isa('Bio::Seq::Meta::Array')) {
+ my $meta_seq=Bio::Seq::Meta::Array->new;
+ $meta_seq->seq($new_seq->seq);
+ $meta_seq->start($new_seq->start);
+ $meta_seq->end($new_seq->end);
+ if ($new_seq->isa('Bio::Seq::Meta')) {
+ for my $meta_name ($new_seq->meta_names) {
+ $meta_seq->named_submeta($meta_name,
+ $new_seq->start,
+ $old_end,
+ [split(//, $new_seq->named_meta($meta_name))]
+ );
+ }
+ }
+ $new_seq=$meta_seq;
+ }
+ for my $meta_name ($cat_seq->meta_names) {
+ $new_seq->named_submeta($meta_name,
+ $old_end+1,
+ $new_seq->end,
+ $cat_seq->named_meta($meta_name)
+ );
+ }
+ } elsif ($cat_seq->isa('Bio::Seq::Meta')) {
+ if ($new_seq->isa('Bio::Seq::Meta::Array')) {
+ for my $meta_name ($cat_seq->meta_names) {
+ $new_seq->named_submeta($meta_name,
+ $old_end+1,
+ $new_seq->end,
+ [split(//,$cat_seq->named_meta($meta_name))]
+ );
+ }
+ } else {
+ unless ($new_seq->isa('Bio::Seq::Meta')) {
+ my $meta_seq=Bio::Seq::Meta::Array->new;
+ $meta_seq->seq($new_seq->seq);
+ $meta_seq->start($new_seq->start);
+ $meta_seq->end($new_seq->end);
+ $new_seq=$meta_seq;
+ }
+ for my $meta_name ($cat_seq->meta_names) {
+ $new_seq->named_submeta($meta_name,
+ $old_end+1,
+ $new_seq->end,
+ $cat_seq->named_meta($meta_name)
+ );
+ }
+ }
+ }
+ }
+ $aln->add_seq($new_seq);
+ }
+ my $cons_meta = $self->consensus_meta;
+ my $new_cons_meta;
+ if ($cons_meta) {
+ $new_cons_meta = Bio::Seq::Meta->new();
+ for my $meta_name ($cons_meta->meta_names) {
+ $new_cons_meta->named_submeta($meta_name, 1, $self->length, $cons_meta->$meta_name);
+ }
+ }
+ my $end=$self->length;
+ for my $cat_aln (@aln) {
+ my $cat_cons_meta=$cat_aln->consensus_meta;
+ if ($cat_cons_meta) {
+ $new_cons_meta = Bio::Seq::Meta->new() if !$new_cons_meta;
+ for my $meta_name ($cat_cons_meta->meta_names) {
+ $new_cons_meta->named_submeta($meta_name, $end+1, $end+$cat_aln->length, $cat_cons_meta->$meta_name);
+ }
+ }
+ $end+=$cat_aln->length;
+ }
+ $aln->consensus_meta($new_cons_meta) if $new_cons_meta;
+ return $aln;
+}
+
+
1;
Modified: bioperl-live/trunk/t/AlignUtil.t
===================================================================
--- bioperl-live/trunk/t/AlignUtil.t 2008-11-14 03:48:54 UTC (rev 14992)
+++ bioperl-live/trunk/t/AlignUtil.t 2008-11-16 06:14:41 UTC (rev 14993)
@@ -7,9 +7,9 @@
use lib 't/lib';
use BioperlTest;
- test_begin(-tests => 19);
+ test_begin(-tests => 33);
- use_ok('Bio::Align::Utilities',qw(aa_to_dna_aln bootstrap_replicates) );
+ use_ok('Bio::Align::Utilities',qw(aa_to_dna_aln bootstrap_replicates cat) );
use_ok('Bio::AlignIO');
use_ok('Bio::SeqIO');
}
@@ -41,3 +41,13 @@
my $bootstraps = &bootstrap_replicates($aln,10);
is(scalar @$bootstraps, 10);
+
+my $sub_aln1=$aln->slice(1,100);
+my $sub_aln2=$aln->slice(101,200);
+my $sub_aln3=$aln->slice(1,200);
+my $cat_aln=cat($sub_aln1, $sub_aln2);
+my @seq=$sub_aln3->each_seq;
+for my $seq ($cat_aln->each_seq) {
+ my $refseq=shift @seq;
+ is($seq->seq, $refseq->seq);
+}
More information about the Bioperl-guts-l
mailing list