[Bioperl-guts-l] [14993] bioperl-live/trunk: [bug 2635]

Christopher John Fields cjfields at dev.open-bio.org
Sun Nov 16 01:14:42 EST 2008


Revision: 14993
Author:   cjfields
Date:     2008-11-16 01:14:41 -0500 (Sun, 16 Nov 2008)

Log Message:
-----------
[bug 2635]
* new method cat()
* needs further evaluation using meta data, but code is iin place which should take care of it.
* patch courtesy of Roy Chaudhuri

Modified Paths:
--------------
    bioperl-live/trunk/Bio/Align/Utilities.pm
    bioperl-live/trunk/t/AlignUtil.t

Modified: bioperl-live/trunk/Bio/Align/Utilities.pm
===================================================================
--- bioperl-live/trunk/Bio/Align/Utilities.pm	2008-11-14 03:48:54 UTC (rev 14992)
+++ bioperl-live/trunk/Bio/Align/Utilities.pm	2008-11-16 06:14:41 UTC (rev 14993)
@@ -89,7 +89,7 @@
 use base qw(Exporter);
 
 @EXPORT = qw();
- at EXPORT_OK = qw(aa_to_dna_aln bootstrap_replicates);
+ at EXPORT_OK = qw(aa_to_dna_aln bootstrap_replicates cat);
 %EXPORT_TAGS = (all =>[@EXPORT, @EXPORT_OK]);
 BEGIN {
     use constant CODONSIZE => 3;
@@ -214,5 +214,142 @@
    return \@alns;
 }
 
+=head2 cat
 
+ Title     : cat
+ Usage     : $aln123 = cat($aln1, $aln2, $aln3)
+ Function  : Concatenates alignment objects. Sequences are identified by id.
+             An error will be thrown if the sequence ids are not unique in the
+             first alignment. If any ids are not present or not unique in any
+             of the additional alignments then those sequences are omitted from
+             the concatenated alignment, and a warning is issued. An error will
+             be thrown if any of the alignments are not flush, since
+             concatenating such alignments is unlikely to make biological
+             sense.
+ Returns   : A new Bio::SimpleAlign object
+ Args      : A list of Bio::SimpleAlign objects
+
+=cut
+
+sub cat {
+    my ($self, @aln) = @_;
+    $self->throw("cat method called with no arguments") unless $self;
+    for ($self, at aln) {
+	$self->throw($_->id. " not a Bio::Align::AlignI object") unless $_->isa('Bio::Align::AlignI');
+	$self->throw($_->id. " is not flush") unless $_->is_flush;
+    }
+    my $aln = $self->new;
+    $aln->id($self->id);
+    $aln->annotation($self->annotation);
+    my %unique;
+  SEQ: foreach my $seq ( $self->each_seq() ) {
+        throw("ID: ", $seq->id, " is not unique in initial alignment.") if exists $unique{$seq->id};
+        $unique{$seq->id}=1;
+
+        # Can be Bio::LocatableSeq, Bio::Seq::Meta or Bio::Seq::Meta::Array
+ 	my $new_seq = $seq->new(-id=> $seq->id,
+                                -strand  => $seq->strand,
+                                -verbose => $self->verbose);
+	$new_seq->seq($seq->seq);
+	$new_seq->start($seq->start);
+	$new_seq->end($seq->end);
+        if ($new_seq->isa('Bio::Seq::MetaI')) {
+            for my $meta_name ($seq->meta_names) {
+                $new_seq->named_submeta($meta_name, $new_seq->start, $new_seq->end, $seq->named_meta($meta_name));
+            }
+        }
+ 	for my $cat_aln (@aln) {
+            my @cat_seq=$cat_aln->each_seq_with_id($seq->id);
+ 	    if (@cat_seq==0) {
+                $self->warn($seq->id. " not found in alignment ". $cat_aln->id. ", skipping this sequence.");
+                next SEQ;
+            }
+ 	    if (@cat_seq>1) {
+                $self->warn($seq->id. " found multiple times in alignment ".  $cat_aln->id. ", skipping this sequence.");
+                next SEQ;
+            }
+            my $cat_seq=$cat_seq[0];
+            my $old_end=$new_seq->end;
+            $new_seq->seq($new_seq->seq.$cat_seq->seq);
+            
+            # Not sure if this is a sensible way to deal with end coordinates
+            $new_seq->end($new_seq->end+$cat_seq->end+1-$cat_seq->start);
+
+            if ($cat_seq->isa('Bio::Seq::Meta::Array')) {
+                unless ($new_seq->isa('Bio::Seq::Meta::Array')) {
+                    my $meta_seq=Bio::Seq::Meta::Array->new;
+                    $meta_seq->seq($new_seq->seq);
+                    $meta_seq->start($new_seq->start);
+                    $meta_seq->end($new_seq->end);
+                    if ($new_seq->isa('Bio::Seq::Meta')) {
+                        for my $meta_name ($new_seq->meta_names) {
+                            $meta_seq->named_submeta($meta_name,
+                                                     $new_seq->start,
+                                                     $old_end,
+                                                     [split(//, $new_seq->named_meta($meta_name))]
+                                                    );
+                        }
+                    }
+                    $new_seq=$meta_seq;
+                }
+                for my $meta_name ($cat_seq->meta_names) {
+                    $new_seq->named_submeta($meta_name,
+                                            $old_end+1,
+                                            $new_seq->end,
+                                            $cat_seq->named_meta($meta_name)
+                                           );
+                }
+            } elsif ($cat_seq->isa('Bio::Seq::Meta')) {
+                if ($new_seq->isa('Bio::Seq::Meta::Array')) {
+                    for my $meta_name ($cat_seq->meta_names) {
+                        $new_seq->named_submeta($meta_name,
+                                                $old_end+1,
+                                                $new_seq->end,
+                                                [split(//,$cat_seq->named_meta($meta_name))]
+                                               );
+                    }
+                } else {
+                    unless ($new_seq->isa('Bio::Seq::Meta')) {
+                        my $meta_seq=Bio::Seq::Meta::Array->new;
+                        $meta_seq->seq($new_seq->seq);
+                        $meta_seq->start($new_seq->start);
+                        $meta_seq->end($new_seq->end);
+                        $new_seq=$meta_seq;
+                    }
+                    for my $meta_name ($cat_seq->meta_names) {
+                        $new_seq->named_submeta($meta_name,
+                                                $old_end+1,
+                                                $new_seq->end,
+                                                $cat_seq->named_meta($meta_name)
+                                               );
+                    }
+                }
+            }
+        }
+        $aln->add_seq($new_seq);
+    }
+    my $cons_meta = $self->consensus_meta;
+    my $new_cons_meta;
+    if ($cons_meta) {
+        $new_cons_meta = Bio::Seq::Meta->new();
+        for my $meta_name ($cons_meta->meta_names) {
+            $new_cons_meta->named_submeta($meta_name, 1, $self->length, $cons_meta->$meta_name);
+        }
+    }
+    my $end=$self->length;
+    for my $cat_aln (@aln) {
+        my $cat_cons_meta=$cat_aln->consensus_meta;
+        if ($cat_cons_meta) {
+            $new_cons_meta = Bio::Seq::Meta->new() if !$new_cons_meta;
+            for my $meta_name ($cat_cons_meta->meta_names) {
+                $new_cons_meta->named_submeta($meta_name, $end+1, $end+$cat_aln->length, $cat_cons_meta->$meta_name);
+            }
+        }
+        $end+=$cat_aln->length;
+    } 
+    $aln->consensus_meta($new_cons_meta) if $new_cons_meta;
+    return $aln;
+}
+
+
 1;

Modified: bioperl-live/trunk/t/AlignUtil.t
===================================================================
--- bioperl-live/trunk/t/AlignUtil.t	2008-11-14 03:48:54 UTC (rev 14992)
+++ bioperl-live/trunk/t/AlignUtil.t	2008-11-16 06:14:41 UTC (rev 14993)
@@ -7,9 +7,9 @@
     use lib 't/lib';
     use BioperlTest;
     
-    test_begin(-tests => 19);
+    test_begin(-tests => 33);
 	
-	use_ok('Bio::Align::Utilities',qw(aa_to_dna_aln bootstrap_replicates) );
+	use_ok('Bio::Align::Utilities',qw(aa_to_dna_aln bootstrap_replicates cat) );
 	use_ok('Bio::AlignIO');
 	use_ok('Bio::SeqIO');
 }
@@ -41,3 +41,13 @@
 my $bootstraps = &bootstrap_replicates($aln,10);
 
 is(scalar @$bootstraps, 10);
+
+my $sub_aln1=$aln->slice(1,100);
+my $sub_aln2=$aln->slice(101,200);
+my $sub_aln3=$aln->slice(1,200);
+my $cat_aln=cat($sub_aln1, $sub_aln2);
+my @seq=$sub_aln3->each_seq;
+for my $seq ($cat_aln->each_seq) {
+    my $refseq=shift @seq;
+    is($seq->seq, $refseq->seq);
+}




More information about the Bioperl-guts-l mailing list