[Bioperl-guts-l] [16371] bioperl-live/trunk: Implemented the $io->next_contig() method for the Bio::Assembly::IO::maq parser

Florent E Angly fangly at dev.open-bio.org
Sat Nov 14 21:26:33 EST 2009


Revision: 16371
Author:   fangly
Date:     2009-11-14 21:26:33 -0500 (Sat, 14 Nov 2009)
Log Message:
-----------
Implemented the $io->next_contig() method for the Bio::Assembly::IO::maq parser

Modified Paths:
--------------
    bioperl-live/trunk/Bio/Assembly/IO/maq.pm
    bioperl-live/trunk/t/Assembly/Assembly.t

Modified: bioperl-live/trunk/Bio/Assembly/IO/maq.pm
===================================================================
--- bioperl-live/trunk/Bio/Assembly/IO/maq.pm	2009-11-15 01:16:21 UTC (rev 16370)
+++ bioperl-live/trunk/Bio/Assembly/IO/maq.pm	2009-11-15 02:26:33 UTC (rev 16371)
@@ -159,10 +159,10 @@
 
 my $progname = 'maq';
 
-=head2 next_assembly()
+=head2 next_assembly
 
  Title   : next_assembly
- Usage   : my $scaffold = $asmio->next_assembly()
+ Usage   : $scaffold = $stream->next_assembly()
  Function: return the assembly defined by the map and cns files
  Returns : Bio::Assembly::Scaffold object
  Args    : none
@@ -170,70 +170,99 @@
 =cut
 
 sub next_assembly {
+    my $self = shift;
+
+    my $assembly = Bio::Assembly::Scaffold->new( -progname => $progname );
+
+    # Load contigs and singlets in the scaffold
+    while ( my $obj = $self->next_contig()) {
+        # Add contig /singlet to assembly
+        if ($obj->isa('Bio::Assembly::Singlet')) { # a singlet
+            $assembly->add_singlet($obj);
+        } else { # a contig
+            $assembly->add_contig($obj);
+        }
+    }
+
+    return $assembly;
+}
+
+
+=head2 next_contig
+
+ Title   : next_contig
+ Usage   : $scaffold = $stream->next_contig()
+ Function: Returns the next contig or singlet in the ACE stream.
+ Returns : a Bio::Assembly::Contig or Bio::Assembly::Single object
+ Args    : none
+
+=cut
+
+sub next_contig {
     my $self = shift; # object reference
-    
-    # Create a new scaffold to hold the contigs
-    my $scaffoldobj = Bio::Assembly::Scaffold->new(-source => $progname);
-    
+
+
+    # Read the file of consensus sequences if it has not already been done for
+    # this Bio:::Assembly::IO stream already
+    if (not defined $self->_cons) {
+        $self->_parse_cns_file or
+            $self->throw("Associated maq consensus file is not available");
+    }
+
     # Contig and read related
-    my ($contigobj, %contiginfo, %readinfo);
-    
-    $self->_parse_cns_file or
-        $self->throw("Associated maq consensus file is not available");
+    my $contigobj;
+    my %contiginfo;
 
     # Loop over all assembly file lines
     while ($_ = $self->_readline) {
         chomp;
+        next if /^$/;
+
         # mapview format parsing ; every line is a read...
-        undef %readinfo;
+        my %readinfo;
         @readinfo{ qw(read_name chr posn strand insert_size
             paired_flag map_qual se_map_qual alt_map_qual
             num_mm_best_hit sum_qual_mm_best_hit zero_mm_hits
             one_mm_hits read_len seqstr qualstr) } = split(/\s+/);
+
         # sanger conversion
         my @qual = map { ord($_)-33 } split('', $readinfo{qualstr});
         $readinfo{seq} = Bio::Seq::Quality->new(
-            -id => $readinfo{read_name},
-            -seq => $readinfo{seqstr},
+            -id   => $readinfo{read_name},
+            -seq  => $readinfo{seqstr},
             -qual => \@qual
             );
 
-        if (!defined $contiginfo{start} || 
-            $readinfo{'posn'} > $contiginfo{end} ) {
-            # new contig
-            # close old contigobj, if nec.
-            if (defined $contiginfo{start}) {
-                # store old contig object
+        if ( not defined $contiginfo{start} ) {
+            # First read in the shiny new contig
+            $contiginfo{'seqnum'}  = 1;
+            $contiginfo{'qualobj'} = $self->_next_cons;
+            $contiginfo{start} = $contiginfo{'qualobj'}->start;
+            $contiginfo{end}   = $contiginfo{'qualobj'}->end;
+            $contiginfo{'asmbl_id'} = 'maq_assy['.$self->_basename.']/'.$contiginfo{start}.'-'.$contiginfo{end};
+            $contigobj = $self->_init_contig(\%contiginfo);
+            $self->_store_read(\%readinfo, $contigobj);
+        } else {
+            if ( $readinfo{'posn'} <= $contiginfo{end} ) {
+                # Add read to existing contig
+                $contiginfo{'seqnum'}++;
+                $self->_store_read(\%readinfo, $contigobj);
+            } else {
+                # Read belongs in a new contig
                 if ($contiginfo{'seqnum'} > 1) {
-                    $self->_store_contig(\%contiginfo, $contigobj, $scaffoldobj);
+                    $self->_store_contig(\%contiginfo, $contigobj);
                 }
                 else { #singlet
-                    $self->_store_singlet(\%contiginfo, $contigobj, $scaffoldobj);
-                    1;
+                    $self->_store_singlet(\%contiginfo, $contigobj);
                 }
+                # do a pushback
+                $self->_pushback($_);
+                last;
             }
-            # create new contigobj
-
-            $contiginfo{'seqnum'} = 1;
-            $contiginfo{'qualobj'} = $self->_next_cons;
-            $contiginfo{start} = $contiginfo{'qualobj'}->start;
-            $contiginfo{end} = $contiginfo{'qualobj'}->end;
-            $contiginfo{'asmbl_id'} = 'maq_assy['.$self->_basename.']/'.$contiginfo{start}.'-'.$contiginfo{end};
-            $contigobj = $self->_init_contig(\%contiginfo, $scaffoldobj);
-
-            $self->_store_read(\%readinfo, $contigobj);
         }
-        else {
-            # update this contig's info...
-            $contiginfo{'seqnum'}++;
-            $self->_store_read(\%readinfo, $contigobj);
-        }
-
     }
-
-    $scaffoldobj->update_seq_list();
     
-    return $scaffoldobj;
+    return $contigobj;
 }
 
 =head2 _init_contig()
@@ -249,14 +278,13 @@
 =cut
 
 sub _init_contig {
-    my ($self, $contiginfo, $scaffoldobj) = @_;
+    my ($self, $contiginfo) = @_;
     # Create a contig and attach it to scaffold
     my $contigobj = Bio::Assembly::Contig->new(
         -id     => $$contiginfo{'asmbl_id'},
         -source => $progname,
         -strand => 1
     );
-    $scaffoldobj->add_contig($contigobj);
     return $contigobj;
 }
 
@@ -268,12 +296,12 @@
     Function: store information of a contig belonging to a scaffold
               in the appropriate object
     Returns : Bio::Assembly::Contig object
-    Args    : hash, Bio::Assembly::Contig, Bio::Assembly::Scaffold
+    Args    : hash, Bio::Assembly::Contig
 
 =cut
 
 sub _store_contig {
-    my ($self, $contiginfo, $contigobj, $scaffoldobj) = @_;
+    my ($self, $contiginfo, $contigobj) = @_;
 
     $self->throw("Contig object must be defined") unless $contigobj;
 
@@ -286,10 +314,10 @@
     $contigobj->set_consensus_quality($$contiginfo{qualobj});
 
     # Add other misc contig information as features of the contig
-   # Add other misc read information as subsequence feature
-   my @other = grep !/asmbl_id|end|qualobj|start/, keys %$contiginfo;
-   my %other;
-   @other{@other} = @$contiginfo{@other};
+    # Add other misc read information as subsequence feature
+    my @other = grep !/asmbl_id|end|qualobj|start/, keys %$contiginfo;
+    my %other;
+    @other{@other} = @$contiginfo{@other};
     my $contigtags = Bio::SeqFeature::Generic->new(
         -primary_tag => "_main_contig_feature:$$contiginfo{'asmbl_id'}",
         -start       => 1,
@@ -353,6 +381,7 @@
     return 1;
 }
 
+
 =head2 _cons()
 
  Title   : _cons
@@ -363,15 +392,23 @@
 
 =cut
 
-sub _cons { @{shift->{'_cons'}} };
+sub _cons {
+    my $self = shift;
+    my $cons = undef;
+    if (defined $self->{'_cons'}) {
+      $cons = $self->{'_cons'};
+    }
+    return $cons;
+}
 
 
 =head2 _next_cons()
 
 =cut
 
-sub _next_cons() { shift(@{shift->{'_cons'}}) }
+sub _next_cons { shift(@{shift->{'_cons'}}) }
 
+
 =head2 _store_read()
 
     Title   : _store_read
@@ -442,19 +479,18 @@
 =head2 _store_singlet()
 
     Title   : _store_singlet
-    Usage   : my $singletobj = $self->_store_read(\%readinfo, \%contiginfo,
-                  $scaffoldobj);
-    Function: store information of a singlet belonging to a scaffold in the appropriate object
+    Usage   : my $singletobj = $self->_store_read(\%readinfo, \%contiginfo);
+    Function: store information of a singlet belonging to a scaffold in a singlet object
     Returns : Bio::Assembly::Singlet
-    Args    : hash, hash, Bio::Assembly::Scaffold
+    Args    : hash, hash
 
 =cut
 
 sub _store_singlet {
-    my ($self, $readinfo, $contiginfo, $scaffoldobj) = @_;
+    my ($self, $readinfo, $contiginfo) = @_;
 
-    my $singletobj = Bio::Assembly::Singlet->new( -seqref => $$readinfo{qualobj} );
-    $scaffoldobj->add_singlet($singletobj);
+    my $singletobj = Bio::Assembly::Singlet->new( -id     => $$readinfo{'read_name'},
+                                                  -seqref => $$readinfo{'qualobj'}   );
 
     # Add other misc contig information as features of the contig
    # Add other misc read information as subsequence feature
@@ -530,7 +566,7 @@
 
 =cut
 
-    sub _basename {
+sub _basename {
     my $self = shift;
     return (fileparse($self->file, ".maq"))[0];
 }

Modified: bioperl-live/trunk/t/Assembly/Assembly.t
===================================================================
--- bioperl-live/trunk/t/Assembly/Assembly.t	2009-11-15 01:16:21 UTC (rev 16370)
+++ bioperl-live/trunk/t/Assembly/Assembly.t	2009-11-15 02:26:33 UTC (rev 16371)
@@ -7,7 +7,7 @@
     use lib '.';
     use Bio::Root::Test;
     
-    test_begin( -tests => 144,
+    test_begin( -tests => 217,
                 -requires_module => 'DB_File' );
 
     use_ok('Bio::Assembly::IO');
@@ -271,7 +271,7 @@
 #
 
 ok $aio = Bio::Assembly::IO->new( -file => test_input_file('test.maq'),
-				  -format => 'maq' ), "init maq IO object";
+                                  -format => 'maq' );
 ok $assembly = $aio->next_assembly, "get maq assy";
 is( $assembly->get_nof_contigs, 11, "got all contigs");
 ok open(my $tf, test_input_file('test.maq')), "read test file as text";
@@ -279,3 +279,9 @@
 is( $assembly->get_nof_contig_seqs, scalar @lines, "recorded all maq reads");
 ok !$assembly->get_nof_singlets, "no singlets";
 
+ok $aio = Bio::Assembly::IO->new( -file => test_input_file('test.maq'),
+                                  -format => 'maq' ), "init maq IO object";
+isa_ok($aio, 'Bio::Assembly::IO');
+while (my $contig = $aio->next_contig) {
+    isa_ok($contig, 'Bio::Assembly::Contig');
+}



More information about the Bioperl-guts-l mailing list