[Bioperl-guts-l] [16955] bioperl-live/trunk: Partial redesign to simplify/ clarify the internal code of B::A::T::ContigSpectrum
Florent E Angly
fangly at dev.open-bio.org
Tue Apr 27 00:28:55 EDT 2010
Revision: 16955
Author: fangly
Date: 2010-04-27 00:28:55 -0400 (Tue, 27 Apr 2010)
Log Message:
-----------
Partial redesign to simplify/clarify the internal code of B::A::T::ContigSpectrum
Modified Paths:
--------------
bioperl-live/trunk/Bio/Assembly/Tools/ContigSpectrum.pm
bioperl-live/trunk/t/Assembly/ContigSpectrum.t
Modified: bioperl-live/trunk/Bio/Assembly/Tools/ContigSpectrum.pm
===================================================================
--- bioperl-live/trunk/Bio/Assembly/Tools/ContigSpectrum.pm 2010-04-26 16:44:21 UTC (rev 16954)
+++ bioperl-live/trunk/Bio/Assembly/Tools/ContigSpectrum.pm 2010-04-27 04:28:55 UTC (rev 16955)
@@ -141,8 +141,7 @@
to_string create a string representation of the spectrum
spectrum import a hash contig spectrum
- contig determine a contig spectrum from a contig
- assembly determine a contig spectrum from an assembly
+ assembly determine a contig spectrum from an assembly, contig or singlet
dissolve calculate a dissolved contig spectrum (depends on assembly)
cross produce a cross contig spectrum (depends on assembly)
add add a contig spectrum to an existing one
@@ -553,39 +552,17 @@
return $spectrum;
}
-=head2 contig
- Title : contig
- Usage : my @obj_list = $csp->contig();
- Function: Update the contig spectrum object by adding a contig or singlet
- object / get a reference to the list of assembly, contig and singlet
- objects used in the contig spectrum.
- Returns : array reference of Bio::Assembly::Scaffold, Bio::Assembly::Contig and
- Bio::Assembly::Singlet objects
- Args : Bio::Assembly::Contig or Bio::Assembly::Singlet object
-
-=cut
-
-sub contig {
- my ($self, $contig) = @_;
- if (defined $contig) {
- $self->_import_contig($contig);
- }
- my @obj_list = @{$self->{'_assembly'}} if defined $self->{'_assembly'};
- return \@obj_list;
-}
-
-
=head2 assembly
Title : assembly
Usage : my @obj_list = $csp->assembly();
- Function: Update the contig spectrum object by adding an assembly object / get
- a reference to the list of assembly, contig and singlet objects used
- in the contig spectrum object.
- Returns : array reference of Bio::Assembly::Scaffold, Bio::Assembly::Contig and
- Bio::Assembly::Singlet objects
- Args : Bio::Assembly::Scaffold object
+ Function: Update the contig spectrum object by adding an assembly, contig or
+ singlet object to it
+ Returns : arrayref of assembly, contig and singlet objects used in the contig
+ spectrum object (Bio::Assembly::Scaffold, Bio::Assembly::Contig and
+ Bio::Assembly::Singlet objects)
+ Args : Bio::Assembly::Scaffold, Contig or Singlet object
=cut
@@ -594,8 +571,24 @@
if (defined $assembly) {
$self->_import_assembly($assembly);
}
+ return $self->get_assembly();
+}
+
+
+=head2 get_assembly
+
+ Title : get_assembly
+ Usage : $csp->get_assembly();
+ Function: Get all assembly objects associated with a contig spectrum.
+ Returns : array reference of Bio::Assembly::Scaffold, Contig and Singlet objects
+ Args : none
+
+=cut
+
+sub get_assembly {
+ my ($self) = @_;
my @obj_list = @{$self->{'_assembly'}} if defined $self->{'_assembly'};
- return \@obj_list;
+ return @obj_list;
}
@@ -959,9 +952,9 @@
Title : _new_from_assembly
Usage :
Function: Creates a new contig spectrum object based solely on the result of
- an assembly
- Returns : Bio::Assembly::Tools::ContigSpectrum
- Args : Bio::Assembly::Scaffold
+ an assembly, contig or singlet
+ Returns : Bio::Assembly::Tools::ContigSpectrum object
+ Args : Bio::Assembly::Scaffold, Contig or Singlet object
=cut
@@ -985,7 +978,7 @@
# 3: Set sequence statistics: nof_seq and avg_seq_len
($csp->{'_avg_seq_len'}, $csp->{'_nof_seq'}) = $self->_get_assembly_seq_stats($assemblyobj);
# 4: Set the spectrum: spectrum and max_size
- for my $contigobj ($assemblyobj->all_contigs) {
+ for my $contigobj ( $self->_get_contig_like($assemblyobj) ) {
my $size = $contigobj->num_sequences;
if (defined $csp->{'_spectrum'}{$size}) {
$csp->{'_spectrum'}{$size}++;
@@ -994,11 +987,6 @@
}
$csp->{'_max_size'} = $size if $size > $csp->{'_max_size'};
}
- my $nof_singlets = $assemblyobj->get_nof_singlets();
- if (defined $nof_singlets) {
- $csp->{'_spectrum'}{1} += $nof_singlets;
- $csp->{'_max_size'} = 1 if $nof_singlets >= 1 && $csp->{'_max_size'} < 1;
- }
# 5: Set list of assembly objects used
push @{$csp->{'_assembly'}}, $assemblyobj;
# 6: Set number of repetitions
@@ -1007,48 +995,6 @@
}
-=head2 _new_from_contig
-
- Title : _new_from_contig
- Usage :
- Function: Creates a new contig spectrum object based solely on a contig or
- singlet
- Returns : Bio::Assembly::Tools::ContigSpectrum
- Args : Bio::Assembly::Contig or Bio::Assembly::Singlet
-
-=cut
-
-sub _new_from_contig {
- # Create new contig spectrum object based purely on what we can get from a
- # contig object
- my ($self, $contigobj) = @_;
- my $csp = Bio::Assembly::Tools::ContigSpectrum->new();
- # 1: Set id
- $csp->{'_id'} = $contigobj->id;
- # 2: Set overlap statistics: nof_overlaps, min_overlap, avg_overlap,
- # min_identity and avg_identity
- $csp->{'_eff_asm_params'} = $self->{'_eff_asm_params'};
- $csp->{'_min_overlap'} = $self->{'_min_overlap'};
- $csp->{'_min_identity'} = $self->{'_min_identity'};
- if ($csp->{'_eff_asm_params'} > 0) {
- ( $csp->{'_avg_overlap'}, $csp->{'_avg_identity'}, $csp->{'_min_overlap'},
- $csp->{'_min_identity'}, $csp->{'_nof_overlaps'} )
- = $csp->_get_contig_overlap_stats($contigobj);
- }
- # 3: Set sequence statistics: nof_seq and avg_seq_len
- ($csp->{'_avg_seq_len'}, $csp->{'_nof_seq'}) = $csp->_get_contig_seq_stats($contigobj);
- # 4: Set the spectrum: spectrum and max_size
- my $size = $contigobj->num_sequences;
- $csp->{'_spectrum'}{$size} = 1;
- $csp->{'_max_size'} = $size;
- # 5: Set list of assembly objects used
- push @{$csp->{'_assembly'}}, $contigobj;
- # 6: Set number of repetitions
- $csp->{'_nof_rep'} = 1;
- return $csp;
-}
-
-
=head2 _new_dissolved_csp
Title :
@@ -1109,42 +1055,28 @@
my $asm_spectrum = { 1 => 0 };
my $good_seqs = {};
for my $obj (@{$mixed_csp->{'_assembly'}}) {
+
# Dissolve this assembly/contig/singlet for the given sequences
- if ($obj->isa('Bio::Assembly::Scaffold')) {
- my $assembly = $obj;
- # For each contig/singlet
- for my $contig ($assembly->all_contigs, $assembly->all_singlets) {
- ($asm_spectrum, $good_seqs) = $self->_dissolve_contig($dissolved, $contig, $seq_header, $asm_spectrum, $good_seqs);
- }
- } elsif ($obj->isa('Bio::Assembly::Contig')) {
- # a contig or singlet
- my $contig = $obj;
- ($asm_spectrum, $good_seqs) = $self->_dissolve_contig($dissolved, $contig, $seq_header, $asm_spectrum, $good_seqs);
+ for my $contig ( $self->_get_contig_like($obj) ) {
+ ($asm_spectrum, $good_seqs) = $self->_dissolve_contig($dissolved, $contig,
+ $seq_header, $asm_spectrum, $good_seqs);
}
# Update spectrum
$dissolved->_import_spectrum($asm_spectrum);
+
# Update nof_rep
$dissolved->{'_nof_rep'}--;
$dissolved->{'_nof_rep'} += $mixed_csp->{'_nof_rep'};
# Get sequence and overlap stats
- if ($obj->isa('Bio::Assembly::Scaffold')) {
- ($dissolved->{'_avg_seq_len'}, $dissolved->{'_nof_seq'}) =
- $dissolved->_get_assembly_seq_stats($obj, $good_seqs);
- if ($dissolved->{'_eff_asm_params'} > 0) {
- ( $dissolved->{'_avg_overlap'}, $dissolved->{'_avg_identity'}, $dissolved->{'_min_overlap'},
- $dissolved->{'_min_identity'}, $dissolved->{'_nof_overlaps'} )
- = $dissolved->_get_assembly_overlap_stats($obj, $good_seqs);
- }
- } elsif ($obj->isa('Bio::Assembly::Contig')) {
- ($dissolved->{'_avg_seq_len'}, $dissolved->{'_nof_seq'}) =
- $dissolved->_get_contig_seq_stats($obj, $good_seqs);
- if ($dissolved->{'_eff_asm_params'} > 0) {
- ( $dissolved->{'_avg_overlap'}, $dissolved->{'_avg_identity'}, $dissolved->{'_min_overlap'},
- $dissolved->{'_min_identity'}, $dissolved->{'_nof_overlaps'} )
- = $dissolved->_get_contig_overlap_stats($obj, $good_seqs);
- }
+ ($dissolved->{'_avg_seq_len'}, $dissolved->{'_nof_seq'}) =
+ $dissolved->_get_assembly_seq_stats($obj, $good_seqs);
+ if ($dissolved->{'_eff_asm_params'} > 0) {
+ ( $dissolved->{'_avg_overlap'}, $dissolved->{'_avg_identity'},
+ $dissolved->{'_min_overlap'}, $dissolved->{'_min_identity'},
+ $dissolved->{'_nof_overlaps'} )
+ = $dissolved->_get_assembly_overlap_stats($obj, $good_seqs);
}
}
@@ -1175,7 +1107,9 @@
# Update spectrum
my $size = scalar @contig_seqs;
- if ($size == 1) {
+ if ($size == 0) {
+ # do nothing
+ } elsif ($size == 1) {
$$asm_spectrum{1}++;
} elsif ($size > 1) {
# Reassemble good sequences
@@ -1186,7 +1120,9 @@
for my $qsize (keys %$contig_spectrum) {
$$asm_spectrum{$qsize} += $$contig_spectrum{$qsize};
}
- }
+ } else {
+ $self->throw("The size is not valid... how could that happen?");
+ }
return $asm_spectrum, $good_seqs;
}
@@ -1237,35 +1173,23 @@
my $spectrum = {1 => 0};
my $good_seqs = {};
for my $obj (@{$mixed_csp->{'_assembly'}}) {
- if ($obj->isa('Bio::Assembly::Scaffold')) {
- # Go through contigs and skip the pure ones
- my $assembly = $obj;
- for my $contig ($assembly->all_contigs) {
- ($spectrum, $good_seqs) = $self->_cross_contig($cross, $contig, $spectrum, $good_seqs);
- }
- # Get sequence stats
- ($cross->{'_avg_seq_len'}, $cross->{'_nof_seq'}) = $cross->_get_assembly_seq_stats($assembly, $good_seqs);
- # Get eff_asm_param for these sequences
- if ($cross->{'_eff_asm_params'} > 0) {
- ( $cross->{'_avg_overlap'}, $cross->{'_avg_identity'}, $cross->{'_min_overlap'},
- $cross->{'_min_identity'}, $cross->{'_nof_overlaps'} )
- = $cross->_get_assembly_overlap_stats($assembly, $good_seqs);
- }
- } elsif ($obj->isa('Bio::Assembly::Contig')) {
- my $contig = $obj;
- ($spectrum, $good_seqs) = $self->_cross_contig($cross, $contig, $spectrum, $good_seqs);
- # Get sequence stats
@@ Diff output truncated at 10000 characters. @@
More information about the Bioperl-guts-l
mailing list