[Bioperl-guts-l] [16335] bioperl-run/trunk: Created wrapper for new assembly software: Bio:: Tools::Run::Minimo

Florent E Angly fangly at dev.open-bio.org
Thu Nov 5 19:16:01 EST 2009


Revision: 16335
Author:   fangly
Date:     2009-11-05 19:16:01 -0500 (Thu, 05 Nov 2009)
Log Message:
-----------
Created wrapper for new assembly software: Bio::Tools::Run::Minimo
Minor changes in existing assembly wrappers

Modified Paths:
--------------
    bioperl-run/trunk/lib/Bio/Tools/Run/AssemblerBase.pm
    bioperl-run/trunk/lib/Bio/Tools/Run/TigrAssembler.pm
    bioperl-run/trunk/t/Cap3.t
    bioperl-run/trunk/t/data/sample_dataset_1.fa
    bioperl-run/trunk/t/data/sample_dataset_1.qual

Added Paths:
-----------
    bioperl-run/trunk/lib/Bio/Tools/Run/Minimo.pm
    bioperl-run/trunk/t/Minimo.t

Modified: bioperl-run/trunk/lib/Bio/Tools/Run/AssemblerBase.pm
===================================================================
--- bioperl-run/trunk/lib/Bio/Tools/Run/AssemblerBase.pm	2009-11-05 09:33:21 UTC (rev 16334)
+++ bioperl-run/trunk/lib/Bio/Tools/Run/AssemblerBase.pm	2009-11-06 00:16:01 UTC (rev 16335)
@@ -382,8 +382,16 @@
   $self->{'_options'}->{'_switches'}    = $switches;
   $self->{'_options'}->{'_translation'} = $translation;
   $self->{'_options'}->{'_qual_param'}  = $qual_param;
-  $self->{'_options'}->{'_dash'}        = $use_dash || 1;
-  $self->{'_options'}->{'_join'}        = $join || ' ';
+  if (not defined $use_dash) {
+    $self->{'_options'}->{'_dash'}      = 1;
+  } else {
+    $self->{'_options'}->{'_dash'}      = $use_dash;
+  }
+  if (not defined $use_dash) {
+    $self->{'_options'}->{'_join'}      = ' ';
+  } else {
+    $self->{'_options'}->{'_join'}      = $join;
+  }
   $self->_set_from_args(
     $args,
     -methods => [ @$params, @$switches ],
@@ -406,6 +414,8 @@
 
 sub _translate_params {
   my ($self)   = @_;
+
+  # Get option string
   my $params   = $self->{'_options'}->{'_params'};
   my $switches = $self->{'_options'}->{'_switches'};
   my $join     = $self->{'_options'}->{'_join'};
@@ -417,7 +427,9 @@
     -join      => $join,
     -dash      => $dash
   );
-  my @options  = split(/$join/, $options);
+
+  # Translate options
+  my @options  = split(/(\s|$join)/, $options);
   for (my $i = 0; $i < scalar @options; $i++) {
     my ($prefix, $name) = ( $options[$i] =~ m/^(-?)(.+)$/ );
     if (defined $name) {
@@ -429,6 +441,11 @@
       $i--;
     }
   }
+  $options = join('', @options);
+
+  # Now arrayify the options
+  @options = split(' ', $options);
+
   return \@options;
 }
 
@@ -456,9 +473,17 @@
 
   # Prepare input files
   my ($fasta_file, $qual_file) = $self->_prepare_input_files($seqs,$quals);
+
+  # If needed, set the program argument for a QUAL file
   my $qual_param = $self->{'_options'}->{'_qual_param'};
-  if ($qual_file && defined $qual_param) {
-    $quals = $self->$qual_param;
+  if (defined $qual_param) {
+    if ($qual_file) {
+      # Set the quality input parameter
+      $quals = $self->$qual_param($qual_file);
+    } else {
+      # Remove the quality input parameter
+      $quals = $self->$qual_param(undef);
+    }
   }
 
   # Assemble

Added: bioperl-run/trunk/lib/Bio/Tools/Run/Minimo.pm
===================================================================
--- bioperl-run/trunk/lib/Bio/Tools/Run/Minimo.pm	                        (rev 0)
+++ bioperl-run/trunk/lib/Bio/Tools/Run/Minimo.pm	2009-11-06 00:16:01 UTC (rev 16335)
@@ -0,0 +1,267 @@
+# BioPerl module for Bio::Tools::Run::Minimo
+#
+# Copyright Florent E Angly <florent-dot-angly-at-gmail-dot-com>
+#
+# You may distribute this module under the same terms as perl itself
+#
+# POD documentation - main docs before the code
+
+=head1 NAME
+
+  Bio::Tools::Run::Minimo - Wrapper for local execution of the Minimo assembler
+
+=head1 SYNOPSIS
+
+  use Bio::Tools::Run::Minimo;
+  # Run Minmo using an input FASTA file
+  my $factory = Bio::Tools::Run::Minimo->new( -minimum_length => 35 );
+  my $asm_obj = $factory->run($fasta_file, $qual_file);
+  # An assembly object is returned by default
+  for my $contig ($assembly->all_contigs) {
+    ... do something ...
+  }
+
+  # Read some sequences
+  use Bio::SeqIO;
+  my $sio = Bio::SeqIO->new(-file => $fasta_file, -format => 'fasta');
+  my @seqs;
+  while (my $seq = $sio->next_seq()) {
+    push @seqs,$seq;
+  }
+
+  # Run Minimo using input sequence objects and returning an assembly file
+  my $asm_file = 'results.ace';
+  $factory->out_type($asm_file);
+  $factory->run(\@seqs);
+
+=head1 DESCRIPTION
+
+  Wrapper module for the local execution of the DNA assembly program Minimo.
+  Minimo is based on AMOS (http://sourceforge.net/apps/mediawiki/amos/) and
+  implements the same conservative assembly algorithm as Minimus
+  (http://sourceforge.net/apps/mediawiki/amos/index.php?title=Minimus).
+
+=head1 FEEDBACK
+
+=head2 Mailing Lists
+
+User feedback is an integral part of the evolution of this and other Bioperl
+modules. Send your comments and suggestions preferably to one of the Bioperl
+mailing lists.  Your participation is much appreciated.
+
+  bioperl-l at bioperl.org                  - General discussion
+  http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
+
+=head2 Support 
+
+Please direct usage questions or support issues to the mailing list:
+
+I<bioperl-l at bioperl.org>
+
+rather than to the module maintainer directly. Many experienced and 
+reponsive experts will be able look at the problem and quickly 
+address it. Please include a thorough description of the problem 
+with code and data examples if at all possible.
+
+=head2 Reporting Bugs
+
+Report bugs to the Bioperl bug tracking system to help us keep track the bugs
+and their resolution.  Bug reports can be submitted via the web:
+
+  http://bugzilla.open-bio.org/
+
+=head1 AUTHOR - Florent E Angly
+
+ Email: florent-dot-angly-at-gmail-dot-com
+
+=head1 APPENDIX
+
+The rest of the documentation details each of the object methods. Internal
+methods are usually preceded with a _
+
+=cut
+
+
+package Bio::Tools::Run::Minimo;
+
+use strict;
+use IPC::Run;
+use File::Copy;
+use File::Spec;
+use File::Basename;
+
+use base qw( Bio::Root::Root Bio::Tools::Run::AssemblerBase );
+
+our $program_name = 'Minimo'; # name of the executable
+our @program_params = (qw( qual_in good_qual bad_qual min_len min_ident out_prefix ace_exp ));
+our @program_switches;
+our %param_translation = (
+  'qual_in'    => 'D QUAL_IN',
+  'good_qual'  => 'D GOOD_QUAL',
+  'bad_qual'   => 'D BAD_QUAL',
+  'min_len'    => 'D MIN_LEN',
+  'min_ident'  => 'D MIN_IDENT',
+  'out_prefix' => 'D OUT_PREFIX',
+  'ace_exp'    => 'D ACE_EXP'
+);
+
+our $qual_param = 'qual_in';
+our $use_dash = 1;
+our $join = '=';
+our $asm_format = 'ace';
+
+=head2 new
+
+ Title   : new
+ Usage   : $assembler->new( -min_len   => 50,
+                            -min_ident => 95 );
+ Function: Creates a Minimo factory
+ Returns : A Bio::Tools::Run::Minimo object
+ Args    : Minimo options available in this module:
+     qual_in      Input quality score file
+     good_qual    Quality score to set for bases within the clear
+                    range if no quality file was given (default: 30)
+     bad_qual     Quality score to set for bases outside clear range
+                    if no quality file was given (default: 10). If your
+                    sequences are trimmed, try the same value as GOOD_QUAL.
+     min_len      Minimum contig overlap length (between 20 and 100 bp,
+                    default: 35)
+     min_ident    Minimum contig overlap identity percentage (between 0
+                    and 100 %, default: 98)
+     out_prefix   Prefix to use for the output file path and name
+
+=cut
+
+sub new {
+  my ($class, at args) = @_;
+  my $self = $class->SUPER::new(@args);
+  $self->_set_program_options(\@args, \@program_params, \@program_switches,
+    \%param_translation, $qual_param, $use_dash, $join);
+  $self->program_name($program_name) if not defined $self->program_name();
+  $self->_assembly_format($asm_format);
+  return $self;
+}
+
+
+=head2 out_type
+
+ Title   : out_type
+ Usage   : $factory->out_type('Bio::Assembly::ScaffoldI')
+ Function: Get/set the desired type of output
+ Returns : The type of results to return
+ Args    : Desired type of results to return (optional):
+                 'Bio::Assembly::IO' object
+                 'Bio::Assembly::ScaffoldI' object (default)
+                 The name of a file to save the results in
+
+=cut
+
+
+=head2 run
+
+ Title   :   run
+ Usage   :   $factory->run($fasta_file);
+ Function:   Run TIGR Assembler
+ Returns :   - a Bio::Assembly::ScaffoldI object, a Bio::Assembly::IO
+               object, a filename, or undef if all sequences were too small to
+               be usable
+ Returns :   Assembly results (file, IO object or assembly object)
+ Args    :   - sequence input (FASTA file or sequence object arrayref)
+             - optional quality score input (QUAL file or quality score object
+               arrayref)
+=cut
+
+
+=head2 _run
+
+ Title   :   _run
+ Usage   :   $factory->_run()
+ Function:   Make a system call and run TIGR Assembler
+ Returns :   An assembly file
+ Args    :   - FASTA file
+             - optional QUAL file
+
+=cut
+
+
+sub _run {
+  my ($self, $fasta_file, $qual_file) = @_;
+
+  #   qual_in      Input quality score file
+  #   fasta_exp    Export results in FASTA format (0:no 1:yes, default: 1)
+  #   ace_exp      Export results in ACE format (0:no 1:yes, default: 1)
+
+  # Specify that we want an ACE output file
+  $self->ace_exp(1);
+
+  # Setup needed files and filehandles first
+  my ($output_fh, $output_file) = $self->_prepare_output_file( );
+  my ($stdout_fh, $stdout_file) = $self->io->tempfile( -dir => $self->tempdir() );
+
+  # Get program executable
+  my $exe = $self->executable;
+
+  # Get command-line options
+  my $options = $self->_translate_params();
+
+  # Usage: Minimo FASTA_IN [options]
+  # Options are of the style: -D PARAM=VAL
+  my @program_args = ( $exe, $fasta_file, @$options);
+  my @ipc_args = ( \@program_args, '>', $stdout_file);
+
+  # Print command for debugging
+  if ($self->verbose() >= 0) {
+    my $cmd = '';
+    $cmd .= join ( $join, @program_args );
+    for ( my $i = 1 ; $i < scalar @ipc_args ; $i++ ) {
+      my $element = $ipc_args[$i];
+      my $ref = ref $element;
+      my $value;
+      if ( $ref && $ref eq 'SCALAR') {
+        $value = $$element;
+      } else {
+        $value = $element;
+      }
+      $cmd .= " $value";
+    }
+    $self->debug( "$exe command = $cmd\n" );
+  }
+
+  # Execute command
+  my $log_file = "$fasta_file.runAmos.log";
+  eval {
+    IPC::Run::run(@ipc_args) || die("There was a problem running $exe. The ".
+      "error message is: $!. Check the log file $log_file for possible causes.");
+  };
+  if ($@) {
+    $self->throw("$exe call crashed: $@");
+  }
+
+  # Close filehandles
+  close($output_fh);
+  close($stdout_fh);
+
+  # Result files
+  my $base = $self->out_prefix();
+  if (not defined $base) {
+    my $dirname  = dirname($fasta_file);

@@ Diff output truncated at 10000 characters. @@


More information about the Bioperl-guts-l mailing list