[Bioperl-guts-l] [16731] bioperl-live/trunk/Bio/Assembly/IO/bowtie.pm: Revert to non-handling of CM comment - discussion with Ben Langmead indicates it is not reliably determinable from bowtie format .

Dan Kortschak kortsch at dev.open-bio.org
Thu Jan 21 15:26:33 EST 2010


Revision: 16731
Author:   kortsch
Date:     2010-01-21 15:26:32 -0500 (Thu, 21 Jan 2010)
Log Message:
-----------
Revert to non-handling of CM comment - discussion with Ben Langmead indicates it is not reliably determinable from bowtie format. If you want it, use SAM output option.

Modified Paths:
--------------
    bioperl-live/trunk/Bio/Assembly/IO/bowtie.pm

Modified: bioperl-live/trunk/Bio/Assembly/IO/bowtie.pm
===================================================================
--- bioperl-live/trunk/Bio/Assembly/IO/bowtie.pm	2010-01-20 04:36:39 UTC (rev 16730)
+++ bioperl-live/trunk/Bio/Assembly/IO/bowtie.pm	2010-01-21 20:26:32 UTC (rev 16731)
@@ -111,7 +111,6 @@
 use Bio::SeqIO;
 use Bio::Tools::Run::Samtools;
 use Bio::Assembly::IO;
-use POSIX;
 
 use base qw( Bio::Assembly::IO );
 
@@ -152,12 +151,10 @@
 	my $self = $class->SUPER::new(@args);
 	$self->_initialize(@args);
 	$self->{'_tempdir'} = $self->tempdir(CLEANUP=>1);
-	my ($file, $index, $no_head, $no_sq, $color) =
-		$self->_rearrange([qw(FILE INDEX NO_HEAD NO_SQ COLOR_SPACE)], @args);
+	my ($file, $index, $no_head, $no_sq) = $self->_rearrange([qw(FILE INDEX NO_HEAD NO_SQ)], @args);
 	$file =~ s/^<//;
 	$self->{'_no_head'} = $no_head;
 	$self->{'_no_sq'} = $no_sq;
-	$self->{'_color_space'} = $color;
 
 	# get the sequence so Bio::DB::Sam can work with it
         my $refdb;
@@ -217,7 +214,7 @@
 		my $first_f =  ($qname =~ m#/1#) ? 0x40 : 0;
 		my $second_f = ($qname =~ m#/2#) ? 0x80 : 0;
 		my $flag = $paired_f | $strand_f | $op_strand_f | $first_f | $second_f;
-		
+
 		$pos++;
 		my $len = length $seq;
 		die unless $len == length $qual;
@@ -235,35 +232,28 @@
 		}
 		push @mismatch, $len-$last_pos;
 		@mismatch = reverse @mismatch if $strand eq '-';
-		my $mismatch = 'XA:i:'.scalar @detail; # we can't know seed length so assume v-mode
-		$mismatch .= "\t".join('',('MD:Z:', at mismatch));
-		
-		my @line = ($qname, $flag, $rname, $pos, $mapq, $cigar, undef, undef, undef, $seq, $qual, $mismatch, $dist);
-		
-		# someone who actually uses CS should confirm that this is always valid
-		push @line, 'CM:i:'.ceil(scalar (grep /!/,split('',$qual))/2) if $self->{'_color_space'};
-		
+		my $mismatch = join('',('MD:Z:', at mismatch));
+
 		if ($paired_f) {
-			$line[6] = '='; #NRNM
+			my $mrnm = '=';
 			if ($in_pair) {
 				my $mpos = $mate_line[3];
 				$mate_line[7] = $pos;
-				$line[8] = $mpos-$pos-$len; #ISIZE
-				$mate_line[8] = -$line[8]; #mate's ISIZE
+				my $isize = $mpos-$pos-$len;
+				$mate_line[8] = -$isize;
 				print $sam_tmp_h join("\t", at mate_line),"\n";
-				print $sam_tmp_h join("\t", at line),"\n";
+				print $sam_tmp_h join("\t",$qname, $flag, $rname, $pos, $mapq, $cigar, $mrnm, $mpos, $isize, $seq, $qual, $mismatch, $dist),"\n";
 				$in_pair = 0;
 			} else {
 				$mlen = $len;
-				@mate_line = ($qname, $flag, $rname, $pos, $mapq, $cigar, '=', undef, undef, $seq, $qual, $mismatch, $dist);
-				push @mate_line, 'CM:i:'.ceil(scalar (grep /!/,split('',$qual))/2) if $self->{'_color_space'};
+				@mate_line = ($qname, $flag, $rname, $pos, $mapq, $cigar, $mrnm, undef, undef, $seq, $qual, $mismatch, $dist);
 				$in_pair = 1;
 			}
 		} else {
-			$line[6] = '*'; #NRNM
-			$line[7] = 0; #MPOS
-			$line[8] = 0; #ISIZE
-			print $sam_tmp_h join("\t", at line),"\n";
+			my $mrnm = '*';
+			my $mpos = 0;
+			my $isize = 0;
+			print $sam_tmp_h join("\t",$qname, $flag, $rname, $pos, $mapq, $cigar, $mrnm, $mpos, $isize, $seq, $qual, $mismatch, $dist),"\n";
 		}
 	}
 



More information about the Bioperl-guts-l mailing list