[Bioperl-guts-l] bioperl-live/Bio/DB NCBIHelper.pm,1.41,1.42

Brian Osborne bosborne at pub.open-bio.org
Sat Feb 18 00:25:34 EST 2006


Update of /home/repository/bioperl/bioperl-live/Bio/DB
In directory pub.open-bio.org:/tmp/cvs-serv467

Modified Files:
	NCBIHelper.pm 
Log Message:
Minor edits


Index: NCBIHelper.pm
===================================================================
RCS file: /home/repository/bioperl/bioperl-live/Bio/DB/NCBIHelper.pm,v
retrieving revision 1.41
retrieving revision 1.42
diff -C2 -d -r1.41 -r1.42
*** NCBIHelper.pm	4 Jan 2006 04:05:48 -0000	1.41
--- NCBIHelper.pm	18 Feb 2006 05:25:32 -0000	1.42
***************
*** 20,24 ****
  =head1 SYNOPSIS
  
!  #Do not use this module directly.
  
   # get a Bio::DB::NCBIHelper object somehow
--- 20,24 ----
  =head1 SYNOPSIS
  
!  # Do not use this module directly.
  
   # get a Bio::DB::NCBIHelper object somehow
***************
*** 159,164 ****
  sub get_request {
      my ($self, @qualifiers) = @_;
!     my ($mode, $uids, $format, $query, $seq_start, $seq_stop) = $self->_rearrange([qw(MODE UIDS FORMAT QUERY SEQ_START SEQ_STOP)],
!                                                          @qualifiers);
  
      $mode = lc $mode;
--- 159,165 ----
  sub get_request {
      my ($self, @qualifiers) = @_;
!     my ($mode, $uids, $format, $query, $seq_start, $seq_stop) = 
! 		$self->_rearrange([qw(MODE UIDS FORMAT QUERY SEQ_START SEQ_STOP)],
! 								@qualifiers);
  
      $mode = lc $mode;
***************
*** 254,258 ****
   Title   : postprocess_data
   Usage   : $self->postprocess_data ( 'type' => 'string',
! 				     'location' => \$datastr);
   Function: process downloaded data before loading into a Bio::SeqIO
   Returns : void
--- 255,259 ----
   Title   : postprocess_data
   Usage   : $self->postprocess_data ( 'type' => 'string',
! 				                         'location' => \$datastr);
   Function: process downloaded data before loading into a Bio::SeqIO
   Returns : void
***************
*** 267,363 ****
  
  sub postprocess_data {
!     my ($self, %args) = @_;
!     my $data;
!     my $type = uc $args{'type'};
!     my $location = $args{'location'};
!     if( !defined $type || $type eq '' || !defined $location) {
! 	return;
!     } elsif( $type eq 'STRING' ) {
! 	$data = $$location; 
!     } elsif ( $type eq 'FILE' ) {
! 	open(TMP, $location) or $self->throw("could not open file $location");
! 	my @in = <TMP>;
! 	close TMP;
! 	$data = join("", @in);
!     }
  
!     # transform links to appropriate descriptions
!     if ($data =~ /\nCONTIG\s+/) {	
! 	$self->warn("CONTIG found. GenBank get_Stream_by_acc about to run."); 
      	my(@batch, at accession,%accessions, at location,$id,
! 	   $contig,$stream,$aCount,$cCount,$gCount,$tCount);
  
      	# process GenBank CONTIG join(...) into two arrays
      	$data =~ /(?:CONTIG\s+join\()((?:.+\n)+)(?:\/\/)/;
! 	$contig = $1;
      	$contig =~ s/\n|\)//g;
! 	foreach (split /\s*,\s*/,$contig){	    
! 	    if (/>(.+)<.+>:(.+)/) {
! 		($id) = split /\./, $1;		
! 		push @accession, $id;
! 		push @location, $2;
! 		$accessions{$id}->{'count'}++;
! 	    } elsif( /([\w\.]+):(.+)/ ) { 
! 		($id) = split /\./, $1;
! 		$accessions{$id}->{'count'}++;
! 		push @accession, $id;
! 		push @location, $2;
! 	    }
! 	}
  
! 	# grab multiple sequences by batch and join based location variable
! 	my @unique_accessions = keys %accessions;
! 	$stream = $self->get_Stream_by_acc(\@unique_accessions);
! 	$contig = "";
! 	my $ct = 0;
! 	while( my $seq = $stream->next_seq() ) {	    
! 	    if( $seq->accession_number !~ /$unique_accessions[$ct]/ ) {
! 		printf STDERR "warning, %s does not match %s\n",
! 		$seq->accession_number, $unique_accessions[$ct];
! 	    }
! 	    $accessions{$unique_accessions[$ct]}->{'seq'} = $seq;
! 	    $ct++;
! 	}
! 	for (my $i = 0; $i < @accession; $i++) {
! 	    my $seq = $accessions{$accession[$i]}->{'seq'};
! 	    unless( defined $seq ) {
! 		# seq not cached, get next sequence
! 		$self->warn("unable to find sequence $accession[$i]\n");
! 		return undef;
! 	    }
! 	    my($start,$end) = split(/\.\./, $location[$i]);
! 	    $contig .= $seq->subseq($start,$end-$start);
! 	}
  
! 	# count number of each letter in sequence
! 	$aCount = () = $contig =~ /a/ig;
! 	$cCount = () = $contig =~ /c/ig;
! 	$gCount = () = $contig =~ /g/ig;
! 	$tCount = () = $contig =~ /t/ig;
  
! 	# remove everything after and including CONTIG
! 	$data =~ s/(CONTIG[\s\S]+)$//i;
  
! 	# build ORIGIN part of data file using sequence and counts
! 	$data .= "BASE COUNT     $aCount a   $cCount c   $gCount g   $tCount t\n";
! 	$data .= "ORIGIN      \n";
! 	$data .= "$contig\n//";
!     }
!     else {
! 	$data =~ s/<a\s+href\s*=.+>\s*(\S+)\s*<\s*\/a\s*\>/$1/ig;
!     }
      
!     # fix gt and lt
!     $data =~ s/&gt;/>/ig;
!     $data =~ s/&lt;/</ig;
!     if( $type eq 'FILE'  ) {
! 	open(TMP, ">$location") or $self->throw("couldn't overwrite file $location");
! 	print TMP $data;
! 	close TMP;
!     } elsif ( $type eq 'STRING' ) {
! 	${$args{'location'}} = $data;
!     }
!     $self->debug("format is ". join(',',$self->request_format()). 
! 		 " data is\n$data\n");
  }
  
--- 268,364 ----
  
  sub postprocess_data {
! 	my ($self, %args) = @_;
! 	my $data;
! 	my $type = uc $args{'type'};
! 	my $location = $args{'location'};
! 	if( !defined $type || $type eq '' || !defined $location) {
! 		return;
! 	} elsif( $type eq 'STRING' ) {
! 		$data = $$location; 
! 	} elsif ( $type eq 'FILE' ) {
! 		open(TMP, $location) or $self->throw("could not open file $location");
! 		my @in = <TMP>;
! 		close TMP;
! 		$data = join("", @in);
! 	}
  
! 	# transform links to appropriate descriptions
! 	if ($data =~ /\nCONTIG\s+/) {	
! 		$self->warn("CONTIG found. GenBank get_Stream_by_acc about to run."); 
      	my(@batch, at accession,%accessions, at location,$id,
! 			$contig,$stream,$aCount,$cCount,$gCount,$tCount);
  
      	# process GenBank CONTIG join(...) into two arrays
      	$data =~ /(?:CONTIG\s+join\()((?:.+\n)+)(?:\/\/)/;
! 		$contig = $1;
      	$contig =~ s/\n|\)//g;
! 		foreach (split /\s*,\s*/,$contig){	    
! 			if (/>(.+)<.+>:(.+)/) {
! 				($id) = split /\./, $1;		
! 				push @accession, $id;
! 				push @location, $2;
! 				$accessions{$id}->{'count'}++;
! 			} elsif( /([\w\.]+):(.+)/ ) { 
! 				($id) = split /\./, $1;
! 				$accessions{$id}->{'count'}++;
! 				push @accession, $id;
! 				push @location, $2;
! 			}
! 		}
  
! 		# grab multiple sequences by batch and join based location variable
! 		my @unique_accessions = keys %accessions;
! 		$stream = $self->get_Stream_by_acc(\@unique_accessions);
! 		$contig = "";
! 		my $ct = 0;
! 		while( my $seq = $stream->next_seq() ) {	    
! 			if( $seq->accession_number !~ /$unique_accessions[$ct]/ ) {
! 				printf STDERR "warning, %s does not match %s\n",
! 				  $seq->accession_number, $unique_accessions[$ct];
! 			}
! 			$accessions{$unique_accessions[$ct]}->{'seq'} = $seq;
! 			$ct++;
! 		}
! 		for (my $i = 0; $i < @accession; $i++) {
! 			my $seq = $accessions{$accession[$i]}->{'seq'};
! 			unless( defined $seq ) {
! 				# seq not cached, get next sequence
! 				$self->warn("unable to find sequence $accession[$i]\n");
! 				return undef;
! 			}
! 			my($start,$end) = split(/\.\./, $location[$i]);
! 			$contig .= $seq->subseq($start,$end-$start);
! 		}
  
! 		# count number of each letter in sequence
! 		$aCount = () = $contig =~ /a/ig;
! 		$cCount = () = $contig =~ /c/ig;
! 		$gCount = () = $contig =~ /g/ig;
! 		$tCount = () = $contig =~ /t/ig;
  
! 		# remove everything after and including CONTIG
! 		$data =~ s/(CONTIG[\s\S]+)$//i;
  
! 		# build ORIGIN part of data file using sequence and counts
! 		$data .= "BASE COUNT     $aCount a   $cCount c   $gCount g   $tCount t\n";
! 		$data .= "ORIGIN      \n";
! 		$data .= "$contig\n//";
! 	}
! 	else {
! 		$data =~ s/<a\s+href\s*=.+>\s*(\S+)\s*<\s*\/a\s*\>/$1/ig;
! 	}
      
! 	# fix gt and lt
! 	$data =~ s/&gt;/>/ig;
! 	$data =~ s/&lt;/</ig;
! 	if( $type eq 'FILE'  ) {
! 		open(TMP, ">$location") or $self->throw("couldn't overwrite file $location");
! 		print TMP $data;
! 		close TMP;
! 	} elsif ( $type eq 'STRING' ) {
! 		${$args{'location'}} = $data;
! 	}
! 	$self->debug("format is ". join(',',$self->request_format()). 
! 					 " data is\n$data\n");
  }
  
***************
*** 430,450 ****
  
  sub _check_id {
!     my ($self, $ids) = @_;
  
!     # NT contigs can not be retrieved
!     $self->throw("NT_ contigs are whole chromosome files which are not part of regular".
! 		 "database distributions. Go to ftp://ftp.ncbi.nih.gov/genomes/.") 
! 	if $ids =~ /NT_/;
  
!     # Asking for a RefSeq from EMBL/GenBank
     
!     unless ($self->no_redirect) {
!      if ($ids =~ /N._/) {
! 	 $self->warn("[$ids] is not a normal sequence database but a RefSeq entry.".
! 	 	   " Redirecting the request.\n")
! 	     if $self->verbose >= 0;
! 	 return  new Bio::DB::RefSeq;
!      }
!     }
  }
  
--- 431,451 ----
  
  sub _check_id {
! 	my ($self, $ids) = @_;
  
! 	# NT contigs can not be retrieved
! 	$self->throw("NT_ contigs are whole chromosome files which are not part of regular".
! 					 "database distributions. Go to ftp://ftp.ncbi.nih.gov/genomes/.") 
! 	  if $ids =~ /NT_/;
  
! 	# Asking for a RefSeq from EMBL/GenBank
     
! 	unless ($self->no_redirect) {
! 		if ($ids =~ /N._/) {
! 			$self->warn("[$ids] is not a normal sequence database but a RefSeq entry.".
! 							" Redirecting the request.\n")
! 			  if $self->verbose >= 0;
! 			return  new Bio::DB::RefSeq;
! 		}
! 	}
  }
  
***************
*** 468,470 ****
--- 469,472 ----
  
  1;
+ 
  __END__



More information about the Bioperl-guts-l mailing list