[Bioperl-l] retrieving top_SeqFeatures for RefSeq proteins fails

e.rapsomaniki at mail.cryst.bbk.ac.uk e.rapsomaniki at mail.cryst.bbk.ac.uk
Sat Apr 8 09:08:00 EDT 2006


Hi

I am trying to retrieve coding sequences associated with RefSeq proteins. My
code (below) works for non-refseq proteins (e.g BAB26271) but not for refseq
(no sequence
features are retrieved although I checked the web-page and a coded_by feature
should be there). Any suggestions? I am using bioperl 1.4

Here's my code:
use Bio::Seq;
use Bio::DB::GenPept;
use Bio::DB::GenBank;
use Bio::DB::RefSeq; 
my $gb = new Bio::DB::GenBank;
my $gp = new Bio::DB::RefSeq;		
my $prot_obj = $gp->get_Seq_by_acc("NP_001008293");	
return unless defined($prot_obj);
		
# factory to turn strings into Bio::Location objects
my $loc_factory = new Bio::Factory::FTLocationFactory;
my $orf;
	
my @f=$prot_obj->top_SeqFeatures();
print "@f\n"; #returns nothing
foreach my $feat ( $prot_obj->top_SeqFeatures ) {
print  $feat->primary_tag, "\n";
	if ( $feat->primary_tag eq 'CDS' ) {
		
	   my @coded_by = $feat->each_tag_value('coded_by');
	   print @coded_by, "\n";
	   my ($nuc_acc,$loc_str) = split /\:/, $coded_by[0];
	   #$nuc_acc=~ s/\..*//;
	   my $nuc_obj = $gb->get_Seq_by_acc($nuc_acc);
	   return unless defined($nuc_obj);
	   my $loc_object = $loc_factory->from_string($loc_str);
	   # create a Feature object by using a Location
	   my $feat_obj = new Bio::SeqFeature::Generic(-location =>$loc_object);
	   # associate the Feature object with the nucleotide Seq object
	   $nuc_obj->add_SeqFeature($feat_obj);
	   my $cds_obj = $feat_obj->spliced_seq;
	   $orf=$cds_obj->seq;
	}
}
print "$orf\n";


----------------------------------------------------------------
This message was sent using IMP, the Internet Messaging Program.


More information about the Bioperl-l mailing list