[Bioperl-l] FileCache.pm error

Marcelino Suzuki suzuki at cbl.umces.edu
Sun Jun 20 19:02:22 EDT 2004

	I am trying to run a script for getting CDS out of Genbank by Jason  
Stajich below that I saved as test2.pl, and get the following error  
message, that I believe is caused by my bioperl configuration (I just  
installed bioperl in MacOS X:

	------------- EXCEPTION  -------------
MSG: Could not open primary index file
STACK Bio::DB::FileCache::_open_database  
STACK Bio::DB::FileCache::new  
STACK toplevel test2.pl:14

	Does anyone have any idea why I get this error?



#!/usr/bin/perl -w
use strict;
use Bio::DB::GenBank;
use Bio::DB::GenPept;
use Bio::DB::FileCache;
use Bio::Factory::FTLocationFactory;
use Bio::SeqFeature::Generic;

my $ntdb = new Bio::DB::GenBank;
my $pepdb= new Bio::DB::GenPept;

# do some caching in the event you're pulling up the same
# chromosome and/or you are debugging
my $cachent = new Bio::DB::FileCache(-kept => 1,
                                      -file => '/tmp/cache/nt.idx',
                                      -seqdb => $ntdb);

my $cachepep = new Bio::DB::FileCache(-kept => 1,
                                       -file => '/tmp/cache/pep.idx',
                                       -seqdb => $pepdb);

# obj to turn strings into Bio::Location object
my $locfactory = new Bio::Factory::FTLocationFactory;

# you might get these from a file (and they can be accessions too)
my @protgis = (10956263);

foreach my $gi ( @protgis ) {
   my $protseq = $cachepep->get_Seq_by_id($gi);
   if( ! $protseq ) { print STDERR "could not find a seq for gi:$gi\n";
   foreach my $cds (  grep { $_->primary_tag eq 'CDS' }
                           $protseq->get_SeqFeatures() )
      next unless( $cds->has_tag('coded_by') ); # skip CDSes with no  
      my ($codedby) = $cds->each_tag_value('coded_by');
      my ($ntacc,$loc) = split(/\:/, $codedby);
      $ntacc =~ s/(\.\d+)//; # genbank wants an accession not a  
versioned one
      my $cdslocation = $locfactory->from_string($loc);
      my $cdsfeature = new Bio::SeqFeature::Generic(-location =>  
      my $ntseq = $cachent->get_Seq_by_acc($ntacc);
      next unless $ntseq;
      $ntseq->add_SeqFeature($cdsfeature); # locate the feature on a seq
      my $cdsseq = $cdsfeature->spliced_seq();
      print "cds seq is ", $cdsseq->seq(), "\n";

