[Bioperl-l] BLAST and parsing question

Robert Murphy robertemurphy at hotmail.com
Sun Apr 23 23:13:21 EDT 2006


#!/usr/bin/perl; 

use strict;
Hi, I have a sequence and db I made myself. I have to run blast on
subsequences of length 20 and identify the ones that are unique to my
sequence. Any suggestion on how to go about doing this? I can get blast
to "blast" my sequence, at length 20, but I'm having lots of trouble
with parsing. There are so many parameters to set up. Any suggstions on
which ones I need to accomplish my goal. The current code I'm using
gives me 1000's of OUT files. There has to be a better way than having
to read get of the OUT files. Below is the code I'm using. Thank you in
advance.

# installed bioperl locally, use local bioperl lib
#use lib="/home/murphyr/lib/";

use Bio::Seq; 
use Bio::SeqIO::fasta; 
use Bio::Tools::Blast; 
use Bio::Tools::Run::StandAloneBlast; 
use Bio::Tools::SeqStats; 
use Data::Dumper; 
print "Program to blast subsequences \n"; 

print "Enter File name of Sequence in FASTA Format \n";
my $inputfile = <STDIN>;
print "$inputfile \n";
my $inseq1 = Bio::SeqIO ->new('-file' => "$inputfile", '-format'=>
'fasta');
my $outseq1 = new Bio::SeqIO (-fh  => \*STDOUT, -format => 'fasta');
my $seq1 = $inseq1 -> next_seq();
my $seq_stats1 = Bio::Tools::SeqStats->new(-seq=>$seq1);
my $monomer_ref = $seq_stats1 -> count_monomers();
my @k = keys %$monomer_ref;
my @v = values %$monomer_ref;
print "@k\n";
print "@v\n";
my $mytotal = 0;
foreach my $value (@v) {
    $mytotal = $mytotal + $value;
}

# print "$seq1 -> seq()\n";
my $did = $seq1 -> display_id();

print "$did\n";

print "Here \n";
# print $outseq1 -> write_seq($seq1);

my $start_pos = 0;
my $end_pos = 24;  

while ($end_pos != $mytotal) { 

$start_pos = $start_pos + 1;
$end_pos = $end_pos + 1; 

my $subseq = $seq1 -> subseq($start_pos, $end_pos);

print "$subseq \n"; 

print "Now doing blast on $subseq \n"; 

my @params = ('program'  => 'blastn',
             'database' => 'virus-db',
             'outfile' => "$subseq.out"); 

my $factory = Bio::Tools::Run::StandAloneBlast->new(@params);
my $subinput = Bio::Seq->new('-id'=>"sub test query",
                         '-seq'=>"$subseq");
my $blast_report = $factory->blastall($subinput);

# Get the report
my $searchio = new Bio::SearchIO ('-format' => 'blast',
                                 '-file'   => "$subseq.out");
my $result = $searchio->next_result;
# Get info about the entire report
$result->database_name;
my $algorithm_type =  $result->algorithm;
# get info about the first hit
my $hit = $result->next_hit;
my $hit_name = $hit->name ;
# get info about the first hsp of the first hit
my $hsp = $hit->next_hsp;
my $hsp_start = $hsp->query->start;

print "Now parsing the blast reports using SearchIO \n"; 
#
# Not being selective here ... print the whole she-bang! ...
#
print "Algorithm Type \n";  
print "$algorithm_type \n";

while ( (my $khit, my $vhit) = each %{$hit}) { 
     print "$khit => $vhit \n"; 
      } 

print Dumper(\%$hit); 

print "Hit Name \n"; 
print "$hit_name\n";

while ( (my $khit,my $vhit) = each %{$hsp}) {
      if ($khit eq "_evalue") {   
      print "$khit => $vhit \n";
      }  
      }


More information about the Bioperl-l mailing list