BioPerl

developer release

perl -MBio::Root::Version -e 'print $Bio::Root::Version::VERSION' 

perl -MBio::Root::Version -e
    'printf "%vd\n", $Bio::Root::Version::VERSION'

perl -MBio::SeqIO -e 'printf "%vd ", $Bio::SeqIO::VERSION' 

% perldoc MODULE

#!/usr/bin/perl -w

use Class::Inspector; 
$class = shift || die "Usage: methods perl_class_name"; 
eval "require $class"; 
print join (" ", sort @{Class::Inspector->methods($class,'full','public')});

use IO::String; 
use Bio::SeqIO; 
my $stringfh = new IO::String($string); 
my $seqio = new Bio::SeqIO(-fh => $stringfh,
                           -format => 'fasta');

while( my $seq = $seqio->next_seq ) {
    # process each seq
}

use IO::String; 
use Bio::SeqIO; 
my $s; 
my $io = IO::String->new($s); 
my $seqOut = new Bio::SeqIO(-format => 'swiss', -fh => $io); 
$seqOut->write_seq($seq1); 
print $s; 
# $s contains the record in swissprot format and is stored in the string

$inx = Bio::Index::Fasta->new(-filename =>$indexname); 
$inx->id_parser(&get_id); 
$inx->make_index($fastaname);

sub get_id {
    my $header = shift;
    $header =~ /^>gi|(d+)/;
    $1;
}

$inx = Bio::DB::Fasta->new($fastaname, -makeid => &get_id);

my ($gi,$acc,$locus); 
(undef,$gi,undef,$acc,$locus) = split(/|/, $seq->display_id); 
$seq->accession_number($acc);


use Bio::SeqIO;

my $seqin = Bio::SeqIO->new(-file => $file, -format => 'genbank');

my $seqout = Bio::SeqIO->new(-fh => $out, -format => 'fasta');

# From Bio::SeqIO::fasta
$seqout->preferred_id_type('display');
my $count = 1;

while (my $seq = $seqin->next_seq) {
    # override the regular display_id with your own
    $seq->display_id('foo'.$count);
    $seqout->write_seq($seq);
    $count++;
}

$seq->desc($some_string);

$hsp->hit->strand; $hsp->hit->frame;

$hsp->query->strand; $hsp->query->frame;

my $blastframe = ($hsp->query->frame + 1) * $hsp->query->strand;

my @domains = $hit->domains; 
my $domainnum = 1; 
my $total = scalar @domains; 
for my $domain ( sort { $a->start <=> $b->start } $hit->domains ) {
    print "domain $domainnum of $total,\n";
    $domainnum++;
}

my @features = $seq->all_SeqFeatures();

my @genes = grep { $_->primary_tag eq 'exon'} $seq->all_SeqFeatures();

my @f_with_note = grep { my @a = $_->has_tag('note') ? 
    $_->each_tag_value('note') : ();
    grep { $noteval } @a;  }  $seq->all_SeqFeatures();

for my $feature ($seqobj->top_SeqFeatures){
    if ( $feature->location->isa('Bio::Location::SplitLocationI') 
        and $feature->primary_tag eq 'CDS' ) {
        for my $location ( $feature->location->sub_Location ) {
            print $location->start , ".." , $location->end, "\n";
        }
    }
}

use Bio::Factory::FTLocationFactory; 
use Bio::DB::GenPept; 
use Bio::DB::GenBank;

my $gp = Bio::DB::GenPept->new; 
my $gb = Bio::DB::GenBank->new;

# factory to turn strings into Bio::Location objects
my $loc_factory = Bio::Factory::FTLocationFactory->new;

my $prot_obj = $gp->get_Seq_by_id($protein_gi); 

for my $feat ( $prot_obj->top_SeqFeatures ) {
    if ( $feat->primary_tag eq 'CDS' ) {
        # example: 'coded_by="U05729.1:1..122"'
        my @coded_by = $feat->each_tag_value('coded_by');
        my ($nuc_acc,$loc_str) = split /:/, $coded_by[0];
        my $nuc_obj = $gb->get_Seq_by_acc($nuc_acc);
        # create Bio::Location object from a string
        my $loc_object = $loc_factory->from_string($loc_str);
        # create a Feature object by using a Location
        my $feat_obj = Bio::SeqFeature::Generic->new(-location =>$loc_object);
        # associate the Feature object with the nucleotide Seq object
        $nuc_obj->add_SeqFeature($feat_obj);
        my $cds_obj = $feat_obj->spliced_seq;     
        print "CDS sequence is ",$cds_obj->seq,"\n";
    }
}


my $seq_obj = $db->get_Seq_by_id($gi); 
for my $feat ( $seq_obj->top_SeqFeatures ) {
  if ( $feat->primary_tag eq 'CDS' ) {
     my $cds_obj = $feat->spliced_seq;
     print "CDS sequence is ",$cds_obj->seq,"\n";
    }
}

my $db = Bio::DB::GenBank->new();
my $io = Bio::SeqIO->new(-file=>'funnyfile.gb', -format=>'genbank'); 
while ( my $seq = $seq_in->next_seq ) {
    for my $feat ( $seq->get_SeqFeatures ) {
        if ( $feat->primary_tag eq 'CDS' ) {
            my $cds = $feat->spliced_seq(-db => $db, -nosort => 0);
            print $cds->translate->seq, "\n";
        }
    }
}

use Bio::Location::Simple; 
my $location = Bio::Location::Simple->new(-start => $start,
                                          -end   => $end,
                                          -strand => "-1");

# assume we already have a sequence object
my $rev_comp_substr = $seq_obj->subseq($location);

$merged_seq = Bio::SeqUtils->cat(@seqs)

About this FAQ

What is this FAQ?

What if my question isn’t answered here?

How can I tell what version of BioPerl is installed?

BioPerl in General

What is BioPerl?

Where do I go to get the latest release?

What do you mean developer release?

How can I learn how to use a module?

I’m interested in the bleeding edge version of the code, where can I get it?

How should I cite BioPerl?

What are the license terms for BioPerl?

I want to help, where do I start?

I’ve got an idea for a module how do I contribute it?

How do I submit a patch or enhancement to BioPerl?

Why can’t I easily get a list of all the methods a object can call?

Can you explain the Object Model design and rationale?

Sequences

How do I parse a sequence file?

I can’t get sequences with Bio::DB::GenBank any more, why not?

How can I get NT_ or NM_ or NP_ accessions from NCBI (Reference sequences)?

How can I use to parse sequence data to or from a string?

How do I use Bio::Index::Fasta and index on different ids?

Accession numbers are not present for FASTA sequence files

How do I get genomic sequences when all I have is an gene identifier or name?

I would like to make my own custom fasta header - how do I do this?

Report Parsing

I want to parse BLAST output, how do I do this?

What was wrong with Bio::Tools::Blast?

I want to parse FASTA or NCBI -m7 (XML) format, how do I do this?

How can I generate a pairwise alignment of two sequences?

How do I get the frame for a translated search?

Can I get domain number from hmmpfam or hmmsearch output?

Annotations and Features

How do I retrieve all the features from a sequence?

How do I parse the CDS join or complement statements in GenBank or EMBL files to get the sub-locations?

How do I retrieve a nucleotide coding sequence when I have a protein gi number?

How do I get the complete spliced nucleotide sequence from the CDS section?

How do I get the complete spliced sequence when the coordinates refer to Genbank identifiers?

How do I get the reverse-complement of a sequence using the subseq method?

I get the warning (old style Annotation) on new style Annotation::Collection. What is wrong?

Utilities

How do I find all the ORFs in a nucleotide sequence? Antigenic sites in a protein? Calculate nucleotide melting temperature? Find repeats?

How do I do motif searches with BioPerl? Can I do “find all sequences that are 75% identical” to a given motif?

How do I merge a set of sequences along with their features and annotations?

Running external programs

How do I run BLAST from within BioPerl?

How do I run applications within BioPerl?

I’m trying to run and I’m seeing error messages like Can't locate Bio/Tools/Run/WrapperBase.pm.

Other BioPerl packages

What is bioperl-ext?

bioperl-ext won’t compile the staden IO lib part - what do I do?

What is bioperl-db?

What is bioperl-network?

What is bioperl-microarray?

What is bioperl-gui?

What is bioperl-pedigree?

BioPerl-related questions

I am using Ensembl. How do I do XYZ?

Why is the version of BioPerl (v.1.2.3) used in Ensembl so old? Haven’t there been bug fixes?

How do I parse the CDS `join` or `complement` statements in GenBank or EMBL files to get the sub-locations?

How do I get the reverse-complement of a sequence using the `subseq` method?

I’m trying to run and I’m seeing error messages like `Can't locate Bio/Tools/Run/WrapperBase.pm`.