[Bioperl-guts-l] bioperl-live/Bio/Tools Glimmer.pm,1.6,1.7
Christopher John Fields
cjfields at dev.open-bio.org
Thu Feb 15 14:36:13 EST 2007
Update of /home/repository/bioperl/bioperl-live/Bio/Tools
In directory dev.open-bio.org:/tmp/cvs-serv390/Bio/Tools
Modified Files:
Glimmer.pm
Log Message:
Bug 2206 - updates to allow all Glimmer formats to be parsed
Index: Glimmer.pm
===================================================================
RCS file: /home/repository/bioperl/bioperl-live/Bio/Tools/Glimmer.pm,v
retrieving revision 1.6
retrieving revision 1.7
diff -C2 -d -r1.6 -r1.7
*** Glimmer.pm 26 Sep 2006 22:03:17 -0000 1.6
--- Glimmer.pm 15 Feb 2007 19:36:11 -0000 1.7
***************
*** 13,17 ****
=head1 NAME
! Bio::Tools::Glimmer - parser for GlimmerM/GlimmerHMM eukaryotic gene predictions
=head1 SYNOPSIS
--- 13,18 ----
=head1 NAME
! Bio::Tools::Glimmer - parser for Glimmer 2.X/3.X prokaryotic and
! GlimmerM/GlimmerHMM eukaryotic gene predictions
=head1 SYNOPSIS
***************
*** 19,25 ****
--- 20,31 ----
use Bio::Tools::Glimmer;
+ # file
my $parser = new Bio::Tools::Glimmer(-file => $file);
# filehandle:
$parser = Bio::Tools::Glimmer->new( -fh => \*INPUT );
+ # provide a sequence identifier (Glimmer 2.X)
+ my $parser = Bio::Tools::Glimmer->new(-file => $file, -seqname => seqname);
+ # force format (override automatic detection)
+ my $parser = Bio::Tools::Glimmer->new(-file => $file, -format => 'GlimmerM');
# parse the results
***************
*** 46,54 ****
=head1 DESCRIPTION
! This is a module for parsing GlimmerM and GlimmerHMM predictions
It will create gene objects from the prediction report which can
be attached to a sequence using Bioperl objects, or output as GFF
suitable for loading into Bio::DB::GFF for use with Gbrowse.
GlimmerM is open source and available at
L<http://www.tigr.org/software/glimmerm/>.
--- 52,63 ----
=head1 DESCRIPTION
! This is a module for parsing Glimmer, GlimmerM and GlimmerHMM predictions.
It will create gene objects from the prediction report which can
be attached to a sequence using Bioperl objects, or output as GFF
suitable for loading into Bio::DB::GFF for use with Gbrowse.
+ Glimmer is open source and available at
+ L<http://www.cbcb.umd.edu/software/glimmer/>.
+
GlimmerM is open source and available at
L<http://www.tigr.org/software/glimmerm/>.
***************
*** 57,65 ****
L<http://www.cbcb.umd.edu/software/GlimmerHMM/>.
! =head1 BUGS
- This module does B<not> parse Glimmer2 or Glimmer3 bacterial gene
- prediction files. Details on their output formats can be found at
- L<http://www.cbcb.umd.edu/software/glimmer/>.
=head1 FEEDBACK
--- 66,76 ----
L<http://www.cbcb.umd.edu/software/GlimmerHMM/>.
! Note that Glimmer 2.X will only process the first
! sequence in a fasta file, and the prediction report does not contain any
! sort of sequence identifier
!
! Note that Glimmer 3.X produces two output files. This module only parses
! the .predict file.
=head1 FEEDBACK
***************
*** 128,132 ****
Function: Builds a new Bio::Tools::Glimmer object
Returns : an instance of Bio::Tools::Glimmer
! Args :
--- 139,143 ----
Function: Builds a new Bio::Tools::Glimmer object
Returns : an instance of Bio::Tools::Glimmer
! Args : format ('Glimmer', 'GlimmerM', 'GlimmerHMM'), seqname
***************
*** 137,140 ****
--- 148,166 ----
my $self = $class->SUPER::new(@args);
+
+ my ($format, $seqname) = $self->_rearrange([qw(FORMAT SEQNAME)], @args);
+
+ # override automagic format detection
+ if (defined($format) &&
+ (($format eq 'Glimmer') ||
+ ($format eq 'GlimmerM') ||
+ ($format eq 'GlimmerHMM'))
+ ) {
+ $self->_format($format);
+ }
+
+ # hardwire seq_id when creating gene and exon objects (Glimmer 2.X)
+ $self->_seqname($seqname) if defined($seqname);
+
return $self;
}
***************
*** 209,213 ****
# if the prediction section hasn't been parsed yet, we do this now
$self->_parse_predictions() unless $self->_predictions_parsed();
!
# get next gene structure
$gene = $self->_prediction();
--- 235,239 ----
# if the prediction section hasn't been parsed yet, we do this now
$self->_parse_predictions() unless $self->_predictions_parsed();
!
# get next gene structure
$gene = $self->_prediction();
***************
*** 227,230 ****
--- 253,318 ----
sub _parse_predictions {
+
+ my ($self) = @_;
+
+
+ my %method = (
+ 'Glimmer' => '_parse_prokaryotic',
+ 'GlimmerM' => '_parse_eukaryotic',
+ 'GlimmerHMM' => '_parse_eukaryotic',
+ '_DEFAULT_' => '_parse_eukaryotic',
+ );
+
+ my $format = $self->_format();
+
+ if (!$format) {
+
+ while (my $line = $self->_readline()) {
+
+ if ( $line =~ /^Glimmer\S*\s+\(Version\s*\S+\)/ ) {
+ $format = 'GlimmerM';
+ $self->_pushback($line);
+ last;
+ }
+ elsif ( $line =~ /^Glimmer\S*$/ ) {
+ $format = 'GlimmerHMM';
+ $self->_pushback($line);
+ last;
+ }
+ elsif ($line =~ /^Putative Genes:$/) {
+ $format = 'Glimmer';
+ $self->_pushback($line);
+ last;
+ }
+ elsif ($line =~ /^>(\S+)/) {
+ $format = 'Glimmer';
+ $self->_pushback($line);
+ last;
+ }
+
+ }
+
+ }
+
+ my $method =
+ (exists($method{$format})) ? $method{$format} : $method{'_DEFAULT_'};
+
+ return $self->$method();
+
+ }
+
+
+ =head2 _parse_eukaryotic
+
+ Title : _parse_eukaryotic()
+ Usage : $obj->_parse_eukaryotic()
+ Function: Parses the prediction section. Automatically called by
+ next_prediction() if not yet done.
+ Example :
+ Returns :
+
+ =cut
+
+ sub _parse_eukaryotic {
my ($self) = @_;
***************
*** 235,239 ****
$source = "$1_$2";
next;
! } elsif( /^(Glimmer\S*)$/ ) { # GlimmerHMM has no version
$source = $1;
next;
--- 323,327 ----
$source = "$1_$2";
next;
! } elsif( /^(GlimmerHMM\S*)$/ ) { # GlimmerHMM has no version
$source = $1;
next;
***************
*** 284,287 ****
--- 372,462 ----
}
+ =head2 _parse_prokaryotic
+
+ Title : _parse_prokaryotic()
+ Usage : $obj->_parse_prokaryotic()
+ Function: Parses the prediction section. Automatically called by
+ next_prediction() if not yet done.
+ Example :
+ Returns :
+
+ =cut
+
+ sub _parse_prokaryotic {
+ my ($self) = @_;
+
+ # default value, possibly overriden later
+ my $source = 'Glimmer';
+
+ # Glimmer 2.X does not provide a sequence identifer
+ # in the prediction report
+ my $seqname = $self->_seqname();
+
+ while(defined($_ = $self->_readline())) {
+ # Glimmer 3.X does provide a sequence identifier -
+ # beware whitespace at the end (comes through from
+ # (the fasta file)
+ if ($_ =~ /^Putative Genes:$/) {
+ $source = 'Glimmer_2.X';
+ next;
+ }
+ # Glimmer 3.X sequence identifier
+ elsif ($_ =~ /^>(\S+)/) {
+ $seqname = $1;
+ $source = 'Glimmer_3.X';
+ next;
+ }
+ elsif (
+ # Glimmer 2.X prediction
+ (/^\s+(\d+)\s+ # gene num
+ (\d+)\s+(\d+)\s+ # start, end
+ \[([\+\-])\d{1}\s+ # strand
+ /ox ) ||
+ # Glimmer 3.X prediction
+ (/\w+(\d+)\s+ # orf (numeric portion)
+ (\d+)\s+(\d+)\s+ # start, end
+ ([\+\-])\d{1}\s+ # strand
+ /ox)) {
+ my ($genenum,$start,$end,$strand) =
+ ( $1,$2,$3,$4 );
+
+ # Glimmer 2.X predictions do not include
+ # the stop codon - this might extend the
+ # prediction off either end of the sequence
+ if ($source eq 'Glimmer_2.X') {
+ if ($strand eq '-') {
+ $end -= 3;
+ }
+ else {
+ $end += 3;
+ }
+ }
+
+ my $exon = new Bio::Tools::Prediction::Exon
+ ('-seq_id' => $seqname,
+ '-start' => $start,
+ '-end' => $end,
+ '-strand' => $strand eq '-' ? '-1' : '1',
+ '-source_tag' => $source,
+ '-primary_tag'=> 'exon',
+ '-tag' => { 'Group' => "GenePrediction_$genenum"},
+ );
+
+ my $gene = Bio::Tools::Prediction::Gene->new
+ (
+ '-seq_id' => $seqname,
+ '-primary_tag' => "gene",
+ '-source_tag' => $source,
+ '-tag' => { 'Group' => "GenePrediction_$genenum"},
+ );
+
+ $gene->add_exon($exon);
+ $self->_add_prediction($gene)
+ }
+ }
+
+ $self->_predictions_parsed(1);
+ }
+
=head2 _prediction
***************
*** 340,343 ****
--- 515,555 ----
}
+ =head2 _seqname
+
+ Title : _seqname
+ Usage : $obj->_seqname($seqname)
+ Function: internal (for Glimmer 2.X)
+ Example :
+ Returns : String
+
+ =cut
+
+ sub _seqname {
+ my ($self, $val) = @_;
+
+ $self->{'_seqname'} = $val if $val;
+ if(! exists($self->{'_seqname'})) {
+ $self->{'_seqname'} = 'unknown';
+ }
+ return $self->{'_seqname'};
+ }
+
+ =head2 _format
+
+ Title : _format
+ Usage : $obj->_format($format)
+ Function: internal
+ Example :
+ Returns : String
+
+ =cut
+
+ sub _format {
+ my ($self, $val) = @_;
+
+ $self->{'_format'} = $val if $val;
+
+ return $self->{'_format'};
+ }
1;
More information about the Bioperl-guts-l
mailing list