[Bioperl-guts-l] [15176] bioperl-live/trunk: parse DBlinks now supported
Jason Stajich
jason at dev.open-bio.org
Tue Dec 16 00:51:46 EST 2008
Revision: 15176
Author: jason
Date: 2008-12-16 00:51:46 -0500 (Tue, 16 Dec 2008)
Log Message:
-----------
parse DBlinks now supported
Modified Paths:
--------------
bioperl-live/trunk/Bio/SeqIO/bsml.pm
bioperl-live/trunk/t/SeqIO/bsml.t
Modified: bioperl-live/trunk/Bio/SeqIO/bsml.pm
===================================================================
--- bioperl-live/trunk/Bio/SeqIO/bsml.pm 2008-12-16 05:11:04 UTC (rev 15175)
+++ bioperl-live/trunk/Bio/SeqIO/bsml.pm 2008-12-16 05:51:46 UTC (rev 15176)
@@ -190,8 +190,8 @@
# Assume that title attribute contains the best display id
if (my $val = $xmlSeq->getAttribute( "title")) {
- $bioSeq->display_id($val);
- }
+ $bioSeq->display_id($val);
+ }
# Set the molecule type
if (my $val = $xmlSeq->getAttribute( "molecule" )) {
@@ -232,20 +232,22 @@
# Sticky wicket here - data not controlled by schema, could be anything
my @seqDesc = ();
my %specs = ('common_name' => 'y',
- 'genus' => 'y',
- 'species' => 'y',
- 'sub_species' => 'y', );
+ 'genus' => 'y',
+ 'species' => 'y',
+ 'sub_species' => 'y',
+ );
my %seqMap = (
- 'add_date' => [ 'date' ],
- 'keywords' => [ 'keyword', ],
- 'seq_version' => [ 'version' ],
- 'division' => [ 'division' ],
+ 'add_date' => [ qw(date date-created date-last-updated)],
+ 'keywords' => [ 'keyword', ],
+ 'seq_version' => [ 'version' ],
+ 'division' => [ 'division' ],
'add_secondary_accession' => ['accession'],
- 'pid' => ['pid'],
- 'primary_id' => [ 'primary.id', 'primary_id' ],
+ 'pid' => ['pid'],
+ 'primary_id' => [ 'primary.id', 'primary_id' ],
);
+ my @links;
my $floppies = &GETFLOPPIES($xmlSeq);
- foreach my $attr (@{$floppies}) {
+ for my $attr (@{$floppies}) {
# Don't want to get attributes from <Feature> or <Table> elements yet
my $parent = $attr->getParentNode->getNodeName;
next unless($parent eq "Sequence" || $parent eq "Feature-tables");
@@ -258,21 +260,32 @@
}
my $value = "";
# Cycle through the Seq methods:
- foreach my $method (keys %seqMap) {
+ for my $method (keys %seqMap) {
# Cycle through potential matching attributes:
- foreach my $match (@{$seqMap{$method}}) {
+ for my $match (@{$seqMap{$method}}) {
# If the <Attribute> name matches one of the keys,
# set $value, unless it has already been set
$value ||= $content if ($name =~ /$match/i);
}
if ($value ne "") {
+
+ if( $method eq 'seq_version'&& $value =~ /\S+\.(\d+)/ ) {
+ # hack for the fact that data in version is actually
+ # ACCESSION.VERSION
+ ($value) = $1;
+ }
$bioSeq->$method($value);
last;
}
}
+ if( $name eq 'database-xref' ) {
+ my ($link_id,$link_db) = split(/:/,$value);
+ push @links, Bio::Annotation::DBLink->new(-primary_id => $link_id,
+ -database => $link_db);
+ }
next if ($value ne "");
- if ($name =~ /^species$/i) { # Uh, it's the species designation?
+ if ($name =~ /^species$/i) { # Uh, it's the species designation?
if ($content =~ / /) {
# Assume that a full species name has been provided
# This will screw up if the last word is the subspecies...
@@ -283,11 +296,11 @@
}
next;
}
- if ($name =~ /sub[_ ]?species/i) { # Should be the subspecies...
+ if ($name =~ /sub[_ ]?species/i) { # Should be the subspecies...
$species->sub_species( $content );
next;
}
- if ($name =~ /classification/i) { # Should be species classification
+ if ($name =~ /classification/i) { # Should be species classification
# We will assume that there are spaces separating the terms:
my @bits = split " ", $content;
# Now make sure there is not other cruft as well (eg semi-colons)
@@ -300,7 +313,7 @@
}
if ($name =~ /comment/) {
my $com = Bio::Annotation::Comment->new('-text' => $content);
- # $bioSeq->annotation->add_Comment($com);
+ # $bioSeq->annotation->add_Comment($com);
$bioSeq->annotation->add_Annotation('comment', $com);
next;
}
@@ -324,43 +337,40 @@
# Extract out <Reference>s associated with the sequence
my @refs;
my %tags = (
- -title => "RefTitle",
- -authors => "RefAuthors",
+ -title => "RefTitle",
+ -authors => "RefAuthors",
-location => "RefJournal",
);
- foreach my $ref ( $xmlSeq->getElementsByTagName ("Reference") ) {
+ for my $ref ( $xmlSeq->getElementsByTagName ("Reference") ) {
my %refVals;
- foreach my $tag (keys %tags) {
+ for my $tag (keys %tags) {
my $rt = &FIRSTDATA($ref->getElementsByTagName($tags{$tag})
->item(0));
- unless ($rt) {
- $self->warn("No data returned for $tag");
- next;
- }
- $rt =~ s/^[\s\r\n]+//; # Kill leading space
- $rt =~ s/[\s\r\n]+$//; # Kill trailing space
- $rt =~ s/[\s\r\n]+/ /; # Collapse internal space runs
+ next unless ($rt);
+ $rt =~ s/^[\s\r\n]+//; # Kill leading space
+ $rt =~ s/[\s\r\n]+$//; # Kill trailing space
+ $rt =~ s/[\s\r\n]+/ /; # Collapse internal space runs
$refVals{$tag} = $rt;
}
my $reference = Bio::Annotation::Reference->new( %refVals );
-
+
# Pull out any <Reference> information hidden in <Attributes>
my %refMap = (
- comment => [ 'comment', 'remark' ],
- medline => [ 'medline', ],
- pubmed => [ 'pubmed' ],
- start => [ 'start', 'begin' ],
- end => [ 'stop', 'end' ],
+ comment => [ 'comment', 'remark' ],
+ medline => [ 'medline', ],
+ pubmed => [ 'pubmed' ],
+ start => [ 'start', 'begin' ],
+ end => [ 'stop', 'end' ],
);
my @refCom = ();
my $floppies = &GETFLOPPIES($ref);
- foreach my $attr (@{$floppies}) {
- my ($name, $content) = &FLOPPYVALS($attr);
+ for my $attr (@{$floppies}) {
+ my ($name, $content) = &FLOPPYVALS($attr);
my $value = "";
# Cycle through the Seq methods:
- foreach my $method (keys %refMap) {
+ for my $method (keys %refMap) {
# Cycle through potential matching attributes:
- foreach my $match (@{$refMap{$method}}) {
+ for my $match (@{$refMap{$method}}) {
# If the <Attribute> name matches one of the keys,
# set $value, unless it has already been set
$value ||= $content if ($name =~ /$match/i);
@@ -383,18 +393,18 @@
}
push @refs, $reference;
}
- $bioSeq->annotation->add_Annotation('reference'=>$_) foreach @refs;
-
+ $bioSeq->annotation->add_Annotation('reference' => $_) for @refs;
+ my $ann_col = $bioSeq->annotation;
# Extract the <Feature>s for this <Sequence>
- foreach my $feat ( $xmlSeq->getElementsByTagName("Feature") ) {
+ for my $feat ( $xmlSeq->getElementsByTagName("Feature") ) {
$bioSeq->add_SeqFeature( $self->_parse_bsml_feature($feat) );
}
-
+
$species->classification( @classification );
$bioSeq->species( $species );
-
-# $seq->annotation->add_DBLink(@links); ->
-
+
+ $bioSeq->annotation->add_Annotation('dblink' => $_) for @links;
+
$self->{'current_node'}++;
return $bioSeq;
}
@@ -580,7 +590,7 @@
my %mol = ('dna' => 'DNA', 'rna' => 'RNA', 'protein' => 'AA');
my @xmlSequences;
- foreach my $bioSeq (@{$seqref}) {
+ for my $bioSeq (@{$seqref}) {
my $xmlSeq = $xml->createElement("Sequence");
my $FTs = $xml->createElement("Feature-tables");
@@ -590,19 +600,20 @@
my $seqDesc = [];
push @{$seqDesc}, ["comment" , "This file generated to BSML 2.2 standards - joins will be collapsed to a single feature enclosing all members of the join"];
push @{$seqDesc}, ["description" , eval{$bioSeq->desc}];
- foreach my $kwd ( eval{$bioSeq->get_keywords} ) {
+ for my $kwd ( eval{$bioSeq->get_keywords} ) {
push @{$seqDesc}, ["keyword" , $kwd];
}
push @{$seqDesc}, ["keyword" , eval{$bioSeq->keywords}];
- push @{$seqDesc}, ["version" , eval{$bioSeq->seq_version}];
+ push @{$seqDesc}, ["version" , eval{
+ join(".", $bioSeq->accession_number, $bioSeq->seq_version); }];
push @{$seqDesc}, ["division" , eval{$bioSeq->division}];
push @{$seqDesc}, ["pid" , eval{$bioSeq->pid}];
# push @{$seqDesc}, ["bio_object" , ref($bioSeq)];
push @{$seqDesc}, ["primary_id" , eval{$bioSeq->primary_id}];
- foreach my $dt (eval{$bioSeq->get_dates()} ) {
+ for my $dt (eval{$bioSeq->get_dates()} ) {
push @{$seqDesc}, ["date" , $dt];
}
- foreach my $ac (eval{$bioSeq->get_secondary_accessions()} ) {
+ for my $ac (eval{$bioSeq->get_secondary_accessions()} ) {
push @{$seqDesc}, ["secondary_accession" , $ac];
}
@@ -632,7 +643,7 @@
$attr{molecule} = $mol{ lc($bioSeq->molecule) } if $bioSeq->can('molecule');
- foreach my $a (keys %attr) {
+ for my $a (keys %attr) {
$xmlSeq->setAttribute($a, $attr{$a}) if (defined $attr{$a} &&
$attr{$a} ne "");
}
@@ -654,7 +665,7 @@
if (ref($bioSeq->species) eq 'Bio::Species') {
# Need to peer into Bio::Species ...
my @specs = ('common_name', 'genus', 'species', 'sub_species');
- foreach my $sp (@specs) {
+ for my $sp (@specs) {
next unless (my $val = $bioSeq->species()->$sp());
push @{$seqDesc}, [$sp , $val];
}
@@ -667,7 +678,7 @@
}
# Add the description <Attribute>s for the <Sequence>
- foreach my $seqD (@{$seqDesc}) {
+ for my $seqD (@{$seqDesc}) {
$self->_addel($xmlSeq, "Attribute", {
name => $seqD->[0], content => $seqD->[1]}) if ($seqD->[1]);
}
@@ -676,7 +687,7 @@
unless ($#{$seqRefs} < 0) {
my $seqFT = $self->_addel($FTs, "Feature-table", {
title => "Sequence References", });
- foreach my $feat (@{$seqRefs}) {
+ for my $feat (@{$seqRefs}) {
$seqFT->appendChild($feat);
}
}
@@ -695,7 +706,7 @@
$args->{SKIPFEAT} eq 'all') {
$args->{SKIPFEAT} = { all => 1};
} else { $args->{SKIPFEAT} ||= {} }
- foreach my $class (keys %{$args->{SKIPFEAT}}) {
+ for my $class (keys %{$args->{SKIPFEAT}}) {
$args->{SKIPFEAT}{lc($class)} = $args->{SKIPFEAT}{$class};
}
# Loop through all the features
@@ -703,7 +714,7 @@
if (@features && !$args->{SKIPFEAT}{all}) {
my $ft = $self->_addel($FTs, "Feature-table", {
title => "Features", });
- foreach my $bioFeat (@features ) {
+ for my $bioFeat (@features ) {
my $featDesc = [];
my $class = lc($bioFeat->primary_tag);
# The user may have specified to ignore this type of feature
@@ -718,7 +729,7 @@
@@ Diff output truncated at 10000 characters. @@
More information about the Bioperl-guts-l
mailing list