[Bioperl-guts-l] [15176] bioperl-live/trunk: parse DBlinks now supported

Jason Stajich jason at dev.open-bio.org
Tue Dec 16 00:51:46 EST 2008


Revision: 15176
Author:   jason
Date:     2008-12-16 00:51:46 -0500 (Tue, 16 Dec 2008)

Log Message:
-----------
parse DBlinks now supported

Modified Paths:
--------------
    bioperl-live/trunk/Bio/SeqIO/bsml.pm
    bioperl-live/trunk/t/SeqIO/bsml.t

Modified: bioperl-live/trunk/Bio/SeqIO/bsml.pm
===================================================================
--- bioperl-live/trunk/Bio/SeqIO/bsml.pm	2008-12-16 05:11:04 UTC (rev 15175)
+++ bioperl-live/trunk/Bio/SeqIO/bsml.pm	2008-12-16 05:51:46 UTC (rev 15176)
@@ -190,8 +190,8 @@
 
     # Assume that title attribute contains the best display id
     if (my $val = $xmlSeq->getAttribute( "title")) {
-       $bioSeq->display_id($val);
-   }
+	$bioSeq->display_id($val);
+    }
 
     # Set the molecule type
     if (my $val = $xmlSeq->getAttribute( "molecule" )) {
@@ -232,20 +232,22 @@
     # Sticky wicket here - data not controlled by schema, could be anything
     my @seqDesc = ();
     my %specs = ('common_name' => 'y',
-		 'genus' => 'y',
-		 'species' => 'y',
-		 'sub_species' => 'y', );
+		 'genus'       => 'y',
+		 'species'     => 'y',
+		 'sub_species' => 'y', 
+		 );
     my %seqMap = (
-		  'add_date' => [ 'date' ],
-		  'keywords' => [ 'keyword', ],
-		  'seq_version' => [ 'version' ],
-		  'division' => [ 'division' ],
+		  'add_date'     => [ qw(date date-created date-last-updated)],
+		  'keywords'     => [ 'keyword', ],
+		  'seq_version'  => [ 'version' ],
+		  'division'     => [ 'division' ],
 		  'add_secondary_accession' => ['accession'],
-		  'pid' => ['pid'],
-		  'primary_id' => [ 'primary.id', 'primary_id' ],
+		  'pid'          => ['pid'],
+		  'primary_id'   => [ 'primary.id', 'primary_id' ],
 		  );
+    my @links;
     my $floppies = &GETFLOPPIES($xmlSeq);
-    foreach my $attr (@{$floppies}) {
+    for my $attr (@{$floppies}) {
 	# Don't want to get attributes from <Feature> or <Table> elements yet
 	my $parent = $attr->getParentNode->getNodeName;
 	next unless($parent eq "Sequence" || $parent eq "Feature-tables");
@@ -258,21 +260,32 @@
 	}
 	my $value = "";
 	# Cycle through the Seq methods:
-	foreach my $method (keys %seqMap) {
+	for my $method (keys %seqMap) {
 	    # Cycle through potential matching attributes:
-	    foreach my $match (@{$seqMap{$method}}) {
+	    for my $match (@{$seqMap{$method}}) {
 		# If the <Attribute> name matches one of the keys,
 		# set $value, unless it has already been set
 		$value ||= $content if ($name =~ /$match/i);
 	    }
 	    if ($value ne "") {
+		
+		if( $method eq 'seq_version'&& $value =~ /\S+\.(\d+)/ ) {
+		    # hack for the fact that data in version is actually
+		    # ACCESSION.VERSION
+		    ($value) = $1;
+		}
 		$bioSeq->$method($value);
 		last;
 	    }
 	}
+	if( $name eq 'database-xref' ) {
+	    my ($link_id,$link_db) = split(/:/,$value);
+	    push @links, Bio::Annotation::DBLink->new(-primary_id => $link_id,
+						      -database   => $link_db);
+	}
 	next if ($value ne "");
 
-	if ($name =~ /^species$/i) {   # Uh, it's the species designation?
+	if ($name =~ /^species$/i) { # Uh, it's the species designation?
 	    if ($content =~ / /) {
 		# Assume that a full species name has been provided
 		# This will screw up if the last word is the subspecies...
@@ -283,11 +296,11 @@
 	    }
 	    next;
 	}
-	if ($name =~ /sub[_ ]?species/i) {  # Should be the subspecies...
+	if ($name =~ /sub[_ ]?species/i) { # Should be the subspecies...
 	    $species->sub_species( $content );
 	    next;
 	}
-	if ($name =~ /classification/i) {  # Should be species classification
+	if ($name =~ /classification/i) { # Should be species classification
 	    # We will assume that there are spaces separating the terms:
 	    my @bits = split " ", $content;
 	    # Now make sure there is not other cruft as well (eg semi-colons)
@@ -300,7 +313,7 @@
 	}
 	if ($name =~ /comment/) {
 	    my $com = Bio::Annotation::Comment->new('-text' => $content);
-	  #  $bioSeq->annotation->add_Comment($com);
+	    #  $bioSeq->annotation->add_Comment($com);
 	    $bioSeq->annotation->add_Annotation('comment', $com);
 	    next;
 	}
@@ -324,43 +337,40 @@
     # Extract out <Reference>s associated with the sequence
     my @refs;
     my %tags = (
-		-title => "RefTitle",
-		-authors => "RefAuthors",
+		-title    => "RefTitle",
+		-authors  => "RefAuthors",
 		-location => "RefJournal",
 		);
-    foreach my $ref ( $xmlSeq->getElementsByTagName ("Reference") ) {
+    for my $ref ( $xmlSeq->getElementsByTagName ("Reference") ) {
 	my %refVals;
-	foreach my $tag (keys %tags) {
+	for my $tag (keys %tags) {
 	    my $rt = &FIRSTDATA($ref->getElementsByTagName($tags{$tag})
 				->item(0));
-        unless ($rt) {
-            $self->warn("No data returned for $tag");
-            next;
-        }
-	    $rt =~ s/^[\s\r\n]+//;  # Kill leading space
-	    $rt =~ s/[\s\r\n]+$//;  # Kill trailing space
-	    $rt =~ s/[\s\r\n]+/ /;  # Collapse internal space runs
+	    next unless ($rt);
+	    $rt =~ s/^[\s\r\n]+//; # Kill leading space
+	    $rt =~ s/[\s\r\n]+$//; # Kill trailing space
+	    $rt =~ s/[\s\r\n]+/ /; # Collapse internal space runs
 	    $refVals{$tag} = $rt;
 	}
 	my $reference = Bio::Annotation::Reference->new( %refVals );
-
+	
 	# Pull out any <Reference> information hidden in <Attributes>
 	my %refMap = (
-		      comment => [ 'comment', 'remark' ],
-		      medline => [ 'medline', ],
-		      pubmed => [ 'pubmed' ],
-		      start => [ 'start', 'begin' ],
-		      end => [ 'stop', 'end' ],
+		      comment         => [ 'comment', 'remark' ],
+		      medline         => [ 'medline', ],
+		      pubmed          => [ 'pubmed' ],
+		      start           => [ 'start', 'begin' ],
+		      end             => [ 'stop', 'end' ],		      
 		      );
 	my @refCom = ();
 	my $floppies = &GETFLOPPIES($ref);
-	foreach my $attr (@{$floppies}) {
-	    my ($name, $content) = &FLOPPYVALS($attr);
+	for my $attr (@{$floppies}) {
+	    my ($name, $content) = &FLOPPYVALS($attr);	    
 	    my $value = "";
 	    # Cycle through the Seq methods:
-	    foreach my $method (keys %refMap) {
+	    for my $method (keys %refMap) {
 		# Cycle through potential matching attributes:
-		foreach my $match (@{$refMap{$method}}) {
+		for my $match (@{$refMap{$method}}) {
 		    # If the <Attribute> name matches one of the keys,
 		    # set $value, unless it has already been set
 		    $value ||= $content if ($name =~ /$match/i);
@@ -383,18 +393,18 @@
 	}
 	push @refs, $reference;
     }
-    $bioSeq->annotation->add_Annotation('reference'=>$_) foreach @refs;
-
+    $bioSeq->annotation->add_Annotation('reference' => $_) for @refs;
+    my $ann_col = $bioSeq->annotation;
     # Extract the <Feature>s for this <Sequence>
-    foreach my $feat ( $xmlSeq->getElementsByTagName("Feature") ) {
+    for my $feat ( $xmlSeq->getElementsByTagName("Feature") ) {
 	$bioSeq->add_SeqFeature( $self->_parse_bsml_feature($feat) );
     }
-
+    
     $species->classification( @classification );
     $bioSeq->species( $species );
-
-# $seq->annotation->add_DBLink(@links);    ->
-
+    
+    $bioSeq->annotation->add_Annotation('dblink' => $_) for @links;
+    
     $self->{'current_node'}++;
     return $bioSeq;
 }
@@ -580,7 +590,7 @@
     my %mol = ('dna' => 'DNA', 'rna' => 'RNA', 'protein' => 'AA');
     my @xmlSequences;
 
-    foreach my $bioSeq (@{$seqref}) {
+    for my $bioSeq (@{$seqref}) {
 	my $xmlSeq = $xml->createElement("Sequence");
 	my $FTs    = $xml->createElement("Feature-tables");
 
@@ -590,19 +600,20 @@
 	my $seqDesc = [];
 	push @{$seqDesc}, ["comment" , "This file generated to BSML 2.2 standards - joins will be collapsed to a single feature enclosing all members of the join"];
 	push @{$seqDesc}, ["description" , eval{$bioSeq->desc}];
-	foreach my $kwd ( eval{$bioSeq->get_keywords} ) {
+	for my $kwd ( eval{$bioSeq->get_keywords} ) {
 	    push @{$seqDesc}, ["keyword" , $kwd];
 	}
 	push @{$seqDesc}, ["keyword" , eval{$bioSeq->keywords}];
-	push @{$seqDesc}, ["version" , eval{$bioSeq->seq_version}];
+	push @{$seqDesc}, ["version" , eval{
+	    join(".", $bioSeq->accession_number, $bioSeq->seq_version); }];
 	push @{$seqDesc}, ["division" , eval{$bioSeq->division}];
 	push @{$seqDesc}, ["pid" , eval{$bioSeq->pid}];
 #	push @{$seqDesc}, ["bio_object" , ref($bioSeq)];
 	push @{$seqDesc}, ["primary_id" , eval{$bioSeq->primary_id}];
-	foreach my $dt (eval{$bioSeq->get_dates()} ) {
+	for my $dt (eval{$bioSeq->get_dates()} ) {
 	    push @{$seqDesc}, ["date" , $dt];
 	}
-	foreach my $ac (eval{$bioSeq->get_secondary_accessions()} ) {
+	for my $ac (eval{$bioSeq->get_secondary_accessions()} ) {
 	    push @{$seqDesc}, ["secondary_accession" , $ac];
 	}
 
@@ -632,7 +643,7 @@
 	$attr{molecule} = $mol{ lc($bioSeq->molecule) } if $bioSeq->can('molecule');
 
 
-	foreach my $a (keys %attr) {
+	for my $a (keys %attr) {
 	    $xmlSeq->setAttribute($a, $attr{$a}) if (defined $attr{$a} &&
 						     $attr{$a} ne "");
 	}
@@ -654,7 +665,7 @@
 	if (ref($bioSeq->species) eq 'Bio::Species') {
 	    # Need to peer into Bio::Species ...
 	    my @specs = ('common_name', 'genus', 'species', 'sub_species');
-	    foreach my $sp (@specs) {
+	    for my $sp (@specs) {
 		next unless (my $val = $bioSeq->species()->$sp());
 		push @{$seqDesc}, [$sp , $val];
 	    }
@@ -667,7 +678,7 @@
 	}
 
 	# Add the description <Attribute>s for the <Sequence>
-	foreach my $seqD (@{$seqDesc}) {
+	for my $seqD (@{$seqDesc}) {
 	    $self->_addel($xmlSeq, "Attribute", {
 		name => $seqD->[0], content => $seqD->[1]}) if ($seqD->[1]);
 	}
@@ -676,7 +687,7 @@
 	unless ($#{$seqRefs} < 0) {
 	    my $seqFT = $self->_addel($FTs, "Feature-table", {
 		title => "Sequence References", });
-	    foreach my $feat (@{$seqRefs}) {
+	    for my $feat (@{$seqRefs}) {
 		$seqFT->appendChild($feat);
 	    }
 	}
@@ -695,7 +706,7 @@
 	    $args->{SKIPFEAT} eq 'all') {
 	    $args->{SKIPFEAT} = { all => 1};
 	} else { $args->{SKIPFEAT} ||= {} }
-	foreach my $class (keys %{$args->{SKIPFEAT}}) {
+	for my $class (keys %{$args->{SKIPFEAT}}) {
 	    $args->{SKIPFEAT}{lc($class)} = $args->{SKIPFEAT}{$class};
 	}
 	# Loop through all the features
@@ -703,7 +714,7 @@
 	if (@features && !$args->{SKIPFEAT}{all}) {
 	    my $ft = $self->_addel($FTs, "Feature-table", {
 		title => "Features", });
-	    foreach my $bioFeat (@features ) {
+	    for my $bioFeat (@features ) {
 		my $featDesc = [];
 		my $class = lc($bioFeat->primary_tag);
 		# The user may have specified to ignore this type of feature
@@ -718,7 +729,7 @@

@@ Diff output truncated at 10000 characters. @@



More information about the Bioperl-guts-l mailing list