[Bioperl-guts-l] bioperl-live/t genbank.t,1.18,1.19

Christopher John Fields cjfields at dev.open-bio.org
Tue Jan 30 10:12:49 EST 2007


Update of /home/repository/bioperl/bioperl-live/t
In directory dev.open-bio.org:/tmp/cvs-serv2321

Modified Files:
	genbank.t 
Log Message:
tests for bug 2195

Index: genbank.t
===================================================================
RCS file: /home/repository/bioperl/bioperl-live/t/genbank.t,v
retrieving revision 1.18
retrieving revision 1.19
diff -C2 -d -r1.18 -r1.19
*** genbank.t	25 Jan 2007 17:25:11 -0000	1.18
--- genbank.t	30 Jan 2007 15:12:47 -0000	1.19
***************
*** 1,3 ****
!     # -*-Perl-*- mode (to keep my emacs happy)
  # $Id$
  
--- 1,3 ----
! # -*-Perl-*- mode (to keep my emacs happy)
  # $Id$
  
***************
*** 10,14 ****
      }
      use Test::More;
!     plan tests => 243;
      use_ok('Bio::SeqIO');
      use_ok('Bio::Root::IO');
--- 10,14 ----
      }
      use Test::More;
!     plan tests => 246;
      use_ok('Bio::SeqIO');
      use_ok('Bio::Root::IO');
***************
*** 24,28 ****
  }
  
! my $ast = Bio::SeqIO->new(-format => 'GenBank' ,
                                    -verbose => $verbose,
                                    -file => Bio::Root::IO->catfile
--- 24,28 ----
  }
  
! my $ast = Bio::SeqIO->new(-format => 'genbank' ,
                                    -verbose => $verbose,
                                    -file => Bio::Root::IO->catfile
***************
*** 30,34 ****
  $ast->verbose($verbose);
  my $as = $ast->next_seq();
! is $as->molecule, 'mRNA';
  is $as->alphabet, 'dna';
  is($as->primary_id, 3598416);
--- 30,34 ----
  $ast->verbose($verbose);
  my $as = $ast->next_seq();
! is $as->molecule, 'mRNA',$as->accession_number;
  is $as->alphabet, 'dna';
  is($as->primary_id, 3598416);
***************
*** 42,46 ****
  $ast->verbose($verbose);
  $as = $ast->next_seq();
! is $as->molecule, 'DNA';
  is $as->alphabet, 'dna';
  is($as->primary_id, 37539616);
--- 42,46 ----
  $ast->verbose($verbose);
  $as = $ast->next_seq();
! is $as->molecule, 'DNA',$as->accession_number;
  is $as->alphabet, 'dna';
  is($as->primary_id, 37539616);
***************
*** 57,61 ****
  $ast->verbose($verbose);
  $as = $ast->next_seq();
! is $as->molecule, 'linear';
  is $as->alphabet, 'protein';
  # Though older GenBank releases indicate SOURCE contains only the common name,
--- 57,61 ----
  $ast->verbose($verbose);
  $as = $ast->next_seq();
! is $as->molecule, 'linear',$as->accession_number;;
  is $as->alphabet, 'protein';
  # Though older GenBank releases indicate SOURCE contains only the common name,
***************
*** 82,86 ****
                                                         "NC_006346.gb"));
  $as = $ast->next_seq;
! is $as->species->binomial('FULL'), 'Bolitoglossa n. sp. RLM-2004';
  @class = $as->species->classification;
  is($class[$#class],'Eukaryota');
--- 82,86 ----
                                                         "NC_006346.gb"));
  $as = $ast->next_seq;
! is $as->species->binomial('FULL'), 'Bolitoglossa n. sp. RLM-2004',$as->accession_number;;
  @class = $as->species->classification;
  is($class[$#class],'Eukaryota');
***************
*** 93,97 ****
  $as = $ast->next_seq;
  @class = $as->species->classification;
! is($class[$#class],'Eukaryota');
  is $as->species->common_name,'black-bellied salamander';
  
--- 93,97 ----
  $as = $ast->next_seq;
  @class = $as->species->classification;
! is($class[$#class],'Eukaryota',$as->accession_number);
  is $as->species->common_name,'black-bellied salamander';
  
***************
*** 103,107 ****
  $as = $ast->next_seq;
  # again, this is not a common name but is in name('abbreviated')
! ok defined($as->species->name('abbreviated')->[0]);
  is $as->species->name('abbreviated')->[0],'Homo sapiens cDNA to mRNA';
  
--- 103,107 ----
  $as = $ast->next_seq;
  # again, this is not a common name but is in name('abbreviated')
! ok defined($as->species->name('abbreviated')->[0]),$as->accession_number;
  is $as->species->name('abbreviated')->[0],'Homo sapiens cDNA to mRNA';
  
***************
*** 112,116 ****
                                                         "DQ018368.gb"));
  $as = $ast->next_seq;
! is $as->species->scientific_name,'(Populus tomentosa x P. bolleana) x P. tomentosa var. truncata';
  
  # test secondary accessions
--- 112,117 ----
                                                         "DQ018368.gb"));
  $as = $ast->next_seq;
! is $as->species->scientific_name,'(Populus tomentosa x P. bolleana) x P. tomentosa var. truncata',
! $as->accession_number;;
  
  # test secondary accessions
***************
*** 121,125 ****
  my $seq = $seqio->next_seq;
  my @kw =  $seq->get_keywords;
! is(scalar @kw, 118);
  is($kw[-1], 'yabO');
  my @sec_acc = $seq->get_secondary_accessions();
--- 122,126 ----
  my $seq = $seqio->next_seq;
  my @kw =  $seq->get_keywords;
! is(scalar @kw, 118, $seq->accession_number);
  is($kw[-1], 'yabO');
  my @sec_acc = $seq->get_secondary_accessions();
***************
*** 135,139 ****
  };
  
! ok(! $@ );
  
  # bug 1647 rpt_unit sub-feature with multiple parens
--- 136,140 ----
  };
  
! ok(! $@, 'bug 1487');
  
  # bug 1647 rpt_unit sub-feature with multiple parens
***************
*** 145,149 ****
  my @rpts = grep { $_->primary_tag eq 'repeat_region' }
    $seq->get_SeqFeatures;
! is $#rpts, 2;
  my @rpt_units = map {$_->get_tag_values('rpt_unit')} @rpts;
  is $#rpt_units, 0;
--- 146,150 ----
  my @rpts = grep { $_->primary_tag eq 'repeat_region' }
    $seq->get_SeqFeatures;
! is $#rpts, 2, 'bug 1647';
  my @rpt_units = map {$_->get_tag_values('rpt_unit')} @rpts;
  is $#rpt_units, 0;
***************
*** 156,160 ****
                                (qw(t data Mcjanrna_rdbII.gbk) )
                );
! ok($seq = $str->next_seq);
  my @refs = $seq->annotation->get_Annotations('reference');
  is(@refs, 1);
--- 157,161 ----
                                (qw(t data Mcjanrna_rdbII.gbk) )
                );
! ok($seq = $str->next_seq, 'bug 1673');
  my @refs = $seq->annotation->get_Annotations('reference');
  is(@refs, 1);
***************
*** 168,172 ****
  $seq = $str->next_seq;
  my @features = $seq->all_SeqFeatures();
! is(@features, 5);
  is($features[0]->start, 1);
  is($features[0]->end, 226);
--- 169,173 ----
  $seq = $str->next_seq;
  my @features = $seq->all_SeqFeatures();
! is(@features, 5, $seq->accession_number);
  is($features[0]->start, 1);
  is($features[0]->end, 226);
***************
*** 185,189 ****
                                        ("t","data","test.genbank"),
                                        -verbose => $verbose,
!                              -format => 'GenBank');
  $stream->verbose($verbose);
  my $seqnum = 0;
--- 186,190 ----
                                        ("t","data","test.genbank"),
                                        -verbose => $verbose,
!                              -format => 'genbank');
  $stream->verbose($verbose);
  my $seqnum = 0;
***************
*** 209,213 ****
      $lasts = $seq;
  }
! is($seqnum, 5);
  is $lasts->display_id(), "HUMBETGLOA";
  my ($ref) = $lasts->annotation->get_Annotations('reference');
--- 210,214 ----
      $lasts = $seq;
  }
! is($seqnum, 5,'streaming');
  is $lasts->display_id(), "HUMBETGLOA";
  my ($ref) = $lasts->annotation->get_Annotations('reference');
***************
*** 218,222 ****
                                    ("t","data","test.genbank.noseq"),
                                    -verbose => $verbose,
!                                   -format => 'GenBank' );
  $seqnum = 0;
  while($seq = $stream->next_seq()) {
--- 219,223 ----
                                    ("t","data","test.genbank.noseq"),
                                    -verbose => $verbose,
!                                   -format => 'genbank' );
  $seqnum = 0;
  while($seq = $stream->next_seq()) {
***************
*** 231,235 ****
  
  # fuzzy
! $seq = Bio::SeqIO->new( -format => 'GenBank',
                                  -verbose => $verbose,
                          -file =>Bio::Root::IO->catfile
--- 232,236 ----
  
  # fuzzy
! $seq = Bio::SeqIO->new( -format => 'genbank',
                                  -verbose => $verbose,
                          -file =>Bio::Root::IO->catfile
***************
*** 239,248 ****
  
  @features = $as->all_SeqFeatures();
! is(@features,21);
  my $lastfeature = pop @features;
  # this is a split location; the root doesn't have strand
  is($lastfeature->strand, undef);
  $location = $lastfeature->location;
! $location->verbose(-1); # silence the warning of undef seq_id()
  # see above; splitlocs roots do not have a strand really
  is($location->strand, undef);
--- 240,249 ----
  
  @features = $as->all_SeqFeatures();
! is(@features,21,'Fuzzy in');
  my $lastfeature = pop @features;
  # this is a split location; the root doesn't have strand
  is($lastfeature->strand, undef);
  $location = $lastfeature->location;
! #$location->verbose(-1); # silence the warning of undef seq_id()
  # see above; splitlocs roots do not have a strand really
  is($location->strand, undef);
***************
*** 263,272 ****
  is($loc->strand,1);
  
! $seq = Bio::SeqIO->new(-format => 'GenBank',
                                -verbose => $verbose,
                         -file=> ">" . Bio::Root::IO->catfile
                                ("t","data","genbank.fuzzyout"));
  $seq->verbose($verbose);
! ok($seq->write_seq($as));
  unlink(Bio::Root::IO->catfile("t","data","genbank.fuzzyout"));
  
--- 264,273 ----
  is($loc->strand,1);
  
! $seq = Bio::SeqIO->new(-format => 'genbank',
                                -verbose => $verbose,
                         -file=> ">" . Bio::Root::IO->catfile
                                ("t","data","genbank.fuzzyout"));
  $seq->verbose($verbose);
! ok($seq->write_seq($as),'Fuzzy out');
  unlink(Bio::Root::IO->catfile("t","data","genbank.fuzzyout"));
  
***************
*** 277,283 ****
                               ( qw(t data BK000016-tpa.gbk)));
  $seq = $str->next_seq;
! ok(defined $seq);
  ok(defined $seq->seq);
! is($seq->accession_number, 'BK000016');
  is($seq->alphabet, 'dna');
  is($seq->display_id, 'BK000016');
--- 278,284 ----
                               ( qw(t data BK000016-tpa.gbk)));
  $seq = $str->next_seq;
! ok(defined $seq, $seq->accession_number);
  ok(defined $seq->seq);
! is($seq->accession_number, 'BK000016',$seq->accession_number);
  is($seq->alphabet, 'dna');
  is($seq->display_id, 'BK000016');
***************
*** 297,313 ****
  my $reference =  ($ac->get_Annotations('reference') )[0];
  is ($reference->pubmed, '11479594');
! is ($reference->medline, '21372465');
  
  # validate that what is written is what is read
  my $testfile = "testtpa.gbk";
! my $out = new Bio::SeqIO(-file => ">$testfile",
                               -format => 'genbank');
  $out->write_seq($seq);
  $out->close();
  
! $str = new Bio::SeqIO(-format =>'genbank',
                               -file => $testfile);
  $seq = $str->next_seq;
! ok(defined $seq);
  ok(defined $seq->seq);
  is($seq->accession_number, 'BK000016');
--- 298,314 ----
  my $reference =  ($ac->get_Annotations('reference') )[0];
  is ($reference->pubmed, '11479594');
! is ($reference->medline, '21372465',$seq->accession_number);
  
  # validate that what is written is what is read
  my $testfile = "testtpa.gbk";
! my $out = Bio::SeqIO->new(-file => ">$testfile",
                               -format => 'genbank');
  $out->write_seq($seq);
  $out->close();
  
! $str = Bio::SeqIO->new(-format =>'genbank',
                               -file => $testfile);
  $seq = $str->next_seq;
! ok(defined $seq,'roundtrip');
  ok(defined $seq->seq);
  is($seq->accession_number, 'BK000016');
***************
*** 335,338 ****
--- 336,340 ----
  # write revcomp split location
  my $gb = new Bio::SeqIO(-format => 'genbank',
+                         -verbose => $verbose,
                          -file   => Bio::Root::IO->catfile
                          (qw(t data revcomp_mrna.gb)));
***************
*** 344,348 ****
  $gb->write_seq($seq);
  undef $gb;
! ok(! -z "tmp_revcomp_mrna.gb");
  # INSERT DIFFING CODE HERE
  
--- 346,350 ----
  $gb->write_seq($seq);
  undef $gb;
! ok(! -z "tmp_revcomp_mrna.gb", 'revcomp split location');
  # INSERT DIFFING CODE HERE
  
***************
*** 353,360 ****
  #           Enterobacteriaceae; Salmonella.
  $gb = new Bio::SeqIO(-format => 'genbank',
!                             -file   => Bio::Root::IO->catfile
                              (qw(t data NC_006511-short.gbk)));
  $seq = $gb->next_seq;
! is $seq->species->common_name, undef;
  is $seq->species->scientific_name, "Salmonella enterica subsp. enterica serovar Paratyphi A str. ATCC 9150";
  @class = $seq->species->classification;
--- 355,363 ----
  #           Enterobacteriaceae; Salmonella.
  $gb = new Bio::SeqIO(-format => 'genbank',
!                      -verbose => $verbose,
!                         -file   => Bio::Root::IO->catfile
                              (qw(t data NC_006511-short.gbk)));
  $seq = $gb->next_seq;
! is $seq->species->common_name, undef, "Bug 1925";
  is $seq->species->scientific_name, "Salmonella enterica subsp. enterica serovar Paratyphi A str. ATCC 9150";
  @class = $seq->species->classification;
***************
*** 363,368 ****
  # WGS tests
  $gb = Bio::SeqIO->new(-format => 'genbank',
!                             -file   => Bio::Root::IO->catfile
!                             (qw(t data O_sat.wgs)));
  $seq = $gb->next_seq;
  
--- 366,372 ----
  # WGS tests
  $gb = Bio::SeqIO->new(-format => 'genbank',
!                       -verbose => $verbose,
!                     -file   => Bio::Root::IO->catfile
!                     (qw(t data O_sat.wgs)));
  $seq = $gb->next_seq;
  
***************
*** 377,381 ****
  for my $wgs (@wgs) {
      my ($tagname, $value) = (shift @tests, shift @tests);
!     is($wgs->tagname, $tagname);
      is($wgs->value, $value);
      $ct++;
--- 381,385 ----
  for my $wgs (@wgs) {
      my ($tagname, $value) = (shift @tests, shift @tests);
!     is($wgs->tagname, $tagname, $tagname);
      is($wgs->value, $value);
      $ct++;
***************
*** 386,395 ****
  # make sure we can retrieve a feature with a primary tag of 'misc_difference'
  $gb = new Bio::SeqIO(-format => 'genbank',
!                             -file   => Bio::Root::IO->catfile
                              (qw(t data BC000007.gbk)));
  $seq = $gb->next_seq;
  ($cds) = grep { $_->primary_tag eq 'misc_difference' } $seq->get_SeqFeatures;
  my @vals = $cds->get_tag_values('gene');
! is $vals[0], 'PX19';
  
  # Check that the source,organism section is identical between input and output.
--- 390,400 ----
  # make sure we can retrieve a feature with a primary tag of 'misc_difference'
  $gb = new Bio::SeqIO(-format => 'genbank',
!                      -verbose => $verbose,
!                     -file   => Bio::Root::IO->catfile
                              (qw(t data BC000007.gbk)));
  $seq = $gb->next_seq;
  ($cds) = grep { $_->primary_tag eq 'misc_difference' } $seq->get_SeqFeatures;
  my @vals = $cds->get_tag_values('gene');
! is $vals[0], 'PX19', $seq->accession_number;
  
  # Check that the source,organism section is identical between input and output.
***************
*** 452,456 ****
      close(RESULT);
      
!     ok $is;
      
      unlink($outfile);
--- 457,461 ----
      close(RESULT);
      
!     ok $is, $in;
      
      unlink($outfile);
***************
*** 461,506 ****
  
  # 20061117: problem with *double* colon in some annotation-dblink values
  
  foreach my $in ('P35527.gb') {
!         my $infile =  Bio::Root::IO->catfile("t","data",$in);
!        $str = new Bio::SeqIO(-format =>'genbank',
!                              -verbose => $verbose,
!                              -file => $infile);
!        $seq = $str->next_seq;
!         my $ac      = $seq->annotation();      # Bio::AnnotationCollection
!         foreach my $key ($ac->get_all_annotation_keys() ) {
!                 my @values = $ac->get_Annotations( $key);
!                 foreach my $value (@values) {
!                         if ($key eq 'dblink') {
! 
!                                 ok (index($value,'::') < 0);   # this should never be true
! 
!                                 ok ($value );   # check value is not empty
  
!                              #  print "  ann/", sprintf('%12s  ',$key), '>>>', $value , '<<<', "\n";
!                              #  print "        index double colon: ",index($value   ,'::'), "\n";
  
!                                 #  check db name:
!                                 my @parts = split(/:/,$value);
!                                 if ( $parts[0] =~ /^(?:
!                                         #  not an exhaustive list of databases;
!                                         #  just the db's referenced in P35527.gb:
!                                         swissprot | GenBank | GenPept  | HSSP| IntAct | Ensembl | KEGG | HGNC | MIM | ArrayExpress
!                                                   | GO      | InterPro | Pfam| PRINTS | PROSITE
!                                                      )$/x )
!                                 {
!                                       ok 1;
!                                 }
!                                 else {
!                                       ok 0;
!                                 }
  
!                                 ok ( $parts[1] );
  
!                         }
!                         # elsif ($key eq 'reference') { }
                  }
          }
  
  
! }
--- 466,526 ----
  
  # 20061117: problem with *double* colon in some annotation-dblink values
+ $ct = 0;
  
  foreach my $in ('P35527.gb') {
!     my $infile =  Bio::Root::IO->catfile("t","data",$in);
!     $str = new Bio::SeqIO(-format =>'genbank',
!                          -verbose => $verbose,
!                          -file => $infile);
!     $seq = $str->next_seq;
!     my $ac      = $seq->annotation();      # Bio::AnnotationCollection
!     foreach my $key ($ac->get_all_annotation_keys() ) {
!         my @values = $ac->get_Annotations($key);
!         foreach my $value (@values) {
!             $ct++;
!             if ($key eq 'dblink') {
  
!                 ok (index($value,'::') < 0);   # this should never be true
  
!                 ok ($value, $value);   # check value is not empty
  
!                 #  print "  ann/", sprintf('%12s  ',$key), '>>>', $value , '<<<', "\n";
!                 #  print "        index double colon: ",index($value   ,'::'), "\n";
  
!                 #  check db name:
!                 my @parts = split(/:/,$value);
!                 if ( $parts[0] =~ /^(?:
!                         #  not an exhaustive list of databases;
!                         #  just the db's referenced in P35527.gb:
!                         swissprot | GenBank | GenPept  | HSSP| IntAct | Ensembl | KEGG | HGNC | MIM | ArrayExpress
!                                   | GO      | InterPro | Pfam| PRINTS | PROSITE
!                                      )$/x )
!                 {
!                     ok 1;
                  }
+                 else {
+                     ok 0;
+                 }
+                     ok ( $parts[1], "$parts[0]" );
+             }
+                 # elsif ($key eq 'reference') { }
          }
+     }
+ }
  
+ is($ct, 45);
  
! # bug 2195
!     
! $str = new Bio::SeqIO(-format =>'genbank',
!                       -verbose => $verbose,
!                       -file => Bio::Root::IO->catfile(qw(t data AF305198.gb))
!                      );
! 
! $species = $str->next_seq->species;
! 
! is($species->scientific_name, 'Virginia creeper phytoplasma', 'Bug 2195');
! is(join(', ',$species->classification), 'Virginia creeper phytoplasma, '.
!    '16SrV (Elm yellows group), Candidatus Phytoplasma, '.
!    'Acholeplasmataceae, Acholeplasmatales, Mollicutes, '.
!    'Firmicutes, Bacteria', 'Bug 2195');



More information about the Bioperl-guts-l mailing list