[Bioperl-guts-l] bioperl-live/scripts/Bio-DB-GFF genbank2gff3.PLS, 1.10, 1.11

Brian Osborne bosborne at dev.open-bio.org
Mon Mar 19 12:42:07 EDT 2007


Update of /home/repository/bioperl/bioperl-live/scripts/Bio-DB-GFF
In directory dev.open-bio.org:/tmp/cvs-serv9928/scripts/Bio-DB-GFF

Modified Files:
	genbank2gff3.PLS 
Log Message:
Don's patch for GFF3

Index: genbank2gff3.PLS
===================================================================
RCS file: /home/repository/bioperl/bioperl-live/scripts/Bio-DB-GFF/genbank2gff3.PLS,v
retrieving revision 1.10
retrieving revision 1.11
diff -C2 -d -r1.10 -r1.11
*** genbank2gff3.PLS	11 Mar 2007 15:13:44 -0000	1.10
--- genbank2gff3.PLS	19 Mar 2007 16:42:05 -0000	1.11
***************
*** 35,38 ****
--- 35,39 ----
          --ethresh -e  error threshold for unflattener
                        set this high (>2) to ignore all unflattener errors
+         --typesource -t  SO sequence type for source (e.g. chromosome; region)
          --help    -h  display this message
  
***************
*** 59,63 ****
  =head2 Notes
  
! =head3 Note1:
  
  In cases where the input files contain many GenBank records (for
--- 60,64 ----
  =head2 Notes
  
! =head3 'split' and 'nolump' produce many files
  
  In cases where the input files contain many GenBank records (for
***************
*** 68,74 ****
  bp_fast_load_gff.pl to load the gff and/ or fasta files.
  
! =head3 Note2:
  
! This script is designed for refseq genomic sequence entries.  It may
  work for third party annotations but this has not been tested.
  
--- 69,75 ----
  bp_fast_load_gff.pl to load the gff and/ or fasta files.
  
! =head3 Designed for RefSeq
  
! This script is designed for RefSeq genomic sequence entries.  It may
  work for third party annotations but this has not been tested.
  
***************
*** 97,101 ****
  
  use vars qw/$split @filter $zip $outdir $help $ethresh
!             $file @files $dir $summary $nolump
              $gene_id $rna_id $tnum %method %id %seen/;
  
--- 98,102 ----
  
  use vars qw/$split @filter $zip $outdir $help $ethresh
!             $file @files $dir $summary $nolump $source_type
              $gene_id $rna_id $tnum %method %id %seen/;
  
***************
*** 110,113 ****
--- 111,115 ----
  	    'y|split'   => \$split,
              "ethresh|e=s"=>\$ethresh,
+             't|typesource=s' => \$source_type,
              'n|nolump'  => \$nolump);
  
***************
*** 223,227 ****
  
          # construct a GFF header
!         print $out &gff_header($seq_name, $end);
  
  	# Note that we use our own get_all_SeqFeatures function 
--- 225,230 ----
  
          # construct a GFF header
!         # add: get source_type from attributes of source feature? chromosome=X tag
!         print $out &gff_header($seq_name, $end, $source_type);
  
  	# Note that we use our own get_all_SeqFeatures function 
***************
*** 268,272 ****
  	}
  	else {
! 	    print $out ">$seq_name\n$dna";
  	}
  
--- 271,276 ----
  	}
  	else {
! 		print $out "##FASTA\n"; # GFF3 spec
! 		print $out ">$seq_name\n$dna";
  	}
  
***************
*** 351,361 ****
  
  sub gff_header {
!     my ($name, $end) = @_;
      
      return <<END;
  ##gff-version 3
! ##sequence-region $name 1 $end
! ##source bp_genbank2gff3.pl
! $name\tGenBank\tregion\t1\t$end\t.\t.\t.\tID=$name
  END
  }
--- 355,366 ----
  
  sub gff_header {
!     my ($name, $end, $source_type) = @_;
!     $source_type ||= "region";
      
      return <<END;
  ##gff-version 3
! # sequence-region $name 1 $end
! # source bp_genbank2gff3.pl
! $name\tGenBank\t$source_type\t1\t$end\t.\t.\t.\tID=$name
  END
  }



More information about the Bioperl-guts-l mailing list