[Bioperl-guts-l] [16932] bioperl-live/trunk: Changed phase in Bio::Tools::GFF:: _gff3_string to default to 1.

Chris Fields cjfields at illinois.edu
Fri Mar 26 10:26:09 EDT 2010


Actually, a default of 1 is very bad even for CDS.  Phase can be 0, 1, or 2 (number of nucleotides to remove in the feature prior to translation), so this would indicate all CDS should start translation after the first nucleotide is removed.

chris

On Mar 26, 2010, at 8:38 AM, Aaron Mackey wrote:

> I don't think phase/frame should default to 1, why would you think so?
> Recall this is not the same as strand, and is invalid for anything but CDS
> (or CDS-like) features.
> 
> -Aaron
> 
> 
> On Thu, Mar 25, 2010 at 8:34 PM, Nathan Liles <nml5566 at dev.open-bio.org>wrote:
> 
>> Revision: 16932
>> Author:   nml5566
>> Date:     2010-03-25 20:34:28 -0400 (Thu, 25 Mar 2010)
>> Log Message:
>> -----------
>> Changed phase in Bio::Tools::GFF::_gff3_string to default to 1. Re-added
>> fixed map_types() to genbank2gff3 converter script.
>> 
>> Modified Paths:
>> --------------
>>   bioperl-live/trunk/Bio/SeqFeature/Tools/TypeMapper.pm
>>   bioperl-live/trunk/Bio/Tools/GFF.pm
>>   bioperl-live/trunk/scripts/Bio-DB-GFF/genbank2gff3.PLS
>> 
>> Modified: bioperl-live/trunk/Bio/SeqFeature/Tools/TypeMapper.pm
>> ===================================================================
>> --- bioperl-live/trunk/Bio/SeqFeature/Tools/TypeMapper.pm       2010-03-25
>> 00:42:41 UTC (rev 16931)
>> +++ bioperl-live/trunk/Bio/SeqFeature/Tools/TypeMapper.pm       2010-03-26
>> 00:34:28 UTC (rev 16932)
>> @@ -273,7 +273,7 @@
>>       #"misc_RNA" => ["transcript", "so:0000673"],
>>       "misc_binding" => ["binding_site", "so:0000409"],
>>       "misc_difference" => ["sequence_difference", "so:0000413"],
>> -       "misc_feature" => ["region", "so:0000001"],
>> +       "misc_feature" => ["region", undef],
>>       "misc_recomb" => ["recombination_feature", "so:0000298"],
>>       "misc_signal" => ["regulatory_region", "so:0005836"],
>>       "misc_structure" => ["sequence_secondary_structure", "so:0000002"],
>> 
>> Modified: bioperl-live/trunk/Bio/Tools/GFF.pm
>> ===================================================================
>> --- bioperl-live/trunk/Bio/Tools/GFF.pm 2010-03-25 00:42:41 UTC (rev 16931)
>> +++ bioperl-live/trunk/Bio/Tools/GFF.pm 2010-03-26 00:34:28 UTC (rev 16932)
>> @@ -1006,7 +1006,7 @@
>>    if( $feat->can('frame') ) {
>>       $frame = $feat->frame();
>>    }
>> -    $frame = '.' unless defined $frame;
>> +    $frame = '1' unless defined $frame;
>> 
>>    $strand = $feat->strand();
>> 
>> 
>> Modified: bioperl-live/trunk/scripts/Bio-DB-GFF/genbank2gff3.PLS
>> ===================================================================
>> --- bioperl-live/trunk/scripts/Bio-DB-GFF/genbank2gff3.PLS      2010-03-25
>> 00:42:41 UTC (rev 16931)
>> +++ bioperl-live/trunk/scripts/Bio-DB-GFF/genbank2gff3.PLS      2010-03-26
>> 00:34:28 UTC (rev 16932)
>> @@ -32,7 +32,7 @@
>>                      for Genbank entries (must be YAML format)
>>                      (if --manual is passed without --ini, user will be
>> prompted to
>>                       create the file if any manual input is saved)
>> -       --sofile  -l  path to to the so.obo file to use for primary id
>> mapping
>> +       --sofile  -l  path to to the so.obo file to use for feature type
>> mapping
>>                      (--sofile live will download the latest online
>> revision)
>>       --manual   -m  when trying to guess the proper SO term, if more than
>>                      one option matches the primary tag, the converter
>> will
>> @@ -1033,15 +1033,15 @@
>> 
>>    # map feature types to the sequence ontology
>>    ## $tm->map_types_to_SO( -seq => $seq );
>> -    $tm->map_types( -seq => $seq, -type_map => $FTSOmap, -undefined =>
>> "region" ); #dgg
>> +    #$tm->map_types( -seq => $seq, -type_map => $FTSOmap, -undefined =>
>> "region" ); #dgg
>> 
>> -    #map_types(
>> -           #$tm,
>> -           #-seq => $seq,
>> -           #-type_map  => $FTSOmap,
>> -           #-syn_map  => $FTSOsynonyms,
>> -           #-undefined => "region"
>> -    #); #nml
>> +    map_types(
>> +           $tm,
>> +           -seq => $seq,
>> +           -type_map  => $FTSOmap,
>> +           -syn_map  => $FTSOsynonyms,
>> +           -undefined => "region"
>> +    ); #nml
>> 
>> }
>> 
>> @@ -1194,8 +1194,7 @@
>>       $seq->isa("Bio::SeqI") || $self->throw("$seq NOT A SeqI");
>>       @sfs = $seq->get_all_SeqFeatures;
>>    }
>> -
>> -
>> +    $type_map = $type_map || $self->typemap; # dgg: was type_map;
>>    foreach my $feat (@sfs) {
>> 
>>       $feat->isa("Bio::SeqFeatureI") || $self->throw("$feat NOT A
>> SeqFeatureI");
>> @@ -1203,24 +1202,45 @@
>> 
>>       my $primary_tag = $feat->primary_tag;
>> 
>> -       if ($primary_tag =~ /^pseudo(.*)$/) {
>> -           $primary_tag = $1;
>> -           $feat->primary_tag($primary_tag);
>> -       }
>> +       #if ($primary_tag =~ /^pseudo(.*)$/) {
>> +           #$primary_tag = $1;
>> +           #$feat->primary_tag($primary_tag);
>> +       #}
>> 
>>       my $mtype = $type_map->{$primary_tag};
>> +       if ($mtype) {
>> +           if (ref($mtype)) {
>> +               if (ref($mtype) eq 'ARRAY') {
>> +                   my $soID;
>> +                   ($mtype, $soID) = @$mtype;
>> 
>> -       if (ref $mtype) {
>> -           my $soID = pop @$mtype;
>> -           $mtype = shift @$mtype;
>> +                   if ($soID && ref($ONTOLOGY)) {
>> +                       my ($term) = $ONTOLOGY->find_terms(-identifier =>
>> $soID);
>> +                       $mtype = $term->name if $term;
>> +                   }
>> +# if SO ID is undefined AND we have an ontology to search, we want to
>> delete
>> +# the feature type hash entry in order to force a fuzzy search
>> +                   elsif (! defined $soID && ref($ONTOLOGY)) {
>> +                       undef $mtype;
>> +                       delete $type_map->{$primary_tag};
>> +                   }
>> +                   elsif ($undefmap && $mtype eq 'undefined') { # dgg
>> +                       $mtype= $undefmap;
>> +                   }
>> 
>> -
>> -           if ($soID) {
>> -               my ($term) = $ONTOLOGY->find_terms(-identifier => $soID);
>> -               $mtype = $term->name if $term;
>> +                   $type_map->{$primary_tag} = $mtype if $mtype;
>> +               }
>> +               elsif (ref($mtype) eq 'CODE') {
>> +                   $mtype = $mtype->($feat);
>> +               }
>> +               else {
>> +                   $self->throw('must be scalar or CODE ref');
>> +               }
>>           }
>> -
>> -           $type_map->{$primary_tag} = $mtype if $mtype;
>> +           elsif ($undefmap && $mtype eq 'undefined') { # dgg
>> +               $mtype= $undefmap;
>> +           }
>> +           $feat->primary_tag($mtype);
>>       }
>> 
>>       if ($CONF) {
>> @@ -1347,9 +1367,9 @@
>> 
>>               }
>>           }
>> +           $mtype ||= $undefmap;
>> +           $feat->primary_tag($mtype);
>>       }
>> -       $mtype ||= $undefmap;
>> -       $feat->primary_tag($mtype);
>>    }
>> 
>> 
>> @@ -1547,7 +1567,6 @@
>> 
>>       my ($nR, $nmR, $descR, $termR, @synR) = ref($right) ? @$right :
>> (undef, undef, undef);
>> 
>> -#die Dumper $termL;
>> 
>>       my $format = "format STDOUT = \n";
>> 
>> 
>> _______________________________________________
>> Bioperl-guts-l mailing list
>> Bioperl-guts-l at lists.open-bio.org
>> http://lists.open-bio.org/mailman/listinfo/bioperl-guts-l
>> 
> _______________________________________________
> Bioperl-guts-l mailing list
> Bioperl-guts-l at lists.open-bio.org
> http://lists.open-bio.org/mailman/listinfo/bioperl-guts-l




More information about the Bioperl-guts-l mailing list