diff --git a/bin/agat b/bin/agat index b87924fd..8151ac26 100755 --- a/bin/agat +++ b/bin/agat @@ -193,6 +193,10 @@ force_gff_input_version, gtf_output_version, gff_output_version and output_forma getopt => 'gtf_output_version=s', help => 'Set the GTF output vesion. Choice 1, 2, 2.1, 2.2, 2.5, 3 or relax. [Default relax]', }, + { + getopt => 'deflate_attribute!', + help => 'deflate multi-values attributes: attribute_tag=att_value1,att_value2,att_value3 will will become attribute_tag=att_value1;attribute_tag2=att_value2;attribute_tag3=att_value3;', + }, { getopt => 'create_l3_for_l2_orphan!', help => 'To create l3 feature for l2 feature without any. [Default activated]', diff --git a/lib/AGAT/AGAT.pm b/lib/AGAT/AGAT.pm index a9885f37..e5859e79 100644 --- a/lib/AGAT/AGAT.pm +++ b/lib/AGAT/AGAT.pm @@ -17,8 +17,9 @@ use AGAT::PlotR; use Bio::Tools::GFF; our $VERSION = "v1.4.1"; -our @ISA = qw(Exporter); -our @EXPORT = qw(get_agat_header print_agat_version get_agat_config handle_levels); +our $CONFIG; # This variable will be used to store the config and will be available from everywhere. +our @ISA = qw( Exporter ); +our @EXPORT = qw( get_agat_header print_agat_version get_agat_config handle_levels ); sub import { AGAT::AGAT->export_to_level(1, @_); # to be able to load the EXPORT functions when direct call; (normal case) AGAT::OmniscientI->export_to_level(1, @_); @@ -159,6 +160,10 @@ sub get_agat_config{ # Load the config my $config = load_config({ config_file => $config_file_checked}); check_config({ config => $config}); + + # Store the config in a Global variable accessible from everywhere. + $CONFIG = $config; + return $config; } @@ -243,6 +248,7 @@ sub handle_config { my $output_format = $general->{configs}[-1]{output_format}; my $gff_output_version = $general->{configs}[-1]{gff_output_version}; my $gtf_output_version = $general->{configs}[-1]{gtf_output_version}; + my $deflate_attribute = $general->{configs}[-1]{deflate_attribute}; my $create_l3_for_l2_orphan = $general->{configs}[-1]{create_l3_for_l2_orphan}; my $clean_attributes_from_template = $general->{configs}[-1]{clean_attributes_from_template}; my $locus_tag = $general->{configs}[-1]{locus_tag}; @@ -325,6 +331,11 @@ sub handle_config { $modified_on_the_fly = 1; } # bolean + if( defined($deflate_attribute) ){ + $config->{ deflate_attribute } = _make_bolean($deflate_attribute); + $modified_on_the_fly = 1; + } + # bolean if( defined($create_l3_for_l2_orphan) ){ $config->{ create_l3_for_l2_orphan } = _make_bolean($create_l3_for_l2_orphan); $modified_on_the_fly = 1; diff --git a/lib/AGAT/BioperlGFF.pm b/lib/AGAT/BioperlGFF.pm index 1dda601e..bc5d6a2c 100644 --- a/lib/AGAT/BioperlGFF.pm +++ b/lib/AGAT/BioperlGFF.pm @@ -149,6 +149,8 @@ package AGAT::BioperlGFF; use vars qw($HAS_HTML_ENTITIES); use strict; +use AGAT::OmniscientTool; +use AGAT::AGAT; use Bio::Seq::SeqFactory; use Bio::LocatableSeq; use Bio::SeqFeature::Generic; @@ -699,6 +701,28 @@ sub write_feature { $self->_print($line); } $self->{'_first'} = 0; + + # deflate multi-values attribute if asked by config + if ($AGAT::AGAT::CONFIG->{'deflate_attribute'}){ + foreach my $feature ( @features ) { + my @list_tags= $feature->get_all_tags(); + foreach my $tag (@list_tags){ + my @tag_values = $feature->get_tag_values($tag); + if ($#tag_values >= 1){ + my $tag_counter=-1; + foreach my $tag_value (@tag_values){ + $tag_counter++; + if ($tag_counter == 0){ + create_or_replace_tag($feature, $tag , $tag_value); + } else { + create_or_replace_tag($feature, $tag."_".$tag_counter , $tag_value); + } + } + } + } + } + } + foreach my $feature ( @features ) { $self->_print($self->gxf_string($feature)."\n"); } diff --git a/lib/AGAT/Config.pm b/lib/AGAT/Config.pm index dc422ae4..d1ca76e8 100644 --- a/lib/AGAT/Config.pm +++ b/lib/AGAT/Config.pm @@ -175,7 +175,7 @@ sub check_config{ $error = 1; } if( ! exists_keys($config,("log") ) ){ - print "progress_bar parameter missing in the configuration file.\n"; + print "log parameter missing in the configuration file.\n"; $error = 1; } if( ! exists_keys($config, ("debug") ) ){ @@ -241,6 +241,10 @@ sub check_config{ print "gtf_output_version parameter missing in the configuration file.\n"; $error = 1; } + if( ! exists_keys($config, ("deflate_attribute") ) ) { + print "deflate_attribute parameter missing in the configuration file.\n"; + $error = 1; + } if( ! exists_keys($config, ("create_l3_for_l2_orphan") ) ) { print "create_level3_for_level2_orphan parameter missing in the configuration file.\n"; $error = 1; diff --git a/lib/AGAT/OmniscientToGTF.pm b/lib/AGAT/OmniscientToGTF.pm index 45470c0a..ef1e60cd 100644 --- a/lib/AGAT/OmniscientToGTF.pm +++ b/lib/AGAT/OmniscientToGTF.pm @@ -66,169 +66,166 @@ sub print_omniscient_as_gtf{ print "\n\n\nlevel1: ".$feature_level1->gff_string."\n" if $debug; my $id_tag_key_level1 = lc($feature_level1->_tag_value('ID')); - # Gene ID level1 + # Gene ID level1 my $gene_id=undef; - my $gene_id_att=undef; - if($feature_level1->has_tag('gene_id')){ - $gene_id_att=$feature_level1->_tag_value('gene_id'); - } - - my $transcript_id=undef; - my $level3_gene_id=undef; - ################# - # == LEVEL 2 == # - ################# - foreach my $primary_tag_key_level2 ( sort {$a cmp $b} keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... - - if ( exists_keys ($hash_omniscient, ('level2', $primary_tag_key_level2, $id_tag_key_level1) ) ){ - foreach my $feature_level2 ( sort {$a->start <=> $b->start} @{$hash_omniscient->{'level2'}{$primary_tag_key_level2}{$id_tag_key_level1}}) { + my $gene_id_att=undef; + if($feature_level1->has_tag('gene_id')){ + $gene_id_att=$feature_level1->_tag_value('gene_id'); + } + + my $transcript_id=undef; + my $level3_gene_id=undef; + ################# + # == LEVEL 2 == # + ################# + foreach my $primary_tag_key_level2 ( sort {$a cmp $b} keys %{$hash_omniscient->{'level2'}}){ # primary_tag_key_level2 = mrna or mirna or ncrna or trna etc... + + if ( exists_keys ($hash_omniscient, ('level2', $primary_tag_key_level2, $id_tag_key_level1) ) ){ + foreach my $feature_level2 ( sort {$a->start <=> $b->start} @{$hash_omniscient->{'level2'}{$primary_tag_key_level2}{$id_tag_key_level1}}) { print "\nlevel2: ".$feature_level2->gff_string."\n" if $debug; + # Gene ID level2 + my $gene_id_mrna_att=undef; + if($feature_level2->has_tag('gene_id')){ + $gene_id_mrna_att=$feature_level2->_tag_value('gene_id'); + } - # Gene ID level2 - my $gene_id_mrna_att=undef; - if($feature_level2->has_tag('gene_id')){ - $gene_id_mrna_att=$feature_level2->_tag_value('gene_id'); - } + my $transcript_id_mrna_att=undef; + if($feature_level2->has_tag('transcript_id')){ + $transcript_id_mrna_att=$feature_level2->_tag_value('transcript_id'); + } + + # get gff3 feature (ID) + my $level2_ID = lc($feature_level2->_tag_value('ID')); + my $level3_transcript_id=undef; + ################# + # == LEVEL 3 == # + ################# + + ############ + # Go through one time to check if gene_id and transcript_id are present and save them + foreach my $primary_tag_key_level3 ( sort {$a cmp $b} keys %{$hash_omniscient->{'level3'}}){ # primary_tag_key_level3 = cds or exon or start_codon or utr etc... + if ( exists_keys ($hash_omniscient, ('level3', $primary_tag_key_level3, $level2_ID) ) ){ + foreach my $feature_level3 ( sort { $a->start <=> $b->start } @{$hash_omniscient->{'level3'}{$primary_tag_key_level3}{$level2_ID}}) { + print "level3: ".$feature_level3->gff_string."\n" if $debug; + #Get level3 gene_id + if(! $level3_gene_id){ + if($feature_level3->has_tag('gene_id')){ + $level3_gene_id=$feature_level3->_tag_value('gene_id'); + } + } + + #Get level3 transcript_id + if(! $level3_transcript_id){ + if($feature_level3->has_tag('transcript_id')){ + $level3_transcript_id=$feature_level3->_tag_value('transcript_id'); + } + } + if($level3_gene_id and $level3_transcript_id){last;} + } + } + if($level3_gene_id and $level3_transcript_id){last;} + } - my $transcript_id_mrna_att=undef; - if($feature_level2->has_tag('transcript_id')){ - $transcript_id_mrna_att=$feature_level2->_tag_value('transcript_id'); - } + ################# + # CHOOSE the gene_id. We take the first from level1 to level3. + if($gene_id_att and ! _does_id_exist("gene_id", $gene_id_att, \%keep_track_gene_id) ){ + $gene_id=$gene_id_att; + } + elsif($gene_id_mrna_att and ! _does_id_exist("gene_id", $gene_id_mrna_att, \%keep_track_gene_id) ){ + $gene_id=$gene_id_mrna_att + } + elsif($level3_gene_id and ! _does_id_exist("gene_id", $level3_gene_id, \%keep_track_gene_id) ){ + $gene_id=$level3_gene_id; + } + else{ # We didn't find any gene_id we will the ID of level1 as gene_id. + $gene_id=$feature_level1->_tag_value('ID'); + } - # get gff3 feature (ID) - my $level2_ID = lc($feature_level2->_tag_value('ID')); + ################# + # CHOOSE the transcript_id. We take the first from level2 to level3. + if($transcript_id_mrna_att and ! _does_id_exist("transcript_id", $transcript_id_mrna_att, \%keep_track_transcript_id) ){ + $transcript_id=$transcript_id_mrna_att; + } + elsif($level3_transcript_id and ! _does_id_exist("transcript_id", $level3_transcript_id, \%keep_track_transcript_id) ){ + $transcript_id=$level3_transcript_id; + } + else{ # We didn't find any gene_id we will the ID of level2 as transcript_id. + $transcript_id=$feature_level2->_tag_value('ID'); + } - my $level3_transcript_id=undef; - ################# - # == LEVEL 3 == # - ################# + ############## + # Second pass of level3 features + # Add gene_id and transcript_id to level3 feature that don't have this information + foreach my $primary_tag_key_level3 (keys %{$hash_omniscient->{'level3'}}){ # primary_tag_key_level3 = cds or exon or start_codon or utr etc... + if ( exists_keys ($hash_omniscient, ('level3', $primary_tag_key_level3, $level2_ID) ) ){ + foreach my $feature_level3 ( @{$hash_omniscient->{'level3'}{$primary_tag_key_level3}{$level2_ID}}) { - ############ - # Go through one time to check if gene_id and transcript_id are present and save them - foreach my $primary_tag_key_level3 ( sort {$a cmp $b} keys %{$hash_omniscient->{'level3'}}){ # primary_tag_key_level3 = cds or exon or start_codon or utr etc... - if ( exists_keys ($hash_omniscient, ('level3', $primary_tag_key_level3, $level2_ID) ) ){ - foreach my $feature_level3 ( sort { $a->start <=> $b->start } @{$hash_omniscient->{'level3'}{$primary_tag_key_level3}{$level2_ID}}) { - print "level3: ".$feature_level3->gff_string."\n" if $debug; - #Get level3 gene_id - if(! $level3_gene_id){ - if($feature_level3->has_tag('gene_id')){ - $level3_gene_id=$feature_level3->_tag_value('gene_id'); - } - } - - #Get level3 transcript_id - if(! $level3_transcript_id){ - if($feature_level3->has_tag('transcript_id')){ - $level3_transcript_id=$feature_level3->_tag_value('transcript_id'); - } - } - if($level3_gene_id and $level3_transcript_id){last;} - } - } - if($level3_gene_id and $level3_transcript_id){last;} - } - - ################# - # CHOOSE the gene_id. We take the first from level1 to level3. - if($gene_id_att and ! _does_id_exist("gene_id", $gene_id_att, \%keep_track_gene_id) ){ - $gene_id=$gene_id_att; - } - elsif($gene_id_mrna_att and ! _does_id_exist("gene_id", $gene_id_mrna_att, \%keep_track_gene_id) ){ - $gene_id=$gene_id_mrna_att - } - elsif($level3_gene_id and ! _does_id_exist("gene_id", $level3_gene_id, \%keep_track_gene_id) ){ - $gene_id=$level3_gene_id; - } - else{ # We didn't find any gene_id we will the ID of level1 as gene_id. - $gene_id=$feature_level1->_tag_value('ID'); - } - - ################# - # CHOOSE the transcript_id. We take the first from level2 to level3. - if($transcript_id_mrna_att and ! _does_id_exist("transcript_id", $transcript_id_mrna_att, \%keep_track_transcript_id) ){ - $transcript_id=$transcript_id_mrna_att; - } - elsif($level3_transcript_id and ! _does_id_exist("transcript_id", $level3_transcript_id, \%keep_track_transcript_id) ){ - $transcript_id=$level3_transcript_id; - } - else{ # We didn't find any gene_id we will the ID of level2 as transcript_id. - $transcript_id=$feature_level2->_tag_value('ID'); - } - - ############## - # Second pass of level3 features - # Add gene_id and transcript_id to level3 feature that don't have this information - foreach my $primary_tag_key_level3 (keys %{$hash_omniscient->{'level3'}}){ # primary_tag_key_level3 = cds or exon or start_codon or utr etc... - if ( exists_keys ($hash_omniscient, ('level3', $primary_tag_key_level3, $level2_ID) ) ){ - foreach my $feature_level3 ( @{$hash_omniscient->{'level3'}{$primary_tag_key_level3}{$level2_ID}}) { - - #Check add gene_id - if(! $feature_level3->has_tag('gene_id')) { - $feature_level3->add_tag_value('gene_id', $gene_id); - } - elsif($feature_level3->_tag_value('gene_id') ne $gene_id) { #gene_id different, we replace it. - warn("Level3 ".$feature_level3->_tag_value('ID').": We replace the gene_id ".$feature_level3->_tag_value('gene_id')." by ".$gene_id.". We save original gene_id into $previous_tag_l1 attribute.\n"); - create_or_replace_tag($feature_level3, $previous_tag_l1, $feature_level3->_tag_value('gene_id')); - create_or_replace_tag($feature_level3, 'gene_id', $gene_id); - } - #Check add transcript_id - if(! $feature_level3->has_tag('transcript_id')){ - $feature_level3->add_tag_value('transcript_id', $transcript_id); - } - elsif($feature_level3->_tag_value('transcript_id') ne $transcript_id){ #transcript_id different, we replace it. - warn("Level3 ".$feature_level3->_tag_value('ID').": We replace the transcript_id ".$feature_level3->_tag_value('transcript_id')." by ".$transcript_id.". We save original transcript_id into $previous_tag_l2 attribute.\n"); - create_or_replace_tag($feature_level3, $previous_tag_l2, $feature_level3->_tag_value('transcript_id')); + #Check add gene_id + if(! $feature_level3->has_tag('gene_id')) { + $feature_level3->add_tag_value('gene_id', $gene_id); + } + elsif($feature_level3->_tag_value('gene_id') ne $gene_id) { #gene_id different, we replace it. + warn("Level3 ".$feature_level3->_tag_value('ID').": We replace the gene_id ".$feature_level3->_tag_value('gene_id')." by ".$gene_id.". We save original gene_id into $previous_tag_l1 attribute.\n"); + create_or_replace_tag($feature_level3, $previous_tag_l1, $feature_level3->_tag_value('gene_id')); + create_or_replace_tag($feature_level3, 'gene_id', $gene_id); + } + #Check add transcript_id + if(! $feature_level3->has_tag('transcript_id')){ + $feature_level3->add_tag_value('transcript_id', $transcript_id); + } + elsif($feature_level3->_tag_value('transcript_id') ne $transcript_id){ #transcript_id different, we replace it. + warn("Level3 ".$feature_level3->_tag_value('ID').": We replace the transcript_id ".$feature_level3->_tag_value('transcript_id')." by ".$transcript_id.". We save original transcript_id into $previous_tag_l2 attribute.\n"); + create_or_replace_tag($feature_level3, $previous_tag_l2, $feature_level3->_tag_value('transcript_id')); create_or_replace_tag($feature_level3, 'transcript_id', $transcript_id); - } - } - } - } - - ## add level2 missing information gene_id - if(! $feature_level2->has_tag('gene_id')) { - $feature_level2->add_tag_value('gene_id', $gene_id); - } - elsif($feature_level2->_tag_value('gene_id') ne $gene_id) { #gene_id different, we replace it. - warn("Level2 ".$feature_level2->_tag_value('ID').": We replace the gene_id ".$feature_level2->_tag_value('gene_id')." by ".$gene_id.". We save original gene_id into $previous_tag_l1 attribute.\n"); + } + } + } + } + + ## add level2 missing information gene_id + if(! $feature_level2->has_tag('gene_id')) { + $feature_level2->add_tag_value('gene_id', $gene_id); + } + elsif($feature_level2->_tag_value('gene_id') ne $gene_id) { #gene_id different, we replace it. + warn("Level2 ".$feature_level2->_tag_value('ID').": We replace the gene_id ".$feature_level2->_tag_value('gene_id')." by ".$gene_id.". We save original gene_id into $previous_tag_l1 attribute.\n"); create_or_replace_tag($feature_level2, $previous_tag_l1, $feature_level2->_tag_value('gene_id')); create_or_replace_tag($feature_level2, 'gene_id', $gene_id); - } - # add level2 missing information transcript_id - if(! $feature_level2->has_tag('transcript_id')){ - $feature_level2->add_tag_value('transcript_id', $transcript_id); - } - elsif($feature_level2->_tag_value('transcript_id') ne $transcript_id){ #gene_id transcript_id, we replace it. - warn("Level2 ".$feature_level2->_tag_value('ID').": We replace the transcript_id ".$feature_level2->_tag_value('transcript_id')." by ".$transcript_id.". We save original transcript_id into $previous_tag_l2 attribute.\n"); + } + # add level2 missing information transcript_id + if(! $feature_level2->has_tag('transcript_id')){ + $feature_level2->add_tag_value('transcript_id', $transcript_id); + } + elsif($feature_level2->_tag_value('transcript_id') ne $transcript_id){ #gene_id transcript_id, we replace it. + warn("Level2 ".$feature_level2->_tag_value('ID').": We replace the transcript_id ".$feature_level2->_tag_value('transcript_id')." by ".$transcript_id.". We save original transcript_id into $previous_tag_l2 attribute.\n"); create_or_replace_tag($feature_level2, $previous_tag_l2, $feature_level2->_tag_value('transcript_id')); - create_or_replace_tag($feature_level2, 'transcript_id', $transcript_id); - } - } - } - } - - ## add level1 missing information gene_id - if(! $feature_level1->has_tag('gene_id')) { - $gene_id = $feature_level1->_tag_value('ID') if (! $gene_id); - $feature_level1->add_tag_value('gene_id', $gene_id); - } - elsif($feature_level1->_tag_value('gene_id') ne $gene_id) { #gene_id different, we replace it. - warn("Level1 ".$feature_level1->_tag_value('ID').": We replace the gene_id ".$feature_level1->_tag_value('gene_id')." by ".$gene_id.". We save original gene_id into $previous_tag_l1 attribute.\n"); + create_or_replace_tag($feature_level2, 'transcript_id', $transcript_id); + } + } + } + } + + ## add level1 missing information gene_id + if(! $feature_level1->has_tag('gene_id')) { + $gene_id = $feature_level1->_tag_value('ID') if (! $gene_id); + $feature_level1->add_tag_value('gene_id', $gene_id); + } + elsif($feature_level1->_tag_value('gene_id') ne $gene_id) { #gene_id different, we replace it. + warn("Level1 ".$feature_level1->_tag_value('ID').": We replace the gene_id ".$feature_level1->_tag_value('gene_id')." by ".$gene_id.". We save original gene_id into $previous_tag_l1 attribute.\n"); create_or_replace_tag($feature_level1, $previous_tag_l1, $feature_level1->_tag_value('gene_id')); create_or_replace_tag($feature_level1,'gene_id', $gene_id); - } + } # Save used ID $keep_track_gene_id{$gene_id}++; $keep_track_transcript_id{$transcript_id}++ if ($transcript_id); - } - } + } + } } if (! $relax){ # convert correct _convert_feature_type($hash_omniscient, $gtf_version, $verbose); - } # print results @@ -432,7 +429,7 @@ sub _remove_cds{ @{$hash_omniscient->{'level3'}{'cds'}{$id_l2}} = @new_cds_list; } -# Make a uniq string id for comprison. +# Make a uniq string id for comparison. # Needed because ID is not enough because CDS can share identifiers sub _uniq_comparison{ my ($feature)=@_; @@ -462,6 +459,7 @@ sub _print_omniscient_filter{ my $feature_l1 = $hash_omniscient->{'level1'}{$primary_tag_l1}{$id_tag_key_level1}; my $primary_tag_l1_gtf = lc($feature_l1->primary_tag()); + _deal_with_double_attributes($feature_l1); $gffout->write_feature($feature_l1); # print feature # ----- LEVEL 2 ----- @@ -470,6 +468,7 @@ sub _print_omniscient_filter{ foreach my $feature_level2 ( sort { ncmp ($a->start.$a->end.$a->_tag_value('ID'), $b->start.$b->end.$b->_tag_value('ID') ) } @{$hash_omniscient->{'level2'}{$primary_tag_l2}{$id_tag_key_level1}}) { my $primary_tag_l2_gtf = lc($feature_level2->primary_tag()); + _deal_with_double_attributes($feature_level2); $gffout->write_feature($feature_level2); # print feature # ----- LEVEL 3 ----- @@ -480,6 +479,7 @@ sub _print_omniscient_filter{ # FIRST EXON if ( exists_keys( $hash_omniscient, ('level3', 'exon', $level2_ID) ) ){ foreach my $feature_level3 ( sort {$a->start <=> $b->start} @{$hash_omniscient->{'level3'}{'exon'}{$level2_ID}}) { + _deal_with_double_attributes($feature_level3); $gffout->write_feature($feature_level3); push @l3_done, 'exon'; } @@ -488,6 +488,7 @@ sub _print_omniscient_filter{ # SECOND CDS if ( exists_keys( $hash_omniscient, ('level3', 'cds', $level2_ID) ) ){ foreach my $feature_level3 ( sort {$a->start <=> $b->start} @{$hash_omniscient->{'level3'}{'cds'}{$level2_ID}}) { + _deal_with_double_attributes($feature_level3); $gffout->write_feature($feature_level3); push @l3_done, 'cds'; } @@ -501,6 +502,7 @@ sub _print_omniscient_filter{ my $primary_tag_l3_gtf = lc($hash_omniscient->{'level3'}{$primary_tag_l3}{$level2_ID}->[0]->primary_tag() ); foreach my $feature_level3 ( sort {$a->start <=> $b->start} @{$hash_omniscient->{'level3'}{$primary_tag_l3}{$level2_ID}}) { + _deal_with_double_attributes($feature_level3); $gffout->write_feature($feature_level3); } } @@ -513,6 +515,34 @@ sub _print_omniscient_filter{ } } +# @Purpose: Modify the gene_id and transcript_id attributes to avoid double values +# @input: 1 => feature +# @output none => none +sub _deal_with_double_attributes{ + my ($feature) = @_ ; + + if ($feature->has_tag('gene_id')){ + my @gene_id_att = $feature->get_tag_values('gene_id'); + if (scalar(@gene_id_att) > 1){ + warn("Gene_id have several values, we will keep only the first one.\n"); + my $gene_id = shift @gene_id_att; # get first value + $feature->remove_tag('gene_id'); + $feature->add_tag_value('gene_id', $gene_id); + $feature->add_tag_value('agat_other_gene_id', @gene_id_att); + } + } + if ($feature->has_tag('transcript_id')){ + my @transcript_id_att = $feature->get_tag_values('transcript_id'); + if (scalar(@transcript_id_att) > 1){ + my $transcript_id = shift @transcript_id_att; # get first value + warn("Transcript_id have several values, we will keep only the first one.\n"); + $feature->remove_tag('transcript_id'); + $feature->add_tag_value('transcript_id', $transcript_id); + $feature->add_tag_value('agat_other_transcript_id', @transcript_id_att); + } + } +} + # @Purpose: Print the headers when first time we access the fh # @input: 2 => ref omniscient, gff fh # @output none => none diff --git a/lib/AGAT/OmniscientTool.pm b/lib/AGAT/OmniscientTool.pm index 52c22b89..28b01cd5 100644 --- a/lib/AGAT/OmniscientTool.pm +++ b/lib/AGAT/OmniscientTool.pm @@ -382,7 +382,7 @@ sub merge_omniscients { else{ #print "INFO level1: Parent $id_l1 already exist. We generate a new one to avoid collision !\n"; my $feature = $hash_omniscient2->{'level1'}{$tag_l1}{$id_l1}; - $uID = replace_by_uniq_ID( $feature, $hash_whole_IDs, $hash2_whole_IDs, $miscCount); + $uID = replace_by_uniq_ID( $feature, $hash_whole_IDs, $hash2_whole_IDs, $miscCount, "merge_omniscients"); $hash_omniscient1->{'level1'}{$tag_l1}{lc($uID)} = $hash_omniscient2->{'level1'}{$tag_l1}{$id_l1}; # save feature level1 $new_parent=1; } @@ -407,7 +407,7 @@ sub merge_omniscients { if ( exists_keys ( $hash_whole_IDs,($id_l2) ) ){ #print "INFO level2: Parent $id_l2 already exist. We generate a new one to avoid collision !\n"; - $uID_l2 = replace_by_uniq_ID($feature_l2, $hash_whole_IDs, $hash2_whole_IDs, $miscCount); + $uID_l2 = replace_by_uniq_ID($feature_l2, $hash_whole_IDs, $hash2_whole_IDs, $miscCount, "merge_omniscients"); $new_parent_l2=1; } else{$hash_whole_IDs->{$id_l2}++;} @@ -430,7 +430,7 @@ sub merge_omniscients { if ( exists_keys ( $hash_whole_IDs,($id_l3) ) ){ # print "INFO level3: Parent $id_l3 already exist. We generate a new one to avoid collision !\n"; - $uID_l3 = replace_by_uniq_ID($feature_l3, $hash_whole_IDs, $hash2_whole_IDs, $miscCount); + $uID_l3 = replace_by_uniq_ID($feature_l3, $hash_whole_IDs, $hash2_whole_IDs, $miscCount, "merge_omniscients"); } else{$hash_whole_IDs->{$id_l3}++;} } @@ -563,9 +563,17 @@ sub merge_overlap_loci{ # update atttribute except ID and Parent for L1: my @list_tag_l2 = $omniscient->{'level1'}{$tag_l1}{$id2_l1}->get_all_tags(); foreach my $tag (@list_tag_l2){ - if(lc($tag) ne "parent" and lc($tag) ne "id"){ + if(lc($tag) ne "parent" and lc($tag) ne "id" and lc($tag) ne "gene_id"){ my @tag_values = $omniscient->{'level1'}{$tag_l1}{$id2_l1}->get_tag_values($tag); create_or_append_tag($omniscient->{'level1'}{$tag_l1}{$id_l1}, $tag , \@tag_values); + } else { + my @tag_values = $omniscient->{'level1'}{$tag_l1}{$id2_l1}->get_tag_values($tag); + foreach my $tag_value (@tag_values){ + # the suffix merge_omniscients is added by merge_omniscients when ID had same name. No need to keep the fake ID provided + if ( $tag_value !~ /^merge_omniscients/ ) { + create_or_append_tag($omniscient->{'level1'}{$tag_l1}{$id_l1}, "merged_".$tag , $tag_value); + } + } } } # remove the level1 of the ovelaping one @@ -606,7 +614,12 @@ sub merge_overlap_loci{ my @list_tag_l2 = $common->get_all_tags(); foreach my $tag (@list_tag_l2){ my @tag_values = $common->get_tag_values($tag); - create_or_append_tag($kept_l2, "merged_".$tag , \@tag_values); + foreach my $tag_value (@tag_values){ + # the suffix merge_overlap_loci is added by merge_omniscients when ID had same name. No need to keep the fake ID provided + if ( $tag_value !~ /^merge_omniscients/ ) { + create_or_append_tag($kept_l2, "merged_".$tag , $tag_value); + } + } } } } @@ -1875,13 +1888,15 @@ sub get_all_IDs{ } # @Purpose: Replace ID by Uniq ID and modify all parent attribute of child feature to stay in line with the modification -# @input: 4 => feature objetc, hash of ids, hash of ids, hash of feature counted to give more rapidly a name +# @input: 5 => feature objetc, hash of ids, hash of ids, hash of feature counted to give more rapidly a name, prefix to choose a specific prefix (may be usefull for downstream process to recognize who/why asked for a new ID) # @output: uniq ID sub replace_by_uniq_ID{ - my ($feature, $hash_whole_IDs, $hash2_whole_IDs, $miscCount) = @_; + my ($feature, $hash_whole_IDs, $hash2_whole_IDs, $miscCount, $prefix) = @_; my $id = $feature->_tag_value('ID'); - my $prefix = "IDmodified"; + if (! $prefix){ + $prefix = "IDmodified"; + } my $key; if($prefix){ diff --git a/share/agat_config.yaml b/share/agat_config.yaml index 3b9b87af..299053c4 100644 --- a/share/agat_config.yaml +++ b/share/agat_config.yaml @@ -17,6 +17,11 @@ gff_output_version: 3 # relax = all feature types will be accepted gtf_output_version: relax +# deflate multi-values attribute +# GTF: attribute_tag "att_value1" "att_value2" "att_value3" => attribute_tag "att_value1"; attribute_tag2 "att_value2"; attribute_tag3 "att_value3" +# GFF: attribute_tag=att_value1,att_value2,att_value3 => attribute_tag=att_value1;attribute_tag2=att_value2;attribute_tag3=att_value3; +deflate_attribute: false + # Integer - verbosity 0,1,2,3,4. 0 is quiet verbose: 1 diff --git a/t/config.t b/t/config.t index 79ecaf6f..5dfbc7d3 100644 --- a/t/config.t +++ b/t/config.t @@ -40,6 +40,7 @@ system("$script config -e \\ --gff_output_version 2 \\ --gtf_output_version 2 \\ --debug \\ + --deflate_attribute \\ --no-check_all_level1_locations \\ --no-check_identical_isoforms \\ --no-check_utrs \\ diff --git a/t/config/out/agat_config.yaml b/t/config/out/agat_config.yaml index 48715279..1537fadb 100644 --- a/t/config/out/agat_config.yaml +++ b/t/config/out/agat_config.yaml @@ -12,6 +12,7 @@ check_utrs: false clean_attributes_from_template: true create_l3_for_l2_orphan: false debug: true +deflate_attribute: true force_gff_input_version: 3 gff_output_version: 2 gtf_output_version: 2 diff --git a/t/gff_other.t b/t/gff_other.t index ae38a000..cca172a0 100644 --- a/t/gff_other.t +++ b/t/gff_other.t @@ -2,7 +2,7 @@ use strict; use warnings; -use Test::More tests => 8; +use Test::More tests => 9; =head1 DESCRIPTION @@ -101,7 +101,7 @@ $correct_output = "$output_folder/issue448.gtf"; system("$script_agat config --expose --output_format gtf 2>&1 1>/dev/null"); system("$script --g $input_folder/issue448.gtf -o $pathtmp 2>&1 1>/dev/null"); -ok( system("diff $pathtmp $correct_output") == 0, "issue441 check"); +ok( system("diff $pathtmp $correct_output") == 0, "issue448 check"); unlink $pathtmp; unlink $config; @@ -109,5 +109,17 @@ unlink $config; $correct_output = "$output_folder/issue448.gff"; system("$script --g $input_folder/issue448.gtf -o $pathtmp 2>&1 1>/dev/null"); -ok( system("diff $pathtmp $correct_output") == 0, "issue441 check"); -unlink $pathtmp; \ No newline at end of file +ok( system("diff $pathtmp $correct_output") == 0, "issue448 check"); +unlink $pathtmp; + +# --------- Issue 457 multi-values attributes (gene_name "26266" "MT-TL1";) can be deflated to be compliant with GTF and CellRanger + +$script = $script_prefix."bin/agat_convert_sp_gff2gtf.pl"; +$correct_output = "$output_folder/issue457.gtf"; + +system("$script_agat config --expose --deflate_attribute 2>&1 1>/dev/null"); +system("$script --gff $input_folder/issue457.gff -o $pathtmp 2>&1 1>/dev/null"); + +ok( system("diff $pathtmp $correct_output") == 0, "issue457 check"); +unlink $pathtmp; +unlink $config; \ No newline at end of file diff --git a/t/gff_other/in/issue457.gff b/t/gff_other/in/issue457.gff new file mode 100644 index 00000000..48ff345d --- /dev/null +++ b/t/gff_other/in/issue457.gff @@ -0,0 +1,4 @@ +##gff-version 3 +chrM AGAT gene 3230 3304 . + . ID=agat-gene-1;gene_id=26266;gene_name=26266,MT-TL1;gene_type=misc_RNA;hgnc_id=HGNC:7490;level=3;merged_ID=ENSG00000209082.1;merged_gene_id=ENSG00000209082.1,26266;tag=basic,Ensembl_canonical;transcript_name=26266,MT-TL1-201;transcript_support_level=NA;transcript_type=misc_RNA +chrM ENSEMBL transcript 3230 3304 . + . ID=26266;Parent=agat-gene-1;gene_id=26266;gene_name=26266;gene_type=misc_RNA;level=3;merged_ID=ENST00000386347.1;merged_Parent=ENSG00000209082.1;merged_gene_id=ENSG00000209082.1,26266;merged_gene_name=MT-TL1,26266;merged_gene_type=misc_RNA;merged_hgnc_id=HGNC:7490;merged_level=3;merged_tag=basic,Ensembl_canonical;merged_transcript_id=ENST00000386347.1,26266;merged_transcript_name=MT-TL1-201,26266;merged_transcript_support_level=NA;merged_transcript_type=misc_RNA;transcript_id=26266;transcript_name=26266;transcript_type=misc_RNA +chrM ENSEMBL exon 3230 3304 . + . ID=agat-exon-1;Parent=26266;gene_id=26266;gene_name=26266;gene_type=misc_RNA;level=3;transcript_id=26266;transcript_name=26266;transcript_type=misc_RNA diff --git a/t/gff_other/out/issue457.gtf b/t/gff_other/out/issue457.gtf new file mode 100644 index 00000000..93ba5861 --- /dev/null +++ b/t/gff_other/out/issue457.gtf @@ -0,0 +1,5 @@ +##gtf-version X +# GFF-like GTF i.e. not checked against any GTF specification. Conversion based on GFF input, standardised by AGAT. +chrM AGAT gene 3230 3304 . + . gene_id "26266"; ID "agat-gene-1"; gene_name "26266"; gene_name_1 "MT-TL1"; gene_type "misc_RNA"; hgnc_id "HGNC:7490"; level "3"; merged_ID "ENSG00000209082.1"; merged_gene_id "ENSG00000209082.1"; merged_gene_id_1 "26266"; tag "basic"; tag_1 "Ensembl_canonical"; transcript_name "26266"; transcript_name_1 "MT-TL1-201"; transcript_support_level "NA"; transcript_type "misc_RNA"; +chrM ENSEMBL transcript 3230 3304 . + . gene_id "26266"; transcript_id "26266"; ID "26266"; Parent "agat-gene-1"; gene_name "26266"; gene_type "misc_RNA"; level "3"; merged_ID "ENST00000386347.1"; merged_Parent "ENSG00000209082.1"; merged_gene_id "ENSG00000209082.1"; merged_gene_id_1 "26266"; merged_gene_name "MT-TL1"; merged_gene_name_1 "26266"; merged_gene_type "misc_RNA"; merged_hgnc_id "HGNC:7490"; merged_level "3"; merged_tag "basic"; merged_tag_1 "Ensembl_canonical"; merged_transcript_id "ENST00000386347.1"; merged_transcript_id_1 "26266"; merged_transcript_name "MT-TL1-201"; merged_transcript_name_1 "26266"; merged_transcript_support_level "NA"; merged_transcript_type "misc_RNA"; transcript_name "26266"; transcript_type "misc_RNA"; +chrM ENSEMBL exon 3230 3304 . + . gene_id "26266"; transcript_id "26266"; ID "agat-exon-1"; Parent "26266"; gene_name "26266"; gene_type "misc_RNA"; level "3"; transcript_name "26266"; transcript_type "misc_RNA"; diff --git a/t/gff_syntax/out/15_correct_output.gff b/t/gff_syntax/out/15_correct_output.gff index 2bf3a620..29e7635d 100644 --- a/t/gff_syntax/out/15_correct_output.gff +++ b/t/gff_syntax/out/15_correct_output.gff @@ -4,7 +4,7 @@ scaffold625 maker match_part 337818 337971 . + . ID=CLUHART00000008717:exon:1404 scaffold625 maker match_part 340733 340841 . + . ID=CLUHART00000008717:exon:1405;Parent=CLUHART00000008717;Target=CLUHART00000008717 155 263 scaffold625 maker match_part 341518 341628 . + . ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717;Target=CLUHART00000008717 264 374 scaffold625 maker match_part 341964 343277 . + . ID=CLUHART00000008717:exon:1407;Parent=CLUHART00000008717;Target=CLUHART00000008717 375 1688 -scaffold789 maker match 558184 564780 . + . ID=CLUHART00000006146;Parent=CLUHARG00000003852 +scaffold789 maker match 558184 564780 . + . ID=CLUHART00000006146;Parent=CLUHARG00000003852;merged_ID=CLUHART00000006147;merged_Parent=CLUHARG00000003852 scaffold789 maker match_part 558184 560123 . + . ID=agat-exon-4;Parent=CLUHART00000006146;Target=CLUHART00000006146 1 1940;merged_ID=CLUHART00000006146:exon:995;merged_Parent=CLUHART00000006147;merged_Target=CLUHART00000006147 1 1940 scaffold789 maker match_part 561401 561519 . + . ID=agat-exon-5;Parent=CLUHART00000006146;Target=CLUHART00000006146 1941 2059;merged_ID=CLUHART00000006146:exon:996;merged_Parent=CLUHART00000006147;merged_Target=CLUHART00000006147 1941 2059 scaffold789 maker match_part 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006146;Target=CLUHART00000006147 2060 2124 diff --git a/t/gff_syntax/out/25_correct_output.gff b/t/gff_syntax/out/25_correct_output.gff index 170b576f..da45d281 100644 --- a/t/gff_syntax/out/25_correct_output.gff +++ b/t/gff_syntax/out/25_correct_output.gff @@ -21,7 +21,7 @@ scaffold1 StringTie exon 12106 13353 1000.00 - . ID=agat-exon-13;Parent=MSTRG.6. scaffold1 AGAT gene 21499 23178 . . . ID=agat-gene-3;geneID=MSTRG.7 scaffold1 StringTie transcript 21499 23178 1000.00 . . ID=MSTRG.7.1;Parent=agat-gene-3;geneID=MSTRG.7 scaffold1 StringTie exon 21499 23178 1000.00 . . ID=agat-exon-14;Parent=MSTRG.7.1;cov=207.398804 -scaffold1 AGAT gene 44218 47964 . - . ID=agat-gene-4;geneID=MSTRG.11 +scaffold1 AGAT gene 44218 47964 . - . ID=agat-gene-4;geneID=MSTRG.11;merged_ID=agat-gene-5,agat-gene-6 scaffold1 StringTie transcript 44218 47964 1000.00 - . ID=MSTRG.11.1;Parent=agat-gene-4;geneID=MSTRG.11 scaffold1 StringTie exon 44218 45365 1000.00 - . ID=agat-exon-15;Parent=MSTRG.11.1;cov=3001.629883 scaffold1 StringTie exon 47660 47706 1000.00 - . ID=agat-exon-16;Parent=MSTRG.11.1;cov=4399.870117 diff --git a/t/scripts_output.t b/t/scripts_output.t index 487bb26b..d5da47ec 100644 --- a/t/scripts_output.t +++ b/t/scripts_output.t @@ -622,6 +622,12 @@ system(" $script --gff $input_folder/agat_sp_merge_annotations/fileA.gff --gff ok( system("diff $result $outtmp") == 0, "output $script"); unlink $outtmp; +$result = "$output_folder/agat_sp_merge_annotations_3.gff"; +system(" $script --gff $input_folder/agat_sp_merge_annotations/test457_A.gff --gff $input_folder/agat_sp_merge_annotations/test457_B.gff -o $outtmp 2>&1 1>/dev/null"); +#run test +ok( system("diff $result $outtmp") == 0, "output $script"); +unlink $outtmp; + # ------------------- check agat_sp_move_attributes_within_records script------------------- $script = $script_prefix."bin/agat_sp_move_attributes_within_records.pl"; diff --git a/t/scripts_output/in/agat_sp_merge_annotations/test457_A.gff b/t/scripts_output/in/agat_sp_merge_annotations/test457_A.gff new file mode 100644 index 00000000..faff9ce3 --- /dev/null +++ b/t/scripts_output/in/agat_sp_merge_annotations/test457_A.gff @@ -0,0 +1,4 @@ +chrM ENSEMBL transcript 3230 3304 . + . gene_id "26266"; transcript_id "26266"; gene_type "misc_RNA"; gene_name "26266"; transcript_type "misc_RNA"; transcript_name "26266"; level 3; +chrM ENSEMBL exon 3230 3304 . + . gene_id "26266"; transcript_id "26266"; gene_type "misc_RNA"; gene_name "26266"; transcript_type "misc_RNA"; transcript_name "26266"; level 3; +chrM ENSEMBL transcript 3230 3304 . + . gene_id "ENSG00000209082.1"; transcript_id "ENST00000386347.1"; gene_type "misc_RNA"; gene_name "MT-TL1"; transcript_type "misc_RNA"; transcript_name "MT-TL1-201"; level 3; transcript_support_level "NA"; hgnc_id "HGNC:7490"; tag "basic"; tag "Ensembl_canonical"; +chrM ENSEMBL exon 3230 3304 . + . gene_id "ENSG00000209082.1"; transcript_id "ENST00000386347.1"; gene_type "misc_RNA"; gene_name "MT-TL1"; transcript_type "misc_RNA"; transcript_name "MT-TL1-201"; exon_number 1; exon_id "ENSE00002006242.1"; level 3; transcript_support_level "NA"; hgnc_id "HGNC:7490"; tag "basic"; tag "Ensembl_canonical"; diff --git a/t/scripts_output/in/agat_sp_merge_annotations/test457_B.gff b/t/scripts_output/in/agat_sp_merge_annotations/test457_B.gff new file mode 100644 index 00000000..b158b5e3 --- /dev/null +++ b/t/scripts_output/in/agat_sp_merge_annotations/test457_B.gff @@ -0,0 +1,2 @@ +chrM ENSEMBL transcript 3230 3304 . + . gene_id "26266"; transcript_id "26266"; gene_type "misc_RNA"; gene_name "26266"; transcript_type "misc_RNA"; transcript_name "26266"; level 3; +chrM ENSEMBL exon 3230 3304 . + . gene_id "26266"; transcript_id "26266"; gene_type "misc_RNA"; gene_name "26266"; transcript_type "misc_RNA"; transcript_name "26266"; level 3; \ No newline at end of file diff --git a/t/scripts_output/out/agat_sp_complement_annotations_1.gff b/t/scripts_output/out/agat_sp_complement_annotations_1.gff index 486495da..4c549de0 100644 --- a/t/scripts_output/out/agat_sp_complement_annotations_1.gff +++ b/t/scripts_output/out/agat_sp_complement_annotations_1.gff @@ -4,17 +4,17 @@ NC_003070.9 RefSeq gene 3631 5899 . + . ID=IDmodified-gene-1;locus_tag=AT1G01010 NC_003070.9 AGAT mRNA 3631 5899 . + . ID=NC_003070.9:NAC001;Parent=IDmodified-gene-1;locus_tag=AT1G01010 NC_003070.9 RefSeq exon 3631 3913 . + . ID=NM_099983.2;Parent=NC_003070.9:NAC001;gbkey=mRNA;locus_tag=AT1G01010 -NC_003070.9 RefSeq exon 3996 4276 . + . ID=IDmodified-exon-1;Parent=NC_003070.9:NAC001;gbkey=mRNA;locus_tag=AT1G01010 -NC_003070.9 RefSeq exon 4486 4605 . + . ID=IDmodified-exon-2;Parent=NC_003070.9:NAC001;gbkey=mRNA;locus_tag=AT1G01010 -NC_003070.9 RefSeq exon 4706 5095 . + . ID=IDmodified-exon-3;Parent=NC_003070.9:NAC001;gbkey=mRNA;locus_tag=AT1G01010 -NC_003070.9 RefSeq exon 5174 5326 . + . ID=IDmodified-exon-4;Parent=NC_003070.9:NAC001;gbkey=mRNA;locus_tag=AT1G01010 -NC_003070.9 RefSeq exon 5439 5899 . + . ID=IDmodified-exon-5;Parent=NC_003070.9:NAC001;gbkey=mRNA;locus_tag=AT1G01010 +NC_003070.9 RefSeq exon 3996 4276 . + . ID=merge_omniscients-exon-1;Parent=NC_003070.9:NAC001;gbkey=mRNA;locus_tag=AT1G01010 +NC_003070.9 RefSeq exon 4486 4605 . + . ID=merge_omniscients-exon-2;Parent=NC_003070.9:NAC001;gbkey=mRNA;locus_tag=AT1G01010 +NC_003070.9 RefSeq exon 4706 5095 . + . ID=merge_omniscients-exon-3;Parent=NC_003070.9:NAC001;gbkey=mRNA;locus_tag=AT1G01010 +NC_003070.9 RefSeq exon 5174 5326 . + . ID=merge_omniscients-exon-4;Parent=NC_003070.9:NAC001;gbkey=mRNA;locus_tag=AT1G01010 +NC_003070.9 RefSeq exon 5439 5899 . + . ID=merge_omniscients-exon-5;Parent=NC_003070.9:NAC001;gbkey=mRNA;locus_tag=AT1G01010 NC_003070.9 RefSeq CDS 3760 3913 . + 0 ID=agat-cds-1;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010 -NC_003070.9 RefSeq CDS 3996 4276 . + 2 ID=IDmodified-cds-1;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010 -NC_003070.9 RefSeq CDS 4486 4605 . + 0 ID=IDmodified-cds-2;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010 -NC_003070.9 RefSeq CDS 4706 5095 . + 0 ID=IDmodified-cds-3;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010 -NC_003070.9 RefSeq CDS 5174 5326 . + 0 ID=IDmodified-cds-4;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010 -NC_003070.9 RefSeq CDS 5439 5630 . + 0 ID=IDmodified-cds-5;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010 +NC_003070.9 RefSeq CDS 3996 4276 . + 2 ID=merge_omniscients-cds-1;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010 +NC_003070.9 RefSeq CDS 4486 4605 . + 0 ID=merge_omniscients-cds-2;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010 +NC_003070.9 RefSeq CDS 4706 5095 . + 0 ID=merge_omniscients-cds-3;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010 +NC_003070.9 RefSeq CDS 5174 5326 . + 0 ID=merge_omniscients-cds-4;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010 +NC_003070.9 RefSeq CDS 5439 5630 . + 0 ID=merge_omniscients-cds-5;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010 NC_003070.9 AGAT five_prime_UTR 3631 3759 . + . ID=agat-five_prime_utr-1;Parent=NC_003070.9:NAC001;gbkey=mRNA;locus_tag=AT1G01010 NC_003070.9 RefSeq start_codon 3760 3762 . + 0 ID=agat-start_codon-1;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010 NC_003070.9 RefSeq stop_codon 5628 5630 . + 0 ID=agat-stop_codon-1;Parent=NC_003070.9:NAC001;locus_tag=AT1G01010 diff --git a/t/scripts_output/out/agat_sp_merge_annotations_1.gff b/t/scripts_output/out/agat_sp_merge_annotations_1.gff index 5f68f1f3..bb16cd3f 100644 --- a/t/scripts_output/out/agat_sp_merge_annotations_1.gff +++ b/t/scripts_output/out/agat_sp_merge_annotations_1.gff @@ -1,6 +1,6 @@ ##gff-version 3 chr10 BestRefSeq gene 123237824 123357992 . - . ID=gene-FGFR2;ontology=G0222 -chr10 BestRefSeq mRNA 123237824 123357992 . - . ID=rna-NM_022970.3;Parent=gene-FGFR2;ontology=G0222;merged_ID=IDmodified-mrna-1;merged_Ontology=G0333;merged_Parent=IDmodified-gene-1 +chr10 BestRefSeq mRNA 123237824 123357992 . - . ID=rna-NM_022970.3;Parent=gene-FGFR2;ontology=G0222;merged_Ontology=G0333 chr10 BestRefSeq exon 123237824 123239535 . - . ID=exon-NM_022970.3-18;Parent=rna-NM_022970.3 chr10 BestRefSeq exon 123243212 123243317 . - . ID=exon-NM_022970.3-17;Parent=rna-NM_022970.3 chr10 BestRefSeq exon 123353223 123353481 . - . ID=exon-NM_022970.3-2;Parent=rna-NM_022970.3 diff --git a/t/scripts_output/out/agat_sp_merge_annotations_2.gff b/t/scripts_output/out/agat_sp_merge_annotations_2.gff index 1c3846b2..0441a6c6 100644 --- a/t/scripts_output/out/agat_sp_merge_annotations_2.gff +++ b/t/scripts_output/out/agat_sp_merge_annotations_2.gff @@ -1,3 +1,3 @@ ##gff-version 3 -chr1 AUGUSTUS gene 1000424 1039237 . + . ID=A +chr1 AUGUSTUS gene 1000424 1039237 . + . ID=A;merged_ID=B chr1 AUGUSTUS mRNA 1000424 1039237 . + . ID=A.t1;Parent=A;merged_ID=B.t1;merged_Parent=B diff --git a/t/scripts_output/out/agat_sp_merge_annotations_3.gff b/t/scripts_output/out/agat_sp_merge_annotations_3.gff new file mode 100644 index 00000000..48ff345d --- /dev/null +++ b/t/scripts_output/out/agat_sp_merge_annotations_3.gff @@ -0,0 +1,4 @@ +##gff-version 3 +chrM AGAT gene 3230 3304 . + . ID=agat-gene-1;gene_id=26266;gene_name=26266,MT-TL1;gene_type=misc_RNA;hgnc_id=HGNC:7490;level=3;merged_ID=ENSG00000209082.1;merged_gene_id=ENSG00000209082.1,26266;tag=basic,Ensembl_canonical;transcript_name=26266,MT-TL1-201;transcript_support_level=NA;transcript_type=misc_RNA +chrM ENSEMBL transcript 3230 3304 . + . ID=26266;Parent=agat-gene-1;gene_id=26266;gene_name=26266;gene_type=misc_RNA;level=3;merged_ID=ENST00000386347.1;merged_Parent=ENSG00000209082.1;merged_gene_id=ENSG00000209082.1,26266;merged_gene_name=MT-TL1,26266;merged_gene_type=misc_RNA;merged_hgnc_id=HGNC:7490;merged_level=3;merged_tag=basic,Ensembl_canonical;merged_transcript_id=ENST00000386347.1,26266;merged_transcript_name=MT-TL1-201,26266;merged_transcript_support_level=NA;merged_transcript_type=misc_RNA;transcript_id=26266;transcript_name=26266;transcript_type=misc_RNA +chrM ENSEMBL exon 3230 3304 . + . ID=agat-exon-1;Parent=26266;gene_id=26266;gene_name=26266;gene_type=misc_RNA;level=3;transcript_id=26266;transcript_name=26266;transcript_type=misc_RNA