Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion q2_annotate/kraken2/select.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,8 @@ def _kraken_to_ncbi_tree(df):
while stack and parent_node.length == 0:
_, parent_node = stack.pop()

if parent_node.children:
# Make sure we are not labeling infra-clades as actual tips
if parent_node.length == 1:
parent_node.children[0].is_actual_tip = True

return tree
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
100.00 180 0 R 1 root
100.00 180 0 R1 131567 cellular organisms
100.00 180 0 R2 2 Bacteria
100.00 180 0 K 1783272 Bacillati
100.00 180 0 P 201174 Actinomycetota
100.00 180 0 C 84998 Coriobacteriia
99.44 179 0 O 84999 Coriobacteriales
99.44 179 0 F 84107 Coriobacteriaceae
99.44 179 0 G 102106 Collinsella
97.78 176 10 S 74426 Collinsella aerofaciens
92.22 166 166 S1 411903 Collinsella aerofaciens ATCC 25986
1.67 3 0 G1 2637548 unclassified Collinsella
1.67 3 3 S 3132705 Collinsella sp. i05-0019-G5
0.56 1 0 O 1643822 Eggerthellales
0.56 1 0 F 1643826 Eggerthellaceae
0.56 1 0 G 644652 Gordonibacter
0.56 1 0 S 471189 Gordonibacter pamelaeae
0.56 1 1 S1 657308 Gordonibacter pamelaeae 7-10-1-b
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
50.00 1 1 U 0 unclassified
50.00 1 0 R 1 root
50.00 1 0 R1 131567 cellular organisms
50.00 1 1 R2 2 Bacteria
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
100.00 1 0 R 1 root
100.00 1 1 R1 131567 cellular organisms
36 changes: 22 additions & 14 deletions q2_annotate/kraken2/tests/test_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,23 @@
# ----------------------------------------------------------------------------
import shutil
import tempfile
import unittest

import pandas as pd
import pandas.testing
from pandas._testing import assert_frame_equal
import skbio
from pandas._testing import assert_frame_equal
from q2_types.kraken2 import (
Kraken2OutputDirectoryFormat,
Kraken2ReportDirectoryFormat,
)
from qiime2.plugin.testing import TestPluginBase

from q2_annotate.kraken2 import kraken2_to_features
from q2_annotate.kraken2.select import (
_kraken_to_ncbi_tree,
_find_lcas,
_kraken_to_ncbi_tree,
kraken2_to_mag_features,
)
from qiime2.plugin.testing import TestPluginBase

from q2_types.kraken2 import (
Kraken2ReportDirectoryFormat,
Kraken2OutputDirectoryFormat,
)


class MockTempDir(tempfile.TemporaryDirectory):
Expand Down Expand Up @@ -159,8 +158,7 @@ def test_kraken2_to_mag_features_incorrect_fraction_unclassified(self):
hits = Kraken2OutputDirectoryFormat(self.get_data_path("outputs-mags"), "r")
with self.assertRaisesRegex(
ValueError,
"fraction for MAG '8894435a-c836-4c18-b475-8b38a9ab6c6b' "
"is not .* 99.01%",
"fraction for MAG '8894435a-c836-4c18-b475-8b38a9ab6c6b' is not .* 99.01%",
):
kraken2_to_mag_features(reports, hits, 0.0)

Expand Down Expand Up @@ -245,7 +243,7 @@ def test_kraken2_to_mag_features_unclassified_no_add_up(self):

with self.assertRaisesRegex(
ValueError,
"fraction for MAG '8894435a-c836-4c18-b475-8b38a9ab6c6b' " "is not 100.0",
"fraction for MAG '8894435a-c836-4c18-b475-8b38a9ab6c6b' is not 100.0",
):
kraken2_to_mag_features(reports, hits, 0.1)

Expand Down Expand Up @@ -380,7 +378,8 @@ def test_find_lcas_mode_lca_all_unclassified(self):
# pandas.testing.assert_frame_equal(obs, exp)


class TestKrakenSelectEdgeCases(unittest.TestCase):
class TestKrakenSelectEdgeCases(TestPluginBase):
package = "q2_annotate.kraken2.tests"

def make_dirfmt(self, string, coverage=False):
"""
Expand Down Expand Up @@ -464,7 +463,6 @@ def test_kraken_to_ncbi_tree_no_tricks(self):
)

table, taxonomy = kraken2_to_features(dirfmt)

pandas.testing.assert_frame_equal(exp_table, table)
pandas.testing.assert_frame_equal(exp_tax, taxonomy)

Expand Down Expand Up @@ -608,3 +606,13 @@ def test_kraken_to_ncbi_tree_rankless_domain_inference(self):

pandas.testing.assert_frame_equal(exp_table, table)
pandas.testing.assert_frame_equal(exp_tax, taxonomy)

def test_kraken2_to_features_root_infraclades(self):
"""
Tests that root infra-clades are not treated as actual tips leading
to mismatch between feature table and taxonomy.
"""
reports = Kraken2ReportDirectoryFormat(
self.get_data_path("root-infraclade-report"), "r"
)
kraken2_to_features(reports)
Loading