Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 1 addition & 11 deletions q2_annotate/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,7 @@
)
from .metabat2 import metabat2
from .semibin2 import semibin2
from ._utils import (
get_feature_lengths,
multiply_tables,
_multiply_tables,
_multiply_tables_pa,
_multiply_tables_relative,
)
from ._utils import get_feature_lengths

try:
from ._version import __version__
Expand All @@ -48,10 +42,6 @@
"filter_derep_mags",
"filter_mags",
"get_feature_lengths",
"multiply_tables",
"_multiply_tables",
"_multiply_tables_pa",
"_multiply_tables_relative",
"abundance",
"filter_reads_pangenome",
"semibin2",
Expand Down
95 changes: 0 additions & 95 deletions q2_annotate/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,13 @@
import os
import subprocess
import hashlib
import warnings
from typing import List, Union

import pandas as pd
import skbio
import biom
import numpy as np
from q2_types.feature_data import DNASequencesDirectoryFormat

from q2_types.feature_data_mag import MAGSequencesDirFmt
from q2_types.feature_table import FeatureTable, PresenceAbsence, RelativeFrequency
from q2_types.per_sample_sequences import MultiMAGSequencesDirFmt, ContigSequencesDirFmt

EXTERNAL_CMD_WARNING = (
Expand Down Expand Up @@ -141,94 +137,3 @@ def get_feature_lengths(
df = pd.DataFrame({"id": ids, "length": lengths})
df.set_index("id", inplace=True)
return df


def _multiply(table1: biom.Table, table2: biom.Table) -> biom.Table:
"""Calculate dot product of two biom tables."""
# Subset table1 to only include observations present in table2's samples
table2_sample_ids = set(table2.ids(axis="sample"))
table1_obs_to_keep = [
obs_id
for obs_id in table1.ids(axis="observation")
if obs_id in table2_sample_ids
]

if not table1_obs_to_keep:
raise ValueError(
"No overlapping features found between table1 observations and "
"table2 samples."
)

if len(table1_obs_to_keep) < len(table1.ids(axis="observation")):
warnings.warn(
f"Removed {len(table1.ids(axis='observation')) - len(table1_obs_to_keep)} "
f"feature(s) from table1 that had no matching samples in table2."
)

table1 = table1.filter(table1_obs_to_keep, axis="observation")

# Reorder table2 samples to match table1 observations
table2 = table2.sort_order(table1.ids(axis="observation"), axis="sample")

# Perform sparse matrix multiplication.
# In biom.Table, matrix_data is stored as observations x samples.
# After transpose, both table1.matrix_data.T and table2.matrix_data.T are
# shaped as samples x observations; the dot product is then transposed back
# when constructing the result biom.Table so that the final table is
# observations (from table2) x samples (from table1).
result_matrix = table1.matrix_data.T.dot(table2.matrix_data.T)

result_table = biom.Table(
result_matrix.T,
observation_ids=table2.ids(axis="observation"),
sample_ids=table1.ids(axis="sample"),
)

return result_table


def _multiply_tables(table1: biom.Table, table2: biom.Table) -> biom.Table:
"""Calculate dot product of two feature tables."""
result = _multiply(table1, table2)
return result


def _multiply_tables_relative(table1: biom.Table, table2: biom.Table) -> biom.Table:
"""Calculate dot product of two feature tables and convert to
a relative frequency table."""
result = _multiply(table1, table2)
result.norm(axis="sample", inplace=True)
return result


def _multiply_tables_pa(table1: biom.Table, table2: biom.Table) -> biom.Table:
"""Calculate dot product of two feature tables and convert to
a presence-absence table."""
result = _multiply(table1, table2)
# Convert to presence-absence (1 if non-zero, 0 otherwise)
result_data = result.matrix_data.copy()
result_data.data = np.ones_like(result_data.data)
result = biom.Table(
result_data,
observation_ids=result.ids(axis="observation"),
sample_ids=result.ids(axis="sample"),
)
return result


def multiply_tables(ctx, table1, table2):
"""Calculate dot product of two feature tables."""
if (
table1.type <= FeatureTable[PresenceAbsence]
or table2.type <= FeatureTable[PresenceAbsence]
):
multiply = ctx.get_action("annotate", "_multiply_tables_pa")
elif (
table1.type <= FeatureTable[RelativeFrequency]
or table2.type <= FeatureTable[RelativeFrequency]
):
multiply = ctx.get_action("annotate", "_multiply_tables_relative")
else:
multiply = ctx.get_action("annotate", "_multiply_tables")
(result,) = multiply(table1, table2)
return result
170 changes: 0 additions & 170 deletions q2_annotate/plugin_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
FeatureTable,
Frequency,
PresenceAbsence,
RelativeFrequency,
)
from q2_types.per_sample_sequences import (
SequencesWithQuality,
Expand Down Expand Up @@ -1921,175 +1920,6 @@
citations=[],
)

multiply_input_descriptions = {
"table1": "First feature table.",
"table2": "Second feature table with matching dimension.",
}
multiply_output_descriptions = {
"result_table": (
"Feature table with the dot product of the two original tables. "
"The table will have a shape of (M x N) where M is the number of "
"rows from table1 and N is number of columns from table2."
),
}

plugin.methods.register_function(
function=q2_annotate._utils._multiply_tables,
inputs={"table1": FeatureTable[Frequency], "table2": FeatureTable[Frequency]},
parameters={},
outputs=[
("result_table", FeatureTable[Frequency]),
],
input_descriptions=multiply_input_descriptions,
parameter_descriptions={},
output_descriptions=multiply_output_descriptions,
name="Multiply two feature tables.",
description=(
"Calculates the dot product of two feature tables with matching dimensions. "
"If table 1 has shape (M x N) and table 2 has shape (N x P), the resulting "
"table will have shape (M x P). Note that the tables must be identical "
"in the N dimension."
),
citations=[],
)

I_multiply_pa_table1, I_multiply_pa_table2, O_multiply_pa = TypeMap(
{
(FeatureTable[PresenceAbsence], FeatureTable[Frequency]): FeatureTable[
PresenceAbsence
],
(FeatureTable[PresenceAbsence], FeatureTable[RelativeFrequency]): FeatureTable[
PresenceAbsence
],
(FeatureTable[PresenceAbsence], FeatureTable[PresenceAbsence]): FeatureTable[
PresenceAbsence
],
(FeatureTable[Frequency], FeatureTable[PresenceAbsence]): FeatureTable[
PresenceAbsence
],
(FeatureTable[RelativeFrequency], FeatureTable[PresenceAbsence]): FeatureTable[
PresenceAbsence
],
}
)

plugin.methods.register_function(
function=q2_annotate._utils._multiply_tables_pa,
inputs={"table1": I_multiply_pa_table1, "table2": I_multiply_pa_table2},
parameters={},
outputs=[
("result_table", O_multiply_pa),
],
input_descriptions=multiply_input_descriptions,
parameter_descriptions={},
output_descriptions=multiply_output_descriptions,
name="Multiply two feature tables.",
description=(
"Calculates the dot product of two feature tables with matching dimensions. "
"If table 1 has shape (M x N) and table 2 has shape (N x P), the resulting "
"table will have shape (M x P). Note that the tables must be identical "
"in the N dimension."
),
citations=[],
)

I_multiply_rel_table1, I_multiply_rel_table2, O_multiply_rel = TypeMap(
{
(FeatureTable[RelativeFrequency], FeatureTable[Frequency]): FeatureTable[
PresenceAbsence
],
(FeatureTable[Frequency], FeatureTable[RelativeFrequency]): FeatureTable[
RelativeFrequency
],
(
FeatureTable[RelativeFrequency],
FeatureTable[RelativeFrequency],
): FeatureTable[RelativeFrequency],
}
)

plugin.methods.register_function(
function=q2_annotate._utils._multiply_tables_relative,
inputs={"table1": I_multiply_rel_table1, "table2": I_multiply_rel_table2},
parameters={},
outputs=[
("result_table", O_multiply_rel),
],
input_descriptions=multiply_input_descriptions,
parameter_descriptions={},
output_descriptions=multiply_output_descriptions,
name="Multiply two feature tables.",
description=(
"Calculates the dot product of two feature tables with matching dimensions. "
"If table 1 has shape (M x N) and table 2 has shape (N x P), the resulting "
"table will have shape (M x P). Note that the tables must be identical "
"in the N dimension."
),
citations=[],
)

I_multiply_table1, I_multiply_table2, O_multiply = TypeMap(
{
(FeatureTable[Frequency], FeatureTable[Frequency]): FeatureTable[Frequency],
(FeatureTable[PresenceAbsence], FeatureTable[Frequency]): FeatureTable[
PresenceAbsence
],
(FeatureTable[PresenceAbsence], FeatureTable[RelativeFrequency]): FeatureTable[
PresenceAbsence
],
(FeatureTable[PresenceAbsence], FeatureTable[PresenceAbsence]): FeatureTable[
PresenceAbsence
],
(FeatureTable[Frequency], FeatureTable[PresenceAbsence]): FeatureTable[
PresenceAbsence
],
(FeatureTable[RelativeFrequency], FeatureTable[PresenceAbsence]): FeatureTable[
PresenceAbsence
],
(FeatureTable[Frequency], FeatureTable[RelativeFrequency]): FeatureTable[
RelativeFrequency
],
(FeatureTable[RelativeFrequency], FeatureTable[Frequency]): FeatureTable[
RelativeFrequency
],
(
FeatureTable[RelativeFrequency],
FeatureTable[RelativeFrequency],
): FeatureTable[RelativeFrequency],
}
)

plugin.pipelines.register_function(
function=q2_annotate._utils.multiply_tables,
inputs={
"table1": I_multiply_table1,
"table2": I_multiply_table2,
},
parameters={},
outputs=[("result_table", O_multiply)],
input_descriptions={
"table1": "First feature table.",
"table2": "Second feature table with matching dimension.",
},
parameter_descriptions={},
output_descriptions={
"result_table": (
"Feature table with the dot product of the two original tables. "
"The table will have the shape of (M x N) where M is the number "
"of rows from table1 and N is number of columns from table2."
),
},
name="Multiply two feature tables.",
description=(
"Calculates the dot product of two feature tables with "
"matching dimensions. If table 1 has shape (M x N) and table "
"2 has shape (N x P), the resulting table will have shape "
"(M x P). Note that the tables must be identical in the N dimension."
),
citations=[],
)


TMR = TypeMatch(
[
SampleData[Kraken2Reports % Properties("reads")],
Expand Down
Loading
Loading