Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,16 +52,19 @@ Once ModelArrayIO is installed, these commands are available in your terminal:

* ``.mif`` → ``.h5``: ``modelarrayio mif-to-h5``
* ``.h5`` → ``.mif``: ``modelarrayio h5-to-mif``
* ``.h5`` scalar row → ``.mif``: ``modelarrayio h5-export-mif-file``

* **Voxel-wise** data (NIfTI):

* NIfTI → ``.h5``: ``modelarrayio nifti-to-h5``
* ``.h5`` → NIfTI: ``modelarrayio h5-to-nifti``
* ``.h5`` scalar row → NIfTI: ``modelarrayio h5-export-nifti-file``

* **Greyordinate-wise** data (CIFTI-2):

* CIFTI-2 → ``.h5``: ``modelarrayio cifti-to-h5``
* ``.h5`` → CIFTI-2: ``modelarrayio h5-to-cifti``
* ``.h5`` scalar row → CIFTI-2: ``modelarrayio h5-export-cifti-file``


Storage backends: HDF5 and TileDB
Expand Down
46 changes: 38 additions & 8 deletions docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,39 +3,39 @@ Usage
#####


**********
*********
mif-to-h5
**********
*********

.. argparse::
:ref: modelarrayio.cli.mif_to_h5._parse_mif_to_h5
:prog: modelarrayio mif-to-h5
:func: _parse_mif_to_h5


**********
***********
nifti-to-h5
**********
************
Comment thread
tsalo marked this conversation as resolved.
Outdated

.. argparse::
:ref: modelarrayio.cli.nifti_to_h5._parse_nifti_to_h5
:prog: modelarrayio nifti-to-h5
:func: _parse_nifti_to_h5


**********
***********
cifti-to-h5
**********
***********

.. argparse::
:ref: modelarrayio.cli.cifti_to_h5._parse_cifti_to_h5
:prog: modelarrayio cifti-to-h5
:func: _parse_cifti_to_h5


**********
*********
h5-to-mif
**********
*********

.. argparse::
:ref: modelarrayio.cli.h5_to_mif._parse_h5_to_mif
Expand All @@ -60,3 +60,33 @@ h5-to-cifti
:ref: modelarrayio.cli.h5_to_cifti._parse_h5_to_cifti
:prog: modelarrayio h5-to-cifti
:func: _parse_h5_to_cifti


******************
h5-export-mif-file
******************

.. argparse::
:ref: modelarrayio.cli.h5_export_mif_file._parse_h5_export_mif_file
:prog: modelarrayio h5-export-mif-file
:func: _parse_h5_export_mif_file


********************
h5-export-nifti-file
********************

.. argparse::
:ref: modelarrayio.cli.h5_export_nifti_file._parse_h5_export_nifti_file
:prog: modelarrayio h5-export-nifti-file
:func: _parse_h5_export_nifti_file


********************
h5-export-cifti-file
********************

.. argparse::
:ref: modelarrayio.cli.h5_export_cifti_file._parse_h5_export_cifti_file
:prog: modelarrayio h5-export-cifti-file
:func: _parse_h5_export_cifti_file
34 changes: 34 additions & 0 deletions modelarrayio/__about__.py
Copy link
Copy Markdown
Member

@tsalo tsalo Mar 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The __about__.py shouldn't be committed. It needs to be added to the gitignore.

Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# file generated by setuptools-scm
# don't change, don't track in version control

__all__ = [
"__version__",
"__version_tuple__",
"version",
"version_tuple",
"__commit_id__",
"commit_id",
]

TYPE_CHECKING = False
if TYPE_CHECKING:
from typing import Tuple
from typing import Union

VERSION_TUPLE = Tuple[Union[int, str], ...]
COMMIT_ID = Union[str, None]
else:
VERSION_TUPLE = object
COMMIT_ID = object

version: str
__version__: str
__version_tuple__: VERSION_TUPLE
version_tuple: VERSION_TUPLE
commit_id: COMMIT_ID
__commit_id__: COMMIT_ID

__version__ = version = '0.1.dev115+g7f4c6030e'
__version_tuple__ = version_tuple = (0, 1, 'dev115', 'g7f4c6030e')

__commit_id__ = commit_id = None
56 changes: 51 additions & 5 deletions src/modelarrayio/cli/cifti_to_h5.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,21 @@
import pandas as pd
from tqdm import tqdm

from modelarrayio.cli import diagnostics as cli_diagnostics
from modelarrayio.cli import utils as cli_utils
from modelarrayio.cli.parser_utils import add_scalar_columns_arg, add_to_modelarray_args
from modelarrayio.cli.parser_utils import (
add_diagnostics_args,
add_scalar_columns_arg,
add_to_modelarray_args,
)
from modelarrayio.utils.cifti import (
_build_scalar_sources,
_cohort_to_long_dataframe,
_load_cohort_cifti,
brain_names_to_dataframe,
extract_cifti_scalar_data,
)
from modelarrayio.utils.s3_utils import load_nibabel

logger = logging.getLogger(__name__)

Expand All @@ -38,6 +44,9 @@ def cifti_to_h5(
workers=None,
s3_workers=1,
scalar_columns=None,
no_diagnostics=False,
diagnostics_dir=None,
diagnostic_maps=None,
):
"""Load all CIFTI data and write to an HDF5 or TileDB file.

Expand Down Expand Up @@ -70,6 +79,12 @@ def cifti_to_h5(
Number of workers for parallel S3 downloads
scalar_columns : :obj:`list`
List of scalar columns to use
no_diagnostics : :obj:`bool`
Disable diagnostic outputs in native format.
diagnostics_dir : :obj:`str` or :obj:`None`
Output directory for diagnostics. Defaults to ``<output_stem>_diagnostics``.
diagnostic_maps : :obj:`list` or :obj:`None`
Diagnostic maps to write. Supported: ``mean``, ``element_id``, ``n_non_nan``.

Returns
-------
Expand All @@ -84,10 +99,35 @@ def cifti_to_h5(
scalar_sources = _build_scalar_sources(cohort_long)
if not scalar_sources:
raise ValueError('Unable to derive scalar sources from cohort file.')
maps_to_write = cli_utils.normalize_diagnostic_maps(diagnostic_maps)

_first_scalar, first_sources = next(iter(scalar_sources.items()))
first_path = first_sources[0]
template_cifti = load_nibabel(first_path, cifti=True)
_first_data, reference_brain_names = extract_cifti_scalar_data(template_cifti)

if not no_diagnostics:
output_diag_dir = (
Path(diagnostics_dir)
if diagnostics_dir is not None
else cli_utils.default_diagnostics_dir(output_path)
)
output_diag_dir.mkdir(parents=True, exist_ok=True)
cli_diagnostics.verify_cifti_element_mapping(template_cifti, reference_brain_names)

if backend == 'hdf5':
scalars, last_brain_names = _load_cohort_cifti(cohort_long, s3_workers)
greyordinate_table, structure_names = brain_names_to_dataframe(last_brain_names)
if not no_diagnostics:
for scalar_name, rows in scalars.items():
diagnostics = cli_diagnostics.summarize_rows(rows)
cli_diagnostics.write_cifti_diagnostics(
maps=maps_to_write,
scalar_name=scalar_name,
diagnostics=diagnostics,
template_cifti=template_cifti,
output_dir=output_diag_dir,
)
output_path = cli_utils.prepare_output_parent(output_path)
with h5py.File(output_path, 'w') as h5_file:
cli_utils.write_table_dataset(
Expand All @@ -113,10 +153,6 @@ def cifti_to_h5(
if not scalar_sources:
return 0

_first_scalar, first_sources = next(iter(scalar_sources.items()))
first_path = first_sources[0]
_, reference_brain_names = extract_cifti_scalar_data(first_path)

def _process_scalar_job(scalar_name, source_files):
rows = []
for source_file in source_files:
Expand All @@ -126,6 +162,15 @@ def _process_scalar_job(scalar_name, source_files):
rows.append(cifti_data)

if rows:
if not no_diagnostics:
diagnostics = cli_diagnostics.summarize_rows(rows)
cli_diagnostics.write_cifti_diagnostics(
maps=maps_to_write,
scalar_name=scalar_name,
diagnostics=diagnostics,
template_cifti=template_cifti,
output_dir=output_diag_dir,
)
cli_utils.write_tiledb_scalar_matrices(
output_path,
{scalar_name: rows},
Expand Down Expand Up @@ -179,4 +224,5 @@ def _parse_cifti_to_h5():
)
add_to_modelarray_args(parser, default_output='greyordinatearray.h5')
add_scalar_columns_arg(parser)
add_diagnostics_args(parser)
return parser
116 changes: 116 additions & 0 deletions src/modelarrayio/cli/diagnostics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
"""Diagnostic image helpers for conversion commands."""

from __future__ import annotations

from pathlib import Path

import nibabel as nb
import numpy as np

from modelarrayio.utils.cifti import extract_cifti_scalar_data
from modelarrayio.utils.fixels import nifti2_to_mif
from modelarrayio.utils.voxels import flattened_image


def summarize_rows(rows) -> dict[str, np.ndarray]:
"""Compute common diagnostics from a sequence of 1-D subject arrays."""
stacked = np.vstack(rows)
return {
'mean': np.nanmean(stacked, axis=0).astype(np.float32),
'n_non_nan': np.sum(~np.isnan(stacked), axis=0).astype(np.float32),
'element_id': np.arange(stacked.shape[1], dtype=np.float32),
}


def verify_nifti_element_mapping(group_mask_img, group_mask_matrix):
"""Verify NIfTI group-mask flattening order matches element indices."""
expected = np.arange(int(group_mask_matrix.sum()), dtype=np.float32)
element_volume = np.zeros(group_mask_matrix.shape, dtype=np.float32)
element_volume[group_mask_matrix] = expected
element_img = nb.Nifti1Image(
element_volume,
affine=group_mask_img.affine,
header=group_mask_img.header,
)
extracted = flattened_image(element_img, group_mask_img, group_mask_matrix)
if not np.array_equal(extracted.astype(np.int64), expected.astype(np.int64)):
raise ValueError('Element ID mapping check failed for NIfTI group-mask flattening.')


def write_nifti_diagnostics(
*,
maps: list[str],
scalar_name: str,
diagnostics: dict[str, np.ndarray],
group_mask_img,
group_mask_matrix,
output_dir: Path,
):
header = group_mask_img.header.copy()
header.set_data_dtype(np.float32)
for name in maps:
out_file = output_dir / f'{scalar_name}_{name}.nii.gz'
data = np.zeros(group_mask_matrix.shape, dtype=np.float32)
data[group_mask_matrix] = diagnostics[name]
nb.Nifti1Image(data, affine=group_mask_img.affine, header=header).to_filename(out_file)


def verify_cifti_element_mapping(template_cifti, reference_brain_names):
"""Verify CIFTI extraction order matches element indices."""
expected = np.arange(reference_brain_names.shape[0], dtype=np.float32)
test_img = nb.Cifti2Image(
expected.reshape(1, -1),
header=template_cifti.header,
nifti_header=template_cifti.nifti_header,
)
recovered, _ = extract_cifti_scalar_data(test_img, reference_brain_names=reference_brain_names)
if not np.array_equal(recovered.astype(np.int64), expected.astype(np.int64)):
raise ValueError('Element ID mapping check failed for CIFTI greyordinate ordering.')


def write_cifti_diagnostics(
*,
maps: list[str],
scalar_name: str,
diagnostics: dict[str, np.ndarray],
template_cifti,
output_dir: Path,
):
for name in maps:
out_file = output_dir / f'{scalar_name}_{name}.dscalar.nii'
nb.Cifti2Image(
diagnostics[name].reshape(1, -1),
header=template_cifti.header,
nifti_header=template_cifti.nifti_header,
).to_filename(out_file)


def verify_mif_element_mapping(template_nifti2, num_elements: int):
"""Verify fixel vector reshape/squeeze mapping remains identity."""
expected = np.arange(num_elements, dtype=np.float32)
test_img = nb.Nifti2Image(
expected.reshape(-1, 1, 1),
affine=template_nifti2.affine,
header=template_nifti2.header,
)
recovered = test_img.get_fdata(dtype=np.float32).squeeze()
if not np.array_equal(recovered.astype(np.int64), expected.astype(np.int64)):
raise ValueError('Element ID mapping check failed for MIF fixel vector ordering.')


def write_mif_diagnostics(
*,
maps: list[str],
scalar_name: str,
diagnostics: dict[str, np.ndarray],
template_nifti2,
output_dir: Path,
):
for name in maps:
out_file = output_dir / f'{scalar_name}_{name}.mif'
temp_nifti2 = nb.Nifti2Image(
diagnostics[name].reshape(-1, 1, 1),
affine=template_nifti2.affine,
header=template_nifti2.header,
)
nifti2_to_mif(temp_nifti2, out_file)
Loading
Loading