Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,16 +52,19 @@ Once ModelArrayIO is installed, these commands are available in your terminal:

* ``.mif`` → ``.h5``: ``modelarrayio mif-to-h5``
* ``.h5`` → ``.mif``: ``modelarrayio h5-to-mif``
* ``.h5`` scalar row → ``.mif``: ``modelarrayio h5-export-mif-file``

* **Voxel-wise** data (NIfTI):

* NIfTI → ``.h5``: ``modelarrayio nifti-to-h5``
* ``.h5`` → NIfTI: ``modelarrayio h5-to-nifti``
* ``.h5`` scalar row → NIfTI: ``modelarrayio h5-export-nifti-file``

* **Greyordinate-wise** data (CIFTI-2):

* CIFTI-2 → ``.h5``: ``modelarrayio cifti-to-h5``
* ``.h5`` → CIFTI-2: ``modelarrayio h5-to-cifti``
* ``.h5`` scalar row → CIFTI-2: ``modelarrayio h5-export-cifti-file``


Storage backends: HDF5 and TileDB
Expand Down
46 changes: 38 additions & 8 deletions docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,39 +3,39 @@ Usage
#####


**********
*********
mif-to-h5
**********
*********

.. argparse::
:ref: modelarrayio.cli.mif_to_h5._parse_mif_to_h5
:prog: modelarrayio mif-to-h5
:func: _parse_mif_to_h5


**********
***********
nifti-to-h5
**********
************
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
************
***********


.. argparse::
:ref: modelarrayio.cli.nifti_to_h5._parse_nifti_to_h5
:prog: modelarrayio nifti-to-h5
:func: _parse_nifti_to_h5


**********
***********
cifti-to-h5
**********
***********

.. argparse::
:ref: modelarrayio.cli.cifti_to_h5._parse_cifti_to_h5
:prog: modelarrayio cifti-to-h5
:func: _parse_cifti_to_h5


**********
*********
h5-to-mif
**********
*********

.. argparse::
:ref: modelarrayio.cli.h5_to_mif._parse_h5_to_mif
Expand All @@ -60,3 +60,33 @@ h5-to-cifti
:ref: modelarrayio.cli.h5_to_cifti._parse_h5_to_cifti
:prog: modelarrayio h5-to-cifti
:func: _parse_h5_to_cifti


******************
h5-export-mif-file
******************

.. argparse::
:ref: modelarrayio.cli.h5_export_mif_file._parse_h5_export_mif_file
:prog: modelarrayio h5-export-mif-file
:func: _parse_h5_export_mif_file


********************
h5-export-nifti-file
********************

.. argparse::
:ref: modelarrayio.cli.h5_export_nifti_file._parse_h5_export_nifti_file
:prog: modelarrayio h5-export-nifti-file
:func: _parse_h5_export_nifti_file


********************
h5-export-cifti-file
********************

.. argparse::
:ref: modelarrayio.cli.h5_export_cifti_file._parse_h5_export_cifti_file
:prog: modelarrayio h5-export-cifti-file
:func: _parse_h5_export_cifti_file
34 changes: 34 additions & 0 deletions modelarrayio/__about__.py
Copy link
Copy Markdown
Member

@tsalo tsalo Mar 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The __about__.py shouldn't be committed. It needs to be added to the gitignore.

Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# file generated by setuptools-scm
# don't change, don't track in version control

__all__ = [
"__version__",
"__version_tuple__",
"version",
"version_tuple",
"__commit_id__",
"commit_id",
]

TYPE_CHECKING = False
if TYPE_CHECKING:
from typing import Tuple
from typing import Union

VERSION_TUPLE = Tuple[Union[int, str], ...]
COMMIT_ID = Union[str, None]
else:
VERSION_TUPLE = object
COMMIT_ID = object

version: str
__version__: str
__version_tuple__: VERSION_TUPLE
version_tuple: VERSION_TUPLE
commit_id: COMMIT_ID
__commit_id__: COMMIT_ID

__version__ = version = '0.1.dev115+g7f4c6030e'
__version_tuple__ = version_tuple = (0, 1, 'dev115', 'g7f4c6030e')

__commit_id__ = commit_id = None
56 changes: 51 additions & 5 deletions src/modelarrayio/cli/cifti_to_h5.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,21 @@
import pandas as pd
from tqdm import tqdm

from modelarrayio.cli import diagnostics as cli_diagnostics
from modelarrayio.cli import utils as cli_utils
from modelarrayio.cli.parser_utils import add_scalar_columns_arg, add_to_modelarray_args
from modelarrayio.cli.parser_utils import (
add_diagnostics_args,
add_scalar_columns_arg,
add_to_modelarray_args,
)
from modelarrayio.utils.cifti import (
_build_scalar_sources,
_cohort_to_long_dataframe,
_load_cohort_cifti,
brain_names_to_dataframe,
extract_cifti_scalar_data,
)
from modelarrayio.utils.s3_utils import load_nibabel

logger = logging.getLogger(__name__)

Expand All @@ -38,6 +44,9 @@ def cifti_to_h5(
workers=None,
s3_workers=1,
scalar_columns=None,
no_diagnostics=False,
diagnostics_dir=None,
diagnostic_maps=None,
):
"""Load all CIFTI data and write to an HDF5 or TileDB file.

Expand Down Expand Up @@ -70,6 +79,12 @@ def cifti_to_h5(
Number of workers for parallel S3 downloads
scalar_columns : :obj:`list`
List of scalar columns to use
no_diagnostics : :obj:`bool`
Disable diagnostic outputs in native format.
diagnostics_dir : :obj:`str` or :obj:`None`
Output directory for diagnostics. Defaults to ``<output_stem>_diagnostics``.
diagnostic_maps : :obj:`list` or :obj:`None`
Diagnostic maps to write. Supported: ``mean``, ``element_id``, ``n_non_nan``.

Returns
-------
Expand All @@ -84,10 +99,35 @@ def cifti_to_h5(
scalar_sources = _build_scalar_sources(cohort_long)
if not scalar_sources:
raise ValueError('Unable to derive scalar sources from cohort file.')
maps_to_write = cli_utils.normalize_diagnostic_maps(diagnostic_maps)

_first_scalar, first_sources = next(iter(scalar_sources.items()))
first_path = first_sources[0]
template_cifti = load_nibabel(first_path, cifti=True)
_first_data, reference_brain_names = extract_cifti_scalar_data(template_cifti)

if not no_diagnostics:
output_diag_dir = (
Path(diagnostics_dir)
if diagnostics_dir is not None
else cli_utils.default_diagnostics_dir(output_path)
)
output_diag_dir.mkdir(parents=True, exist_ok=True)
cli_diagnostics.verify_cifti_element_mapping(template_cifti, reference_brain_names)

if backend == 'hdf5':
scalars, last_brain_names = _load_cohort_cifti(cohort_long, s3_workers)
greyordinate_table, structure_names = brain_names_to_dataframe(last_brain_names)
if not no_diagnostics:
for scalar_name, rows in scalars.items():
diagnostics = cli_diagnostics.summarize_rows(rows)
cli_diagnostics.write_cifti_diagnostics(
maps=maps_to_write,
scalar_name=scalar_name,
diagnostics=diagnostics,
template_cifti=template_cifti,
output_dir=output_diag_dir,
)
output_path = cli_utils.prepare_output_parent(output_path)
with h5py.File(output_path, 'w') as h5_file:
cli_utils.write_table_dataset(
Expand All @@ -113,10 +153,6 @@ def cifti_to_h5(
if not scalar_sources:
return 0

_first_scalar, first_sources = next(iter(scalar_sources.items()))
first_path = first_sources[0]
_, reference_brain_names = extract_cifti_scalar_data(first_path)

def _process_scalar_job(scalar_name, source_files):
rows = []
for source_file in source_files:
Expand All @@ -126,6 +162,15 @@ def _process_scalar_job(scalar_name, source_files):
rows.append(cifti_data)

if rows:
if not no_diagnostics:
diagnostics = cli_diagnostics.summarize_rows(rows)
cli_diagnostics.write_cifti_diagnostics(
maps=maps_to_write,
scalar_name=scalar_name,
diagnostics=diagnostics,
template_cifti=template_cifti,
output_dir=output_diag_dir,
)
cli_utils.write_tiledb_scalar_matrices(
output_path,
{scalar_name: rows},
Expand Down Expand Up @@ -179,4 +224,5 @@ def _parse_cifti_to_h5():
)
add_to_modelarray_args(parser, default_output='greyordinatearray.h5')
add_scalar_columns_arg(parser)
add_diagnostics_args(parser)
return parser
116 changes: 116 additions & 0 deletions src/modelarrayio/cli/diagnostics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
"""Diagnostic image helpers for conversion commands."""

from __future__ import annotations

from pathlib import Path

import nibabel as nb
import numpy as np

from modelarrayio.utils.cifti import extract_cifti_scalar_data
from modelarrayio.utils.fixels import nifti2_to_mif
from modelarrayio.utils.voxels import flattened_image


def summarize_rows(rows) -> dict[str, np.ndarray]:
"""Compute common diagnostics from a sequence of 1-D subject arrays."""
stacked = np.vstack(rows)
return {
'mean': np.nanmean(stacked, axis=0).astype(np.float32),
'n_non_nan': np.sum(~np.isnan(stacked), axis=0).astype(np.float32),
'element_id': np.arange(stacked.shape[1], dtype=np.float32),
}


def verify_nifti_element_mapping(group_mask_img, group_mask_matrix):
"""Verify NIfTI group-mask flattening order matches element indices."""
expected = np.arange(int(group_mask_matrix.sum()), dtype=np.float32)
element_volume = np.zeros(group_mask_matrix.shape, dtype=np.float32)
element_volume[group_mask_matrix] = expected
element_img = nb.Nifti1Image(
element_volume,
affine=group_mask_img.affine,
header=group_mask_img.header,
)
extracted = flattened_image(element_img, group_mask_img, group_mask_matrix)
if not np.array_equal(extracted.astype(np.int64), expected.astype(np.int64)):
raise ValueError('Element ID mapping check failed for NIfTI group-mask flattening.')


def write_nifti_diagnostics(
*,
maps: list[str],
scalar_name: str,
diagnostics: dict[str, np.ndarray],
group_mask_img,
group_mask_matrix,
output_dir: Path,
):
header = group_mask_img.header.copy()
header.set_data_dtype(np.float32)
for name in maps:
out_file = output_dir / f'{scalar_name}_{name}.nii.gz'
data = np.zeros(group_mask_matrix.shape, dtype=np.float32)
data[group_mask_matrix] = diagnostics[name]
nb.Nifti1Image(data, affine=group_mask_img.affine, header=header).to_filename(out_file)


def verify_cifti_element_mapping(template_cifti, reference_brain_names):
"""Verify CIFTI extraction order matches element indices."""
expected = np.arange(reference_brain_names.shape[0], dtype=np.float32)
test_img = nb.Cifti2Image(
expected.reshape(1, -1),
header=template_cifti.header,
nifti_header=template_cifti.nifti_header,
)
recovered, _ = extract_cifti_scalar_data(test_img, reference_brain_names=reference_brain_names)
if not np.array_equal(recovered.astype(np.int64), expected.astype(np.int64)):
raise ValueError('Element ID mapping check failed for CIFTI greyordinate ordering.')


def write_cifti_diagnostics(
*,
maps: list[str],
scalar_name: str,
diagnostics: dict[str, np.ndarray],
template_cifti,
output_dir: Path,
):
for name in maps:
out_file = output_dir / f'{scalar_name}_{name}.dscalar.nii'
nb.Cifti2Image(
diagnostics[name].reshape(1, -1),
header=template_cifti.header,
nifti_header=template_cifti.nifti_header,
).to_filename(out_file)


def verify_mif_element_mapping(template_nifti2, num_elements: int):
"""Verify fixel vector reshape/squeeze mapping remains identity."""
expected = np.arange(num_elements, dtype=np.float32)
test_img = nb.Nifti2Image(
expected.reshape(-1, 1, 1),
affine=template_nifti2.affine,
header=template_nifti2.header,
)
recovered = test_img.get_fdata(dtype=np.float32).squeeze()
if not np.array_equal(recovered.astype(np.int64), expected.astype(np.int64)):
raise ValueError('Element ID mapping check failed for MIF fixel vector ordering.')


def write_mif_diagnostics(
*,
maps: list[str],
scalar_name: str,
diagnostics: dict[str, np.ndarray],
template_nifti2,
output_dir: Path,
):
for name in maps:
out_file = output_dir / f'{scalar_name}_{name}.mif'
temp_nifti2 = nb.Nifti2Image(
diagnostics[name].reshape(-1, 1, 1),
affine=template_nifti2.affine,
header=template_nifti2.header,
)
nifti2_to_mif(temp_nifti2, out_file)
Loading
Loading