diff --git a/README.rst b/README.rst
index e6f01a1..0db98e4 100644
--- a/README.rst
+++ b/README.rst
@@ -30,7 +30,7 @@ ModelArrayIO
:target: https://github.com/astral-sh/ruff
:alt: Code style: ruff
-**ModelArrayIO** is a Python package that converts between neuroimaging formats (fixel ``.mif``, voxel NIfTI, CIFTI-2 dscalar) and the HDF5 (``.h5``) layout used by the R package `ModelArray `_. It can also write ModelArray statistical results back to imaging formats.
+**ModelArrayIO** is a Python package that converts between neuroimaging formats (fixel ``.mif``, voxel NIfTI, CIFTI-2 dscalar/pscalar/pconn) and the HDF5 (``.h5``) layout used by the R package `ModelArray `_. It can also write ModelArray statistical results back to imaging formats.
**Relationship to ConFixel:** The earlier project `ConFixel `_ is superseded by ModelArrayIO. The ConFixel repository is retained for history (including links from publications) and will be archived; new work should use this repository.
@@ -48,20 +48,10 @@ ModelArrayIO provides three converter areas, each with import and export command
Once ModelArrayIO is installed, these commands are available in your terminal:
-* **Fixel-wise** data (MRtrix ``.mif``):
+* Neuroimaging data (CIFTI, NIfTI, or MRtrix ``.mif``):
- * ``.mif`` → ``.h5``: ``modelarrayio mif-to-h5``
- * ``.h5`` → ``.mif``: ``modelarrayio h5-to-mif``
-
-* **Voxel-wise** data (NIfTI):
-
- * NIfTI → ``.h5``: ``modelarrayio nifti-to-h5``
- * ``.h5`` → NIfTI: ``modelarrayio h5-to-nifti``
-
-* **Greyordinate-wise** data (CIFTI-2):
-
- * CIFTI-2 → ``.h5``: ``modelarrayio cifti-to-h5``
- * ``.h5`` → CIFTI-2: ``modelarrayio h5-to-cifti``
+ * Neuroimaging → ``.h5``: ``modelarrayio to-modelarray``
+ * ``.h5`` → Neuroimaging: ``modelarrayio export-results``
Storage backends: HDF5 and TileDB
diff --git a/docs/examples/plot_cifti_workflow.py b/docs/examples/plot_cifti_workflow.py
index 1bdc221..f718e06 100644
--- a/docs/examples/plot_cifti_workflow.py
+++ b/docs/examples/plot_cifti_workflow.py
@@ -2,9 +2,9 @@
CIFTI (Greyordinate-wise) Data Conversion
=========================================
-For imaging data in CIFTI format, use the ``modelarrayio cifti-to-h5`` command to convert
+For imaging data in CIFTI format, use the ``modelarrayio to-modelarray`` command to convert
the CIFTI files to the HDF5 format (``.h5``) used by **ModelArray**,
-and ``modelarrayio h5-to-cifti`` to export results back to CIFTI.
+and ``modelarrayio export-results`` to export results back to CIFTI.
The CIFTI workflow is very similar to the MIF workflow
(:ref:`sphx_glr_auto_examples_plot_mif_workflow.py`).
"""
@@ -89,7 +89,7 @@
# # activate your conda environment first
# conda activate
#
-# modelarrayio cifti-to-h5 \
+# modelarrayio to-modelarray \
# --cohort-file /home/username/myProject/data/cohort_FA.csv \
# --output /home/username/myProject/data/FA.h5
#
@@ -101,13 +101,13 @@
# --------------------------------
#
# After running **ModelArray** and obtaining statistical results inside ``FA.h5`` (suppose the
-# analysis name is ``"mylm"``), use ``modelarrayio h5-to-cifti`` to export them as CIFTI files.
+# analysis name is ``"mylm"``), use ``modelarrayio export-results`` to export them as CIFTI files.
#
# You must also provide an example CIFTI file to use as a template for the output.
#
# .. code-block:: console
#
-# modelarrayio h5-to-cifti \
+# modelarrayio export-results \
# --cohort-file /home/username/myProject/data/cohort_FA.csv \
# --analysis-name mylm \
# --input-hdf5 /home/username/myProject/data/FA.h5 \
@@ -119,11 +119,11 @@
#
# .. warning::
#
-# If ``--output-dir`` already exists, ``modelarrayio h5-to-cifti`` will not delete it — you will
+# If ``--output-dir`` already exists, ``modelarrayio export-results`` will not delete it — you will
# see ``WARNING: Output directory exists``. Existing files that are **not** part of the
# current output list are left unchanged. Existing files that **are** part of the current
# output list will be overwritten. To avoid confusion, consider manually deleting the output
-# directory before re-running ``modelarrayio h5-to-cifti``.
+# directory before re-running ``modelarrayio export-results``.
# %%
# Number-of-observations image
@@ -161,7 +161,7 @@
#
# .. code-block:: console
#
-# modelarrayio cifti-to-h5 --help
-# modelarrayio h5-to-cifti --help
+# modelarrayio to-modelarray --help
+# modelarrayio export-results --help
#
# or in the :doc:`/usage` page of this documentation.
diff --git a/docs/examples/plot_fixel_workflow.py b/docs/examples/plot_fixel_workflow.py
index 3ee7fcb..f06a6de 100644
--- a/docs/examples/plot_fixel_workflow.py
+++ b/docs/examples/plot_fixel_workflow.py
@@ -3,9 +3,9 @@
================================
To convert fixel-wise data in MIF format to HDF5 format,
-use the ``modelarrayio mif-to-h5`` command to convert the MIF files to the HDF5 format
+use the ``modelarrayio to-modelarray`` command to convert the MIF files to the HDF5 format
(``.h5``) used by **ModelArray**,
-and ``modelarrayio h5-to-mif`` to export results back to MIF.
+and ``modelarrayio export-results`` to export results back to MIF.
This guide assumes **ModelArrayIO** and **MRtrix** are already installed.
"""
@@ -100,7 +100,7 @@
# # activate your conda environment first
# conda activate
#
-# modelarrayio mif-to-h5 \
+# modelarrayio to-modelarray \
# --index-file /home/username/myProject/data/FD/index.mif \
# --directions-file /home/username/myProject/data/FD/directions.mif \
# --cohort-file /home/username/myProject/data/cohort_FD.csv \
@@ -114,13 +114,13 @@
# --------------------------------------
#
# After running ModelArray and obtaining statistical results inside ``FD.h5`` (suppose the
-# analysis name is ``"mylm"``), use ``modelarrayio h5-to-mif`` to export them as ``.mif`` files.
+# analysis name is ``"mylm"``), use ``modelarrayio export-results`` to export them as ``.mif`` files.
# The command also copies the original ``index.mif`` and ``directions.mif`` into the output
# folder.
#
# .. code-block:: console
#
-# modelarrayio h5-to-mif \
+# modelarrayio export-results \
# --index-file /home/username/myProject/data/FD/index.mif \
# --directions-file /home/username/myProject/data/FD/directions.mif \
# --cohort-file /home/username/myProject/data/cohort_FD.csv \
@@ -132,11 +132,11 @@
#
# .. warning::
#
-# **Existing files are not overwritten.** ``modelarrayio h5-to-mif`` calls ``mrconvert`` without
+# **Existing files are not overwritten.** ``modelarrayio export-results`` calls ``mrconvert`` without
# ``-force``, so any ``.mif`` file already present in ``--output-dir`` with the same name
# will be left unchanged. If ``--output-dir`` itself already exists you will see a
# ``WARNING: Output directory exists`` message, but no files will be deleted. To start
-# fresh, manually remove the output directory before re-running ``modelarrayio h5-to-mif``.
+# fresh, manually remove the output directory before re-running ``modelarrayio export-results``.
# %%
# Additional help
@@ -146,7 +146,7 @@
#
# .. code-block:: console
#
-# modelarrayio mif-to-h5 --help
-# modelarrayio h5-to-mif --help
+# modelarrayio to-modelarray --help
+# modelarrayio export-results --help
#
# or in the :doc:`/usage` page of this documentation.
diff --git a/docs/examples/plot_voxel_workflow.py b/docs/examples/plot_voxel_workflow.py
index d9bfd8b..b572c60 100644
--- a/docs/examples/plot_voxel_workflow.py
+++ b/docs/examples/plot_voxel_workflow.py
@@ -2,9 +2,9 @@
NIfTI (Voxel-wise) Data Conversion
==================================
-For imaging data in NIfTI format, use the ``modelarrayio nifti-to-h5`` command to convert
+For imaging data in NIfTI format, use the ``modelarrayio to-modelarray`` command to convert
the NIfTI files to the HDF5 format (``.h5``) used by **ModelArray**,
-and ``modelarrayio h5-to-nifti`` to export results back to NIfTI.
+and ``modelarrayio export-results`` to export results back to NIfTI.
The voxel workflow is very similar to the fixel workflow
(:ref:`sphx_glr_auto_examples_plot_mif_workflow.py`).
"""
@@ -105,8 +105,8 @@
# # activate your conda environment first
# conda activate
#
-# modelarrayio nifti-to-h5 \
-# --group-mask-file /home/username/myProject/data/group_mask.nii.gz \
+# modelarrayio to-modelarray \
+# --mask /home/username/myProject/data/group_mask.nii.gz \
# --cohort-file /home/username/myProject/data/cohort_FA.csv \
# --output /home/username/myProject/data/FA.h5
#
@@ -118,12 +118,12 @@
# --------------------------------
#
# After running **ModelArray** and obtaining statistical results inside ``FA.h5`` (suppose the
-# analysis name is ``"mylm"``), use ``modelarrayio h5-to-nifti`` to export them as NIfTI files.
+# analysis name is ``"mylm"``), use ``modelarrayio export-results`` to export them as NIfTI files.
#
# .. code-block:: console
#
-# modelarrayio h5-to-nifti \
-# --group-mask-file /home/username/myProject/data/group_mask.nii.gz \
+# modelarrayio export-results \
+# --mask /home/username/myProject/data/group_mask.nii.gz \
# --cohort-file /home/username/myProject/data/cohort_FA.csv \
# --analysis-name mylm \
# --input-hdf5 /home/username/myProject/data/FA.h5 \
@@ -135,11 +135,11 @@
#
# .. warning::
#
-# If ``--output-dir`` already exists, ``modelarrayio h5-to-nifti`` will not delete it — you will
+# If ``--output-dir`` already exists, ``modelarrayio export-results`` will not delete it — you will
# see ``WARNING: Output directory exists``. Existing files that are **not** part of the
# current output list are left unchanged. Existing files that **are** part of the current
# output list will be overwritten. To avoid confusion, consider manually deleting the output
-# directory before re-running ``modelarrayio h5-to-nifti``.
+# directory before re-running ``modelarrayio export-results``.
# %%
# Number-of-observations image
@@ -177,7 +177,7 @@
#
# .. code-block:: console
#
-# modelarrayio nifti-to-h5 --help
-# modelarrayio h5-to-nifti --help
+# modelarrayio to-modelarray --help
+# modelarrayio export-results --help
#
# or in the :doc:`/usage` page of this documentation.
diff --git a/docs/installation.rst b/docs/installation.rst
index ad861f4..1d01c75 100644
--- a/docs/installation.rst
+++ b/docs/installation.rst
@@ -17,7 +17,7 @@ If you want to use the most up-to-date version, you can install from the ``main`
MRtrix (required for fixel ``.mif`` only)
-----------------------------------------
-For fixel-wise ``.mif`` conversion, the ``modelarrayio mif-to-h5`` / ``modelarrayio h5-to-mif`` tools use MRtrix ``mrconvert``.
+Fixel-wise ``.mif`` conversion tools use MRtrix ``mrconvert``.
Install MRtrix from `MRtrix's webpage `_ if needed.
Run ``mrview`` in the terminal to verify the installation.
diff --git a/docs/outputs.rst b/docs/outputs.rst
index 092bdd8..eba845f 100644
--- a/docs/outputs.rst
+++ b/docs/outputs.rst
@@ -12,17 +12,23 @@ Commands Overview
The commands fall into two groups:
-- ``*-to-h5`` commands: convert input neuroimaging data into either:
+- ``to-modelarray``: convert input neuroimaging data into either:
- one or more HDF5 files (``--backend hdf5``), or
- one or more TileDB directories (``--backend tiledb``).
-- ``h5-to-*`` commands: convert analysis results stored in an HDF5 file into image files.
+ The modality (NIfTI, CIFTI, or MIF/fixel) is autodetected from the source file
+ extensions in the cohort file.
+- ``export-results``: convert analysis results stored in an HDF5 file into image files.
+ The modality is inferred from which arguments are provided (``--mask`` for NIfTI,
+ ``--index-file``/``--directions-file`` for MIF, ``--cohort-file``/``--example-file``
+ for CIFTI).
-*********************
-nifti-to-h5 (volumes)
-*********************
+***********************
+to-modelarray (volumes)
+***********************
-Default output name (HDF5 backend): ``voxelarray.h5``.
+Triggered when source files in the cohort have ``.nii`` or ``.nii.gz`` extensions.
+Requires ``--mask``.
HDF5 output contents:
@@ -42,24 +48,27 @@ TileDB output contents:
When ``--scalar-columns`` is provided:
- Output is split by scalar column name.
-- Example: ``--scalar-columns alpha beta --output voxelarray.h5`` writes:
- - ``alpha_voxelarray.h5``
- - ``beta_voxelarray.h5``
+- Example: ``--scalar-columns alpha beta --output modelarray.h5`` writes:
+ - ``alpha_modelarray.h5``
+ - ``beta_modelarray.h5``
- The same prefix rule also applies to TileDB output paths.
-*******************
-cifti-to-h5 (CIFTI)
-*******************
+*********************
+to-modelarray (CIFTI)
+*********************
-Default output name (HDF5 backend): ``greyordinatearray.h5``.
+Triggered when source files in the cohort have a CIFTI compound extension
+(e.g. ``.dscalar.nii``, ``.pscalar.nii``, ``.pconn.nii``).
HDF5 output contents:
-- ``greyordinates`` dataset:
+- ``greyordinates`` dataset (dscalar):
- transposed table with rows for ``vertex_id`` and ``structure_id``.
- attribute ``column_names = ['vertex_id', 'structure_id']``.
- attribute ``structure_names`` listing CIFTI brain structures.
+- ``parcels/parcel_id`` string dataset (pscalar), or
+ ``parcels/parcel_id_from`` and ``parcels/parcel_id_to`` (pconn).
- Per scalar:
- ``scalars//values`` with shape ``(n_subjects, n_greyordinates)``.
- ``scalars//column_names`` listing source file names.
@@ -74,17 +83,18 @@ TileDB output contents:
When ``--scalar-columns`` is provided:
- Output is split by scalar column name.
-- Example: ``--scalar-columns alpha beta --output greyordinatearray.h5`` writes:
- - ``alpha_greyordinatearray.h5``
- - ``beta_greyordinatearray.h5``
+- Example: ``--scalar-columns alpha beta --output modelarray.h5`` writes:
+ - ``alpha_modelarray.h5``
+ - ``beta_modelarray.h5``
- The same prefix rule also applies to TileDB output paths.
-******************
-mif-to-h5 (fixels)
-******************
+**************************
+to-modelarray (MIF/fixels)
+**************************
-Default output name (HDF5 backend): ``fixelarray.h5``.
+Triggered when source files in the cohort have a ``.mif`` extension.
+Requires ``--index-file`` and ``--directions-file``.
HDF5 output contents:
@@ -107,17 +117,17 @@ TileDB output contents:
When ``--scalar-columns`` is provided:
- Output is split by scalar column name.
-- Example: ``--scalar-columns alpha beta --output fixelarray.h5`` writes:
- - ``alpha_fixelarray.h5``
- - ``beta_fixelarray.h5``
+- Example: ``--scalar-columns alpha beta --output modelarray.h5`` writes:
+ - ``alpha_modelarray.h5``
+ - ``beta_modelarray.h5``
- The same prefix rule also applies to TileDB output paths.
-***********************************
-h5-to-* commands (result exporters)
-***********************************
+*********************************
+export-results (result exporters)
+*********************************
-These commands read statistical results from:
+This command reads statistical results from:
- ``results//results_matrix`` (shape: ``(n_results, n_elements)``).
@@ -131,34 +141,43 @@ Result names are read in this order:
Any spaces or ``/`` in result names are replaced with ``_`` in filenames.
-h5-to-nifti
-===========
+export-results (NIfTI)
+======================
+
+Triggered by providing ``--mask``.
Writes one file per result to ``--output-dir``:
-- ``_`` (default extension ``.nii.gz``).
+- ``_.nii.gz``.
- If a result name contains ``p.value``, an additional file is written:
- ``_``,
+ ``_.nii.gz``,
containing ``1 - p.value``.
-Each output volume uses ``--group-mask-file`` to map vectorized results back into 3D space.
+Each output volume uses ``--mask`` to map vectorized results back into 3D space.
+Pass ``--no-compress`` to write uncompressed ``.nii`` files instead.
+
+export-results (CIFTI)
+======================
-h5-to-cifti
-===========
+Triggered by providing ``--cohort-file`` or ``--example-file`` (without
+``--mask`` or ``--index-file``/``--directions-file``).
-Writes one CIFTI dscalar file per result to ``--output-dir``:
+Writes one CIFTI file per result to ``--output-dir``, using the extension that
+matches the example file (e.g. ``.dscalar.nii``, ``.pscalar.nii``, ``.pconn.nii``):
-- ``_.dscalar.nii``.
+- ``_.``.
- If a result name contains ``p.value``, also writes the ``1 - p.value`` companion file
with ``1m.p.value`` in its name.
-The header is taken from ``--example-cifti`` (or from the first cohort ``source_file`` if
+The header is taken from ``--example-file`` (or from the first cohort ``source_file`` if
``--cohort-file`` is used instead).
-h5-to-mif
-=========
+export-results (MIF/fixels)
+===========================
+
+Triggered by providing ``--index-file`` and ``--directions-file``.
Writes one MIF file per result to ``--output-dir``:
@@ -171,5 +190,6 @@ Also copies these files into ``--output-dir``:
- ``--index-file``
- ``--directions-file``
-The output MIF geometry/header template is taken from ``--example-mif`` (or from the first
+The output MIF geometry/header template is taken from ``--example-file`` (or from the first
cohort ``source_file`` if ``--cohort-file`` is used instead).
+Pass ``--no-compress`` to write uncompressed output where applicable.
diff --git a/docs/usage.rst b/docs/usage.rst
index 9a8e942..c29ca8f 100644
--- a/docs/usage.rst
+++ b/docs/usage.rst
@@ -3,60 +3,21 @@ Usage
#####
-**********
-mif-to-h5
-**********
+*************
+to-modelarray
+*************
.. argparse::
- :ref: modelarrayio.cli.mif_to_h5._parse_mif_to_h5
- :prog: modelarrayio mif-to-h5
- :func: _parse_mif_to_h5
+ :ref: modelarrayio.cli.to_modelarray._parse_to_modelarray
+ :prog: modelarrayio to-modelarray
+ :func: _parse_to_modelarray
-**********
-nifti-to-h5
-**********
+**************
+export-results
+**************
.. argparse::
- :ref: modelarrayio.cli.nifti_to_h5._parse_nifti_to_h5
- :prog: modelarrayio nifti-to-h5
- :func: _parse_nifti_to_h5
-
-
-**********
-cifti-to-h5
-**********
-
-.. argparse::
- :ref: modelarrayio.cli.cifti_to_h5._parse_cifti_to_h5
- :prog: modelarrayio cifti-to-h5
- :func: _parse_cifti_to_h5
-
-
-**********
-h5-to-mif
-**********
-
-.. argparse::
- :ref: modelarrayio.cli.h5_to_mif._parse_h5_to_mif
- :prog: modelarrayio h5-to-mif
- :func: _parse_h5_to_mif
-
-***********
-h5-to-nifti
-***********
-
-.. argparse::
- :ref: modelarrayio.cli.h5_to_nifti._parse_h5_to_nifti
- :prog: modelarrayio h5-to-nifti
- :func: _parse_h5_to_nifti
-
-
-***********
-h5-to-cifti
-***********
-
-.. argparse::
- :ref: modelarrayio.cli.h5_to_cifti._parse_h5_to_cifti
- :prog: modelarrayio h5-to-cifti
- :func: _parse_h5_to_cifti
+ :ref: modelarrayio.cli.export_results._parse_export_results
+ :prog: modelarrayio export-results
+ :func: _parse_export_results
diff --git a/src/modelarrayio/cli/cifti_to_h5.py b/src/modelarrayio/cli/cifti_to_h5.py
index b6df079..b336cd0 100644
--- a/src/modelarrayio/cli/cifti_to_h5.py
+++ b/src/modelarrayio/cli/cifti_to_h5.py
@@ -2,7 +2,6 @@
from __future__ import annotations
-import argparse
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
@@ -12,7 +11,6 @@
from tqdm import tqdm
from modelarrayio.cli import utils as cli_utils
-from modelarrayio.cli.parser_utils import add_to_modelarray_args
from modelarrayio.utils.cifti import (
_get_cifti_parcel_info,
brain_names_to_dataframe,
@@ -206,19 +204,3 @@ def _process_scalar_job(scalar_name, source_files):
else:
cli_utils.write_tiledb_parcel_arrays(output, parcel_arrays)
return 0
-
-
-def cifti_to_h5_main(**kwargs):
- """Entry point for the ``modelarrayio cifti-to-h5`` command."""
- log_level = kwargs.pop('log_level', 'INFO')
- cli_utils.configure_logging(log_level)
- return cifti_to_h5(**kwargs)
-
-
-def _parse_cifti_to_h5():
- parser = argparse.ArgumentParser(
- description='Create a hdf5 file of CIFTI2 dscalar data',
- formatter_class=argparse.ArgumentDefaultsHelpFormatter,
- )
- add_to_modelarray_args(parser, default_output='greyordinatearray.h5')
- return parser
diff --git a/src/modelarrayio/cli/export_results.py b/src/modelarrayio/cli/export_results.py
new file mode 100644
index 0000000..0be642e
--- /dev/null
+++ b/src/modelarrayio/cli/export_results.py
@@ -0,0 +1,223 @@
+"""Export statistical results from an HDF5 modelarray file to neuroimaging formats."""
+
+from __future__ import annotations
+
+import argparse
+import logging
+import shutil
+from functools import partial
+from pathlib import Path
+
+import pandas as pd
+
+from modelarrayio.cli import utils as cli_utils
+from modelarrayio.cli.h5_to_cifti import h5_to_cifti
+from modelarrayio.cli.h5_to_mif import h5_to_mif
+from modelarrayio.cli.h5_to_nifti import h5_to_nifti
+from modelarrayio.cli.parser_utils import _is_file, add_log_level_arg
+
+logger = logging.getLogger(__name__)
+
+
+def export_results(
+ in_file,
+ analysis_name,
+ output_dir,
+ group_mask_file=None,
+ compress=True,
+ index_file=None,
+ directions_file=None,
+ cohort_file=None,
+ example_file=None,
+):
+ """Export statistical results from an HDF5 modelarray file.
+
+ The modality is inferred from the arguments provided:
+
+ * **NIfTI**: ``group_mask_file`` is given
+ * **MIF/fixel**: ``index_file`` and ``directions_file`` are given
+ * **CIFTI**: only ``cohort_file`` or ``example_file`` is given
+
+ Parameters
+ ----------
+ in_file : path-like
+ HDF5 file containing statistical results.
+ analysis_name : str
+ Name of the statistical analysis results group inside the HDF5 file.
+ output_dir : path-like
+ Directory where output files will be written.
+ group_mask_file : path-like, optional
+ NIfTI binary group mask. Required for NIfTI results.
+ compress : bool, optional
+ Whether to compress output NIfTI or MIF files. Default True.
+ index_file : path-like, optional
+ Nifti2 index file. Required for MIF/fixel results.
+ directions_file : path-like, optional
+ Nifti2 directions file. Required for MIF/fixel results.
+ cohort_file : path-like, optional
+ CSV cohort file used to locate an example source file.
+ Required for CIFTI or MIF if ``example_file`` is not given.
+ example_file : path-like, optional
+ Path to an example source file whose header serves as a template.
+ Required for CIFTI or MIF if ``cohort_file`` is not given.
+ """
+ if group_mask_file is not None:
+ modality = 'nifti'
+ elif index_file is not None or directions_file is not None:
+ modality = 'mif'
+ elif cohort_file is not None or example_file is not None:
+ modality = 'cifti'
+ else:
+ raise ValueError(
+ 'Cannot determine modality. Provide --mask (NIfTI), '
+ '--index-file/--directions-file (MIF), or --cohort-file/--example-file (CIFTI).'
+ )
+ logger.info('Detected modality: %s', modality)
+
+ output_path = cli_utils.prepare_output_directory(output_dir, logger)
+
+ if modality == 'nifti':
+ h5_to_nifti(
+ in_file=in_file,
+ analysis_name=analysis_name,
+ group_mask_file=group_mask_file,
+ compress=compress,
+ output_dir=output_path,
+ )
+ return 0
+
+ if modality == 'mif':
+ if index_file is None or directions_file is None:
+ raise ValueError(
+ 'Both --index-file and --directions-file are required for MIF results.'
+ )
+ if cohort_file is None and example_file is None:
+ raise ValueError('One of --cohort-file or --example-file is required for MIF results.')
+ shutil.copyfile(index_file, output_path / Path(index_file).name)
+ shutil.copyfile(directions_file, output_path / Path(directions_file).name)
+ if example_file is None:
+ logger.warning('No example MIF file provided; using first source_file from cohort.')
+ example_file = pd.read_csv(cohort_file)['source_file'].iloc[0]
+ h5_to_mif(
+ example_mif=example_file,
+ in_file=in_file,
+ analysis_name=analysis_name,
+ compress=compress,
+ output_dir=output_path,
+ )
+ return 0
+
+ # cifti
+ if cohort_file is None and example_file is None:
+ raise ValueError('One of --cohort-file or --example-file is required for CIFTI results.')
+ if example_file is None:
+ logger.warning('No example CIFTI file provided; using first source_file from cohort.')
+ example_file = pd.read_csv(cohort_file)['source_file'].iloc[0]
+ h5_to_cifti(
+ example_cifti=example_file,
+ in_file=in_file,
+ analysis_name=analysis_name,
+ output_dir=output_path,
+ )
+ return 0
+
+
+def export_results_main(**kwargs):
+ """Entry point for the ``modelarrayio export-results`` command."""
+ log_level = kwargs.pop('log_level', 'INFO')
+ cli_utils.configure_logging(log_level)
+ return export_results(**kwargs)
+
+
+def _parse_export_results():
+ parser = argparse.ArgumentParser(
+ description=(
+ 'Export statistical results from an HDF5 modelarray file to '
+ 'neuroimaging format files. The modality (NIfTI, CIFTI, or MIF/fixel) '
+ 'is inferred from the arguments provided.'
+ ),
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+ )
+ IsFile = partial(_is_file, parser=parser)
+
+ parser.add_argument(
+ '--analysis-name',
+ '--analysis_name',
+ help='Name for the statistical analysis results to be saved.',
+ required=True,
+ )
+ parser.add_argument(
+ '--input-hdf5',
+ '--input_hdf5',
+ help='Name of HDF5 (.h5) file where results outputs are saved.',
+ type=partial(_is_file, parser=parser),
+ dest='in_file',
+ required=True,
+ )
+ parser.add_argument(
+ '--output-dir',
+ '--output_dir',
+ help=(
+ 'Directory where outputs will be saved. '
+ 'If the directory does not exist, it will be automatically created.'
+ ),
+ required=True,
+ )
+
+ nifti_group = parser.add_argument_group('NIfTI arguments (required for NIfTI results)')
+ nifti_group.add_argument(
+ '--mask',
+ help='Path to the NIfTI binary group mask file used during data preparation.',
+ type=IsFile,
+ default=None,
+ dest='group_mask_file',
+ )
+
+ mif_group = parser.add_argument_group('MIF/fixel arguments (required for MIF/fixel results)')
+ mif_group.add_argument(
+ '--index-file',
+ '--index_file',
+ help='Nifti2 index file used to reconstruct MIF files.',
+ type=IsFile,
+ default=None,
+ )
+ mif_group.add_argument(
+ '--directions-file',
+ '--directions_file',
+ help='Nifti2 directions file used to reconstruct MIF files.',
+ type=IsFile,
+ default=None,
+ )
+
+ template_group = parser.add_argument_group(
+ 'Template arguments (required for CIFTI and MIF/fixel results)'
+ )
+ template_source = template_group.add_mutually_exclusive_group()
+ template_source.add_argument(
+ '--cohort-file',
+ '--cohort_file',
+ help=(
+ 'Path to a CSV cohort file. The first source file entry is used as a header template.'
+ ),
+ type=IsFile,
+ default=None,
+ )
+ template_source.add_argument(
+ '--example-file',
+ '--example_file',
+ help='Path to an example source file whose header is used as a template.',
+ type=IsFile,
+ default=None,
+ )
+
+ output_group = parser.add_argument_group('Output arguments')
+ output_group.add_argument(
+ '--no-compress',
+ action='store_false',
+ dest='compress',
+ help='Disable compression for output NIfTI or MIF files. Does not affect CIFTI files.',
+ default=True,
+ )
+
+ add_log_level_arg(parser)
+ return parser
diff --git a/src/modelarrayio/cli/h5_to_cifti.py b/src/modelarrayio/cli/h5_to_cifti.py
index 5a2ccb7..2201070 100644
--- a/src/modelarrayio/cli/h5_to_cifti.py
+++ b/src/modelarrayio/cli/h5_to_cifti.py
@@ -2,17 +2,13 @@
from __future__ import annotations
-import argparse
import logging
-from functools import partial
from pathlib import Path
import h5py
import nibabel as nb
-import pandas as pd
from modelarrayio.cli import utils as cli_utils
-from modelarrayio.cli.parser_utils import _is_file, add_from_modelarray_args, add_log_level_arg
logger = logging.getLogger(__name__)
@@ -92,7 +88,7 @@ def h5_to_cifti(example_cifti, in_file, analysis_name, output_dir):
item in ``results/has_names``.
Parameters
- ==========
+ ----------
example_cifti: pathlike
abspath to a scalar cifti file. Its header is used as a template
in_file: str
@@ -103,7 +99,7 @@ def h5_to_cifti(example_cifti, in_file, analysis_name, output_dir):
abspath to where the output cifti files will go.
Outputs
- =======
+ -------
None
"""
# Get a template nifti image.
@@ -152,63 +148,3 @@ def h5_to_cifti(example_cifti, in_file, analysis_name, output_dir):
nifti_header=cifti.nifti_header,
)
temp_nifti2_1mpvalue.to_filename(out_cifti_1mpvalue)
-
-
-def h5_to_cifti_main(
- analysis_name,
- in_file,
- output_dir,
- cohort_file=None,
- example_cifti=None,
- log_level='INFO',
-):
- """Entry point for the ``modelarrayio h5-to-cifti`` command."""
- cli_utils.configure_logging(log_level)
- output_path = cli_utils.prepare_output_directory(output_dir, logger)
-
- if example_cifti is None:
- logger.warning(
- 'No example cifti file provided, using the first cifti file from the cohort file'
- )
- cohort_df = pd.read_csv(cohort_file)
- example_cifti = cohort_df['source_file'].iloc[0]
-
- h5_to_cifti(
- example_cifti=example_cifti,
- in_file=in_file,
- analysis_name=analysis_name,
- output_dir=output_path,
- )
- return 0
-
-
-def _parse_h5_to_cifti():
- parser = argparse.ArgumentParser(
- description='Create a directory with cifti results from an hdf5 file',
- formatter_class=argparse.ArgumentDefaultsHelpFormatter,
- )
- IsFile = partial(_is_file, parser=parser)
-
- add_from_modelarray_args(parser)
-
- example_cifti_group = parser.add_mutually_exclusive_group(required=True)
- example_cifti_group.add_argument(
- '--cohort-file',
- '--cohort_file',
- help=(
- 'Path to a csv with demographic info and paths to data. '
- 'Used to select an example CIFTI file if no example CIFTI file is provided.'
- ),
- type=IsFile,
- default=None,
- )
- example_cifti_group.add_argument(
- '--example-cifti',
- '--example_cifti',
- help='Path to an example cifti file.',
- type=IsFile,
- default=None,
- )
-
- add_log_level_arg(parser)
- return parser
diff --git a/src/modelarrayio/cli/h5_to_mif.py b/src/modelarrayio/cli/h5_to_mif.py
index 99e0d3b..603a827 100644
--- a/src/modelarrayio/cli/h5_to_mif.py
+++ b/src/modelarrayio/cli/h5_to_mif.py
@@ -2,24 +2,19 @@
from __future__ import annotations
-import argparse
import logging
-import shutil
-from functools import partial
from pathlib import Path
import h5py
import nibabel as nb
-import pandas as pd
from modelarrayio.cli import utils as cli_utils
-from modelarrayio.cli.parser_utils import _is_file, add_from_modelarray_args, add_log_level_arg
from modelarrayio.utils.mif import mif_to_nifti2, nifti2_to_mif
logger = logging.getLogger(__name__)
-def h5_to_mif(example_mif, in_file, analysis_name, output_dir):
+def h5_to_mif(example_mif, in_file, analysis_name, compress, output_dir):
"""Writes the contents of an hdf5 file to a fixels directory.
The ``in_file`` parameter should point to an HDF5 file that contains at least two
@@ -35,24 +30,27 @@ def h5_to_mif(example_mif, in_file, analysis_name, output_dir):
item in ``results/has_names``.
Parameters
- ==========
+ ----------
example_mif: str
abspath to a scalar mif file. Its header is used as a template
in_file: str
abspath to an h5 file that contains statistical results and their metadata.
analysis_name: str
the name for the analysis results to be saved
+ compress: bool
+ whether to compress output MIF files
output_dir: str
abspath to where the output fixel data will go. the index and directions mif files
should already be copied here.
Outputs
- =======
+ -------
None
"""
# Get a template nifti image.
nifti2_img, _ = mif_to_nifti2(example_mif)
output_path = Path(output_dir)
+ ext = '.mif.gz' if compress else '.mif'
with h5py.File(in_file, 'r') as h5_data:
results_matrix = h5_data[f'results/{analysis_name}/results_matrix']
results_names = cli_utils.read_result_names(
@@ -61,7 +59,7 @@ def h5_to_mif(example_mif, in_file, analysis_name, output_dir):
for result_col, result_name in enumerate(results_names):
valid_result_name = cli_utils.sanitize_result_name(result_name)
- out_mif = output_path / f'{analysis_name}_{valid_result_name}.mif'
+ out_mif = output_path / f'{analysis_name}_{valid_result_name}{ext}'
temp_nifti2 = nb.Nifti2Image(
results_matrix[result_col, :].reshape(-1, 1, 1),
nifti2_img.affine,
@@ -73,7 +71,7 @@ def h5_to_mif(example_mif, in_file, analysis_name, output_dir):
continue
valid_result_name_1mpvalue = valid_result_name.replace('p.value', '1m.p.value')
- out_mif_1mpvalue = output_path / f'{analysis_name}_{valid_result_name_1mpvalue}.mif'
+ out_mif_1mpvalue = output_path / f'{analysis_name}_{valid_result_name_1mpvalue}{ext}'
output_mifvalues_1mpvalue = 1 - results_matrix[result_col, :]
temp_nifti2_1mpvalue = nb.Nifti2Image(
output_mifvalues_1mpvalue.reshape(-1, 1, 1),
@@ -81,88 +79,3 @@ def h5_to_mif(example_mif, in_file, analysis_name, output_dir):
header=nifti2_img.header,
)
nifti2_to_mif(temp_nifti2_1mpvalue, out_mif_1mpvalue)
-
-
-def h5_to_mif_main(
- index_file,
- directions_file,
- analysis_name,
- in_file,
- output_dir,
- cohort_file=None,
- example_mif=None,
- log_level='INFO',
-):
- """Entry point for the ``modelarrayio h5-to-mif`` command."""
- cli_utils.configure_logging(log_level)
- output_path = cli_utils.prepare_output_directory(output_dir, logger)
-
- shutil.copyfile(
- directions_file,
- output_path / Path(directions_file).name,
- )
- shutil.copyfile(
- index_file,
- output_path / Path(index_file).name,
- )
-
- if example_mif is None:
- logger.warning(
- 'No example MIF file provided, using the first MIF file from the cohort file'
- )
- cohort_df = pd.read_csv(cohort_file)
- example_mif = cohort_df['source_file'].iloc[0]
-
- h5_to_mif(
- example_mif=example_mif,
- in_file=in_file,
- analysis_name=analysis_name,
- output_dir=output_path,
- )
- return 0
-
-
-def _parse_h5_to_mif():
- parser = argparse.ArgumentParser(
- description='Create a fixel directory from an hdf5 file',
- formatter_class=argparse.ArgumentDefaultsHelpFormatter,
- )
- IsFile = partial(_is_file, parser=parser)
-
- parser.add_argument(
- '--index-file',
- '--index_file',
- help='Index file used to reconstruct MIF files.',
- required=True,
- type=IsFile,
- )
- parser.add_argument(
- '--directions-file',
- '--directions_file',
- help='Directions file used to reconstruct MIF files.',
- required=True,
- type=IsFile,
- )
-
- add_from_modelarray_args(parser)
-
- example_mif_group = parser.add_mutually_exclusive_group(required=True)
- example_mif_group.add_argument(
- '--cohort-file',
- '--cohort_file',
- help=(
- 'Path to a csv with demographic info and paths to data. '
- 'Used to select an example MIF file if no example MIF file is provided.'
- ),
- type=IsFile,
- default=None,
- )
- example_mif_group.add_argument(
- '--example-mif',
- '--example_mif',
- help='Path to an example MIF file.',
- type=IsFile,
- default=None,
- )
- add_log_level_arg(parser)
- return parser
diff --git a/src/modelarrayio/cli/h5_to_nifti.py b/src/modelarrayio/cli/h5_to_nifti.py
index 4ff6ca9..f57bd29 100644
--- a/src/modelarrayio/cli/h5_to_nifti.py
+++ b/src/modelarrayio/cli/h5_to_nifti.py
@@ -2,9 +2,7 @@
from __future__ import annotations
-import argparse
import logging
-from functools import partial
from pathlib import Path
import h5py
@@ -12,14 +10,30 @@
import numpy as np
from modelarrayio.cli import utils as cli_utils
-from modelarrayio.cli.parser_utils import _is_file, add_from_modelarray_args, add_log_level_arg
logger = logging.getLogger(__name__)
-def h5_to_nifti(in_file, analysis_name, group_mask_file, output_extension, output_dir):
- """Convert stat results in .h5 file to a list of volume (.nii or .nii.gz) files."""
-
+def h5_to_nifti(in_file, analysis_name, group_mask_file, compress, output_dir):
+ """Convert stat results in .h5 file to a list of volume (.nii or .nii.gz) files.
+
+ Parameters
+ ----------
+ in_file: str
+ abspath to an h5 file that contains statistical results and their metadata.
+ analysis_name: str
+ the name for the analysis results to be saved
+ group_mask_file: str
+ abspath to a NIfTI-1 binary group mask file.
+ compress: bool
+ whether to compress output NIfTI files
+ output_dir: str
+ abspath to where the output NIfTI files will go.
+
+ Outputs
+ -------
+ None
+ """
data_type_tosave = np.float32
# group-level mask:
@@ -31,6 +45,8 @@ def h5_to_nifti(in_file, analysis_name, group_mask_file, output_extension, outpu
# modify the data type (mask's data type could be uint8...)
header_tosave.set_data_dtype(data_type_tosave)
+ ext = '.nii.gz' if compress else '.nii'
+
output_path = Path(output_dir)
with h5py.File(in_file, 'r') as h5_data:
results_matrix = h5_data[f'results/{analysis_name}/results_matrix']
@@ -40,7 +56,7 @@ def h5_to_nifti(in_file, analysis_name, group_mask_file, output_extension, outpu
for result_col, result_name in enumerate(results_names):
valid_result_name = cli_utils.sanitize_result_name(result_name)
- out_file = output_path / f'{analysis_name}_{valid_result_name}{output_extension}'
+ out_file = output_path / f'{analysis_name}_{valid_result_name}{ext}'
output = np.zeros(group_mask_matrix.shape)
data_tosave = results_matrix[result_col, :].astype(data_type_tosave)
output[group_mask_matrix] = data_tosave
@@ -51,9 +67,7 @@ def h5_to_nifti(in_file, analysis_name, group_mask_file, output_extension, outpu
continue
valid_result_name_1mpvalue = valid_result_name.replace('p.value', '1m.p.value')
- out_file_1mpvalue = (
- output_path / f'{analysis_name}_{valid_result_name_1mpvalue}{output_extension}'
- )
+ out_file_1mpvalue = output_path / f'{analysis_name}_{valid_result_name_1mpvalue}{ext}'
output_1mpvalue = np.zeros(group_mask_matrix.shape)
output_1mpvalue[group_mask_matrix] = (1 - results_matrix[result_col, :]).astype(
data_type_tosave
@@ -62,56 +76,3 @@ def h5_to_nifti(in_file, analysis_name, group_mask_file, output_extension, outpu
output_1mpvalue, affine=group_mask_img.affine, header=header_tosave
)
output_img_1mpvalue.to_filename(out_file_1mpvalue)
-
-
-def h5_to_nifti_main(
- group_mask_file,
- analysis_name,
- in_file,
- output_dir,
- output_extension='.nii.gz',
- log_level='INFO',
-):
- """Entry point for the ``modelarrayio h5-to-nifti`` command."""
- cli_utils.configure_logging(log_level)
- output_path = cli_utils.prepare_output_directory(output_dir, logger)
-
- h5_to_nifti(
- in_file=in_file,
- analysis_name=analysis_name,
- group_mask_file=group_mask_file,
- output_extension=output_extension,
- output_dir=output_path,
- )
- return 0
-
-
-def _parse_h5_to_nifti():
- parser = argparse.ArgumentParser(
- description='Convert statistical results from an hdf5 file to a volume data (NIfTI file)',
- formatter_class=argparse.ArgumentDefaultsHelpFormatter,
- )
- IsFile = partial(_is_file, parser=parser)
- parser.add_argument(
- '--group-mask-file',
- '--group_mask_file',
- help='Path to a group mask file',
- required=True,
- type=IsFile,
- )
-
- add_from_modelarray_args(parser)
-
- parser.add_argument(
- '--output-ext',
- '--output_ext',
- dest='output_extension',
- help=(
- 'The extension for output volume data. '
- 'Options are .nii.gz (default) and .nii. Please provide the prefix dot.'
- ),
- choices=['.nii.gz', '.nii'],
- default='.nii.gz',
- )
- add_log_level_arg(parser)
- return parser
diff --git a/src/modelarrayio/cli/main.py b/src/modelarrayio/cli/main.py
index 9cd26a9..f472ca1 100644
--- a/src/modelarrayio/cli/main.py
+++ b/src/modelarrayio/cli/main.py
@@ -5,20 +5,12 @@
import argparse
from importlib.metadata import PackageNotFoundError, version
-from modelarrayio.cli.cifti_to_h5 import _parse_cifti_to_h5, cifti_to_h5_main
-from modelarrayio.cli.h5_to_cifti import _parse_h5_to_cifti, h5_to_cifti_main
-from modelarrayio.cli.h5_to_mif import _parse_h5_to_mif, h5_to_mif_main
-from modelarrayio.cli.h5_to_nifti import _parse_h5_to_nifti, h5_to_nifti_main
-from modelarrayio.cli.mif_to_h5 import _parse_mif_to_h5, mif_to_h5_main
-from modelarrayio.cli.nifti_to_h5 import _parse_nifti_to_h5, nifti_to_h5_main
+from modelarrayio.cli.export_results import _parse_export_results, export_results_main
+from modelarrayio.cli.to_modelarray import _parse_to_modelarray, to_modelarray_main
COMMANDS = [
- ('mif-to-h5', _parse_mif_to_h5, mif_to_h5_main),
- ('nifti-to-h5', _parse_nifti_to_h5, nifti_to_h5_main),
- ('cifti-to-h5', _parse_cifti_to_h5, cifti_to_h5_main),
- ('h5-to-mif', _parse_h5_to_mif, h5_to_mif_main),
- ('h5-to-nifti', _parse_h5_to_nifti, h5_to_nifti_main),
- ('h5-to-cifti', _parse_h5_to_cifti, h5_to_cifti_main),
+ ('to-modelarray', _parse_to_modelarray, to_modelarray_main),
+ ('export-results', _parse_export_results, export_results_main),
]
diff --git a/src/modelarrayio/cli/mif_to_h5.py b/src/modelarrayio/cli/mif_to_h5.py
index ea753ff..0d7a9a4 100644
--- a/src/modelarrayio/cli/mif_to_h5.py
+++ b/src/modelarrayio/cli/mif_to_h5.py
@@ -2,10 +2,8 @@
from __future__ import annotations
-import argparse
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed
-from functools import partial
from pathlib import Path
import h5py
@@ -13,7 +11,6 @@
from tqdm import tqdm
from modelarrayio.cli import utils as cli_utils
-from modelarrayio.cli.parser_utils import _is_file, add_to_modelarray_args
from modelarrayio.utils.mif import gather_fixels, load_cohort_mif
from modelarrayio.utils.misc import cohort_to_long_dataframe
@@ -162,38 +159,3 @@ def _write_scalar_job(scalar_name):
for future in tqdm(as_completed(futures), total=len(futures), desc='TileDB scalars'):
future.result()
return 0
-
-
-def mif_to_h5_main(**kwargs):
- """Entry point for the ``modelarrayio mif-to-h5`` command."""
- log_level = kwargs.pop('log_level', 'INFO')
- cli_utils.configure_logging(log_level)
- return mif_to_h5(**kwargs)
-
-
-def _parse_mif_to_h5():
- parser = argparse.ArgumentParser(
- description='Create a hdf5 file of fixel data',
- formatter_class=argparse.ArgumentDefaultsHelpFormatter,
- )
- IsFile = partial(_is_file, parser=parser)
-
- # MIF-specific arguments
- parser.add_argument(
- '--index-file',
- '--index_file',
- help='Index File',
- required=True,
- type=IsFile,
- )
- parser.add_argument(
- '--directions-file',
- '--directions_file',
- help='Directions File',
- required=True,
- type=IsFile,
- )
-
- # Common arguments
- add_to_modelarray_args(parser, default_output='fixelarray.h5')
- return parser
diff --git a/src/modelarrayio/cli/nifti_to_h5.py b/src/modelarrayio/cli/nifti_to_h5.py
index 12952ad..a81fb64 100644
--- a/src/modelarrayio/cli/nifti_to_h5.py
+++ b/src/modelarrayio/cli/nifti_to_h5.py
@@ -2,10 +2,8 @@
from __future__ import annotations
-import argparse
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed
-from functools import partial
from pathlib import Path
import h5py
@@ -15,7 +13,6 @@
from tqdm import tqdm
from modelarrayio.cli import utils as cli_utils
-from modelarrayio.cli.parser_utils import _is_file, add_to_modelarray_args
from modelarrayio.utils.misc import cohort_to_long_dataframe
from modelarrayio.utils.nifti import load_cohort_voxels
@@ -163,31 +160,3 @@ def _write_scalar_job(scalar_name):
for future in tqdm(as_completed(futures), total=len(futures), desc='TileDB scalars'):
future.result()
return 0
-
-
-def nifti_to_h5_main(**kwargs):
- """Entry point for the ``modelarrayio nifti-to-h5`` command."""
- log_level = kwargs.pop('log_level', 'INFO')
- cli_utils.configure_logging(log_level)
- return nifti_to_h5(**kwargs)
-
-
-def _parse_nifti_to_h5():
- parser = argparse.ArgumentParser(
- description='Create a hdf5 file of volume data',
- formatter_class=argparse.ArgumentDefaultsHelpFormatter,
- )
- IsFile = partial(_is_file, parser=parser)
-
- # NIfTI-specific arguments
- parser.add_argument(
- '--group-mask-file',
- '--group_mask_file',
- help='Path to a group mask file',
- required=True,
- type=IsFile,
- )
-
- # Common arguments
- add_to_modelarray_args(parser, default_output='voxelarray.h5')
- return parser
diff --git a/src/modelarrayio/cli/parser_utils.py b/src/modelarrayio/cli/parser_utils.py
index 5a47836..21cfeb9 100644
--- a/src/modelarrayio/cli/parser_utils.py
+++ b/src/modelarrayio/cli/parser_utils.py
@@ -1,124 +1,8 @@
from __future__ import annotations
-from functools import partial
from pathlib import Path
-def add_to_modelarray_args(parser, default_output='output.h5'):
- """Add arguments common to all commands that prepare data for ModelArray."""
- parser.add_argument(
- '--cohort-file',
- '--cohort_file',
- help='Path to a csv with demographic info and paths to data.',
- required=True,
- type=partial(_is_file, parser=parser),
- )
- parser.add_argument(
- '--output',
- help=(
- 'Output path. For the hdf5 backend, path to an .h5 file; '
- 'for the tiledb backend, path to a .tdb directory.'
- ),
- default=default_output,
- type=Path,
- )
- parser.add_argument(
- '--scalar-columns',
- '--scalar_columns',
- nargs='+',
- help=(
- 'Column names containing scalar file paths when the cohort table is in wide format. '
- 'If omitted, the cohort file must include "scalar_name" and "source_file" columns.'
- ),
- )
- parser.add_argument(
- '--backend',
- help='Storage backend for subject-by-element matrix',
- choices=['hdf5', 'tiledb'],
- default='hdf5',
- )
- parser.add_argument(
- '--dtype',
- help='Floating dtype for storing values: float32 (default) or float64',
- choices=['float32', 'float64'],
- default='float32',
- dest='storage_dtype',
- )
- parser.add_argument(
- '--compression',
- help=(
- 'Compression filter (default gzip). '
- 'gzip works for both backends; '
- 'lzf is HDF5-only; '
- 'zstd is TileDB-only.'
- ),
- choices=['gzip', 'zstd', 'lzf', 'none'],
- default='gzip',
- )
- parser.add_argument(
- '--compression-level',
- '--compression_level',
- type=int,
- help='Compression level (codec-dependent). Default 4.',
- default=4,
- )
- parser.add_argument(
- '--no-shuffle',
- dest='shuffle',
- action='store_false',
- help='Disable shuffle filter (enabled by default when compression is used).',
- default=True,
- )
-
- chunk_allocation_group = parser.add_mutually_exclusive_group()
- chunk_allocation_group.add_argument(
- '--chunk-voxels',
- '--chunk_voxels',
- type=int,
- help=(
- 'Chunk/tile size along voxel/greyordinate/fixel axis. '
- 'If 0, auto-compute based on --target-chunk-mb and number of subjects.'
- ),
- default=0,
- )
- chunk_allocation_group.add_argument(
- '--target-chunk-mb',
- '--target_chunk_mb',
- type=float,
- help='Target chunk/tile size in MiB when auto-computing the spatial axis length. Default 2.0.',
- default=2.0,
- )
-
- tiledb_group = parser.add_argument_group('TileDB arguments')
- tiledb_group.add_argument(
- '--workers',
- type=int,
- help=(
- 'Maximum number of parallel TileDB write workers. '
- 'Default 1. '
- 'Has no effect when --backend=hdf5.'
- ),
- default=1,
- )
-
- s3_group = parser.add_argument_group('S3 arguments')
- s3_group.add_argument(
- '--s3-workers',
- '--s3_workers',
- type=int,
- default=1,
- help=(
- 'Number of parallel worker processes for loading image files. '
- 'Set > 1 to enable parallel downloads when cohort paths begin with s3://. '
- 'Default 1 (serial).'
- ),
- )
-
- add_log_level_arg(parser)
-
- return parser
-
-
def add_log_level_arg(parser):
parser.add_argument(
'--log-level',
@@ -131,34 +15,6 @@ def add_log_level_arg(parser):
return parser
-def add_from_modelarray_args(parser):
- parser.add_argument(
- '--analysis-name',
- '--analysis_name',
- help='Name for the statistical analysis results to be saved.',
- required=True,
- )
- parser.add_argument(
- '--input-hdf5',
- '--input_hdf5',
- help='Name of HDF5 (.h5) file where results outputs are saved.',
- type=partial(_is_file, parser=parser),
- dest='in_file',
- required=True,
- )
- parser.add_argument(
- '--output-dir',
- '--output_dir',
- help=(
- 'Directory where outputs will be saved. '
- 'If the directory does not exist, it will be automatically created.'
- ),
- required=True,
- )
-
- return parser
-
-
def _path_exists(path: str | Path | None, parser) -> Path:
"""Ensure a given path exists."""
if path is None or not Path(path).exists():
diff --git a/src/modelarrayio/cli/to_modelarray.py b/src/modelarrayio/cli/to_modelarray.py
new file mode 100644
index 0000000..e113505
--- /dev/null
+++ b/src/modelarrayio/cli/to_modelarray.py
@@ -0,0 +1,277 @@
+"""Convert neuroimaging data to an HDF5 modelarray file."""
+
+from __future__ import annotations
+
+import argparse
+import logging
+from functools import partial
+from pathlib import Path
+
+import pandas as pd
+
+from modelarrayio.cli import utils as cli_utils
+from modelarrayio.cli.cifti_to_h5 import cifti_to_h5
+from modelarrayio.cli.mif_to_h5 import mif_to_h5
+from modelarrayio.cli.nifti_to_h5 import nifti_to_h5
+from modelarrayio.cli.parser_utils import _is_file, add_log_level_arg
+from modelarrayio.utils.misc import cohort_to_long_dataframe
+
+logger = logging.getLogger(__name__)
+
+_CIFTI_EXTENSIONS = (
+ '.dscalar.nii',
+ '.pscalar.nii',
+ '.pconn.nii',
+ '.dtseries.nii',
+ '.ptseries.nii',
+ '.dlabel.nii',
+)
+
+
+def _detect_modality_from_path(path: str) -> str:
+ """Return ``'cifti'``, ``'mif'``, or ``'nifti'`` based on file extension."""
+ path = str(path)
+ if any(path.endswith(ext) for ext in _CIFTI_EXTENSIONS):
+ return 'cifti'
+ if path.endswith('.mif'):
+ return 'mif'
+ if path.endswith(('.nii.gz', '.nii')):
+ return 'nifti'
+ raise ValueError(
+ f'Cannot detect modality from file extension: {path!r}. '
+ 'Expected .mif, .nii, .nii.gz, or a CIFTI compound extension '
+ '(e.g. .dscalar.nii, .pscalar.nii).'
+ )
+
+
+def to_modelarray(
+ cohort_file,
+ backend='hdf5',
+ output=Path('modelarray.h5'),
+ storage_dtype='float32',
+ compression='gzip',
+ compression_level=4,
+ shuffle=True,
+ chunk_voxels=0,
+ target_chunk_mb=2.0,
+ workers=1,
+ s3_workers=1,
+ scalar_columns=None,
+ group_mask_file=None,
+ index_file=None,
+ directions_file=None,
+):
+ """Load neuroimaging data and write to an HDF5 or TileDB modelarray file.
+
+ The modality (NIfTI, CIFTI, or MIF/fixel) is autodetected from the source
+ file extensions listed in the cohort file.
+
+ Parameters
+ ----------
+ cohort_file : path-like
+ Path to a CSV with demographic info and paths to data.
+ group_mask_file : path-like, optional
+ Path to a NIfTI binary group mask file. Required for NIfTI data.
+ index_file : path-like, optional
+ Nifti2 index file. Required for MIF/fixel data.
+ directions_file : path-like, optional
+ Nifti2 directions file. Required for MIF/fixel data.
+ """
+ cohort_df = pd.read_csv(cohort_file)
+ cohort_long = cohort_to_long_dataframe(cohort_df, scalar_columns=scalar_columns)
+ if cohort_long.empty:
+ raise ValueError('Cohort file does not contain any scalar entries after normalization.')
+
+ first_path = cohort_long['source_file'].iloc[0]
+ modality = _detect_modality_from_path(str(first_path))
+ logger.info('Detected modality: %s', modality)
+
+ common_kwargs = {
+ 'cohort_file': cohort_file,
+ 'backend': backend,
+ 'output': output,
+ 'storage_dtype': storage_dtype,
+ 'compression': compression,
+ 'compression_level': compression_level,
+ 'shuffle': shuffle,
+ 'chunk_voxels': chunk_voxels,
+ 'target_chunk_mb': target_chunk_mb,
+ 'workers': workers,
+ 's3_workers': s3_workers,
+ 'scalar_columns': scalar_columns,
+ }
+
+ if modality == 'nifti':
+ if group_mask_file is None:
+ raise ValueError(
+ 'Detected NIfTI data but --mask was not provided. '
+ 'Please supply the path to a binary group mask NIfTI file.'
+ )
+ return nifti_to_h5(group_mask_file=group_mask_file, **common_kwargs)
+
+ if modality == 'mif':
+ if index_file is None or directions_file is None:
+ raise ValueError(
+ 'Detected MIF/fixel data but --index-file and/or --directions-file '
+ 'were not provided. Both are required for MIF data.'
+ )
+ return mif_to_h5(index_file=index_file, directions_file=directions_file, **common_kwargs)
+
+ # cifti
+ return cifti_to_h5(**common_kwargs)
+
+
+def to_modelarray_main(**kwargs):
+ """Entry point for the ``modelarrayio to-modelarray`` command."""
+ log_level = kwargs.pop('log_level', 'INFO')
+ cli_utils.configure_logging(log_level)
+ return to_modelarray(**kwargs)
+
+
+def _parse_to_modelarray():
+ parser = argparse.ArgumentParser(
+ description=(
+ 'Convert neuroimaging data to a modelarray HDF5 file. '
+ 'The modality (NIfTI, CIFTI, or MIF/fixel) is autodetected from '
+ 'the source file extensions in the cohort file.'
+ ),
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+ )
+ IsFile = partial(_is_file, parser=parser)
+
+ parser.add_argument(
+ '--cohort-file',
+ '--cohort_file',
+ help='Path to a csv with demographic info and paths to data.',
+ required=True,
+ type=partial(_is_file, parser=parser),
+ )
+ parser.add_argument(
+ '--output',
+ help=(
+ 'Output path. For the hdf5 backend, path to an .h5 file; '
+ 'for the tiledb backend, path to a .tdb directory.'
+ ),
+ default=Path('modelarray.h5'),
+ type=Path,
+ )
+ parser.add_argument(
+ '--scalar-columns',
+ '--scalar_columns',
+ nargs='+',
+ help=(
+ 'Column names containing scalar file paths when the cohort table is in wide format. '
+ 'If omitted, the cohort file must include "scalar_name" and "source_file" columns.'
+ ),
+ )
+ parser.add_argument(
+ '--backend',
+ help='Storage backend for subject-by-element matrix',
+ choices=['hdf5', 'tiledb'],
+ default='hdf5',
+ )
+ parser.add_argument(
+ '--dtype',
+ help='Floating dtype for storing values: float32 (default) or float64',
+ choices=['float32', 'float64'],
+ default='float32',
+ dest='storage_dtype',
+ )
+ parser.add_argument(
+ '--compression',
+ help=(
+ 'Compression filter (default gzip). '
+ 'gzip works for both backends; '
+ 'lzf is HDF5-only; '
+ 'zstd is TileDB-only.'
+ ),
+ choices=['gzip', 'zstd', 'lzf', 'none'],
+ default='gzip',
+ )
+ parser.add_argument(
+ '--compression-level',
+ '--compression_level',
+ type=int,
+ help='Compression level (codec-dependent). Default 4.',
+ default=4,
+ )
+ parser.add_argument(
+ '--no-shuffle',
+ dest='shuffle',
+ action='store_false',
+ help='Disable shuffle filter (enabled by default when compression is used).',
+ default=True,
+ )
+
+ chunk_allocation_group = parser.add_mutually_exclusive_group()
+ chunk_allocation_group.add_argument(
+ '--chunk-voxels',
+ '--chunk_voxels',
+ type=int,
+ help=(
+ 'Chunk/tile size along voxel/greyordinate/fixel axis. '
+ 'If 0, auto-compute based on --target-chunk-mb and number of subjects.'
+ ),
+ default=0,
+ )
+ chunk_allocation_group.add_argument(
+ '--target-chunk-mb',
+ '--target_chunk_mb',
+ type=float,
+ help='Target chunk/tile size in MiB when auto-computing the spatial axis length. Default 2.0.',
+ default=2.0,
+ )
+
+ tiledb_group = parser.add_argument_group('TileDB arguments')
+ tiledb_group.add_argument(
+ '--workers',
+ type=int,
+ help=(
+ 'Maximum number of parallel TileDB write workers. '
+ 'Default 1. '
+ 'Has no effect when --backend=hdf5.'
+ ),
+ default=1,
+ )
+
+ s3_group = parser.add_argument_group('S3 arguments')
+ s3_group.add_argument(
+ '--s3-workers',
+ '--s3_workers',
+ type=int,
+ default=1,
+ help=(
+ 'Number of parallel worker processes for loading image files. '
+ 'Set > 1 to enable parallel downloads when cohort paths begin with s3://. '
+ 'Default 1 (serial).'
+ ),
+ )
+
+ nifti_group = parser.add_argument_group('NIfTI arguments (required for NIfTI data)')
+ nifti_group.add_argument(
+ '--mask',
+ help='Path to a NIfTI binary group mask file.',
+ type=IsFile,
+ default=None,
+ dest='group_mask_file',
+ )
+
+ mif_group = parser.add_argument_group('MIF/fixel arguments (required for MIF/fixel data)')
+ mif_group.add_argument(
+ '--index-file',
+ '--index_file',
+ help='Nifti2 index file.',
+ type=IsFile,
+ default=None,
+ )
+ mif_group.add_argument(
+ '--directions-file',
+ '--directions_file',
+ help='Nifti2 directions file.',
+ type=IsFile,
+ default=None,
+ )
+
+ add_log_level_arg(parser)
+
+ return parser
diff --git a/test/data_voxel_toy/README.md b/test/data_voxel_toy/README.md
index bc505ca..4f8bb65 100644
--- a/test/data_voxel_toy/README.md
+++ b/test/data_voxel_toy/README.md
@@ -1,4 +1,4 @@
-This is a toy voxel-wise dataset for testing ModelArrayIO’s voxel pipeline (`modelarrayio nifti-to-h5` / `modelarrayio h5-to-nifti`).
+This is a toy voxel-wise dataset for testing ModelArrayIO’s voxel pipeline (`modelarrayio to-modelarray` / `modelarrayio export-results`).
* It was generated using scripts in [ModelArray_tests](https://github.com/PennLINC/ModelArray_tests) GitHub repository.
* Using scripts in folder `testdata_ConFixel` there (historical folder name from the ConFixel era).
diff --git a/test/test_cifti_cli.py b/test/test_cifti_cli.py
index 8a6d581..0a745fa 100644
--- a/test/test_cifti_cli.py
+++ b/test/test_cifti_cli.py
@@ -1,4 +1,4 @@
-"""Tests for the cifti-to-h5 and h5-to-cifti CLI commands.
+"""Tests for CIFTI conversion functions and the to-modelarray CLI command.
Covers dscalar, pscalar, and pconn CIFTI types for both conversion directions,
and includes end-to-end tests via the top-level modelarrayio CLI entry point.
@@ -16,8 +16,8 @@
import pytest
from utils import make_dscalar, make_parcels_axis, make_pconn, make_pscalar # noqa: F401
-from modelarrayio.cli.cifti_to_h5 import cifti_to_h5, cifti_to_h5_main
-from modelarrayio.cli.h5_to_cifti import _cifti_output_ext, h5_to_cifti, h5_to_cifti_main
+from modelarrayio.cli.cifti_to_h5 import cifti_to_h5
+from modelarrayio.cli.h5_to_cifti import _cifti_output_ext, h5_to_cifti
from modelarrayio.cli.main import main as modelarrayio_main
from modelarrayio.utils.cifti import _get_cifti_parcel_info
@@ -478,27 +478,26 @@ def test_missing_required_columns_raises(self, tmp_path):
# ===========================================================================
-# cifti_to_h5_main entry point
+# cifti_to_h5 additional entry-point smoke tests
# ===========================================================================
-class TestCiftiToH5Main:
+class TestCiftiToH5EntryPoint:
def test_returns_zero_on_success(self, tmp_path):
mask = _dscalar_mask()
paths = _write_dscalar_subjects(tmp_path, mask, n_subjects=1)
cohort = tmp_path / 'cohort.csv'
_write_cohort_csv(cohort, [{'scalar_name': 'THICK', 'source_file': str(p)} for p in paths])
out_h5 = tmp_path / 'out.h5'
- result = cifti_to_h5_main(cohort_file=str(cohort), output=out_h5)
- assert result == 0
+ assert cifti_to_h5(cohort, output=out_h5) == 0
- def test_output_file_exists_after_main(self, tmp_path):
+ def test_output_file_exists_after_call(self, tmp_path):
mask = _dscalar_mask()
paths = _write_dscalar_subjects(tmp_path, mask)
cohort = tmp_path / 'cohort.csv'
_write_cohort_csv(cohort, [{'scalar_name': 'THICK', 'source_file': str(p)} for p in paths])
out_h5 = tmp_path / 'out.h5'
- cifti_to_h5_main(cohort_file=str(cohort), output=out_h5)
+ cifti_to_h5(cohort, output=out_h5)
assert out_h5.exists()
@@ -810,98 +809,40 @@ def test_pconn_from_toy_data(self, tmp_path):
# ===========================================================================
-# h5_to_cifti_main entry point
+# h5_to_cifti additional smoke tests
# ===========================================================================
-class TestH5ToCiftiMain:
- def test_main_with_example_cifti_returns_zero(self, tmp_path):
- example = _make_dscalar_example(tmp_path)
- h5_path = tmp_path / 'results.h5'
- _make_h5_results(h5_path, 'analysis', np.ones((2, 5), np.float32), ['beta', 'tstat'])
- out_dir = tmp_path / 'out'
- result = h5_to_cifti_main(
- analysis_name='analysis',
- in_file=str(h5_path),
- output_dir=str(out_dir),
- example_cifti=str(example),
- )
- assert result == 0
-
- def test_main_with_example_cifti_creates_files(self, tmp_path):
+class TestH5ToCiftiDirect:
+ def test_dscalar_returns_zero(self, tmp_path):
example = _make_dscalar_example(tmp_path)
h5_path = tmp_path / 'results.h5'
_make_h5_results(h5_path, 'analysis', np.ones((2, 5), np.float32), ['beta', 'tstat'])
out_dir = tmp_path / 'out'
- h5_to_cifti_main(
- analysis_name='analysis',
- in_file=str(h5_path),
- output_dir=str(out_dir),
- example_cifti=str(example),
- )
+ out_dir.mkdir()
+ h5_to_cifti(str(example), str(h5_path), 'analysis', str(out_dir))
assert (out_dir / 'analysis_beta.dscalar.nii').exists()
assert (out_dir / 'analysis_tstat.dscalar.nii').exists()
- def test_main_with_cohort_file_returns_zero(self, tmp_path):
- example = _make_dscalar_example(tmp_path)
- cohort_csv = tmp_path / 'cohort.csv'
- pd.DataFrame({'source_file': [str(example)]}).to_csv(cohort_csv, index=False)
- h5_path = tmp_path / 'results.h5'
- _make_h5_results(h5_path, 'analysis', np.ones((1, 5), np.float32), ['beta'])
- out_dir = tmp_path / 'out'
- result = h5_to_cifti_main(
- analysis_name='analysis',
- in_file=str(h5_path),
- output_dir=str(out_dir),
- cohort_file=str(cohort_csv),
- )
- assert result == 0
-
- def test_main_with_cohort_file_uses_first_source(self, tmp_path):
- """cohort_file mode picks the first source_file row as the example CIFTI."""
- example = _make_dscalar_example(tmp_path)
- cohort_csv = tmp_path / 'cohort.csv'
- pd.DataFrame({'source_file': [str(example)]}).to_csv(cohort_csv, index=False)
- h5_path = tmp_path / 'results.h5'
- _make_h5_results(h5_path, 'analysis', np.ones((1, 5), np.float32), ['beta'])
- out_dir = tmp_path / 'out'
- h5_to_cifti_main(
- analysis_name='analysis',
- in_file=str(h5_path),
- output_dir=str(out_dir),
- cohort_file=str(cohort_csv),
- )
- assert (out_dir / 'analysis_beta.dscalar.nii').exists()
-
- def test_main_pscalar_with_example_cifti(self, tmp_path):
+ def test_pscalar_with_example(self, tmp_path):
parcels = ['A', 'B', 'C']
example = _make_pscalar_example(tmp_path, parcels)
h5_path = tmp_path / 'results.h5'
_make_h5_results(h5_path, 'analysis', np.ones((1, len(parcels)), np.float32), ['beta'])
out_dir = tmp_path / 'out'
- result = h5_to_cifti_main(
- analysis_name='analysis',
- in_file=str(h5_path),
- output_dir=str(out_dir),
- example_cifti=str(example),
- )
- assert result == 0
+ out_dir.mkdir()
+ h5_to_cifti(str(example), str(h5_path), 'analysis', str(out_dir))
assert (out_dir / 'analysis_beta.pscalar.nii').exists()
- def test_main_pconn_with_example_cifti(self, tmp_path):
+ def test_pconn_with_example(self, tmp_path):
parcels = ['X', 'Y']
n = len(parcels)
example = _make_pconn_example(tmp_path, parcels)
h5_path = tmp_path / 'results.h5'
_make_h5_results(h5_path, 'analysis', np.ones((1, n * n), np.float32), ['beta'])
out_dir = tmp_path / 'out'
- result = h5_to_cifti_main(
- analysis_name='analysis',
- in_file=str(h5_path),
- output_dir=str(out_dir),
- example_cifti=str(example),
- )
- assert result == 0
+ out_dir.mkdir()
+ h5_to_cifti(str(example), str(h5_path), 'analysis', str(out_dir))
assert (out_dir / 'analysis_beta.pconn.nii').exists()
@@ -910,8 +851,8 @@ def test_main_pconn_with_example_cifti(self, tmp_path):
# ===========================================================================
-class TestCiftiToH5ViaCLI:
- """End-to-end tests that exercise the cifti-to-h5 subcommand through modelarrayio_main."""
+class TestCiftiToModelarrayViaCLI:
+ """End-to-end tests that exercise CIFTI conversion through the to-modelarray subcommand."""
def test_cifti_to_h5_creates_expected_hdf5(self, tmp_path, monkeypatch):
vol_shape = (3, 3, 3)
@@ -940,7 +881,7 @@ def test_cifti_to_h5_creates_expected_hdf5(self, tmp_path, monkeypatch):
assert (
modelarrayio_main(
[
- 'cifti-to-h5',
+ 'to-modelarray',
'--cohort-file',
str(cohort_csv),
'--output',
@@ -1025,7 +966,7 @@ def test_cifti_to_h5_scalar_columns_writes_prefixed_outputs(self, tmp_path, monk
assert (
modelarrayio_main(
[
- 'cifti-to-h5',
+ 'to-modelarray',
'--cohort-file',
str(cohort_csv),
'--scalar-columns',
diff --git a/test/test_mif_cli.py b/test/test_mif_cli.py
index f67212b..d2b3735 100644
--- a/test/test_mif_cli.py
+++ b/test/test_mif_cli.py
@@ -1,4 +1,4 @@
-"""Tests for the mif-to-h5 CLI command."""
+"""Tests for the to-modelarray CLI command (MIF/fixel modality)."""
from __future__ import annotations
@@ -51,7 +51,7 @@ def fake_load_cohort_mif(cohort_long, _s3_workers):
assert (
modelarrayio_main(
[
- 'mif-to-h5',
+ 'to-modelarray',
'--index-file',
str(index_file),
'--directions-file',
diff --git a/test/test_nifti_cli.py b/test/test_nifti_cli.py
index e54fed5..6cc4c4d 100644
--- a/test/test_nifti_cli.py
+++ b/test/test_nifti_cli.py
@@ -1,4 +1,4 @@
-"""Tests for the nifti-to-h5 and h5-to-nifti CLI commands."""
+"""Tests for the to-modelarray and export-results CLI commands (NIfTI modality)."""
from __future__ import annotations
@@ -76,8 +76,8 @@ def test_nifti_to_h5_creates_expected_hdf5(tmp_path, monkeypatch):
assert (
modelarrayio_main(
[
- 'nifti-to-h5',
- '--group-mask-file',
+ 'to-modelarray',
+ '--mask',
str(group_mask_file),
'--cohort-file',
str(cohort_csv),
@@ -176,8 +176,8 @@ def test_h5_to_nifti_writes_results_with_dataset_column_names(tmp_path):
assert (
modelarrayio_main(
[
- 'h5-to-nifti',
- '--group-mask-file',
+ 'export-results',
+ '--mask',
str(group_mask_file),
'--analysis-name',
'lm',
@@ -256,8 +256,8 @@ def test_nifti_to_h5_scalar_columns_writes_prefixed_outputs(tmp_path, monkeypatc
assert (
modelarrayio_main(
[
- 'nifti-to-h5',
- '--group-mask-file',
+ 'to-modelarray',
+ '--mask',
str(group_mask_file),
'--cohort-file',
str(cohort_csv),
@@ -333,8 +333,8 @@ def test_nifti_tiledb_removes_existing_arrays_on_rerun(tmp_path, monkeypatch, ca
monkeypatch.chdir(tmp_path)
cli_args = [
- 'nifti-to-h5',
- '--group-mask-file',
+ 'to-modelarray',
+ '--mask',
str(group_mask_file),
'--cohort-file',
str(cohort_csv),
diff --git a/test/test_nifti_s3.py b/test/test_nifti_s3.py
index 454217a..08a2e78 100644
--- a/test/test_nifti_s3.py
+++ b/test/test_nifti_s3.py
@@ -82,8 +82,8 @@ def test_nifti_s3_parallel(tmp_path, group_mask_path, monkeypatch):
assert (
modelarrayio_main(
[
- 'nifti-to-h5',
- '--group-mask-file',
+ 'to-modelarray',
+ '--mask',
'group_mask.nii.gz',
'--cohort-file',
str(cohort_csv),
@@ -143,8 +143,8 @@ def test_nifti_s3_serial_matches_parallel(tmp_path, group_mask_path, monkeypatch
)
base_argv = [
- 'nifti-to-h5',
- '--group-mask-file',
+ 'to-modelarray',
+ '--mask',
str(group_mask_path),
'--cohort-file',
str(cohort_csv),
@@ -160,7 +160,7 @@ def test_nifti_s3_serial_matches_parallel(tmp_path, group_mask_path, monkeypatch
monkeypatch.setenv('MODELARRAYIO_S3_ANON', '1')
for workers, name in [('1', 'serial.h5'), ('4', 'parallel.h5')]:
assert modelarrayio_main(base_argv + ['--output', name, '--s3-workers', workers]) == 0, (
- f'modelarrayio nifti-to-h5 failed (workers={workers})'
+ f'modelarrayio to-modelarray failed (workers={workers})'
)
with (
diff --git a/test/test_parser_utils.py b/test/test_parser_utils.py
index 0e2f75f..4b9e936 100644
--- a/test/test_parser_utils.py
+++ b/test/test_parser_utils.py
@@ -1,84 +1,320 @@
-"""Smoke tests for shared argparse helpers."""
+"""Tests for shared argparse helpers and CLI argument parsers."""
from __future__ import annotations
import argparse
from pathlib import Path
-from modelarrayio.cli import parser_utils
+import pytest
+from modelarrayio.cli import parser_utils
+from modelarrayio.cli.export_results import _parse_export_results
+from modelarrayio.cli.to_modelarray import _parse_to_modelarray
-def _parser_with_cohort() -> argparse.ArgumentParser:
- p = argparse.ArgumentParser()
- parser_utils.add_to_modelarray_args(p)
- return p
+def test_add_log_level_arg_registers_choices():
+ parser = argparse.ArgumentParser()
+ parser_utils.add_log_level_arg(parser)
+ args = parser.parse_args(['--log-level', 'DEBUG'])
+ assert args.log_level == 'DEBUG'
+ with pytest.raises(SystemExit):
+ parser.parse_args(['--log-level', 'INVALID'])
-def test_minimal_invocation_defaults(tmp_path_factory) -> None:
- tmp_path = tmp_path_factory.mktemp('test_minimal_invocation_defaults')
- cohort_file = tmp_path / 'cohort.csv'
- cohort_file.write_text('subject_id,path\n1,path1\n2,path2')
- p = _parser_with_cohort()
- args = p.parse_args(['--cohort-file', str(cohort_file)])
- assert args.cohort_file == cohort_file
+def test_parse_to_modelarray_minimal_defaults(tmp_path):
+ cohort = tmp_path / 'cohort.csv'
+ cohort.write_text('scalar_name,source_file\nx,file.nii.gz\n')
+ parser = _parse_to_modelarray()
+ args = parser.parse_args(['--cohort-file', str(cohort)])
+ assert args.cohort_file == cohort.resolve()
+ assert args.output == Path('modelarray.h5')
+ assert args.backend == 'hdf5'
assert args.storage_dtype == 'float32'
assert args.compression == 'gzip'
assert args.compression_level == 4
assert args.shuffle is True
assert args.chunk_voxels == 0
- assert args.backend == 'hdf5'
+ assert args.target_chunk_mb == 2.0
+ assert args.workers == 1
assert args.s3_workers == 1
+ assert args.scalar_columns is None
+ assert args.group_mask_file is None
+ assert args.index_file is None
+ assert args.directions_file is None
assert args.log_level == 'INFO'
-def test_storage_aliases_and_no_shuffle(tmp_path_factory) -> None:
- tmp_path = tmp_path_factory.mktemp('test_storage_aliases_and_no_shuffle')
- cohort_file = tmp_path / 'cohort.csv'
- cohort_file.write_text('subject_id,path\n1,path1\n2,path2')
+def test_parse_to_modelarray_cohort_file_underscore_alias(tmp_path):
+ cohort = tmp_path / 'cohort.csv'
+ cohort.touch()
+ parser = _parse_to_modelarray()
+ args = parser.parse_args(['--cohort_file', str(cohort)])
+ assert args.cohort_file == cohort.resolve()
+
- p = _parser_with_cohort()
- args = p.parse_args(
+def test_parse_to_modelarray_optional_paths_and_flags(tmp_path):
+ cohort = tmp_path / 'cohort.csv'
+ cohort.touch()
+ mask = tmp_path / 'mask.nii.gz'
+ mask.touch()
+ idx = tmp_path / 'index.nii.gz'
+ idx.touch()
+ dirs = tmp_path / 'dirs.nii.gz'
+ dirs.touch()
+ parser = _parse_to_modelarray()
+ args = parser.parse_args(
[
'--cohort-file',
- str(cohort_file),
+ str(cohort),
+ '--output',
+ str(tmp_path / 'out.h5'),
+ '--backend',
+ 'tiledb',
'--dtype',
'float64',
'--compression',
- 'lzf',
+ 'zstd',
+ '--compression-level',
+ '7',
'--no-shuffle',
'--chunk-voxels',
- '128',
+ '64',
+ '--workers',
+ '4',
+ '--s3-workers',
+ '2',
+ '--scalar-columns',
+ 'col_a',
+ 'col_b',
+ '--mask',
+ str(mask),
+ '--index-file',
+ str(idx),
+ '--directions-file',
+ str(dirs),
'--log-level',
'WARNING',
]
)
+ assert args.output == tmp_path / 'out.h5'
+ assert args.backend == 'tiledb'
assert args.storage_dtype == 'float64'
- assert args.compression == 'lzf'
+ assert args.compression == 'zstd'
+ assert args.compression_level == 7
assert args.shuffle is False
- assert args.chunk_voxels == 128
+ assert args.chunk_voxels == 64
+ assert args.workers == 4
+ assert args.s3_workers == 2
+ assert args.scalar_columns == ['col_a', 'col_b']
+ assert args.group_mask_file == mask.resolve()
+ assert args.index_file == idx.resolve()
+ assert args.directions_file == dirs.resolve()
assert args.log_level == 'WARNING'
-def test_output_hdf5_default_name_override(tmp_path_factory) -> None:
- tmp_path = tmp_path_factory.mktemp('test_output_hdf5_default_name_override')
- cohort_file = tmp_path / 'cohort.csv'
- cohort_file.write_text('subject_id,path\n1,path1\n2,path2')
- p = argparse.ArgumentParser()
- parser_utils.add_to_modelarray_args(p, default_output='custom.h5')
- args = p.parse_args(['--cohort-file', str(cohort_file)])
- assert args.output == Path('custom.h5')
-
-
-def test_tiledb_args_group(tmp_path_factory) -> None:
- tmp_path = tmp_path_factory.mktemp('test_tiledb_args_group')
- cohort_file = tmp_path / 'cohort.csv'
- cohort_file.write_text('subject_id,path\n1,path1\n2,path2')
- p = argparse.ArgumentParser()
- parser_utils.add_to_modelarray_args(p, default_output='arrays.tdb')
- args = p.parse_args(['--cohort-file', str(cohort_file), '--backend', 'tiledb'])
- assert args.output == Path('arrays.tdb')
- assert args.backend == 'tiledb'
- assert args.workers == 1
- assert args.s3_workers == 1
+def test_parse_to_modelarray_target_chunk_mb_branch(tmp_path):
+ cohort = tmp_path / 'cohort.csv'
+ cohort.touch()
+ parser = _parse_to_modelarray()
+ args = parser.parse_args(['--cohort-file', str(cohort), '--target-chunk-mb', '8.5'])
+ assert args.target_chunk_mb == 8.5
+ assert args.chunk_voxels == 0
+
+
+def test_parse_to_modelarray_requires_cohort_file(tmp_path):
+ parser = _parse_to_modelarray()
+ with pytest.raises(SystemExit):
+ parser.parse_args([])
+
+
+def test_parse_to_modelarray_rejects_missing_cohort_path(tmp_path):
+ parser = _parse_to_modelarray()
+ missing = tmp_path / 'nope.csv'
+ with pytest.raises(SystemExit):
+ parser.parse_args(['--cohort-file', str(missing)])
+
+
+def test_parse_to_modelarray_rejects_invalid_backend(tmp_path):
+ cohort = tmp_path / 'cohort.csv'
+ cohort.touch()
+ parser = _parse_to_modelarray()
+ with pytest.raises(SystemExit):
+ parser.parse_args(['--cohort-file', str(cohort), '--backend', 'sqlite'])
+
+
+def test_parse_export_results_minimal_defaults(tmp_path):
+ h5 = tmp_path / 'results.h5'
+ h5.touch()
+ out = tmp_path / 'exports'
+ parser = _parse_export_results()
+ args = parser.parse_args(
+ [
+ '--analysis-name',
+ 'lm',
+ '--input-hdf5',
+ str(h5),
+ '--output-dir',
+ str(out),
+ ]
+ )
+ assert args.analysis_name == 'lm'
+ assert args.in_file == h5.resolve()
+ assert args.output_dir == str(out)
+ assert args.group_mask_file is None
+ assert args.compress is True
+ assert args.index_file is None
+ assert args.directions_file is None
+ assert args.cohort_file is None
+ assert args.example_file is None
assert args.log_level == 'INFO'
+
+
+def test_parse_export_results_input_hdf5_underscore_alias(tmp_path):
+ h5 = tmp_path / 'results.h5'
+ h5.touch()
+ parser = _parse_export_results()
+ args = parser.parse_args(
+ [
+ '--analysis_name',
+ 'a',
+ '--input_hdf5',
+ str(h5),
+ '--output_dir',
+ str(tmp_path / 'o'),
+ ]
+ )
+ assert args.in_file == h5.resolve()
+
+
+def test_parse_export_results_nifti_options(tmp_path):
+ h5 = tmp_path / 'results.h5'
+ h5.touch()
+ mask = tmp_path / 'mask.nii.gz'
+ mask.touch()
+ parser = _parse_export_results()
+ args = parser.parse_args(
+ [
+ '--analysis-name',
+ 'x',
+ '--input-hdf5',
+ str(h5),
+ '--output-dir',
+ str(tmp_path / 'out'),
+ '--mask',
+ str(mask),
+ '--no-compress',
+ ]
+ )
+ assert args.group_mask_file == mask.resolve()
+ assert args.compress is False
+
+
+def test_parse_export_results_mif_files(tmp_path):
+ h5 = tmp_path / 'results.h5'
+ h5.touch()
+ idx = tmp_path / 'index.nii.gz'
+ idx.touch()
+ dirs = tmp_path / 'dirs.nii.gz'
+ dirs.touch()
+ cohort = tmp_path / 'cohort.csv'
+ cohort.touch()
+ parser = _parse_export_results()
+ args = parser.parse_args(
+ [
+ '--analysis-name',
+ 'm',
+ '--input-hdf5',
+ str(h5),
+ '--output-dir',
+ str(tmp_path / 'out'),
+ '--index-file',
+ str(idx),
+ '--directions-file',
+ str(dirs),
+ '--cohort-file',
+ str(cohort),
+ ]
+ )
+ assert args.index_file == idx.resolve()
+ assert args.directions_file == dirs.resolve()
+ assert args.cohort_file == cohort.resolve()
+
+
+def test_parse_export_results_example_file_instead_of_cohort(tmp_path):
+ h5 = tmp_path / 'results.h5'
+ h5.touch()
+ example = tmp_path / 'example.dscalar.nii'
+ example.touch()
+ parser = _parse_export_results()
+ args = parser.parse_args(
+ [
+ '--analysis-name',
+ 'c',
+ '--input-hdf5',
+ str(h5),
+ '--output-dir',
+ str(tmp_path / 'out'),
+ '--example-file',
+ str(example),
+ ]
+ )
+ assert args.example_file == example.resolve()
+ assert args.cohort_file is None
+
+
+def test_parse_export_results_cohort_and_example_mutually_exclusive(tmp_path):
+ h5 = tmp_path / 'results.h5'
+ h5.touch()
+ cohort = tmp_path / 'cohort.csv'
+ cohort.touch()
+ example = tmp_path / 'ex.nii'
+ example.touch()
+ parser = _parse_export_results()
+ with pytest.raises(SystemExit):
+ parser.parse_args(
+ [
+ '--analysis-name',
+ 'x',
+ '--input-hdf5',
+ str(h5),
+ '--output-dir',
+ str(tmp_path / 'out'),
+ '--cohort-file',
+ str(cohort),
+ '--example-file',
+ str(example),
+ ]
+ )
+
+
+def test_parse_export_results_requires_analysis_input_output(tmp_path):
+ h5 = tmp_path / 'results.h5'
+ h5.touch()
+ parser = _parse_export_results()
+ with pytest.raises(SystemExit):
+ parser.parse_args(['--input-hdf5', str(h5), '--output-dir', str(tmp_path / 'o')])
+ with pytest.raises(SystemExit):
+ parser.parse_args(
+ [
+ '--analysis-name',
+ 'a',
+ '--output-dir',
+ str(tmp_path / 'o'),
+ ]
+ )
+
+
+def test_parse_export_results_rejects_missing_hdf5(tmp_path):
+ parser = _parse_export_results()
+ missing = tmp_path / 'gone.h5'
+ with pytest.raises(SystemExit):
+ parser.parse_args(
+ [
+ '--analysis-name',
+ 'a',
+ '--input-hdf5',
+ str(missing),
+ '--output-dir',
+ str(tmp_path / 'out'),
+ ]
+ )