From 828136284f1348db4899857a8d26a04307b0e643 Mon Sep 17 00:00:00 2001
From: araikes <adam.raikes@gmail.com>
Date: Thu, 26 Mar 2026 12:08:00 -0700
Subject: [PATCH 01/23] Update ModelArray roxygen

This should fix the duplicate documentation of "usage" in ModelArray by separating the class and function roxygen.
---
 R/ModelArray_Constructor.R | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/R/ModelArray_Constructor.R b/R/ModelArray_Constructor.R
index f16d709..e0e2203 100644
--- a/R/ModelArray_Constructor.R
+++ b/R/ModelArray_Constructor.R
@@ -1,16 +1,22 @@
 # Exported Functions
 
-### setClass of "ModelArray" #####
+#' ModelArray class
+#'
 #' An S4 class to represent element-wise scalar data and statistics.
 #'
 #' @slot sources A list of source filenames
-#' @slot scalars A list of element-wise scalar matrix
-#' @slot results A list of statistical result matrix
+#' @slot scalars A list of element-wise scalar matrices
+#' @slot results A list of statistical result matrices
 #' @slot path Path to the h5 file on disk
+#'
+#' @name ModelArray-class
+#' @rdname ModelArray-class
+#' @keywords internal
 #' @importClassesFrom DelayedArray DelayedArray
-ModelArray <- setClass(
+NULL
+
+setClass(
   "ModelArray",
-  # contains="DelayedArray",
   slots = c(
     results = "list",
     sources = "list",
@@ -43,21 +49,16 @@ ModelArraySeed <- function(filepath, name, type = NA) {
 
 
 
-#' Load element-wise data from .h5 file as an ModelArray object
+#' Construct a ModelArray object
+#'
+#' Load element-wise data from an .h5 file as a `ModelArray` object.
 #'
-#' @details:
-#' Tips for debugging:
-#' if you run into this error: "Error in h(simpleError(msg, call)) :
-#' error in evaluating the argument 'seed' in selecting a method for
-#' function 'DelayedArray': HDF5. Symbol table. Can't open object."
-#' Then please check if you give correct "scalar_types" - check via
-#' rhdf5::h5ls(filename_for_h5)
+#' @param filepath Path to an .h5 file
+#' @param scalar_types Expected scalars
+#' @param analysis_names The subfolder names for results in the .h5 file.
+#'   If empty (default), results are not read.
 #'
-#' @param filepath file
-#' @param scalar_types expected scalars
-#' @param analysis_names the subfolder names for results in .h5 file. If empty
-#' (default), results are not read.
-#' @return ModelArray object
+#' @return A `ModelArray` object
 #' @export
 #' @import methods
 #' @importFrom dplyr %>%

From b17ec967a8a50dd644163891072cea7f5d8575c9 Mon Sep 17 00:00:00 2001
From: araikes <adam.raikes@gmail.com>
Date: Thu, 26 Mar 2026 12:48:26 -0700
Subject: [PATCH 02/23] Update Rd files

---
 DESCRIPTION             |  2 +-
 man/ModelArray-class.Rd | 21 +++++++++++++++++++++
 man/ModelArray.Rd       | 39 +++++++--------------------------------
 3 files changed, 29 insertions(+), 33 deletions(-)
 create mode 100644 man/ModelArray-class.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index 3d74ee8..77882dd 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -40,7 +40,7 @@ Imports:
     rlang,
     tibble,
     tidyr
-RoxygenNote: 7.3.2
+RoxygenNote: 7.3.3
 Suggests:
     rmarkdown,
     knitr,
diff --git a/man/ModelArray-class.Rd b/man/ModelArray-class.Rd
new file mode 100644
index 0000000..2f89cd0
--- /dev/null
+++ b/man/ModelArray-class.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ModelArray_Constructor.R
+\name{ModelArray-class}
+\alias{ModelArray-class}
+\title{ModelArray class}
+\description{
+An S4 class to represent element-wise scalar data and statistics.
+}
+\section{Slots}{
+
+\describe{
+\item{\code{sources}}{A list of source filenames}
+
+\item{\code{scalars}}{A list of element-wise scalar matrices}
+
+\item{\code{results}}{A list of statistical result matrices}
+
+\item{\code{path}}{Path to the h5 file on disk}
+}}
+
+\keyword{internal}
diff --git a/man/ModelArray.Rd b/man/ModelArray.Rd
index defb99a..6d13363 100644
--- a/man/ModelArray.Rd
+++ b/man/ModelArray.Rd
@@ -2,46 +2,21 @@
 % Please edit documentation in R/ModelArray_Constructor.R
 \name{ModelArray}
 \alias{ModelArray}
-\title{An S4 class to represent element-wise scalar data and statistics.}
+\title{Construct a ModelArray object}
 \usage{
-ModelArray(filepath, scalar_types = c("FD"), analysis_names = character(0))
-
 ModelArray(filepath, scalar_types = c("FD"), analysis_names = character(0))
 }
 \arguments{
-\item{filepath}{file}
+\item{filepath}{Path to an .h5 file}
 
-\item{scalar_types}{expected scalars}
+\item{scalar_types}{Expected scalars}
 
-\item{analysis_names}{the subfolder names for results in .h5 file. If empty
-(default), results are not read.}
+\item{analysis_names}{The subfolder names for results in the .h5 file.
+If empty (default), results are not read.}
 }
 \value{
-ModelArray object
+A `ModelArray` object
 }
 \description{
-An S4 class to represent element-wise scalar data and statistics.
-
-Load element-wise data from .h5 file as an ModelArray object
+Load element-wise data from an .h5 file as a `ModelArray` object.
 }
-\details{
-:
-Tips for debugging:
-if you run into this error: "Error in h(simpleError(msg, call)) :
-error in evaluating the argument 'seed' in selecting a method for
-function 'DelayedArray': HDF5. Symbol table. Can't open object."
-Then please check if you give correct "scalar_types" - check via
-rhdf5::h5ls(filename_for_h5)
-}
-\section{Slots}{
-
-\describe{
-\item{\code{sources}}{A list of source filenames}
-
-\item{\code{scalars}}{A list of element-wise scalar matrix}
-
-\item{\code{results}}{A list of statistical result matrix}
-
-\item{\code{path}}{Path to the h5 file on disk}
-}}
-

From cd9f7cb2b4eaaf8141419f9eacd1b8a554139c85 Mon Sep 17 00:00:00 2001
From: araikes <adam.raikes@gmail.com>
Date: Thu, 26 Mar 2026 13:05:26 -0700
Subject: [PATCH 03/23] Update DESCRIPTION

Add markdown rendering for roxygen
---
 DESCRIPTION | 1 +
 1 file changed, 1 insertion(+)

diff --git a/DESCRIPTION b/DESCRIPTION
index 77882dd..fa3650f 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -41,6 +41,7 @@ Imports:
     tibble,
     tidyr
 RoxygenNote: 7.3.3
+Roxygen: list(markdown = TRUE)
 Suggests:
     rmarkdown,
     knitr,

From 69107eee1569c89b74d54c504abc4c38f4bda984 Mon Sep 17 00:00:00 2001
From: araikes <adam.raikes@gmail.com>
Date: Fri, 27 Mar 2026 15:36:10 -0700
Subject: [PATCH 04/23] Testing new S4 and constructor docs

---
 NAMESPACE                  |  1 +
 R/ModelArray_Constructor.R | 96 ++++++++++++++++++++++++++++++--------
 man/ModelArray-class.Rd    | 21 ---------
 3 files changed, 77 insertions(+), 41 deletions(-)
 delete mode 100644 man/ModelArray-class.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 18c0fe2..201c081 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -24,6 +24,7 @@ export(scalarNames)
 export(scalars)
 export(sources)
 export(writeResults)
+exportClasses(ModelArray)
 exportMethods(analysisNames)
 exportMethods(elementMetadata)
 exportMethods(exampleElementData)
diff --git a/R/ModelArray_Constructor.R b/R/ModelArray_Constructor.R
index e0e2203..1f50a94 100644
--- a/R/ModelArray_Constructor.R
+++ b/R/ModelArray_Constructor.R
@@ -1,21 +1,45 @@
-# Exported Functions
-
 #' ModelArray class
 #'
-#' An S4 class to represent element-wise scalar data and statistics.
+#' ModelArray is an S4 class that represents element-wise scalar data and
+#' associated statistical results backed by an HDF5 file on disk.
+#'
+#' @description
+#' A ModelArray wraps one or more element-wise scalar matrices (e.g., FD, FC,
+#' log_FC for fixel data) read lazily via \pkg{DelayedArray}, along with any
+#' previously saved analysis results. The object holds references to the
+#' underlying HDF5 file and reads data on demand, making it suitable for
+#' large-scale neuroimaging datasets.
+#'
+#' @details
+#' Each scalar in the HDF5 file is stored at \code{/scalars/<name>/values}
+#' as a matrix of elements (rows) by source files (columns). Source filenames
+#' are read from HDF5 attributes or companion datasets. Analysis results, if
+#' present, live under \code{/results/<analysis_name>/results_matrix}.
+#'
+#' ModelArray objects are typically created with the \code{\link{ModelArray}}
+#' constructor function. Element-wise models are fit with
+#' \code{\link{ModelArray.lm}}, \code{\link{ModelArray.gam}}, or
+#' \code{\link{ModelArray.wrap}}.
 #'
-#' @slot sources A list of source filenames
-#' @slot scalars A list of element-wise scalar matrices
-#' @slot results A list of statistical result matrices
-#' @slot path Path to the h5 file on disk
+#' @slot sources A named list of character vectors. Each element corresponds
+#'   to a scalar and contains the source filenames (one per input file/subject).
+#' @slot scalars A named list of \linkS4class{DelayedArray} matrices.
+#'   Each matrix has elements as rows and source files as columns.
+#' @slot results A named list of analysis results. Each element is itself a
+#'   list containing at minimum \code{results_matrix} (a
+#'   \linkS4class{DelayedArray}).
+#' @slot path Character. Path(s) to the HDF5 file(s) on disk.
+#'
+#' @seealso \code{\link{ModelArray}} for the constructor,
+#'   \code{\link{ModelArray.lm}}, \code{\link{ModelArray.gam}},
+#'   \code{\link{ModelArray.wrap}} for analysis,
+#'   \code{\link{scalars}}, \code{\link{sources}}, \code{\link{results}} for
+#'   accessors.
 #'
-#' @name ModelArray-class
-#' @rdname ModelArray-class
-#' @keywords internal
 #' @importClassesFrom DelayedArray DelayedArray
-NULL
-
-setClass(
+#' @exportClass ModelArray
+#' @aliases ModelArray-class
+ModelArray <- setClass(
   "ModelArray",
   slots = c(
     results = "list",
@@ -49,16 +73,48 @@ ModelArraySeed <- function(filepath, name, type = NA) {
 
 
 
-#' Construct a ModelArray object
+#' Load element-wise data from an HDF5 file as a ModelArray object
+#'
+#' @description
+#' Reads scalar matrices and (optionally) saved analysis results from an HDF5
+#' file and returns a \linkS4class{ModelArray} object.
+#'
+#' @details
+#' The constructor reads each scalar listed in \code{scalar_types} from
+#' \code{/scalars/<scalar_type>/values} in the HDF5 file, wrapping them as
+#' \linkS4class{DelayedArray} objects for lazy access. Source filenames are
+#' extracted from HDF5 attributes or companion datasets.
+#'
+#' If \code{analysis_names} is non-empty, saved results are read from
+#' \code{/results/<name>/results_matrix} along with column name metadata.
+#'
+#' \strong{Debugging tip:} If you encounter
+#' \code{"Error in h(simpleError(msg, call)) : error in evaluating the
+#' argument 'seed'..."}, check that \code{scalar_types} matches the groups
+#' present in the file. Inspect with \code{rhdf5::h5ls(filepath)}.
+#'
+#' @param filepath Character. Path to an existing HDF5 (\code{.h5}) file
+#'   containing element-wise scalar data.
+#' @param scalar_types Character vector. Names of scalar groups to read from
+#'   \code{/scalars/} in the HDF5 file. Default is \code{c("FD")}. Must match
+#'   group names in the file; verify with \code{rhdf5::h5ls(filepath)}.
+#' @param analysis_names Character vector. Subfolder names under
+#'   \code{/results/} to load. Default is \code{character(0)} (no results
+#'   loaded).
+#'
+#' @return A \linkS4class{ModelArray} object.
 #'
-#' Load element-wise data from an .h5 file as a `ModelArray` object.
+#' @seealso \linkS4class{ModelArray} for the class definition,
+#'   \code{\link{h5summary}} for inspecting an HDF5 file without loading,
+#'   \code{\link{scalars}}, \code{\link{sources}} for accessing loaded data.
 #'
-#' @param filepath Path to an .h5 file
-#' @param scalar_types Expected scalars
-#' @param analysis_names The subfolder names for results in the .h5 file.
-#'   If empty (default), results are not read.
+#' @examples
+#' \dontrun{
+#' ma <- ModelArray("path/to/data.h5", scalar_types = c("FD", "FC"))
+#' ma
+#' scalars(ma)
+#' }
 #'
-#' @return A `ModelArray` object
 #' @export
 #' @import methods
 #' @importFrom dplyr %>%
diff --git a/man/ModelArray-class.Rd b/man/ModelArray-class.Rd
deleted file mode 100644
index 2f89cd0..0000000
--- a/man/ModelArray-class.Rd
+++ /dev/null
@@ -1,21 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/ModelArray_Constructor.R
-\name{ModelArray-class}
-\alias{ModelArray-class}
-\title{ModelArray class}
-\description{
-An S4 class to represent element-wise scalar data and statistics.
-}
-\section{Slots}{
-
-\describe{
-\item{\code{sources}}{A list of source filenames}
-
-\item{\code{scalars}}{A list of element-wise scalar matrices}
-
-\item{\code{results}}{A list of statistical result matrices}
-
-\item{\code{path}}{Path to the h5 file on disk}
-}}
-
-\keyword{internal}

From 27c24c0625c6000e99b7da87352209bd47cc44ee Mon Sep 17 00:00:00 2001
From: araikes <adam.raikes@gmail.com>
Date: Fri, 27 Mar 2026 17:33:15 -0700
Subject: [PATCH 05/23] Include updated ModelArray.Rd

---
 man/ModelArray.Rd | 90 +++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 83 insertions(+), 7 deletions(-)

diff --git a/man/ModelArray.Rd b/man/ModelArray.Rd
index 6d13363..96de23c 100644
--- a/man/ModelArray.Rd
+++ b/man/ModelArray.Rd
@@ -2,21 +2,97 @@
 % Please edit documentation in R/ModelArray_Constructor.R
 \name{ModelArray}
 \alias{ModelArray}
-\title{Construct a ModelArray object}
+\alias{ModelArray-class}
+\title{ModelArray class}
 \usage{
+ModelArray(filepath, scalar_types = c("FD"), analysis_names = character(0))
+
 ModelArray(filepath, scalar_types = c("FD"), analysis_names = character(0))
 }
 \arguments{
-\item{filepath}{Path to an .h5 file}
+\item{filepath}{Character. Path to an existing HDF5 (\code{.h5}) file
+containing element-wise scalar data.}
 
-\item{scalar_types}{Expected scalars}
+\item{scalar_types}{Character vector. Names of scalar groups to read from
+\code{/scalars/} in the HDF5 file. Default is \code{c("FD")}. Must match
+group names in the file; verify with \code{rhdf5::h5ls(filepath)}.}
 
-\item{analysis_names}{The subfolder names for results in the .h5 file.
-If empty (default), results are not read.}
+\item{analysis_names}{Character vector. Subfolder names under
+\code{/results/} to load. Default is \code{character(0)} (no results
+loaded).}
 }
 \value{
-A `ModelArray` object
+A \linkS4class{ModelArray} object.
 }
 \description{
-Load element-wise data from an .h5 file as a `ModelArray` object.
+A ModelArray wraps one or more element-wise scalar matrices (e.g., FD, FC,
+log_FC for fixel data) read lazily via \pkg{DelayedArray}, along with any
+previously saved analysis results. The object holds references to the
+underlying HDF5 file and reads data on demand, making it suitable for
+large-scale neuroimaging datasets.
+
+Reads scalar matrices and (optionally) saved analysis results from an HDF5
+file and returns a \linkS4class{ModelArray} object.
+}
+\details{
+ModelArray is an S4 class that represents element-wise scalar data and
+associated statistical results backed by an HDF5 file on disk.
+
+Each scalar in the HDF5 file is stored at \code{/scalars/<name>/values}
+as a matrix of elements (rows) by source files (columns). Source filenames
+are read from HDF5 attributes or companion datasets. Analysis results, if
+present, live under \code{/results/<analysis_name>/results_matrix}.
+
+ModelArray objects are typically created with the \code{\link{ModelArray}}
+constructor function. Element-wise models are fit with
+\code{\link{ModelArray.lm}}, \code{\link{ModelArray.gam}}, or
+\code{\link{ModelArray.wrap}}.
+
+The constructor reads each scalar listed in \code{scalar_types} from
+\code{/scalars/<scalar_type>/values} in the HDF5 file, wrapping them as
+\linkS4class{DelayedArray} objects for lazy access. Source filenames are
+extracted from HDF5 attributes or companion datasets.
+
+If \code{analysis_names} is non-empty, saved results are read from
+\code{/results/<name>/results_matrix} along with column name metadata.
+
+\strong{Debugging tip:} If you encounter
+\code{"Error in h(simpleError(msg, call)) : error in evaluating the
+argument 'seed'..."}, check that \code{scalar_types} matches the groups
+present in the file. Inspect with \code{rhdf5::h5ls(filepath)}.
+}
+\section{Slots}{
+
+\describe{
+\item{\code{sources}}{A named list of character vectors. Each element corresponds
+to a scalar and contains the source filenames (one per input file/subject).}
+
+\item{\code{scalars}}{A named list of \linkS4class{DelayedArray} matrices.
+Each matrix has elements as rows and source files as columns.}
+
+\item{\code{results}}{A named list of analysis results. Each element is itself a
+list containing at minimum \code{results_matrix} (a
+\linkS4class{DelayedArray}).}
+
+\item{\code{path}}{Character. Path(s) to the HDF5 file(s) on disk.}
+}}
+
+\examples{
+\dontrun{
+ma <- ModelArray("path/to/data.h5", scalar_types = c("FD", "FC"))
+ma
+scalars(ma)
+}
+
+}
+\seealso{
+\code{\link{ModelArray}} for the constructor,
+\code{\link{ModelArray.lm}}, \code{\link{ModelArray.gam}},
+\code{\link{ModelArray.wrap}} for analysis,
+\code{\link{scalars}}, \code{\link{sources}}, \code{\link{results}} for
+accessors.
+
+\linkS4class{ModelArray} for the class definition,
+\code{\link{h5summary}} for inspecting an HDF5 file without loading,
+\code{\link{scalars}}, \code{\link{sources}} for accessing loaded data.
 }

From 421b9ed91fdec463db06b87be9960a964590d318 Mon Sep 17 00:00:00 2001
From: araikes <adam.raikes@gmail.com>
Date: Mon, 30 Mar 2026 16:30:11 -0700
Subject: [PATCH 06/23] Test disambiguating class and function

---
 NAMESPACE                  |  2 +-
 R/ModelArray_Constructor.R |  5 +---
 man/ModelArray-class.Rd    | 51 +++++++++++++++++++++++++++++++++++++
 man/ModelArray.Rd          | 52 +++-----------------------------------
 4 files changed, 57 insertions(+), 53 deletions(-)
 create mode 100644 man/ModelArray-class.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 201c081..bf243de 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,7 +2,6 @@
 
 S3method(print,h5summary)
 export("%>%")
-export(ModelArray)
 export(ModelArray.gam)
 export(ModelArray.lm)
 export(ModelArray.wrap)
@@ -16,6 +15,7 @@ export(gen_gamFormula_contIx)
 export(gen_gamFormula_fxSmooth)
 export(h5summary)
 export(mergeModelArrays)
+export(modelarray)
 export(nElements)
 export(nInputFiles)
 export(numElementsTotal)
diff --git a/R/ModelArray_Constructor.R b/R/ModelArray_Constructor.R
index 1f50a94..b690186 100644
--- a/R/ModelArray_Constructor.R
+++ b/R/ModelArray_Constructor.R
@@ -38,7 +38,6 @@
 #'
 #' @importClassesFrom DelayedArray DelayedArray
 #' @exportClass ModelArray
-#' @aliases ModelArray-class
 ModelArray <- setClass(
   "ModelArray",
   slots = c(
@@ -49,8 +48,6 @@ ModelArray <- setClass(
   )
 )
 
-
-
 #' ModelArraySeed
 #'
 #' Generates a "seed" for the h5 file format. A wrapper around HDF5ArraySeed
@@ -120,7 +117,7 @@ ModelArraySeed <- function(filepath, name, type = NA) {
 #' @importFrom dplyr %>%
 #' @importFrom DelayedArray DelayedArray realize
 #' @importFrom rhdf5 h5readAttributes
-ModelArray <- function(filepath,
+modelarray <- function(filepath,
                        scalar_types = c("FD"),
                        analysis_names = character(0)) {
   # TODO: try and use hdf5r instead of rhdf5 and delayedarray here
diff --git a/man/ModelArray-class.Rd b/man/ModelArray-class.Rd
new file mode 100644
index 0000000..205b468
--- /dev/null
+++ b/man/ModelArray-class.Rd
@@ -0,0 +1,51 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ModelArray_Constructor.R
+\docType{class}
+\name{ModelArray-class}
+\alias{ModelArray-class}
+\alias{ModelArray}
+\title{ModelArray class}
+\description{
+A ModelArray wraps one or more element-wise scalar matrices (e.g., FD, FC,
+log_FC for fixel data) read lazily via \pkg{DelayedArray}, along with any
+previously saved analysis results. The object holds references to the
+underlying HDF5 file and reads data on demand, making it suitable for
+large-scale neuroimaging datasets.
+}
+\details{
+ModelArray is an S4 class that represents element-wise scalar data and
+associated statistical results backed by an HDF5 file on disk.
+
+Each scalar in the HDF5 file is stored at \code{/scalars/<name>/values}
+as a matrix of elements (rows) by source files (columns). Source filenames
+are read from HDF5 attributes or companion datasets. Analysis results, if
+present, live under \code{/results/<analysis_name>/results_matrix}.
+
+ModelArray objects are typically created with the \code{\link{ModelArray}}
+constructor function. Element-wise models are fit with
+\code{\link{ModelArray.lm}}, \code{\link{ModelArray.gam}}, or
+\code{\link{ModelArray.wrap}}.
+}
+\section{Slots}{
+
+\describe{
+\item{\code{sources}}{A named list of character vectors. Each element corresponds
+to a scalar and contains the source filenames (one per input file/subject).}
+
+\item{\code{scalars}}{A named list of \linkS4class{DelayedArray} matrices.
+Each matrix has elements as rows and source files as columns.}
+
+\item{\code{results}}{A named list of analysis results. Each element is itself a
+list containing at minimum \code{results_matrix} (a
+\linkS4class{DelayedArray}).}
+
+\item{\code{path}}{Character. Path(s) to the HDF5 file(s) on disk.}
+}}
+
+\seealso{
+\code{\link{ModelArray}} for the constructor,
+\code{\link{ModelArray.lm}}, \code{\link{ModelArray.gam}},
+\code{\link{ModelArray.wrap}} for analysis,
+\code{\link{scalars}}, \code{\link{sources}}, \code{\link{results}} for
+accessors.
+}
diff --git a/man/ModelArray.Rd b/man/ModelArray.Rd
index 96de23c..f2bdb38 100644
--- a/man/ModelArray.Rd
+++ b/man/ModelArray.Rd
@@ -1,13 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/ModelArray_Constructor.R
-\name{ModelArray}
-\alias{ModelArray}
-\alias{ModelArray-class}
-\title{ModelArray class}
+\name{modelarray}
+\alias{modelarray}
+\title{Load element-wise data from an HDF5 file as a ModelArray object}
 \usage{
-ModelArray(filepath, scalar_types = c("FD"), analysis_names = character(0))
-
-ModelArray(filepath, scalar_types = c("FD"), analysis_names = character(0))
+modelarray(filepath, scalar_types = c("FD"), analysis_names = character(0))
 }
 \arguments{
 \item{filepath}{Character. Path to an existing HDF5 (\code{.h5}) file
@@ -25,29 +22,10 @@ loaded).}
 A \linkS4class{ModelArray} object.
 }
 \description{
-A ModelArray wraps one or more element-wise scalar matrices (e.g., FD, FC,
-log_FC for fixel data) read lazily via \pkg{DelayedArray}, along with any
-previously saved analysis results. The object holds references to the
-underlying HDF5 file and reads data on demand, making it suitable for
-large-scale neuroimaging datasets.
-
 Reads scalar matrices and (optionally) saved analysis results from an HDF5
 file and returns a \linkS4class{ModelArray} object.
 }
 \details{
-ModelArray is an S4 class that represents element-wise scalar data and
-associated statistical results backed by an HDF5 file on disk.
-
-Each scalar in the HDF5 file is stored at \code{/scalars/<name>/values}
-as a matrix of elements (rows) by source files (columns). Source filenames
-are read from HDF5 attributes or companion datasets. Analysis results, if
-present, live under \code{/results/<analysis_name>/results_matrix}.
-
-ModelArray objects are typically created with the \code{\link{ModelArray}}
-constructor function. Element-wise models are fit with
-\code{\link{ModelArray.lm}}, \code{\link{ModelArray.gam}}, or
-\code{\link{ModelArray.wrap}}.
-
 The constructor reads each scalar listed in \code{scalar_types} from
 \code{/scalars/<scalar_type>/values} in the HDF5 file, wrapping them as
 \linkS4class{DelayedArray} objects for lazy access. Source filenames are
@@ -61,22 +39,6 @@ If \code{analysis_names} is non-empty, saved results are read from
 argument 'seed'..."}, check that \code{scalar_types} matches the groups
 present in the file. Inspect with \code{rhdf5::h5ls(filepath)}.
 }
-\section{Slots}{
-
-\describe{
-\item{\code{sources}}{A named list of character vectors. Each element corresponds
-to a scalar and contains the source filenames (one per input file/subject).}
-
-\item{\code{scalars}}{A named list of \linkS4class{DelayedArray} matrices.
-Each matrix has elements as rows and source files as columns.}
-
-\item{\code{results}}{A named list of analysis results. Each element is itself a
-list containing at minimum \code{results_matrix} (a
-\linkS4class{DelayedArray}).}
-
-\item{\code{path}}{Character. Path(s) to the HDF5 file(s) on disk.}
-}}
-
 \examples{
 \dontrun{
 ma <- ModelArray("path/to/data.h5", scalar_types = c("FD", "FC"))
@@ -86,12 +48,6 @@ scalars(ma)
 
 }
 \seealso{
-\code{\link{ModelArray}} for the constructor,
-\code{\link{ModelArray.lm}}, \code{\link{ModelArray.gam}},
-\code{\link{ModelArray.wrap}} for analysis,
-\code{\link{scalars}}, \code{\link{sources}}, \code{\link{results}} for
-accessors.
-
 \linkS4class{ModelArray} for the class definition,
 \code{\link{h5summary}} for inspecting an HDF5 file without loading,
 \code{\link{scalars}}, \code{\link{sources}} for accessing loaded data.

From 9f26ac620bbb0ce428bd4472879135ad965eb82d Mon Sep 17 00:00:00 2001
From: araikes <adam.raikes@gmail.com>
Date: Mon, 30 Mar 2026 16:56:19 -0700
Subject: [PATCH 07/23] Disambiguate with aliases instead

---
 NAMESPACE                  |  2 +-
 R/ModelArray_Constructor.R | 45 +++++++++++++++++++-------------------
 man/ModelArray-class.Rd    |  8 ++++---
 man/ModelArray.Rd          | 45 ++++++++++++++++++--------------------
 4 files changed, 50 insertions(+), 50 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index bf243de..201c081 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,6 +2,7 @@
 
 S3method(print,h5summary)
 export("%>%")
+export(ModelArray)
 export(ModelArray.gam)
 export(ModelArray.lm)
 export(ModelArray.wrap)
@@ -15,7 +16,6 @@ export(gen_gamFormula_contIx)
 export(gen_gamFormula_fxSmooth)
 export(h5summary)
 export(mergeModelArrays)
-export(modelarray)
 export(nElements)
 export(nInputFiles)
 export(numElementsTotal)
diff --git a/R/ModelArray_Constructor.R b/R/ModelArray_Constructor.R
index b690186..c1c8075 100644
--- a/R/ModelArray_Constructor.R
+++ b/R/ModelArray_Constructor.R
@@ -36,6 +36,8 @@
 #'   \code{\link{scalars}}, \code{\link{sources}}, \code{\link{results}} for
 #'   accessors.
 #'
+#' @aliases ModelArray-class
+#' @rdname ModelArray-class
 #' @importClassesFrom DelayedArray DelayedArray
 #' @exportClass ModelArray
 ModelArray <- setClass(
@@ -70,54 +72,53 @@ ModelArraySeed <- function(filepath, name, type = NA) {
 
 
 
-#' Load element-wise data from an HDF5 file as a ModelArray object
+#' Load element-wise data from an HDF5 file
 #'
 #' @description
-#' Reads scalar matrices and (optionally) saved analysis results from an HDF5
-#' file and returns a \linkS4class{ModelArray} object.
+#' Reads scalar matrices and (optionally) saved analysis results from
+#' an HDF5 file and returns a \linkS4class{ModelArray} object.
 #'
 #' @details
 #' The constructor reads each scalar listed in \code{scalar_types} from
-#' \code{/scalars/<scalar_type>/values} in the HDF5 file, wrapping them as
-#' \linkS4class{DelayedArray} objects for lazy access. Source filenames are
-#' extracted from HDF5 attributes or companion datasets.
+#' \code{/scalars/<scalar_type>/values}, wrapping them as
+#' \linkS4class{DelayedArray} objects. Source filenames are extracted
+#' from HDF5 attributes or companion datasets.
 #'
-#' If \code{analysis_names} is non-empty, saved results are read from
-#' \code{/results/<name>/results_matrix} along with column name metadata.
+#' If \code{analysis_names} is non-empty, saved results are loaded from
+#' \code{/results/<name>/results_matrix}.
 #'
 #' \strong{Debugging tip:} If you encounter
-#' \code{"Error in h(simpleError(msg, call)) : error in evaluating the
-#' argument 'seed'..."}, check that \code{scalar_types} matches the groups
-#' present in the file. Inspect with \code{rhdf5::h5ls(filepath)}.
+#' \code{"error in evaluating the argument 'seed'..."}, check that
+#' \code{scalar_types} matches groups in the file. Inspect with
+#' \code{rhdf5::h5ls(filepath)}.
 #'
-#' @param filepath Character. Path to an existing HDF5 (\code{.h5}) file
-#'   containing element-wise scalar data.
-#' @param scalar_types Character vector. Names of scalar groups to read from
-#'   \code{/scalars/} in the HDF5 file. Default is \code{c("FD")}. Must match
-#'   group names in the file; verify with \code{rhdf5::h5ls(filepath)}.
+#' @param filepath Character. Path to an existing HDF5 (\code{.h5})
+#'   file containing element-wise scalar data.
+#' @param scalar_types Character vector. Names of scalar groups to read
+#'   from \code{/scalars/} in the HDF5 file. Default is \code{c("FD")}.
+#'   Must match group names in the file.
 #' @param analysis_names Character vector. Subfolder names under
-#'   \code{/results/} to load. Default is \code{character(0)} (no results
-#'   loaded).
+#'   \code{/results/} to load. Default is \code{character(0)} (none).
 #'
 #' @return A \linkS4class{ModelArray} object.
 #'
 #' @seealso \linkS4class{ModelArray} for the class definition,
-#'   \code{\link{h5summary}} for inspecting an HDF5 file without loading,
-#'   \code{\link{scalars}}, \code{\link{sources}} for accessing loaded data.
+#'   \code{\link{h5summary}} for inspecting an HDF5 file.
 #'
 #' @examples
 #' \dontrun{
 #' ma <- ModelArray("path/to/data.h5", scalar_types = c("FD", "FC"))
 #' ma
-#' scalars(ma)
 #' }
 #'
+#' @rdname ModelArray
+#' @aliases ModelArray
 #' @export
 #' @import methods
 #' @importFrom dplyr %>%
 #' @importFrom DelayedArray DelayedArray realize
 #' @importFrom rhdf5 h5readAttributes
-modelarray <- function(filepath,
+ModelArray <- function(filepath,
                        scalar_types = c("FD"),
                        analysis_names = character(0)) {
   # TODO: try and use hdf5r instead of rhdf5 and delayedarray here
diff --git a/man/ModelArray-class.Rd b/man/ModelArray-class.Rd
index 205b468..95abad0 100644
--- a/man/ModelArray-class.Rd
+++ b/man/ModelArray-class.Rd
@@ -1,10 +1,12 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/ModelArray_Constructor.R
-\docType{class}
-\name{ModelArray-class}
-\alias{ModelArray-class}
+\name{ModelArray}
 \alias{ModelArray}
+\alias{ModelArray-class}
 \title{ModelArray class}
+\usage{
+ModelArray(filepath, scalar_types = c("FD"), analysis_names = character(0))
+}
 \description{
 A ModelArray wraps one or more element-wise scalar matrices (e.g., FD, FC,
 log_FC for fixel data) read lazily via \pkg{DelayedArray}, along with any
diff --git a/man/ModelArray.Rd b/man/ModelArray.Rd
index f2bdb38..d23c8d2 100644
--- a/man/ModelArray.Rd
+++ b/man/ModelArray.Rd
@@ -1,54 +1,51 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/ModelArray_Constructor.R
-\name{modelarray}
-\alias{modelarray}
-\title{Load element-wise data from an HDF5 file as a ModelArray object}
+\name{ModelArray}
+\alias{ModelArray}
+\title{Load element-wise data from an HDF5 file}
 \usage{
-modelarray(filepath, scalar_types = c("FD"), analysis_names = character(0))
+ModelArray(filepath, scalar_types = c("FD"), analysis_names = character(0))
 }
 \arguments{
-\item{filepath}{Character. Path to an existing HDF5 (\code{.h5}) file
-containing element-wise scalar data.}
+\item{filepath}{Character. Path to an existing HDF5 (\code{.h5})
+file containing element-wise scalar data.}
 
-\item{scalar_types}{Character vector. Names of scalar groups to read from
-\code{/scalars/} in the HDF5 file. Default is \code{c("FD")}. Must match
-group names in the file; verify with \code{rhdf5::h5ls(filepath)}.}
+\item{scalar_types}{Character vector. Names of scalar groups to read
+from \code{/scalars/} in the HDF5 file. Default is \code{c("FD")}.
+Must match group names in the file.}
 
 \item{analysis_names}{Character vector. Subfolder names under
-\code{/results/} to load. Default is \code{character(0)} (no results
-loaded).}
+\code{/results/} to load. Default is \code{character(0)} (none).}
 }
 \value{
 A \linkS4class{ModelArray} object.
 }
 \description{
-Reads scalar matrices and (optionally) saved analysis results from an HDF5
-file and returns a \linkS4class{ModelArray} object.
+Reads scalar matrices and (optionally) saved analysis results from
+an HDF5 file and returns a \linkS4class{ModelArray} object.
 }
 \details{
 The constructor reads each scalar listed in \code{scalar_types} from
-\code{/scalars/<scalar_type>/values} in the HDF5 file, wrapping them as
-\linkS4class{DelayedArray} objects for lazy access. Source filenames are
-extracted from HDF5 attributes or companion datasets.
+\code{/scalars/<scalar_type>/values}, wrapping them as
+\linkS4class{DelayedArray} objects. Source filenames are extracted
+from HDF5 attributes or companion datasets.
 
-If \code{analysis_names} is non-empty, saved results are read from
-\code{/results/<name>/results_matrix} along with column name metadata.
+If \code{analysis_names} is non-empty, saved results are loaded from
+\code{/results/<name>/results_matrix}.
 
 \strong{Debugging tip:} If you encounter
-\code{"Error in h(simpleError(msg, call)) : error in evaluating the
-argument 'seed'..."}, check that \code{scalar_types} matches the groups
-present in the file. Inspect with \code{rhdf5::h5ls(filepath)}.
+\code{"error in evaluating the argument 'seed'..."}, check that
+\code{scalar_types} matches groups in the file. Inspect with
+\code{rhdf5::h5ls(filepath)}.
 }
 \examples{
 \dontrun{
 ma <- ModelArray("path/to/data.h5", scalar_types = c("FD", "FC"))
 ma
-scalars(ma)
 }
 
 }
 \seealso{
 \linkS4class{ModelArray} for the class definition,
-\code{\link{h5summary}} for inspecting an HDF5 file without loading,
-\code{\link{scalars}}, \code{\link{sources}} for accessing loaded data.
+\code{\link{h5summary}} for inspecting an HDF5 file.
 }

From dc32033f25568f3b7acb65b41c66272dd6918e89 Mon Sep 17 00:00:00 2001
From: araikes <adam.raikes@gmail.com>
Date: Mon, 30 Mar 2026 17:10:49 -0700
Subject: [PATCH 08/23] Update S4 methods documentation

---
 man/ModelArray-class.Rd       | 16 +++++++++++++-
 man/analysisNames.Rd          | 21 ++++++++++++++++---
 man/elementMetadata.Rd        | 22 ++++++++++++++++----
 man/nElements.Rd              | 22 ++++++++++++++++----
 man/nInputFiles.Rd            | 22 ++++++++++++++++----
 man/results.Rd                | 39 +++++++++++++++++++++++++++++++----
 man/scalarNames.Rd            | 18 +++++++++++++---
 man/scalars.Rd                | 26 +++++++++++++++++++----
 man/show-ModelArray-method.Rd | 15 --------------
 9 files changed, 159 insertions(+), 42 deletions(-)
 delete mode 100644 man/show-ModelArray-method.Rd

diff --git a/man/ModelArray-class.Rd b/man/ModelArray-class.Rd
index 95abad0..2deb8af 100644
--- a/man/ModelArray-class.Rd
+++ b/man/ModelArray-class.Rd
@@ -1,11 +1,22 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/ModelArray_Constructor.R
+% Please edit documentation in R/ModelArray_Constructor.R,
+%   R/ModelArray_S4Methods.R
 \name{ModelArray}
 \alias{ModelArray}
 \alias{ModelArray-class}
+\alias{show,ModelArray-method}
 \title{ModelArray class}
 \usage{
 ModelArray(filepath, scalar_types = c("FD"), analysis_names = character(0))
+
+\S4method{show}{ModelArray}(object)
+}
+\arguments{
+\item{object}{A \linkS4class{ModelArray} object.}
+}
+\value{
+Invisible \code{NULL}. Called for its side effect of printing
+to the console.
 }
 \description{
 A ModelArray wraps one or more element-wise scalar matrices (e.g., FD, FC,
@@ -13,6 +24,9 @@ log_FC for fixel data) read lazily via \pkg{DelayedArray}, along with any
 previously saved analysis results. The object holds references to the
 underlying HDF5 file and reads data on demand, making it suitable for
 large-scale neuroimaging datasets.
+
+Prints a summary of the ModelArray including file path, scalar dimensions,
+and any saved analysis names.
 }
 \details{
 ModelArray is an S4 class that represents element-wise scalar data and
diff --git a/man/analysisNames.Rd b/man/analysisNames.Rd
index 2279fd7..88470c6 100644
--- a/man/analysisNames.Rd
+++ b/man/analysisNames.Rd
@@ -10,11 +10,26 @@ analysisNames(x)
 \S4method{analysisNames}{ModelArray}(x)
 }
 \arguments{
-\item{x}{A ModelArray object}
+\item{x}{A \linkS4class{ModelArray} object.}
 }
 \value{
-Character vector of analysis names
+Character vector of analysis names. Returns \code{character(0)}
+if no analyses have been loaded or saved.
 }
 \description{
-Names of analyses in a ModelArray
+Returns the names of all analysis result sets currently loaded in a
+\linkS4class{ModelArray}. These correspond to subfolder names under
+\code{/results/} in the HDF5 file.
+}
+\examples{
+\dontrun{
+ma <- ModelArray("data.h5", scalar_types = c("FD"),
+                 analysis_names = c("lm_age"))
+analysisNames(ma)   # "lm_age"
+}
+
+}
+\seealso{
+\code{\link{results}}, \code{\link{scalarNames}},
+\code{\link{writeResults}}
 }
diff --git a/man/elementMetadata.Rd b/man/elementMetadata.Rd
index fb35fc1..7590f4e 100644
--- a/man/elementMetadata.Rd
+++ b/man/elementMetadata.Rd
@@ -10,12 +10,26 @@ elementMetadata(x)
 \S4method{elementMetadata}{ModelArray}(x)
 }
 \arguments{
-\item{x}{A ModelArray object}
+\item{x}{A \linkS4class{ModelArray} object.}
 }
 \value{
-A matrix or data.frame of element metadata, or NULL
+A matrix or data.frame of element metadata if found, or \code{NULL}
+if no known metadata dataset exists in the HDF5 file.
 }
 \description{
-Reads element metadata (e.g., greyordinates for cifti data) from the h5 file
-if present. Returns NULL if no element metadata is found.
+Reads element metadata (e.g., greyordinates for CIFTI data, or fixel/voxel
+coordinate information) from the HDF5 file if present. The function searches
+for known metadata dataset names (\code{"greyordinates"}, \code{"fixels"},
+\code{"voxels"}) at the top level of the HDF5 file.
+}
+\examples{
+\dontrun{
+ma <- ModelArray("data.h5", scalar_types = c("FD"))
+meta <- elementMetadata(ma)
+if (!is.null(meta)) head(meta)
+}
+
+}
+\seealso{
+\code{\link{nElements}}, \code{\link{scalars}}
 }
diff --git a/man/nElements.Rd b/man/nElements.Rd
index 0c0b0da..e0b264c 100644
--- a/man/nElements.Rd
+++ b/man/nElements.Rd
@@ -10,13 +10,27 @@ nElements(x, scalar = NULL)
 \S4method{nElements}{ModelArray}(x, scalar = NULL)
 }
 \arguments{
-\item{x}{A ModelArray object}
+\item{x}{A \linkS4class{ModelArray} object.}
 
-\item{scalar}{Optional scalar name. Defaults to the first scalar.}
+\item{scalar}{Optional character string. Name of the scalar to query.
+Defaults to the first scalar in \code{names(scalars(x))}.}
 }
 \value{
-Integer, number of elements (rows)
+Integer. The number of elements (rows) in the scalar matrix.
 }
 \description{
-Number of elements in a ModelArray
+Returns the number of elements (e.g., fixels or voxels) for a given scalar
+in a \linkS4class{ModelArray}. This is the row count of the scalar matrix.
+}
+\examples{
+\dontrun{
+ma <- ModelArray("data.h5", scalar_types = c("FD"))
+nElements(ma)
+nElements(ma, "FD")
+}
+
+}
+\seealso{
+\code{\link{nInputFiles}}, \code{\link{numElementsTotal}},
+\code{\link{scalars}}, \code{\link{scalarNames}}
 }
diff --git a/man/nInputFiles.Rd b/man/nInputFiles.Rd
index 8ce751d..4667fb2 100644
--- a/man/nInputFiles.Rd
+++ b/man/nInputFiles.Rd
@@ -10,13 +10,27 @@ nInputFiles(x, scalar = NULL)
 \S4method{nInputFiles}{ModelArray}(x, scalar = NULL)
 }
 \arguments{
-\item{x}{A ModelArray object}
+\item{x}{A \linkS4class{ModelArray} object.}
 
-\item{scalar}{Optional scalar name. Defaults to the first scalar.}
+\item{scalar}{Optional character string. Name of the scalar to query.
+Defaults to the first scalar in \code{names(scalars(x))}.}
 }
 \value{
-Integer, number of input files (columns)
+Integer. The number of input files (columns) in the scalar matrix.
 }
 \description{
-Number of input files in a ModelArray
+Returns the number of input files (i.e., subjects or source files) for a
+given scalar in a \linkS4class{ModelArray}. This is the column count of the
+scalar matrix.
+}
+\examples{
+\dontrun{
+ma <- ModelArray("data.h5", scalar_types = c("FD"))
+nInputFiles(ma)
+}
+
+}
+\seealso{
+\code{\link{nElements}}, \code{\link{sources}},
+\code{\link{scalarNames}}
 }
diff --git a/man/results.Rd b/man/results.Rd
index 72f516d..862db7f 100644
--- a/man/results.Rd
+++ b/man/results.Rd
@@ -10,13 +10,44 @@ results(x, ...)
 \S4method{results}{ModelArray}(x, ...)
 }
 \arguments{
-\item{x}{A ModelArray object}
+\item{x}{A \linkS4class{ModelArray} object.}
 
-\item{...}{Additional arguments. Currently accepts an analysis name (character).}
+\item{...}{Optional: a single character string giving the analysis name to
+extract. If omitted, the entire named list is returned.}
 }
 \value{
-Statistical results in this ModelArray object
+If called with no extra arguments, a named list of result lists.
+If called with an analysis name, the corresponding result list (containing
+at minimum \code{results_matrix}).
 }
 \description{
-Statistical results of a ModelArray object
+Retrieve previously saved analysis results from a \linkS4class{ModelArray}.
+When called with no additional arguments, returns the full named list of all
+result sets. When called with a single analysis name, returns that one
+result set.
+}
+\details{
+Each result set is itself a list containing at minimum
+\code{results_matrix} (a \linkS4class{DelayedArray} with elements as rows
+and statistics as columns). Column names are stored alongside the matrix
+in the HDF5 file.
+
+Results are only available if \code{analysis_names} was supplied when the
+\linkS4class{ModelArray} was constructed, or if results have been written
+back with \code{\link{writeResults}}.
+}
+\examples{
+\dontrun{
+ma <- ModelArray("data.h5", scalar_types = c("FD"),
+                 analysis_names = c("lm_age"))
+results(ma)                # named list of all results
+results(ma, "lm_age")     # single result set
+results(ma, "lm_age")$results_matrix
+}
+
+}
+\seealso{
+\code{\link{sources}}, \code{\link{scalars}},
+\code{\link{analysisNames}}, \code{\link{writeResults}},
+\code{\link{ModelArray.lm}}, \code{\link{ModelArray.gam}}
 }
diff --git a/man/scalarNames.Rd b/man/scalarNames.Rd
index 95c918f..f2449be 100644
--- a/man/scalarNames.Rd
+++ b/man/scalarNames.Rd
@@ -10,11 +10,23 @@ scalarNames(x)
 \S4method{scalarNames}{ModelArray}(x)
 }
 \arguments{
-\item{x}{A ModelArray object}
+\item{x}{A \linkS4class{ModelArray} object.}
 }
 \value{
-Character vector of scalar names
+Character vector of scalar names.
 }
 \description{
-Names of scalars in a ModelArray
+Returns the names of all scalar datasets loaded in a
+\linkS4class{ModelArray} (e.g. \code{"FD"}, \code{"FC"}, \code{"log_FC"}).
+}
+\examples{
+\dontrun{
+ma <- ModelArray("data.h5", scalar_types = c("FD", "FC"))
+scalarNames(ma)   # c("FD", "FC")
+}
+
+}
+\seealso{
+\code{\link{scalars}}, \code{\link{analysisNames}},
+\code{\link{nElements}}
 }
diff --git a/man/scalars.Rd b/man/scalars.Rd
index 57e9513..7b6d504 100644
--- a/man/scalars.Rd
+++ b/man/scalars.Rd
@@ -10,13 +10,31 @@ scalars(x, ...)
 \S4method{scalars}{ModelArray}(x, ...)
 }
 \arguments{
-\item{x}{A ModelArray object}
+\item{x}{A \linkS4class{ModelArray} object.}
 
-\item{...}{Additional arguments. Currently accepts a scalar name (character).}
+\item{...}{Optional: a single character string giving the scalar name to
+extract. If omitted, the entire named list is returned.}
 }
 \value{
-A matrix of element-wise scalar data: elements (row) by source files (column).
+If called with no extra arguments, a named list of
+\linkS4class{DelayedArray} matrices (elements as rows, source files as
+columns). If called with a scalar name, the corresponding single
+\linkS4class{DelayedArray} matrix.
 }
 \description{
-Element-wise scalar data of a ModelArray object
+Retrieve scalar matrices from a \linkS4class{ModelArray}. When called
+with no additional arguments, returns the full named list of all scalar
+matrices. When called with a single scalar name, returns that one matrix.
+}
+\examples{
+\dontrun{
+ma <- ModelArray("data.h5", scalar_types = c("FD", "FC"))
+scalars(ma)         # named list of all scalars
+scalars(ma, "FD")   # single DelayedArray matrix
+}
+
+}
+\seealso{
+\code{\link{sources}}, \code{\link{results}},
+\code{\link{scalarNames}}, \code{\link{nElements}}
 }
diff --git a/man/show-ModelArray-method.Rd b/man/show-ModelArray-method.Rd
deleted file mode 100644
index ecd0e43..0000000
--- a/man/show-ModelArray-method.Rd
+++ /dev/null
@@ -1,15 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/ModelArray_S4Methods.R
-\name{show,ModelArray-method}
-\alias{show,ModelArray-method}
-\title{Show ModelArray object}
-\usage{
-\S4method{show}{ModelArray}(object)
-}
-\arguments{
-\item{object}{An ModelArray object}
-}
-\description{
-Print the basic information for an ModelArray object, including number of source files,
-scalar names, and any analysis names.
-}

From f177e7f0a9b533a689084d187321633f9dbdfc41 Mon Sep 17 00:00:00 2001
From: araikes <adam.raikes@gmail.com>
Date: Mon, 30 Mar 2026 17:26:05 -0700
Subject: [PATCH 09/23] Update ModelArray.lm roxygen

---
 R/ModelArray_S4Methods.R  | 279 ++++++++++++++++++++++++++++++++------
 R/analyse.R               | 202 ++++++++++++++++-----------
 man/ModelArray.lm.Rd      | 197 +++++++++++++++++----------
 man/exampleElementData.Rd |  50 +++++--
 man/sources.Rd            |  22 ++-
 5 files changed, 539 insertions(+), 211 deletions(-)

diff --git a/R/ModelArray_S4Methods.R b/R/ModelArray_S4Methods.R
index 0cfe44c..274bc72 100644
--- a/R/ModelArray_S4Methods.R
+++ b/R/ModelArray_S4Methods.R
@@ -1,13 +1,17 @@
 ### Methods of "ModelArray" ###
 
 ### Show ModelArray #####
-#' Show ModelArray object
+#' @rdname ModelArray-class
 #'
 #' @description
-#' Print the basic information for an ModelArray object, including number of source files,
-#' scalar names, and any analysis names.
+#' Prints a summary of the ModelArray including file path, scalar dimensions,
+#' and any saved analysis names.
+#'
+#' @param object A \linkS4class{ModelArray} object.
+#'
+#' @return Invisible \code{NULL}. Called for its side effect of printing
+#'   to the console.
 #'
-#' @param object An ModelArray object
 #' @export
 setMethod("show", "ModelArray", function(object) { # , group_name_results="results"
 
@@ -50,8 +54,27 @@ setMethod("show", "ModelArray", function(object) { # , group_name_results="resul
 
 #' Source filenames of a ModelArray object
 #'
-#' @param x A ModelArray object
-#' @return A list of source filenames
+#' @description
+#' Retrieve the named list of source filename vectors from a
+#' \linkS4class{ModelArray}. Each element of the list corresponds to one
+#' scalar and contains a character vector of filenames, one per input
+#' file/subject.
+#'
+#' @param x A \linkS4class{ModelArray} object.
+#'
+#' @return A named list of character vectors. Names correspond to scalar names
+#'   (e.g. \code{"FD"}, \code{"FC"}).
+#'
+#' @seealso \code{\link{scalars}}, \code{\link{results}},
+#'   \code{\link{scalarNames}}, \code{\link{nInputFiles}}
+#'
+#' @examples
+#' \dontrun{
+#' ma <- ModelArray("data.h5", scalar_types = c("FD"))
+#' sources(ma)          # named list
+#' sources(ma)[["FD"]]  # character vector of filenames
+#' }
+#'
 #' @name sources
 #' @export
 setGeneric("sources", function(x) standardGeneric("sources"))
@@ -63,9 +86,30 @@ setMethod("sources", "ModelArray", function(x) x@sources)
 
 #' Element-wise scalar data of a ModelArray object
 #'
-#' @param x A ModelArray object
-#' @param ... Additional arguments. Currently accepts a scalar name (character).
-#' @return A matrix of element-wise scalar data: elements (row) by source files (column).
+#' @description
+#' Retrieve scalar matrices from a \linkS4class{ModelArray}. When called
+#' with no additional arguments, returns the full named list of all scalar
+#' matrices. When called with a single scalar name, returns that one matrix.
+#'
+#' @param x A \linkS4class{ModelArray} object.
+#' @param ... Optional: a single character string giving the scalar name to
+#'   extract. If omitted, the entire named list is returned.
+#'
+#' @return If called with no extra arguments, a named list of
+#'   \linkS4class{DelayedArray} matrices (elements as rows, source files as
+#'   columns). If called with a scalar name, the corresponding single
+#'   \linkS4class{DelayedArray} matrix.
+#'
+#' @seealso \code{\link{sources}}, \code{\link{results}},
+#'   \code{\link{scalarNames}}, \code{\link{nElements}}
+#'
+#' @examples
+#' \dontrun{
+#' ma <- ModelArray("data.h5", scalar_types = c("FD", "FC"))
+#' scalars(ma)         # named list of all scalars
+#' scalars(ma, "FD")   # single DelayedArray matrix
+#' }
+#'
 #' @name scalars
 #' @export
 setGeneric("scalars", function(x, ...) standardGeneric("scalars"))
@@ -89,9 +133,43 @@ setMethod(
 
 #' Statistical results of a ModelArray object
 #'
-#' @param x A ModelArray object
-#' @param ... Additional arguments. Currently accepts an analysis name (character).
-#' @return Statistical results in this ModelArray object
+#' @description
+#' Retrieve previously saved analysis results from a \linkS4class{ModelArray}.
+#' When called with no additional arguments, returns the full named list of all
+#' result sets. When called with a single analysis name, returns that one
+#' result set.
+#'
+#' @details
+#' Each result set is itself a list containing at minimum
+#' \code{results_matrix} (a \linkS4class{DelayedArray} with elements as rows
+#' and statistics as columns). Column names are stored alongside the matrix
+#' in the HDF5 file.
+#'
+#' Results are only available if \code{analysis_names} was supplied when the
+#' \linkS4class{ModelArray} was constructed, or if results have been written
+#' back with \code{\link{writeResults}}.
+#'
+#' @param x A \linkS4class{ModelArray} object.
+#' @param ... Optional: a single character string giving the analysis name to
+#'   extract. If omitted, the entire named list is returned.
+#'
+#' @return If called with no extra arguments, a named list of result lists.
+#'   If called with an analysis name, the corresponding result list (containing
+#'   at minimum \code{results_matrix}).
+#'
+#' @seealso \code{\link{sources}}, \code{\link{scalars}},
+#'   \code{\link{analysisNames}}, \code{\link{writeResults}},
+#'   \code{\link{ModelArray.lm}}, \code{\link{ModelArray.gam}}
+#'
+#' @examples
+#' \dontrun{
+#' ma <- ModelArray("data.h5", scalar_types = c("FD"),
+#'                  analysis_names = c("lm_age"))
+#' results(ma)                # named list of all results
+#' results(ma, "lm_age")     # single result set
+#' results(ma, "lm_age")$results_matrix
+#' }
+#'
 #' @name results
 #' @export
 setGeneric("results", function(x, ...) standardGeneric("results"))
@@ -138,31 +216,48 @@ setMethod(
 
 #' Example per-element data.frame for user functions
 #'
-#' @title Example per-element data.frame for user functions
-#' @name exampleElementData
-#' @rdname exampleElementData
 #' @description
-#' Generic for constructing a per-element data.frame from a `ModelArray`.
-#' See the `ModelArray` method for details.
+#' Constructs a per-element data.frame from a \linkS4class{ModelArray} that
+#' mirrors the \code{data} argument passed to user functions by
+#' \code{\link{ModelArray.wrap}}. This is useful for testing and debugging
+#' user-supplied functions outside of the full element-wise analysis loop.
+#'
+#' @details
+#' Returns a copy of \code{phenotypes} with an extra column named by
+#' \code{scalar} populated with the selected element's values from the
+#' \linkS4class{ModelArray}. This mirrors the per-element data that
+#' \code{\link{ModelArray.wrap}} passes to user functions (as \code{data = dat}).
+#'
+#' Use this to verify that your custom function works correctly on a single
+#' element before committing to a full \code{\link{ModelArray.wrap}} run.
 #'
-#' @param x A `ModelArray` object (or compatible type)
-#' @param ... Additional arguments (ignored)
+#' @param x A \linkS4class{ModelArray} object.
+#' @param ... Additional arguments passed to the method (currently unused
+#'   at the generic level).
+#'
+#' @seealso \code{\link{ModelArray.wrap}}, \code{\link{scalars}}
+#'
+#' @rdname exampleElementData
 #' @export
 setGeneric("exampleElementData", function(x, ...) standardGeneric("exampleElementData"))
 
-#' Example per-element data.frame for user functions
 #' @rdname exampleElementData
 #'
-#' @description
-#' Returns a copy of `phenotypes` with an extra column named by `scalar` populated
-#' with the selected element's values from the `ModelArray`. This mirrors the
-#' per-element data that `ModelArray.wrap` passes to user functions (`data = dat`).
-#'
-#' @param x An ModelArray object
-#' @param scalar A character. The name of the element-wise scalar to append
-#' @param i_element An integer, the i_th element (1-based)
-#' @param phenotypes A data.frame of the cohort with independent variables/covariates
-#' @return A data.frame with the additional response column named by `scalar`
+#' @param x A \linkS4class{ModelArray} object.
+#' @param scalar Character. The name of the element-wise scalar to append
+#'   as a column. Must be one of \code{names(scalars(x))}. Default is
+#'   \code{"FD"}.
+#' @param i_element Integer. The 1-based index of the element whose values
+#'   should be extracted. Must be between 1 and the number of elements for
+#'   the given scalar.
+#' @param phenotypes A data.frame of the cohort with columns of independent
+#'   variables and covariates. Must have the same number of rows as the
+#'   number of source files in the \linkS4class{ModelArray}.
+#'
+#' @return A data.frame: the input \code{phenotypes} with one additional
+#'   column named by \code{scalar} containing that element's values across
+#'   all subjects.
+#'
 #' @examples
 #' \dontrun{
 #' h5_path <- system.file("extdata", "n50_fixels.h5", package = "ModelArray")
@@ -170,7 +265,15 @@ setGeneric("exampleElementData", function(x, ...) standardGeneric("exampleElemen
 #' ma <- ModelArray(h5_path, scalar_types = c("FD"))
 #' phen <- read.csv(csv_path)
 #' df <- exampleElementData(ma, scalar = "FD", i_element = 1, phenotypes = phen)
+#'
+#' # Now test your custom function on this single element:
+#' my_fun <- function(data, ...) {
+#'   mod <- lm(FD ~ age + sex, data = data)
+#'   broom::tidy(mod)
+#' }
+#' my_fun(data = df)
 #' }
+#'
 #' @export
 setMethod(
   "exampleElementData",
@@ -197,9 +300,27 @@ setMethod(
 
 #' Number of elements in a ModelArray
 #'
-#' @param x A ModelArray object
-#' @param scalar Optional scalar name. Defaults to the first scalar.
-#' @return Integer, number of elements (rows)
+#' @description
+#' Returns the number of elements (e.g., fixels or voxels) for a given scalar
+#' in a \linkS4class{ModelArray}. This is the row count of the scalar matrix.
+#'
+#' @param x A \linkS4class{ModelArray} object.
+#' @param scalar Optional character string. Name of the scalar to query.
+#'   Defaults to the first scalar in \code{names(scalars(x))}.
+#'
+#' @return Integer. The number of elements (rows) in the scalar matrix.
+#'
+#' @seealso \code{\link{nInputFiles}}, \code{\link{numElementsTotal}},
+#'   \code{\link{scalars}}, \code{\link{scalarNames}}
+#'
+#' @examples
+#' \dontrun{
+#' ma <- ModelArray("data.h5", scalar_types = c("FD"))
+#' nElements(ma)
+#' nElements(ma, "FD")
+#' }
+#'
+#' @name nElements
 #' @export
 setGeneric("nElements", function(x, scalar = NULL) standardGeneric("nElements"))
 
@@ -212,9 +333,27 @@ setMethod("nElements", "ModelArray", function(x, scalar = NULL) {
 
 #' Number of input files in a ModelArray
 #'
-#' @param x A ModelArray object
-#' @param scalar Optional scalar name. Defaults to the first scalar.
-#' @return Integer, number of input files (columns)
+#' @description
+#' Returns the number of input files (i.e., subjects or source files) for a
+#' given scalar in a \linkS4class{ModelArray}. This is the column count of the
+#' scalar matrix.
+#'
+#' @param x A \linkS4class{ModelArray} object.
+#' @param scalar Optional character string. Name of the scalar to query.
+#'   Defaults to the first scalar in \code{names(scalars(x))}.
+#'
+#' @return Integer. The number of input files (columns) in the scalar matrix.
+#'
+#' @seealso \code{\link{nElements}}, \code{\link{sources}},
+#'   \code{\link{scalarNames}}
+#'
+#' @examples
+#' \dontrun{
+#' ma <- ModelArray("data.h5", scalar_types = c("FD"))
+#' nInputFiles(ma)
+#' }
+#'
+#' @name nInputFiles
 #' @export
 setGeneric("nInputFiles", function(x, scalar = NULL) standardGeneric("nInputFiles"))
 
@@ -227,8 +366,24 @@ setMethod("nInputFiles", "ModelArray", function(x, scalar = NULL) {
 
 #' Names of scalars in a ModelArray
 #'
-#' @param x A ModelArray object
-#' @return Character vector of scalar names
+#' @description
+#' Returns the names of all scalar datasets loaded in a
+#' \linkS4class{ModelArray} (e.g. \code{"FD"}, \code{"FC"}, \code{"log_FC"}).
+#'
+#' @param x A \linkS4class{ModelArray} object.
+#'
+#' @return Character vector of scalar names.
+#'
+#' @seealso \code{\link{scalars}}, \code{\link{analysisNames}},
+#'   \code{\link{nElements}}
+#'
+#' @examples
+#' \dontrun{
+#' ma <- ModelArray("data.h5", scalar_types = c("FD", "FC"))
+#' scalarNames(ma)   # c("FD", "FC")
+#' }
+#'
+#' @name scalarNames
 #' @export
 setGeneric("scalarNames", function(x) standardGeneric("scalarNames"))
 
@@ -238,10 +393,30 @@ setMethod("scalarNames", "ModelArray", function(x) {
   names(x@scalars)
 })
 
+
 #' Names of analyses in a ModelArray
 #'
-#' @param x A ModelArray object
-#' @return Character vector of analysis names
+#' @description
+#' Returns the names of all analysis result sets currently loaded in a
+#' \linkS4class{ModelArray}. These correspond to subfolder names under
+#' \code{/results/} in the HDF5 file.
+#'
+#' @param x A \linkS4class{ModelArray} object.
+#'
+#' @return Character vector of analysis names. Returns \code{character(0)}
+#'   if no analyses have been loaded or saved.
+#'
+#' @seealso \code{\link{results}}, \code{\link{scalarNames}},
+#'   \code{\link{writeResults}}
+#'
+#' @examples
+#' \dontrun{
+#' ma <- ModelArray("data.h5", scalar_types = c("FD"),
+#'                  analysis_names = c("lm_age"))
+#' analysisNames(ma)   # "lm_age"
+#' }
+#'
+#' @name analysisNames
 #' @export
 setGeneric("analysisNames", function(x) standardGeneric("analysisNames"))
 
@@ -254,11 +429,26 @@ setMethod("analysisNames", "ModelArray", function(x) {
 #' Element metadata from a ModelArray
 #'
 #' @description
-#' Reads element metadata (e.g., greyordinates for cifti data) from the h5 file
-#' if present. Returns NULL if no element metadata is found.
+#' Reads element metadata (e.g., greyordinates for CIFTI data, or fixel/voxel
+#' coordinate information) from the HDF5 file if present. The function searches
+#' for known metadata dataset names (\code{"greyordinates"}, \code{"fixels"},
+#' \code{"voxels"}) at the top level of the HDF5 file.
+#'
+#' @param x A \linkS4class{ModelArray} object.
+#'
+#' @return A matrix or data.frame of element metadata if found, or \code{NULL}
+#'   if no known metadata dataset exists in the HDF5 file.
+#'
+#' @seealso \code{\link{nElements}}, \code{\link{scalars}}
 #'
-#' @param x A ModelArray object
-#' @return A matrix or data.frame of element metadata, or NULL
+#' @examples
+#' \dontrun{
+#' ma <- ModelArray("data.h5", scalar_types = c("FD"))
+#' meta <- elementMetadata(ma)
+#' if (!is.null(meta)) head(meta)
+#' }
+#'
+#' @name elementMetadata
 #' @export
 setGeneric("elementMetadata", function(x) standardGeneric("elementMetadata"))
 
@@ -279,5 +469,6 @@ setMethod("elementMetadata", "ModelArray", function(x) {
   }
   NULL
 })
+
 # setMethod("lm",
 #           signature = c("formula", "ModelArray", "data.frame", "character", "integer"))
diff --git a/R/analyse.R b/R/analyse.R
index cb0ec6c..5f3c2c0 100644
--- a/R/analyse.R
+++ b/R/analyse.R
@@ -1,93 +1,139 @@
-# This file provides the functions for model fitting of (all) elements
-
-
-#' Fit linear model for element-wise data
+#' Fit element-wise linear models
 #'
 #' @description
-#' `ModelArray.lm` fits linear model (`stats::lm()`) for each of elements requested, and returns a tibble
-#' dataframe of requested model statistics.
+#' `ModelArray.lm` fits a linear model at each requested element in a
+#' \linkS4class{ModelArray} and returns a tibble of requested model statistics.
 #'
 #' @details
-#' You may request returning specific statistical variables by setting \code{var.*},
-#' or you can get all by setting \code{full.outputs=TRUE}.
-#' Note that statistics covered by \code{full.outputs} or \code{var.*} are the ones from broom::tidy()
-#' and broom::glance() only, and do not include corrected p-values.
-#' However FDR-corrected p-values ("fdr") are generated by default.
+#' You may request returning specific statistical variables by setting
+#' \code{var.*}, or you can get all by setting \code{full.outputs = TRUE}.
+#' Note that statistics covered by \code{full.outputs} or \code{var.*} are
+#' the ones from \code{broom::tidy()}, \code{broom::glance()} only, and do
+#' not include corrected p-values. However FDR-corrected p-values
+#' (\code{"fdr"}) are generated by default.
+#'
 #' List of acceptable statistic names for each of \code{var.*}:
 #' \itemize{
-#'   \item \code{var.terms}: c("estimate","std.error","statistic","p.value");
-#'      For interpretation please see \link[broom]{tidy.lm}.
-#'   \item \code{var.model}: c("r.squared", "adj.r.squared", "sigma", "statistic",
-#'      "p.value", "df", "logLik", "AIC", "BIC", "deviance", "df.residual", "nobs");
-#'      For interpretation please see \link[broom]{glance.lm}.
+#'   \item \code{var.terms}: \code{c("estimate", "std.error", "statistic",
+#'     "p.value")}; For interpretation please see \link[broom]{tidy.lm}.
+#'   \item \code{var.model}: \code{c("r.squared", "adj.r.squared", "sigma",
+#'     "statistic", "p.value", "df", "logLik", "AIC", "BIC", "deviance",
+#'     "df.residual", "nobs")}; For interpretation please see
+#'     \link[broom]{glance.lm}.
 #' }
-#' For p-value corrections (arguments \code{correct.p.value.*}),
-#' supported methods include all methods in `p.adjust.methods` except "none".
-#' Can be more than one method. FDR-corrected p-values ("fdr") are calculated by default.
-#' Turn it off by setting to "none". \cr
-#' Arguments \code{num.subj.lthr.abs} and \code{num.subj.lthr.rel}
-#' are mainly for input data with subject-specific masks,
-#'  i.e. currently only for volume data. For fixel-wise data, you may ignore these arguments.
 #'
-#' @param formula Formula (passed to `stats::lm()`)
-#' @param data ModelArray class
-#' @param phenotypes A data.frame of the cohort with columns of independent variables
-#' and covariates to be added to the model. It should contains a column called "source_file",
-#' and this column should match to that in \code{data}.
-#' @param scalar A character. The name of the element-wise scalar to be analysed
-#' @param element.subset A list of positive integers (min = 1, max = number of elements).
-#' The subset of elements you want to run. Default is `NULL`, i.e. requesting all elements in `data`.
-#' @param full.outputs TRUE or FALSE, Whether to return full set of outputs.
-#' If FALSE, it will only return those requested in arguments \code{var.*} and \code{correct.p.value.*};
-#' if TRUE, arguments \code{var.*} will be ignored, and will return all possible statistics for
-#' \code{var.*} and any options requested in arguments \code{correct.p.value.*}.
-#' @param var.terms A list of characters.
-#' The list of variables to save for terms (got from `broom::tidy()`).
-#' See "Details" section for more.
-#' @param var.model A list of characters.
-#' The list of variables to save for the model (got from `broom::glance()`).
-#' See "Details" section for more.
-#' @param correct.p.value.terms A list of characters.
-#' To perform and add a column for p.value correction for each term. Default: "fdr".
-#' See "Details" section for more.
-#' @param correct.p.value.model A list of characters.
-#' To perform and add a column for p.value correction for the model.
-#' Default: "fdr". See "Details" section for more.
-#' @param num.subj.lthr.abs
-#' An integer, lower threshold of absolute number of subjects.
-#' For an element, if number of subjects who have finite values (defined by `is.finite()`,
-#' i.e. not NaN or NA or Inf) in h5 file > \code{num.subj.lthr.abs},
-#' then this element will be run normally; otherwise,
-#' this element will be skipped and statistical outputs will be set as NaN.
-#' Default is 10.
-#' @param num.subj.lthr.rel A value between 0-1, lower threshold of relative number of subjects.
-#' Similar to \code{num.subj.lthr.abs},
-#' if proportion of subjects who have valid value > \code{num.subj.lthr.rel},
-#' then this element will be run normally; otherwise,
-#' this element will be skipped and statistical outputs will be set as NaN.
-#' Default is 0.2.
-#' @param verbose TRUE or FALSE, to print verbose message or not
-#' @param pbar TRUE or FALSE, to print progress bar or not
-#' @param n_cores Positive integer, The number of CPU cores to run with
-#' @param on_error Character: one of "stop", "skip", or "debug". When an error occurs
-#' while fitting an element, choose whether to stop, skip returning all-NaN values for
-#' that element, or drop into `browser()` (if interactive) then skip. Default: "stop".
-#' @param write_results_name Optional analysis name for incremental writes to
-#' `results/<write_results_name>/results_matrix`.
-#' @param write_results_file Optional HDF5 file path used when `write_results_name` is provided.
-#' @param write_results_flush_every Positive integer number of elements per write block.
-#' @param write_results_storage_mode Storage mode for results writes (e.g., `"double"`).
-#' @param write_results_compression_level Gzip compression level (0-9) for results writes.
-#' @param return_output If TRUE (default), return the combined data.frame. If FALSE,
-#' returns `invisible(NULL)`; useful for streaming large runs to HDF5.
-#' @param ... Additional arguments for `stats::lm()`
-#' @return Tibble with the summarized model statistics for all elements requested
+#' For p-value corrections (arguments \code{correct.p.value.*}), supported
+#' methods include all methods in \code{p.adjust.methods} except
+#' \code{"none"}. You can request more than one method. FDR-corrected
+#' p-values (\code{"fdr"}) are calculated by default. Turn it off by
+#' setting to \code{"none"}.
+#'
+#' Arguments \code{num.subj.lthr.abs} and \code{num.subj.lthr.rel} are
+#' mainly for input data with subject-specific masks, i.e. currently only
+#' for volume data. For fixel-wise data, you may ignore these arguments.
+#'
+#' @param formula Formula (passed to \code{\link[stats]{lm}}).
+#' @param data A \linkS4class{ModelArray} object.
+#' @param phenotypes A data.frame of the cohort with columns of independent
+#'   variables and covariates to be added to the model. It must contain a
+#'   column called \code{"source_file"} whose entries match those in
+#'   \code{sources(data)[[scalar]]}.
+#' @param scalar Character. The name of the element-wise scalar to analyse.
+#'   Must be one of \code{names(scalars(data))}.
+#' @param element.subset Integer vector of element indices (1-based) to run.
+#'   Default is \code{NULL}, i.e. all elements in \code{data}.
+#' @param full.outputs Logical. If \code{TRUE}, return the full set of
+#'   statistics (ignoring \code{var.*} arguments). If \code{FALSE} (default),
+#'   only return those requested in \code{var.*} and
+#'   \code{correct.p.value.*}.
+#' @param var.terms Character vector. Statistics to save per term, from
+#'   \code{broom::tidy()}. See Details.
+#' @param var.model Character vector. Statistics to save for the overall
+#'   model, from \code{broom::glance()}. See Details.
+#' @param correct.p.value.terms Character vector. P-value correction
+#'   method(s) for each term. Default: \code{"fdr"}. See Details.
+#' @param correct.p.value.model Character vector. P-value correction
+#'   method(s) for the model-level p-value. Default: \code{"fdr"}.
+#'   See Details.
+#' @param num.subj.lthr.abs Integer. Lower threshold for the absolute
+#'   number of subjects with finite scalar values (not \code{NaN},
+#'   \code{NA}, or \code{Inf}) required per element. Elements below this
+#'   threshold are skipped (outputs set to \code{NaN}). Default is 10.
+#' @param num.subj.lthr.rel Numeric between 0 and 1. Lower threshold for
+#'   the proportion of subjects with finite values. Used together with
+#'   \code{num.subj.lthr.abs} (the effective threshold is the maximum of
+#'   the two). Default is 0.2.
+#' @param verbose Logical. Print progress messages. Default \code{TRUE}.
+#' @param pbar Logical. Show progress bar. Default \code{TRUE}.
+#' @param n_cores Positive integer. Number of CPU cores for parallel
+#'   processing via \code{\link[parallel]{mclapply}}. Default is 1
+#'   (serial).
+#' @param on_error Character: one of \code{"stop"}, \code{"skip"}, or
+#'   \code{"debug"}. When an error occurs fitting one element: \code{"stop"}
+#'   halts execution; \code{"skip"} returns all-\code{NaN} for that element;
+#'   \code{"debug"} drops into \code{\link{browser}} (if interactive) then
+#'   skips. Default: \code{"stop"}.
+#' @param write_results_name Optional character. If provided, results are
+#'   incrementally written to
+#'   \code{results/<write_results_name>/results_matrix} in the HDF5 file
+#'   specified by \code{write_results_file}.
+#' @param write_results_file Optional character. HDF5 file path for
+#'   incremental result writes. Required when \code{write_results_name}
+#'   is provided.
+#' @param write_results_flush_every Positive integer. Number of elements
+#'   per write block. Default 1000.
+#' @param write_results_storage_mode Character. Storage mode for HDF5
+#'   writes (e.g. \code{"double"}). Default \code{"double"}.
+#' @param write_results_compression_level Integer 0--9. Gzip compression
+#'   level for HDF5 writes. Default 4.
+#' @param return_output Logical. If \code{TRUE} (default), return the
+#'   combined data.frame. If \code{FALSE}, return \code{invisible(NULL)};
+#'   useful when writing large outputs directly to HDF5.
+#' @param ... Additional arguments passed to \code{\link[stats]{lm}}.
+#'
+#' @return A tibble with one row per element. The first column is
+#'   \code{element_id} (0-based). Remaining columns contain the requested
+#'   statistics, named as \code{<term>.<statistic>} for per-term statistics
+#'   and \code{model.<statistic>} for model-level statistics. If p-value
+#'   corrections were requested, additional columns are appended with the
+#'   correction method as suffix (e.g. \code{<term>.p.value.fdr}).
+#'
+#' @seealso \code{\link{ModelArray.gam}} for generalized additive models,
+#'   \code{\link{ModelArray.wrap}} for user-supplied functions,
+#'   \linkS4class{ModelArray} for the input class,
+#'   \code{\link{ModelArray}} for the constructor,
+#'   \code{\link{exampleElementData}} for testing formulas on a single
+#'   element.
+#'
+#' @examplesIf interactive()
+#' ma <- ModelArray("path/to/data.h5", scalar_types = c("FD"))
+#' phenotypes <- read.csv("cohort.csv")
+#'
+#' # Fit linear model with default outputs
+#' results <- ModelArray.lm(
+#'   FD ~ age + sex,
+#'   data = ma,
+#'   phenotypes = phenotypes,
+#'   scalar = "FD"
+#' )
+#' head(results)
+#'
+#' # Full outputs, no p-value correction
+#' results_full <- ModelArray.lm(
+#'   FD ~ age + sex,
+#'   data = ma,
+#'   phenotypes = phenotypes,
+#'   scalar = "FD",
+#'   full.outputs = TRUE,
+#'   correct.p.value.terms = "none",
+#'   correct.p.value.model = "none"
+#' )
+#'
+#' @rdname ModelArray.lm
 #' @importFrom dplyr %>%
 #' @import doParallel
 #' @import tibble
-#' @importFrom stats p.adjust lm
 #' @importFrom glue glue
-#' @importFrom rlang :=
 #' @export
 
 ModelArray.lm <- function(formula, data, phenotypes, scalar, element.subset = NULL, full.outputs = FALSE,
diff --git a/man/ModelArray.lm.Rd b/man/ModelArray.lm.Rd
index 9188bbf..20c55e9 100644
--- a/man/ModelArray.lm.Rd
+++ b/man/ModelArray.lm.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/analyse.R
 \name{ModelArray.lm}
 \alias{ModelArray.lm}
-\title{Fit linear model for element-wise data}
+\title{Fit element-wise linear models}
 \usage{
 ModelArray.lm(
   formula,
@@ -31,107 +31,158 @@ ModelArray.lm(
 )
 }
 \arguments{
-\item{formula}{Formula (passed to `stats::lm()`)}
+\item{formula}{Formula (passed to \code{\link[stats]{lm}}).}
 
-\item{data}{ModelArray class}
+\item{data}{A \linkS4class{ModelArray} object.}
 
-\item{phenotypes}{A data.frame of the cohort with columns of independent variables
-and covariates to be added to the model. It should contains a column called "source_file",
-and this column should match to that in \code{data}.}
+\item{phenotypes}{A data.frame of the cohort with columns of independent
+variables and covariates to be added to the model. It must contain a
+column called \code{"source_file"} whose entries match those in
+\code{sources(data)[[scalar]]}.}
 
-\item{scalar}{A character. The name of the element-wise scalar to be analysed}
+\item{scalar}{Character. The name of the element-wise scalar to analyse.
+Must be one of \code{names(scalars(data))}.}
 
-\item{element.subset}{A list of positive integers (min = 1, max = number of elements).
-The subset of elements you want to run. Default is `NULL`, i.e. requesting all elements in `data`.}
+\item{element.subset}{Integer vector of element indices (1-based) to run.
+Default is \code{NULL}, i.e. all elements in \code{data}.}
 
-\item{full.outputs}{TRUE or FALSE, Whether to return full set of outputs.
-If FALSE, it will only return those requested in arguments \code{var.*} and \code{correct.p.value.*};
-if TRUE, arguments \code{var.*} will be ignored, and will return all possible statistics for
-\code{var.*} and any options requested in arguments \code{correct.p.value.*}.}
+\item{full.outputs}{Logical. If \code{TRUE}, return the full set of
+statistics (ignoring \code{var.*} arguments). If \code{FALSE} (default),
+only return those requested in \code{var.*} and
+\code{correct.p.value.*}.}
 
-\item{var.terms}{A list of characters.
-The list of variables to save for terms (got from `broom::tidy()`).
-See "Details" section for more.}
+\item{var.terms}{Character vector. Statistics to save per term, from
+\code{broom::tidy()}. See Details.}
 
-\item{var.model}{A list of characters.
-The list of variables to save for the model (got from `broom::glance()`).
-See "Details" section for more.}
+\item{var.model}{Character vector. Statistics to save for the overall
+model, from \code{broom::glance()}. See Details.}
 
-\item{correct.p.value.terms}{A list of characters.
-To perform and add a column for p.value correction for each term. Default: "fdr".
-See "Details" section for more.}
+\item{correct.p.value.terms}{Character vector. P-value correction
+method(s) for each term. Default: \code{"fdr"}. See Details.}
 
-\item{correct.p.value.model}{A list of characters.
-To perform and add a column for p.value correction for the model.
-Default: "fdr". See "Details" section for more.}
+\item{correct.p.value.model}{Character vector. P-value correction
+method(s) for the model-level p-value. Default: \code{"fdr"}.
+See Details.}
 
-\item{num.subj.lthr.abs}{An integer, lower threshold of absolute number of subjects.
-For an element, if number of subjects who have finite values (defined by `is.finite()`,
-i.e. not NaN or NA or Inf) in h5 file > \code{num.subj.lthr.abs},
-then this element will be run normally; otherwise,
-this element will be skipped and statistical outputs will be set as NaN.
-Default is 10.}
+\item{num.subj.lthr.abs}{Integer. Lower threshold for the absolute
+number of subjects with finite scalar values (not \code{NaN},
+\code{NA}, or \code{Inf}) required per element. Elements below this
+threshold are skipped (outputs set to \code{NaN}). Default is 10.}
 
-\item{num.subj.lthr.rel}{A value between 0-1, lower threshold of relative number of subjects.
-Similar to \code{num.subj.lthr.abs},
-if proportion of subjects who have valid value > \code{num.subj.lthr.rel},
-then this element will be run normally; otherwise,
-this element will be skipped and statistical outputs will be set as NaN.
-Default is 0.2.}
+\item{num.subj.lthr.rel}{Numeric between 0 and 1. Lower threshold for
+the proportion of subjects with finite values. Used together with
+\code{num.subj.lthr.abs} (the effective threshold is the maximum of
+the two). Default is 0.2.}
 
-\item{verbose}{TRUE or FALSE, to print verbose message or not}
+\item{verbose}{Logical. Print progress messages. Default \code{TRUE}.}
 
-\item{pbar}{TRUE or FALSE, to print progress bar or not}
+\item{pbar}{Logical. Show progress bar. Default \code{TRUE}.}
 
-\item{n_cores}{Positive integer, The number of CPU cores to run with}
+\item{n_cores}{Positive integer. Number of CPU cores for parallel
+processing via \code{\link[parallel]{mclapply}}. Default is 1
+(serial).}
 
-\item{on_error}{Character: one of "stop", "skip", or "debug". When an error occurs
-while fitting an element, choose whether to stop, skip returning all-NaN values for
-that element, or drop into `browser()` (if interactive) then skip. Default: "stop".}
+\item{on_error}{Character: one of \code{"stop"}, \code{"skip"}, or
+\code{"debug"}. When an error occurs fitting one element: \code{"stop"}
+halts execution; \code{"skip"} returns all-\code{NaN} for that element;
+\code{"debug"} drops into \code{\link{browser}} (if interactive) then
+skips. Default: \code{"stop"}.}
 
-\item{write_results_name}{Optional analysis name for incremental writes to
-`results/<write_results_name>/results_matrix`.}
+\item{write_results_name}{Optional character. If provided, results are
+incrementally written to
+\code{results/<write_results_name>/results_matrix} in the HDF5 file
+specified by \code{write_results_file}.}
 
-\item{write_results_file}{Optional HDF5 file path used when `write_results_name` is provided.}
+\item{write_results_file}{Optional character. HDF5 file path for
+incremental result writes. Required when \code{write_results_name}
+is provided.}
 
-\item{write_results_flush_every}{Positive integer number of elements per write block.}
+\item{write_results_flush_every}{Positive integer. Number of elements
+per write block. Default 1000.}
 
-\item{write_results_storage_mode}{Storage mode for results writes (e.g., `"double"`).}
+\item{write_results_storage_mode}{Character. Storage mode for HDF5
+writes (e.g. \code{"double"}). Default \code{"double"}.}
 
-\item{write_results_compression_level}{Gzip compression level (0-9) for results writes.}
+\item{write_results_compression_level}{Integer 0--9. Gzip compression
+level for HDF5 writes. Default 4.}
 
-\item{return_output}{If TRUE (default), return the combined data.frame. If FALSE,
-returns `invisible(NULL)`; useful for streaming large runs to HDF5.}
+\item{return_output}{Logical. If \code{TRUE} (default), return the
+combined data.frame. If \code{FALSE}, return \code{invisible(NULL)};
+useful when writing large outputs directly to HDF5.}
 
-\item{...}{Additional arguments for `stats::lm()`}
+\item{...}{Additional arguments passed to \code{\link[stats]{lm}}.}
 }
 \value{
-Tibble with the summarized model statistics for all elements requested when
-`return_output = TRUE`; otherwise `invisible(NULL)`.
+A tibble with one row per element. The first column is
+\code{element_id} (0-based). Remaining columns contain the requested
+statistics, named as \code{<term>.<statistic>} for per-term statistics
+and \code{model.<statistic>} for model-level statistics. If p-value
+corrections were requested, additional columns are appended with the
+correction method as suffix (e.g. \code{<term>.p.value.fdr}).
 }
 \description{
-`ModelArray.lm` fits linear model (`stats::lm()`) for each of elements requested, and returns a tibble
-dataframe of requested model statistics.
+\code{ModelArray.lm} fits a linear model at each requested element in a
+\linkS4class{ModelArray} and returns a tibble of requested model statistics.
 }
 \details{
-You may request returning specific statistical variables by setting \code{var.*},
-or you can get all by setting \code{full.outputs=TRUE}.
-Note that statistics covered by \code{full.outputs} or \code{var.*} are the ones from broom::tidy()
-and broom::glance() only, and do not include corrected p-values.
-However FDR-corrected p-values ("fdr") are generated by default.
+You may request returning specific statistical variables by setting
+\code{var.*}, or you can get all by setting \code{full.outputs = TRUE}.
+Note that statistics covered by \code{full.outputs} or \code{var.*} are
+the ones from \code{broom::tidy()}, \code{broom::glance()} only, and do
+not include corrected p-values. However FDR-corrected p-values
+(\code{"fdr"}) are generated by default.
+
 List of acceptable statistic names for each of \code{var.*}:
 \itemize{
-  \item \code{var.terms}: c("estimate","std.error","statistic","p.value");
-     For interpretation please see \link[broom]{tidy.lm}.
-  \item \code{var.model}: c("r.squared", "adj.r.squared", "sigma", "statistic",
-     "p.value", "df", "logLik", "AIC", "BIC", "deviance", "df.residual", "nobs");
-     For interpretation please see \link[broom]{glance.lm}.
+\item \code{var.terms}: \code{c("estimate", "std.error", "statistic",
+    "p.value")}; For interpretation please see \link[broom]{tidy.lm}.
+\item \code{var.model}: \code{c("r.squared", "adj.r.squared", "sigma",
+    "statistic", "p.value", "df", "logLik", "AIC", "BIC", "deviance",
+    "df.residual", "nobs")}; For interpretation please see
+\link[broom]{glance.lm}.
+}
+
+For p-value corrections (arguments \code{correct.p.value.*}), supported
+methods include all methods in \code{p.adjust.methods} except
+\code{"none"}. You can request more than one method. FDR-corrected
+p-values (\code{"fdr"}) are calculated by default. Turn it off by
+setting to \code{"none"}.
+
+Arguments \code{num.subj.lthr.abs} and \code{num.subj.lthr.rel} are
+mainly for input data with subject-specific masks, i.e. currently only
+for volume data. For fixel-wise data, you may ignore these arguments.
+}
+\examples{
+\dontshow{if (interactive()) withAutoprint(\{ # examplesIf}
+ma <- ModelArray("path/to/data.h5", scalar_types = c("FD"))
+phenotypes <- read.csv("cohort.csv")
+
+# Fit linear model with default outputs
+results <- ModelArray.lm(
+  FD ~ age + sex,
+  data = ma,
+  phenotypes = phenotypes,
+  scalar = "FD"
+)
+head(results)
+
+# Full outputs, no p-value correction
+results_full <- ModelArray.lm(
+  FD ~ age + sex,
+  data = ma,
+  phenotypes = phenotypes,
+  scalar = "FD",
+  full.outputs = TRUE,
+  correct.p.value.terms = "none",
+  correct.p.value.model = "none"
+)
+\dontshow{\}) # examplesIf}
 }
-For p-value corrections (arguments \code{correct.p.value.*}),
-supported methods include all methods in `p.adjust.methods` except "none".
-Can be more than one method. FDR-corrected p-values ("fdr") are calculated by default.
-Turn it off by setting to "none". \cr
-Arguments \code{num.subj.lthr.abs} and \code{num.subj.lthr.rel}
-are mainly for input data with subject-specific masks,
- i.e. currently only for volume data. For fixel-wise data, you may ignore these arguments.
+\seealso{
+\code{\link{ModelArray.gam}} for generalized additive models,
+\code{\link{ModelArray.wrap}} for user-supplied functions,
+\linkS4class{ModelArray} for the input class,
+\code{\link{ModelArray}} for the constructor,
+\code{\link{exampleElementData}} for testing formulas on a single
+element.
 }
diff --git a/man/exampleElementData.Rd b/man/exampleElementData.Rd
index 1699b25..f6e1383 100644
--- a/man/exampleElementData.Rd
+++ b/man/exampleElementData.Rd
@@ -10,29 +10,42 @@ exampleElementData(x, ...)
 \S4method{exampleElementData}{ModelArray}(x, scalar = "FD", i_element = 1L, phenotypes)
 }
 \arguments{
-\item{x}{An ModelArray object}
+\item{x}{A \linkS4class{ModelArray} object.}
 
-\item{...}{Additional arguments (ignored)}
+\item{...}{Additional arguments passed to the method (currently unused
+at the generic level).}
 
-\item{scalar}{A character. The name of the element-wise scalar to append}
+\item{scalar}{Character. The name of the element-wise scalar to append
+as a column. Must be one of \code{names(scalars(x))}. Default is
+\code{"FD"}.}
 
-\item{i_element}{An integer, the i_th element (1-based)}
+\item{i_element}{Integer. The 1-based index of the element whose values
+should be extracted. Must be between 1 and the number of elements for
+the given scalar.}
 
-\item{phenotypes}{A data.frame of the cohort with independent variables/covariates}
+\item{phenotypes}{A data.frame of the cohort with columns of independent
+variables and covariates. Must have the same number of rows as the
+number of source files in the \linkS4class{ModelArray}.}
 }
 \value{
-A data.frame with the additional response column named by `scalar`
+A data.frame: the input \code{phenotypes} with one additional
+column named by \code{scalar} containing that element's values across
+all subjects.
 }
 \description{
-Generic for constructing a per-element data.frame from a `ModelArray`.
-See the `ModelArray` method for details.
-
-Returns a copy of `phenotypes` with an extra column named by `scalar` populated
-with the selected element's values from the `ModelArray`. This mirrors the
-per-element data that `ModelArray.wrap` passes to user functions (`data = dat`).
+Constructs a per-element data.frame from a \linkS4class{ModelArray} that
+mirrors the \code{data} argument passed to user functions by
+\code{\link{ModelArray.wrap}}. This is useful for testing and debugging
+user-supplied functions outside of the full element-wise analysis loop.
 }
 \details{
-Example per-element data.frame for user functions
+Returns a copy of \code{phenotypes} with an extra column named by
+\code{scalar} populated with the selected element's values from the
+\linkS4class{ModelArray}. This mirrors the per-element data that
+\code{\link{ModelArray.wrap}} passes to user functions (as \code{data = dat}).
+
+Use this to verify that your custom function works correctly on a single
+element before committing to a full \code{\link{ModelArray.wrap}} run.
 }
 \examples{
 \dontrun{
@@ -41,5 +54,16 @@ csv_path <- system.file("extdata", "n50_cohort.csv", package = "ModelArray")
 ma <- ModelArray(h5_path, scalar_types = c("FD"))
 phen <- read.csv(csv_path)
 df <- exampleElementData(ma, scalar = "FD", i_element = 1, phenotypes = phen)
+
+# Now test your custom function on this single element:
+my_fun <- function(data, ...) {
+  mod <- lm(FD ~ age + sex, data = data)
+  broom::tidy(mod)
+}
+my_fun(data = df)
+}
+
 }
+\seealso{
+\code{\link{ModelArray.wrap}}, \code{\link{scalars}}
 }
diff --git a/man/sources.Rd b/man/sources.Rd
index 67af997..f55f6e9 100644
--- a/man/sources.Rd
+++ b/man/sources.Rd
@@ -10,11 +10,27 @@ sources(x)
 \S4method{sources}{ModelArray}(x)
 }
 \arguments{
-\item{x}{A ModelArray object}
+\item{x}{A \linkS4class{ModelArray} object.}
 }
 \value{
-A list of source filenames
+A named list of character vectors. Names correspond to scalar names
+(e.g. \code{"FD"}, \code{"FC"}).
 }
 \description{
-Source filenames of a ModelArray object
+Retrieve the named list of source filename vectors from a
+\linkS4class{ModelArray}. Each element of the list corresponds to one
+scalar and contains a character vector of filenames, one per input
+file/subject.
+}
+\examples{
+\dontrun{
+ma <- ModelArray("data.h5", scalar_types = c("FD"))
+sources(ma)          # named list
+sources(ma)[["FD"]]  # character vector of filenames
+}
+
+}
+\seealso{
+\code{\link{scalars}}, \code{\link{results}},
+\code{\link{scalarNames}}, \code{\link{nInputFiles}}
 }

From 2bdf5b75ab09808233703619d53bcb26c7ee52de Mon Sep 17 00:00:00 2001
From: araikes <adam.raikes@gmail.com>
Date: Mon, 30 Mar 2026 17:47:58 -0700
Subject: [PATCH 10/23] Update gam and wrap functions

---
 NAMESPACE              |   5 -
 R/analyse.R            | 342 ++++++++++++++++++-----------------------
 man/ModelArray.gam.Rd  | 250 +++++++++++++-----------------
 man/ModelArray.wrap.Rd | 182 ++++++++++++++++------
 4 files changed, 387 insertions(+), 392 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 201c081..525532f 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -50,7 +50,6 @@ importFrom(crayon,black)
 importFrom(dplyr,"%>%")
 importFrom(dplyr,bind_cols)
 importFrom(dplyr,filter)
-importFrom(dplyr,mutate)
 importFrom(dplyr,select)
 importFrom(glue,glue)
 importFrom(magrittr,"%>%")
@@ -61,12 +60,8 @@ importFrom(mgcv,ti)
 importFrom(rhdf5,h5closeAll)
 importFrom(rhdf5,h5ls)
 importFrom(rhdf5,h5readAttributes)
-importFrom(rlang,":=")
 importFrom(rlang,.data)
 importFrom(stats,as.formula)
-importFrom(stats,drop.terms)
 importFrom(stats,lm)
-importFrom(stats,p.adjust)
 importFrom(stats,p.adjust.methods)
-importFrom(stats,terms)
 importFrom(utils,head)
diff --git a/R/analyse.R b/R/analyse.R
index 5f3c2c0..b7547f2 100644
--- a/R/analyse.R
+++ b/R/analyse.R
@@ -353,163 +353,73 @@ ModelArray.lm <- function(formula, data, phenotypes, scalar, element.subset = NU
   invisible(NULL)
 }
 
-#' Run GAM for element-wise data
+#' Fit element-wise generalized additive models
+#' no model-level p-value for GAMs, so there is no
+#' \code{correct.p.value.model} argument.
 #'
-#' @description
-#' `ModelArray.gam` fits gam model for each of elements requested,
-#' and returns a tibble data.frame of requested model statistics.
+#' @inheritParams ModelArray.lm
 #'
-#' @details
-#' You may request returning specific statistical variables by setting \code{var.*},
-#' or you can get all by setting \code{full.outputs=TRUE}.
-#' Note that statistics covered by \code{full.outputs} or
-#' \code{var.*} are the ones from broom::tidy(), broom::glance(), and summary() only,
-#' and do not include delta adjusted R-squared or partial R-squared or corrected p-values.
-#' However FDR-corrected p-values ("fdr") are generated by default.
+#' @param formula Formula (passed to \code{\link[mgcv]{gam}}).
+#' @param var.smoothTerms Character vector. Statistics to save for smooth
+#'   terms, from \code{broom::tidy(parametric = FALSE)}. See Details.
+#' @param var.parametricTerms Character vector. Statistics to save for
+#'   parametric terms, from \code{broom::tidy(parametric = TRUE)}.
+#'   See Details.
+#' @param var.model Character vector. Statistics to save for the overall
+#'   model, from \code{broom::glance()} and \code{summary()}. See Details.
+#' @param changed.rsq.term.index A list of positive integers. Each value
+#'   is the index of a term on the right-hand side of \code{formula} for
+#'   which delta adjusted R-squared and partial R-squared should be
+#'   computed. Usually the term of interest is a smooth term or interaction
+#'   term. Default \code{NULL} (not computed). See Details for warnings.
+#' @param correct.p.value.smoothTerms Character vector. P-value
+#'   correction method(s) for each smooth term. Default: \code{"fdr"}.
+#' @param correct.p.value.parametricTerms Character vector. P-value
+#'   correction method(s) for each parametric term. Default: \code{"fdr"}.
+#' @param ... Additional arguments passed to \code{\link[mgcv]{gam}}.
 #'
-#' List of acceptable statistic names for each of \code{var.*}:
-#' \itemize{
-#'  \item \code{var.smoothTerms}: c("edf","ref.df","statistic","p.value");
-#'    For interpretation please see \link[broom]{tidy.gam} with `parametric=FALSE`.
-#'  \item \code{var.parametricTerms}: c("estimate", "std.error","statistic","p.value");
-#'    For interpretation please see \link[broom]{tidy.gam} with `parametric=TRUE`.
-#'  \item \code{var.model}: c("adj.r.squared","dev.expl", "sp.criterion", "scale", "df",
-#'    "logLik","AIC", "BIC", "deviance", "df.residual", "nobs"); "adj.r.squared" is \code{r.sq} from
-#'    \link[mgcv]{summary.gam}; "sp.criterion" is \code{sp.criterion} from \link[mgcv]{summary.gam};
-#'    For interpretation please see \link[broom]{glance.gam} and \link[mgcv]{summary.gam}.
-#' }
+#' @return A tibble with one row per element. The first column is
+#'   \code{element_id} (0-based). Remaining columns contain the requested
+#'   statistics, named as \code{<term>.<statistic>}. If
+#'   \code{changed.rsq.term.index} was requested, additional columns
+#'   \code{<term>.delta.adj.rsq} and \code{<term>.partial.rsq} are
+#'   appended.
 #'
-#' Regarding formula: So far these kinds of formula are tested:
-#' \itemize{
-#'   \item formula with smooth term, but without any interactions.
-#'         Examples like \code{y ~ s(x) + orderedFactor}; \code{y ~ s(x) + s(z)}
-#'   \item formula with interaction, but limited to only one interaction term, and in the formats of:
-#'   \itemize{
-#'       \item Formula #1: \code{y ~ orderedFactor + s(x) + s(x, by=orderedFactor) + other_covariate},
-#'          where \code{orderedFactor} should be discrete variables and generated by `ordered`.
-#'          The interaction term will be displayed as "s_x_BYorderedFactor"
-#'          in the column name in returned data.frame.
-#'          You may use function `gen_gamFormula_fxSmooth()` to generate one.
-#'       \item Formula #2: \code{y ~ ti(x) + ti(z) + ti(x,z) + other_covariate},
-#'          where \code{x} and \code{z} should be continuous variables.
-#'          The interaction term will be displayed as "ti_x_z" in the column name in the returned data.frame.
-#'          You may use function `gen_gamFormula_contIx()` to generate one.
-#'   }
-#' }
-#' You may be interested in how important a term is in a model.
-#' We provide two ways of quantification (see below).
-#' Both of them require running the reduced model without this term of interest,
-#' thus it will take longer time to run.
-#' You can make such request via argument \code{changed.rsq.term.index}, and you'll get both quantifications.
-#' \itemize{
-#'   \item Delta adjusted R-squared (\code{delta.adj.rsq}) is defined as the difference between
-#'      adjusted R-squared of full model (full formula in \code{formula}) and that of reduced model
-#'      (formula without the term of interest). Notice that adjusted R-squared includes the penalty
-#'      from the model complexity.
-#'   \item Partial R-squared (\code{partial.rsq}) is defined as:
-#'      \code{(sse.reduced.model - sse.full.model) / sse.reduced.model},
-#'      where \code{sse} is the error sum of squares (or, residual sum of squares).
-#'      It quantifies the amount of variance in the response variable that cannot be explained by the reduced model
-#'      (model without term of interest), but can be explained by the term of interest in the full model.
+#' @seealso \code{\link{ModelArray.lm}} for linear models,
+#'   \code{\link{ModelArray.wrap}} for user-supplied functions,
+#'   \code{\link{gen_gamFormula_fxSmooth}} and
+#'   \code{\link{gen_gamFormula_contIx}} for formula helpers,
+#'   \linkS4class{ModelArray} for the input class.
+#'
+#' @examples{
+#' \dontrun{
+#' ma <- ModelArray("path/to/data.h5", scalar_types = c("FD"))
+#' phenotypes <- read.csv("cohort.csv")
+#'
+#' results <- ModelArray.gam(
+#'   FD ~ s(age, fx = TRUE) + sex,
+#'   data = ma,
+#'   phenotypes = phenotypes,
+#'   scalar = "FD"
+#' )
+#' head(results)
+#'
+#' # With changed R-squared for the smooth term (term index 1)
+#' results_rsq <- ModelArray.gam(
+#'   FD ~ s(age, fx = TRUE) + sex,
+#'   data = ma,
+#'   phenotypes = phenotypes,
+#'   scalar = "FD",
+#'   changed.rsq.term.index = list(1)
+#' )
 #' }
-#' \strong{___!!! WARNING !!!___}:
-#' If you want to request \code{changed.rsq.term.index} for a term that has missing values,
-#' before feeding \code{phenotypes} into \code{ModelArray.gam()},
-#' you must exclude those observations (i.e., those rows in \code{phenotypes})
-#' who have missing values in this term of interest from \code{phenotypes}.
-#' You should do the same for each term you'd like to request in \code{changed.rsq.term.index},
-#' if that term has missing values.
-#' Without such exclusion, the full and reduced models would include different number of subjects,
-#' causing inaccuracy of calculation of \code{delta.adj.rsq} and \code{partial.rsq}. \cr \cr
-#' Other notes on \code{changed.rsq.term.index}:
-#' \itemize{
-#'   \item When requesting \code{changed.rsq.term.index},
-#'      \code{fx} should be set as \code{TRUE}, so that degree of freedom is fixed.
-#'   \item For formula with interactions, only formula in above formats are tested,
-#'      and only the values of interaction term are valid.
-#'      The delta.adj.rsq and partial.rsq for main effect (such as s(x) in Formula #1)
-#'      may not "functionally" be these metrics, as their definitions should be changed to
-#'      reduced formula without both main effect and interaction term.
 #' }
-#' For p-value corrections (arguments \code{correct.p.value.*}),
-#' supported methods include all methods in `p.adjust.methods` except "none".
-#' You can request more than one method. FDR-corrected p-values ("fdr") are calculated by default.
-#' Turn it off by setting to "none".
-#' Please notice that different from `ModelArray.lm`,
-#' there is no p.value for the GAM model, so no "correct.p.value.model" for GAM model. \cr
-#' Arguments \code{num.subj.lthr.abs} and \code{num.subj.lthr.rel}
-#' are mainly for input data with subject-specific masks, i.e. currently only for volume data.
-#' For fixel-wise data, you may ignore these arguments.
 #'
-#' @param formula Formula (passed to `mgcv::gam()`)
-#' @param data ModelArray class
-#' @param phenotypes A data.frame of the cohort with columns of independent variables and covariates
-#' to be added to the model. It should contains a column called "source_file",
-#' and this column should match to that in \code{data}.
-#' @param scalar A character. The name of the element-wise scalar to be analysed
-#' @param element.subset A list of positive integers (min = 1, max = number of elements).
-#' The subset of elements you want to run. Default is `NULL`, i.e. requesting all elements in `data`.
-#' @param full.outputs TRUE or FALSE, Whether to return full set of outputs.
-#' If FALSE, it will only return those requested in arguments \code{var.*} and \code{correct.p.value.*};
-#' if TRUE, arguments \code{var.*} will be ignored, and will return all possible statistics for \code{var.*}
-#' and any options requested in arguments \code{correct.p.value.*}.
-#' @param var.smoothTerms A list of characters.
-#' The list of variables to save for smooth terms (got from `broom::tidy(parametric = FALSE)`).
-#' Example smooth term: age in formula "outcome ~ s(age)". See "Details" section for more.
-#' @param var.parametricTerms A list of characters.
-#' The list of variables to save for parametric terms (got from `broom::tidy(parametric = TRUE)`).
-#' Example parametric term: sex in formula "outcome ~ s(age) + sex".
-#' See "Details" section for more.
-#' @param var.model A list of characters.
-#' The list of variables to save for the model (got from `broom::glance()` and `summary()`).
-#' See "Details" section for more.
-#' @param changed.rsq.term.index A list of (one or several) positive integers.
-#' Each element in the list means the i-th term of the formula's right hand side
-#' as the term of interest for changed R-squared between with and without it.
-#' Both delta adjusted R-squared and partial R-squared will be calculated for each of term requested.
-#' Usually term of interest is smooth term, or interaction term in models with interactions.
-#'  See "Details" section for more, especially the "WARNING" in Details section for cases with caution!!
-#' @param correct.p.value.smoothTerms A list of characters.
-#' To perform and add a column for p.value correction for each smooth term.
-#' Default: "fdr". See "Details" section for more.
-#' @param correct.p.value.parametricTerms A list of characters.
-#' To perform and add a column for p.value correction for each parametric term. Default: "fdr".
-#' See "Details" section for more.
-#' @param num.subj.lthr.abs An integer, lower threshold of absolute number of subjects.
-#' For an element, if number of subjects who have finite values (defined by `is.finite()`,
-#' i.e. not NaN or NA or Inf) in h5 file > \code{num.subj.lthr.abs},
-#' then this element will be run normally;
-#' otherwise, this element will be skipped and statistical outputs will be set as NaN.
-#' Default is 10.
-#' @param num.subj.lthr.rel A value between 0-1, lower threshold of relative number of subjects.
-#' Similar to \code{num.subj.lthr.abs},
-#' if proportion of subjects who have valid value > \code{num.subj.lthr.rel},
-#' then this element will be run normally; otherwise,
-#' this element will be skipped and statistical outputs will be set as NaN.
-#' Default is 0.2.
-#' @param verbose TRUE or FALSE, to print verbose messages or not
-#' @param pbar TRUE or FALSE, to print progress bar or not
-#' @param n_cores Positive integer, The number of CPU cores to run with
-#' @param on_error Character: one of "stop", "skip", or "debug". When an error occurs
-#' while fitting an element, choose whether to stop, skip returning all-NaN values for
-#' that element, or drop into `browser()` (if interactive) then skip. Default: "stop".
-#' @param write_results_name Optional analysis name for incremental writes to
-#' `results/<write_results_name>/results_matrix`.
-#' @param write_results_file Optional HDF5 file path used when `write_results_name` is provided.
-#' @param write_results_flush_every Positive integer number of elements per write block.
-#' @param write_results_storage_mode Storage mode for results writes (e.g., `"double"`).
-#' @param write_results_compression_level Gzip compression level (0-9) for results writes.
-#' @param return_output If TRUE (default), return the combined data.frame. If FALSE,
-#' returns `invisible(NULL)`; useful for streaming large runs to HDF5.
-#' @param ... Additional arguments for `mgcv::gam()`
-#' @return Tibble with the summarized model statistics for all elements requested
-#' @importFrom dplyr %>% mutate
+#' @rdname ModelArray.gam
+#' @importFrom dplyr %>%
 #' @import doParallel
 #' @import tibble
-#' @import mgcv
-#' @importFrom stats terms as.formula drop.terms p.adjust
 #' @importFrom glue glue
-#' @importFrom rlang :=
 #' @export
 
 ModelArray.gam <- function(formula, data, phenotypes, scalar, element.subset = NULL, full.outputs = FALSE,
@@ -905,57 +815,101 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar, element.subset = N
 #' Run a user-supplied function for element-wise data
 #'
 #' @description
-#' `ModelArray.wrap` runs a user-supplied function `FUN` for each requested element and
-#' returns a tibble/data.frame of results combined across elements.
+#' `ModelArray.wrap` runs a user-supplied function \code{FUN} at each
+#' requested element and returns a tibble of results combined across
+#' elements.
 #'
 #' @details
-#' This provides a generic framework reusing ModelArray's per-element looping, alignment,
-#' subject-thresholding, and parallelization. The user function will be called as
-#' `FUN(data = dat, ...)` where `dat` is `phenotypes` with the response column named `scalar`
-#' appended for the current element. The return value from `FUN` for a single element must be
-#' either a one-row data.frame/tibble, a named list, or a named atomic vector. The column
-#' names from the first successful element determine the final schema.
+#' This provides a generic framework reusing ModelArray's per-element
+#' looping, alignment, subject-thresholding, and parallelization. The user
+#' function is called as \code{FUN(data = dat, ...)} where \code{dat} is
+#' \code{phenotypes} with all scalar columns appended for the current
+#' element. The return value from \code{FUN} for a single element must be
+#' one of:
+#' \itemize{
+#'   \item a one-row \code{data.frame} or \code{tibble}
+#'   \item a named list
+#'   \item a named atomic vector
+#' }
+#' The column names from the first successful element determine the final
+#' schema.
+#'
+#' Note: \code{ModelArray.wrap} never performs any p-value corrections or
+#' modifications. If you need adjusted p-values (e.g. FDR), implement
+#' them inside \code{FUN}.
+#'
+#' Use \code{\link{exampleElementData}} to construct a sample per-element
+#' data.frame for testing your function before committing to a full run.
 #'
-#' Note: `ModelArray.wrap` never performs any p-value corrections or modifications.
-#' If you need adjusted p-values (e.g., FDR), implement them inside your provided `FUN`.
+#' @inheritParams ModelArray.lm
+#'
+#' @param FUN A function that accepts at least \code{data} (a data.frame)
+#'   and returns a one-row data.frame/tibble, a named list, or a named
+#'   vector of results for that element.
+#' @param write_scalar_name Optional character. If provided, selected
+#'   output columns are written into
+#'   \code{scalars/<write_scalar_name>/values} in the HDF5 file specified
+#'   by \code{write_scalar_file}.
+#' @param write_scalar_file Optional character. HDF5 output file path.
+#'   Required when \code{write_scalar_name} is provided.
+#' @param write_scalar_columns Optional character or integer vector
+#'   selecting which output columns to save as scalar values. If
+#'   \code{NULL} (default), uses all output columns except
+#'   \code{element_id}.
+#' @param write_scalar_column_names Optional character vector of source
+#'   names saved as scalar \code{column_names}. If \code{NULL}, uses
+#'   \code{phenotypes$source_file}.
+#' @param write_scalar_flush_every Positive integer. Elements per write
+#'   block for scalar writes. Default 1000.
+#' @param write_scalar_storage_mode Character. Storage mode for scalar
+#'   writes (e.g. \code{"double"}). Default \code{"double"}.
+#' @param write_scalar_compression_level Integer 0--9. Gzip compression
+#'   level for scalar writes. Default 4.
+#' @param ... Additional arguments forwarded to \code{FUN}.
+#'
+#' @return A tibble with one row per element and first column
+#'   \code{element_id} (0-based). Remaining columns are determined by
+#'   the return value of \code{FUN}.
+#'
+#' @seealso \code{\link{ModelArray.lm}} for linear models,
+#'   \code{\link{ModelArray.gam}} for GAMs,
+#'   \code{\link{exampleElementData}} for building test data,
+#'   \linkS4class{ModelArray} for the input class.
+#'
+#' @examples{
+#' \dontrun{
+#' ma <- ModelArray("path/to/data.h5", scalar_types = c("FD"))
+#' phenotypes <- read.csv("cohort.csv")
+#'
+#' # Simple custom function
+#' my_fun <- function(data, ...) {
+#'   mod <- lm(FD ~ age + sex, data = data)
+#'   tidy_out <- broom::tidy(mod)
+#'   # Return a one-row tibble
+#'   tibble::tibble(
+#'     age_estimate = tidy_out$estimate[tidy_out$term == "age"],
+#'     age_pvalue   = tidy_out$p.value[tidy_out$term == "age"]
+#'   )
+#' }
+#'
+#'
+#' # Test on one element first
+#' test_df <- exampleElementData(ma, scalar = "FD",
+#'                                i_element = 1,
+#'                                phenotypes = phenotypes)
+#' my_fun(data = test_df)
+#'
+#' # Run across all elements
+#' results <- ModelArray.wrap(
+#'   FUN = my_fun,
+#'   data = ma,
+#'   phenotypes = phenotypes,
+#'   scalar = "FD"
+#' )
+#' }
+#' }
 #'
-#' @param FUN A function that accepts at least `data` and returns a one-row data.frame/tibble,
-#' a named list, or a named vector of results for that element.
-#' @param data ModelArray class
-#' @param phenotypes A data.frame with cohort covariates; must contain column `source_file`.
-#' It must match `sources(data)[[scalar]]` order and contents (reordered if needed).
-#' @param scalar Character, the scalar name to analyze
-#' @param element.subset Optional integer vector of element indices (1-based); defaults to all
-#' @param num.subj.lthr.abs Integer lower threshold for subjects with finite values (default 10)
-#' @param num.subj.lthr.rel Relative lower threshold (0-1) (default 0.2)
-#' @param verbose TRUE/FALSE to print messages
-#' @param pbar TRUE/FALSE to show progress bar
-#' @param n_cores Positive integer number of CPU cores
-#' @param on_error Character: one of "stop", "skip", or "debug". When an error occurs in
-#' the user function for an element, choose whether to stop, skip returning all-NaN values
-#' for that element, or drop into `browser()` (if interactive) then skip. Default: "stop".
-#' @param write_scalar_name Optional character scalar name. If provided, `ModelArray.wrap`
-#' writes selected output columns into `scalars/<write_scalar_name>/values` while processing.
-#' @param write_scalar_file Optional character HDF5 output filename used when
-#' `write_scalar_name` is provided.
-#' @param write_scalar_columns Optional character/integer selector for output columns to save
-#' as scalar values. If NULL, uses all wrap output columns except `element_id`.
-#' @param write_scalar_column_names Optional character vector of source names saved as
-#' scalar `column_names`. If NULL, uses `phenotypes$source_file`.
-#' @param write_scalar_flush_every Positive integer number of elements per write block.
-#' @param write_scalar_storage_mode Storage mode for scalar writes (e.g., `"double"`).
-#' @param write_scalar_compression_level Gzip compression level (0-9) for scalar writes.
-#' @param write_results_name Optional analysis name for incremental writes to
-#' `results/<write_results_name>/results_matrix`.
-#' @param write_results_file Optional HDF5 file path used when `write_results_name` is provided.
-#' @param write_results_flush_every Positive integer number of elements per write block for
-#' results writes.
-#' @param write_results_storage_mode Storage mode for results writes (e.g., `"double"`).
-#' @param write_results_compression_level Gzip compression level (0-9) for results writes.
-#' @param return_output If TRUE (default), return the combined data.frame. If FALSE,
-#' returns `invisible(NULL)`; useful when writing large outputs directly to HDF5.
-#' @param ... Additional arguments forwarded to `FUN`
-#' @return Tibble/data.frame with one row per element and first column `element_id`
+#' @rdname ModelArray.wrap
 #' @importFrom dplyr %>%
 #' @import doParallel
 #' @import tibble
diff --git a/man/ModelArray.gam.Rd b/man/ModelArray.gam.Rd
index ffa2990..74fbe72 100644
--- a/man/ModelArray.gam.Rd
+++ b/man/ModelArray.gam.Rd
@@ -2,7 +2,9 @@
 % Please edit documentation in R/analyse.R
 \name{ModelArray.gam}
 \alias{ModelArray.gam}
-\title{Run GAM for element-wise data}
+\title{Fit element-wise generalized additive models
+no model-level p-value for GAMs, so there is no
+\code{correct.p.value.model} argument.}
 \usage{
 ModelArray.gam(
   formula,
@@ -33,179 +35,139 @@ ModelArray.gam(
 )
 }
 \arguments{
-\item{formula}{Formula (passed to `mgcv::gam()`)}
+\item{formula}{Formula (passed to \code{\link[mgcv]{gam}}).}
 
-\item{data}{ModelArray class}
+\item{data}{A \linkS4class{ModelArray} object.}
 
-\item{phenotypes}{A data.frame of the cohort with columns of independent variables and covariates
-to be added to the model. It should contains a column called "source_file",
-and this column should match to that in \code{data}.}
+\item{phenotypes}{A data.frame of the cohort with columns of independent
+variables and covariates to be added to the model. It must contain a
+column called \code{"source_file"} whose entries match those in
+\code{sources(data)[[scalar]]}.}
 
-\item{scalar}{A character. The name of the element-wise scalar to be analysed}
+\item{scalar}{Character. The name of the element-wise scalar to analyse.
+Must be one of \code{names(scalars(data))}.}
 
-\item{element.subset}{A list of positive integers (min = 1, max = number of elements).
-The subset of elements you want to run. Default is `NULL`, i.e. requesting all elements in `data`.}
+\item{element.subset}{Integer vector of element indices (1-based) to run.
+Default is \code{NULL}, i.e. all elements in \code{data}.}
 
-\item{full.outputs}{TRUE or FALSE, Whether to return full set of outputs.
-If FALSE, it will only return those requested in arguments \code{var.*} and \code{correct.p.value.*};
-if TRUE, arguments \code{var.*} will be ignored, and will return all possible statistics for \code{var.*}
-and any options requested in arguments \code{correct.p.value.*}.}
+\item{full.outputs}{Logical. If \code{TRUE}, return the full set of
+statistics (ignoring \code{var.*} arguments). If \code{FALSE} (default),
+only return those requested in \code{var.*} and
+\code{correct.p.value.*}.}
 
-\item{var.smoothTerms}{A list of characters.
-The list of variables to save for smooth terms (got from `broom::tidy(parametric = FALSE)`).
-Example smooth term: age in formula "outcome ~ s(age)". See "Details" section for more.}
+\item{var.smoothTerms}{Character vector. Statistics to save for smooth
+terms, from \code{broom::tidy(parametric = FALSE)}. See Details.}
 
-\item{var.parametricTerms}{A list of characters.
-The list of variables to save for parametric terms (got from `broom::tidy(parametric = TRUE)`).
-Example parametric term: sex in formula "outcome ~ s(age) + sex".
-See "Details" section for more.}
+\item{var.parametricTerms}{Character vector. Statistics to save for
+parametric terms, from \code{broom::tidy(parametric = TRUE)}.
+See Details.}
 
-\item{var.model}{A list of characters.
-The list of variables to save for the model (got from `broom::glance()` and `summary()`).
-See "Details" section for more.}
+\item{var.model}{Character vector. Statistics to save for the overall
+model, from \code{broom::glance()} and \code{summary()}. See Details.}
 
-\item{changed.rsq.term.index}{A list of (one or several) positive integers.
-Each element in the list means the i-th term of the formula's right hand side
-as the term of interest for changed R-squared between with and without it.
-Both delta adjusted R-squared and partial R-squared will be calculated for each of term requested.
-Usually term of interest is smooth term, or interaction term in models with interactions.
- See "Details" section for more, especially the "WARNING" in Details section for cases with caution!!}
+\item{changed.rsq.term.index}{A list of positive integers. Each value
+is the index of a term on the right-hand side of \code{formula} for
+which delta adjusted R-squared and partial R-squared should be
+computed. Usually the term of interest is a smooth term or interaction
+term. Default \code{NULL} (not computed). See Details for warnings.}
 
-\item{correct.p.value.smoothTerms}{A list of characters.
-To perform and add a column for p.value correction for each smooth term.
-Default: "fdr". See "Details" section for more.}
+\item{correct.p.value.smoothTerms}{Character vector. P-value
+correction method(s) for each smooth term. Default: \code{"fdr"}.}
 
-\item{correct.p.value.parametricTerms}{A list of characters.
-To perform and add a column for p.value correction for each parametric term. Default: "fdr".
-See "Details" section for more.}
+\item{correct.p.value.parametricTerms}{Character vector. P-value
+correction method(s) for each parametric term. Default: \code{"fdr"}.}
 
-\item{num.subj.lthr.abs}{An integer, lower threshold of absolute number of subjects.
-For an element, if number of subjects who have finite values (defined by `is.finite()`,
-i.e. not NaN or NA or Inf) in h5 file > \code{num.subj.lthr.abs},
-then this element will be run normally;
-otherwise, this element will be skipped and statistical outputs will be set as NaN.
-Default is 10.}
+\item{num.subj.lthr.abs}{Integer. Lower threshold for the absolute
+number of subjects with finite scalar values (not \code{NaN},
+\code{NA}, or \code{Inf}) required per element. Elements below this
+threshold are skipped (outputs set to \code{NaN}). Default is 10.}
 
-\item{num.subj.lthr.rel}{A value between 0-1, lower threshold of relative number of subjects.
-Similar to \code{num.subj.lthr.abs},
-if proportion of subjects who have valid value > \code{num.subj.lthr.rel},
-then this element will be run normally; otherwise,
-this element will be skipped and statistical outputs will be set as NaN.
-Default is 0.2.}
+\item{num.subj.lthr.rel}{Numeric between 0 and 1. Lower threshold for
+the proportion of subjects with finite values. Used together with
+\code{num.subj.lthr.abs} (the effective threshold is the maximum of
+the two). Default is 0.2.}
 
-\item{verbose}{TRUE or FALSE, to print verbose messages or not}
+\item{verbose}{Logical. Print progress messages. Default \code{TRUE}.}
 
-\item{pbar}{TRUE or FALSE, to print progress bar or not}
+\item{pbar}{Logical. Show progress bar. Default \code{TRUE}.}
 
-\item{n_cores}{Positive integer, The number of CPU cores to run with}
+\item{n_cores}{Positive integer. Number of CPU cores for parallel
+processing via \code{\link[parallel]{mclapply}}. Default is 1
+(serial).}
 
-\item{on_error}{Character: one of "stop", "skip", or "debug". When an error occurs
-while fitting an element, choose whether to stop, skip returning all-NaN values for
-that element, or drop into `browser()` (if interactive) then skip. Default: "stop".}
+\item{on_error}{Character: one of \code{"stop"}, \code{"skip"}, or
+\code{"debug"}. When an error occurs fitting one element: \code{"stop"}
+halts execution; \code{"skip"} returns all-\code{NaN} for that element;
+\code{"debug"} drops into \code{\link{browser}} (if interactive) then
+skips. Default: \code{"stop"}.}
 
-\item{write_results_name}{Optional analysis name for incremental writes to
-`results/<write_results_name>/results_matrix`.}
+\item{write_results_name}{Optional character. If provided, results are
+incrementally written to
+\code{results/<write_results_name>/results_matrix} in the HDF5 file
+specified by \code{write_results_file}.}
 
-\item{write_results_file}{Optional HDF5 file path used when `write_results_name` is provided.}
+\item{write_results_file}{Optional character. HDF5 file path for
+incremental result writes. Required when \code{write_results_name}
+is provided.}
 
-\item{write_results_flush_every}{Positive integer number of elements per write block.}
+\item{write_results_flush_every}{Positive integer. Number of elements
+per write block. Default 1000.}
 
-\item{write_results_storage_mode}{Storage mode for results writes (e.g., `"double"`).}
+\item{write_results_storage_mode}{Character. Storage mode for HDF5
+writes (e.g. \code{"double"}). Default \code{"double"}.}
 
-\item{write_results_compression_level}{Gzip compression level (0-9) for results writes.}
+\item{write_results_compression_level}{Integer 0--9. Gzip compression
+level for HDF5 writes. Default 4.}
 
-\item{return_output}{If TRUE (default), return the combined data.frame. If FALSE,
-returns `invisible(NULL)`; useful for streaming large runs to HDF5.}
+\item{return_output}{Logical. If \code{TRUE} (default), return the
+combined data.frame. If \code{FALSE}, return \code{invisible(NULL)};
+useful when writing large outputs directly to HDF5.}
 
-\item{...}{Additional arguments for `mgcv::gam()`}
+\item{...}{Additional arguments passed to \code{\link[mgcv]{gam}}.}
 }
 \value{
-Tibble with the summarized model statistics for all elements requested when
-`return_output = TRUE`; otherwise `invisible(NULL)`.
+A tibble with one row per element. The first column is
+\code{element_id} (0-based). Remaining columns contain the requested
+statistics, named as \code{<term>.<statistic>}. If
+\code{changed.rsq.term.index} was requested, additional columns
+\code{<term>.delta.adj.rsq} and \code{<term>.partial.rsq} are
+appended.
 }
 \description{
-`ModelArray.gam` fits gam model for each of elements requested,
-and returns a tibble data.frame of requested model statistics.
+Fit element-wise generalized additive models
+no model-level p-value for GAMs, so there is no
+\code{correct.p.value.model} argument.
 }
-\details{
-You may request returning specific statistical variables by setting \code{var.*},
-or you can get all by setting \code{full.outputs=TRUE}.
-Note that statistics covered by \code{full.outputs} or
-\code{var.*} are the ones from broom::tidy(), broom::glance(), and summary() only,
-and do not include delta adjusted R-squared or partial R-squared or corrected p-values.
-However FDR-corrected p-values ("fdr") are generated by default.
-
-List of acceptable statistic names for each of \code{var.*}:
-\itemize{
- \item \code{var.smoothTerms}: c("edf","ref.df","statistic","p.value");
-   For interpretation please see \link[broom]{tidy.gam} with `parametric=FALSE`.
- \item \code{var.parametricTerms}: c("estimate", "std.error","statistic","p.value");
-   For interpretation please see \link[broom]{tidy.gam} with `parametric=TRUE`.
- \item \code{var.model}: c("adj.r.squared","dev.expl", "sp.criterion", "scale", "df",
-   "logLik","AIC", "BIC", "deviance", "df.residual", "nobs"); "adj.r.squared" is \code{r.sq} from
-   \link[mgcv]{summary.gam}; "sp.criterion" is \code{sp.criterion} from \link[mgcv]{summary.gam};
-   For interpretation please see \link[broom]{glance.gam} and \link[mgcv]{summary.gam}.
-}
-
-Regarding formula: So far these kinds of formula are tested:
-\itemize{
-  \item formula with smooth term, but without any interactions.
-        Examples like \code{y ~ s(x) + orderedFactor}; \code{y ~ s(x) + s(z)}
-  \item formula with interaction, but limited to only one interaction term, and in the formats of:
-  \itemize{
-      \item Formula #1: \code{y ~ orderedFactor + s(x) + s(x, by=orderedFactor) + other_covariate},
-         where \code{orderedFactor} should be discrete variables and generated by `ordered`.
-         The interaction term will be displayed as "s_x_BYorderedFactor"
-         in the column name in returned data.frame.
-         You may use function `gen_gamFormula_fxSmooth()` to generate one.
-      \item Formula #2: \code{y ~ ti(x) + ti(z) + ti(x,z) + other_covariate},
-         where \code{x} and \code{z} should be continuous variables.
-         The interaction term will be displayed as "ti_x_z" in the column name in the returned data.frame.
-         You may use function `gen_gamFormula_contIx()` to generate one.
-  }
+\examples{
+{
+\dontrun{
+ma <- ModelArray("path/to/data.h5", scalar_types = c("FD"))
+phenotypes <- read.csv("cohort.csv")
+
+results <- ModelArray.gam(
+  FD ~ s(age, fx = TRUE) + sex,
+  data = ma,
+  phenotypes = phenotypes,
+  scalar = "FD"
+)
+head(results)
+
+# With changed R-squared for the smooth term (term index 1)
+results_rsq <- ModelArray.gam(
+  FD ~ s(age, fx = TRUE) + sex,
+  data = ma,
+  phenotypes = phenotypes,
+  scalar = "FD",
+  changed.rsq.term.index = list(1)
+)
 }
-You may be interested in how important a term is in a model.
-We provide two ways of quantification (see below).
-Both of them require running the reduced model without this term of interest,
-thus it will take longer time to run.
-You can make such request via argument \code{changed.rsq.term.index}, and you'll get both quantifications.
-\itemize{
-  \item Delta adjusted R-squared (\code{delta.adj.rsq}) is defined as the difference between
-     adjusted R-squared of full model (full formula in \code{formula}) and that of reduced model
-     (formula without the term of interest). Notice that adjusted R-squared includes the penalty
-     from the model complexity.
-  \item Partial R-squared (\code{partial.rsq}) is defined as:
-     \code{(sse.reduced.model - sse.full.model) / sse.reduced.model},
-     where \code{sse} is the error sum of squares (or, residual sum of squares).
-     It quantifies the amount of variance in the response variable that cannot be explained by the reduced model
-     (model without term of interest), but can be explained by the term of interest in the full model.
 }
-\strong{___!!! WARNING !!!___}:
-If you want to request \code{changed.rsq.term.index} for a term that has missing values,
-before feeding \code{phenotypes} into \code{ModelArray.gam()},
-you must exclude those observations (i.e., those rows in \code{phenotypes})
-who have missing values in this term of interest from \code{phenotypes}.
-You should do the same for each term you'd like to request in \code{changed.rsq.term.index},
-if that term has missing values.
-Without such exclusion, the full and reduced models would include different number of subjects,
-causing inaccuracy of calculation of \code{delta.adj.rsq} and \code{partial.rsq}. \cr \cr
-Other notes on \code{changed.rsq.term.index}:
-\itemize{
-  \item When requesting \code{changed.rsq.term.index},
-     \code{fx} should be set as \code{TRUE}, so that degree of freedom is fixed.
-  \item For formula with interactions, only formula in above formats are tested,
-     and only the values of interaction term are valid.
-     The delta.adj.rsq and partial.rsq for main effect (such as s(x) in Formula #1)
-     may not "functionally" be these metrics, as their definitions should be changed to
-     reduced formula without both main effect and interaction term.
+
 }
-For p-value corrections (arguments \code{correct.p.value.*}),
-supported methods include all methods in `p.adjust.methods` except "none".
-You can request more than one method. FDR-corrected p-values ("fdr") are calculated by default.
-Turn it off by setting to "none".
-Please notice that different from `ModelArray.lm`,
-there is no p.value for the GAM model, so no "correct.p.value.model" for GAM model. \cr
-Arguments \code{num.subj.lthr.abs} and \code{num.subj.lthr.rel}
-are mainly for input data with subject-specific masks, i.e. currently only for volume data.
-For fixel-wise data, you may ignore these arguments.
+\seealso{
+\code{\link{ModelArray.lm}} for linear models,
+\code{\link{ModelArray.wrap}} for user-supplied functions,
+\code{\link{gen_gamFormula_fxSmooth}} and
+\code{\link{gen_gamFormula_contIx}} for formula helpers,
+\linkS4class{ModelArray} for the input class.
 }
diff --git a/man/ModelArray.wrap.Rd b/man/ModelArray.wrap.Rd
index 19b5835..10d0686 100644
--- a/man/ModelArray.wrap.Rd
+++ b/man/ModelArray.wrap.Rd
@@ -33,83 +33,167 @@ ModelArray.wrap(
 )
 }
 \arguments{
-\item{FUN}{A function that accepts at least `data` and returns a one-row data.frame/tibble,
-a named list, or a named vector of results for that element.}
+\item{FUN}{A function that accepts at least \code{data} (a data.frame)
+and returns a one-row data.frame/tibble, a named list, or a named
+vector of results for that element.}
 
-\item{data}{ModelArray class}
+\item{data}{A \linkS4class{ModelArray} object.}
 
-\item{phenotypes}{A data.frame with cohort covariates; must contain column `source_file`.
-It must match `sources(data)[[scalar]]` order and contents (reordered if needed).}
+\item{phenotypes}{A data.frame of the cohort with columns of independent
+variables and covariates to be added to the model. It must contain a
+column called \code{"source_file"} whose entries match those in
+\code{sources(data)[[scalar]]}.}
 
-\item{scalar}{Character, the scalar name to analyze}
+\item{scalar}{Character. The name of the element-wise scalar to analyse.
+Must be one of \code{names(scalars(data))}.}
 
-\item{element.subset}{Optional integer vector of element indices (1-based); defaults to all}
+\item{element.subset}{Integer vector of element indices (1-based) to run.
+Default is \code{NULL}, i.e. all elements in \code{data}.}
 
-\item{num.subj.lthr.abs}{Integer lower threshold for subjects with finite values (default 10)}
+\item{num.subj.lthr.abs}{Integer. Lower threshold for the absolute
+number of subjects with finite scalar values (not \code{NaN},
+\code{NA}, or \code{Inf}) required per element. Elements below this
+threshold are skipped (outputs set to \code{NaN}). Default is 10.}
 
-\item{num.subj.lthr.rel}{Relative lower threshold (0-1) (default 0.2)}
+\item{num.subj.lthr.rel}{Numeric between 0 and 1. Lower threshold for
+the proportion of subjects with finite values. Used together with
+\code{num.subj.lthr.abs} (the effective threshold is the maximum of
+the two). Default is 0.2.}
 
-\item{verbose}{TRUE/FALSE to print messages}
+\item{verbose}{Logical. Print progress messages. Default \code{TRUE}.}
 
-\item{pbar}{TRUE/FALSE to show progress bar}
+\item{pbar}{Logical. Show progress bar. Default \code{TRUE}.}
 
-\item{n_cores}{Positive integer number of CPU cores}
+\item{n_cores}{Positive integer. Number of CPU cores for parallel
+processing via \code{\link[parallel]{mclapply}}. Default is 1
+(serial).}
 
-\item{on_error}{Character: one of "stop", "skip", or "debug". When an error occurs in
-the user function for an element, choose whether to stop, skip returning all-NaN values
-for that element, or drop into `browser()` (if interactive) then skip. Default: "stop".}
+\item{on_error}{Character: one of \code{"stop"}, \code{"skip"}, or
+\code{"debug"}. When an error occurs fitting one element: \code{"stop"}
+halts execution; \code{"skip"} returns all-\code{NaN} for that element;
+\code{"debug"} drops into \code{\link{browser}} (if interactive) then
+skips. Default: \code{"stop"}.}
 
-\item{write_scalar_name}{Optional character scalar name. If provided, `ModelArray.wrap`
-writes selected output columns into `scalars/<write_scalar_name>/values` while processing.}
+\item{write_scalar_name}{Optional character. If provided, selected
+output columns are written into
+\code{scalars/<write_scalar_name>/values} in the HDF5 file specified
+by \code{write_scalar_file}.}
 
-\item{write_scalar_file}{Optional character HDF5 output filename used when
-`write_scalar_name` is provided.}
+\item{write_scalar_file}{Optional character. HDF5 output file path.
+Required when \code{write_scalar_name} is provided.}
 
-\item{write_scalar_columns}{Optional character/integer selector for output columns to save
-as scalar values. If NULL, uses all wrap output columns except `element_id`.}
+\item{write_scalar_columns}{Optional character or integer vector
+selecting which output columns to save as scalar values. If
+\code{NULL} (default), uses all output columns except
+\code{element_id}.}
 
-\item{write_scalar_column_names}{Optional character vector of source names saved as
-scalar `column_names`. If NULL, uses `phenotypes$source_file`.}
+\item{write_scalar_column_names}{Optional character vector of source
+names saved as scalar \code{column_names}. If \code{NULL}, uses
+\code{phenotypes$source_file}.}
 
-\item{write_scalar_flush_every}{Positive integer number of elements per write block.}
+\item{write_scalar_flush_every}{Positive integer. Elements per write
+block for scalar writes. Default 1000.}
 
-\item{write_scalar_storage_mode}{Storage mode for scalar writes (e.g., `"double"`).}
+\item{write_scalar_storage_mode}{Character. Storage mode for scalar
+writes (e.g. \code{"double"}). Default \code{"double"}.}
 
-\item{write_scalar_compression_level}{Gzip compression level (0-9) for scalar writes.}
+\item{write_scalar_compression_level}{Integer 0--9. Gzip compression
+level for scalar writes. Default 4.}
 
-\item{write_results_name}{Optional analysis name for incremental writes to
-`results/<write_results_name>/results_matrix`.}
+\item{write_results_name}{Optional character. If provided, results are
+incrementally written to
+\code{results/<write_results_name>/results_matrix} in the HDF5 file
+specified by \code{write_results_file}.}
 
-\item{write_results_file}{Optional HDF5 file path used when `write_results_name` is provided.}
+\item{write_results_file}{Optional character. HDF5 file path for
+incremental result writes. Required when \code{write_results_name}
+is provided.}
 
-\item{write_results_flush_every}{Positive integer number of elements per write block for
-results writes.}
+\item{write_results_flush_every}{Positive integer. Number of elements
+per write block. Default 1000.}
 
-\item{write_results_storage_mode}{Storage mode for results writes (e.g., `"double"`).}
+\item{write_results_storage_mode}{Character. Storage mode for HDF5
+writes (e.g. \code{"double"}). Default \code{"double"}.}
 
-\item{write_results_compression_level}{Gzip compression level (0-9) for results writes.}
+\item{write_results_compression_level}{Integer 0--9. Gzip compression
+level for HDF5 writes. Default 4.}
 
-\item{return_output}{If TRUE (default), return the combined data.frame. If FALSE,
-returns `invisible(NULL)`; useful when writing large outputs directly to HDF5.}
+\item{return_output}{Logical. If \code{TRUE} (default), return the
+combined data.frame. If \code{FALSE}, return \code{invisible(NULL)};
+useful when writing large outputs directly to HDF5.}
 
-\item{...}{Additional arguments forwarded to `FUN`}
+\item{...}{Additional arguments forwarded to \code{FUN}.}
 }
 \value{
-Tibble/data.frame with one row per element and first column `element_id`
-when `return_output = TRUE`; otherwise `invisible(NULL)`.
+A tibble with one row per element and first column
+\code{element_id} (0-based). Remaining columns are determined by
+the return value of \code{FUN}.
 }
 \description{
-`ModelArray.wrap` runs a user-supplied function `FUN` for each requested element and
-returns a tibble/data.frame of results combined across elements.
+\code{ModelArray.wrap} runs a user-supplied function \code{FUN} at each
+requested element and returns a tibble of results combined across
+elements.
 }
 \details{
-This provides a generic framework reusing ModelArray's per-element looping, alignment,
-subject-thresholding, and parallelization. The user function will be called as
-`FUN(data = dat, ...)` where `dat` is `phenotypes` with the response column named `scalar`
-appended for the current element. The return value from `FUN` for a single element must be
-either a one-row data.frame/tibble, a named list, or a named atomic vector. The column
-names from the first successful element determine the final schema.
-
-Note: `ModelArray.wrap` never performs any p-value corrections or modifications.
-If you need adjusted p-values (e.g., FDR), implement them inside your provided `FUN`.
+This provides a generic framework reusing ModelArray's per-element
+looping, alignment, subject-thresholding, and parallelization. The user
+function is called as \code{FUN(data = dat, ...)} where \code{dat} is
+\code{phenotypes} with all scalar columns appended for the current
+element. The return value from \code{FUN} for a single element must be
+one of:
+\itemize{
+\item a one-row \code{data.frame} or \code{tibble}
+\item a named list
+\item a named atomic vector
+}
+The column names from the first successful element determine the final
+schema.
+
+Note: \code{ModelArray.wrap} never performs any p-value corrections or
+modifications. If you need adjusted p-values (e.g. FDR), implement
+them inside \code{FUN}.
+
+Use \code{\link{exampleElementData}} to construct a sample per-element
+data.frame for testing your function before committing to a full run.
+}
+\examples{
+{
+\dontrun{
+ma <- ModelArray("path/to/data.h5", scalar_types = c("FD"))
+phenotypes <- read.csv("cohort.csv")
+
+# Simple custom function
+my_fun <- function(data, ...) {
+  mod <- lm(FD ~ age + sex, data = data)
+  tidy_out <- broom::tidy(mod)
+  # Return a one-row tibble
+  tibble::tibble(
+    age_estimate = tidy_out$estimate[tidy_out$term == "age"],
+    age_pvalue   = tidy_out$p.value[tidy_out$term == "age"]
+  )
+}
+
+
+# Test on one element first
+test_df <- exampleElementData(ma, scalar = "FD",
+                               i_element = 1,
+                               phenotypes = phenotypes)
+my_fun(data = test_df)
+
+# Run across all elements
+results <- ModelArray.wrap(
+  FUN = my_fun,
+  data = ma,
+  phenotypes = phenotypes,
+  scalar = "FD"
+)
+}
+}
+
+}
+\seealso{
+\code{\link{ModelArray.lm}} for linear models,
+\code{\link{ModelArray.gam}} for GAMs,
+\code{\link{exampleElementData}} for building test data,
+\linkS4class{ModelArray} for the input class.
 }

From 37eeb60a3a4aee7e35879a69a42b031565eb9d2a Mon Sep 17 00:00:00 2001
From: araikes <adam.raikes@gmail.com>
Date: Tue, 31 Mar 2026 10:33:54 -0700
Subject: [PATCH 11/23] Update analyzeOneElement function documentation

---
 R/ModelArray_Constructor.R | 256 +++++++++++++++++++++++--------------
 1 file changed, 160 insertions(+), 96 deletions(-)

diff --git a/R/ModelArray_Constructor.R b/R/ModelArray_Constructor.R
index c1c8075..456fd8c 100644
--- a/R/ModelArray_Constructor.R
+++ b/R/ModelArray_Constructor.R
@@ -71,7 +71,6 @@ ModelArraySeed <- function(filepath, name, type = NA) {
 }
 
 
-
 #' Load element-wise data from an HDF5 file
 #'
 #' @description
@@ -107,7 +106,7 @@ ModelArraySeed <- function(filepath, name, type = NA) {
 #'
 #' @examples
 #' \dontrun{
-#' ma <- ModelArray("path/to/data.h5", scalar_types = c("FD", "FC"))
+#' ma <- ModelArray("path/to/data.h5", scalar_types = c("FD"))
 #' ma
 #' }
 #'
@@ -399,42 +398,65 @@ numElementsTotal <- function(modelarray, scalar_name = "FD") {
   numElementsTotal
 }
 
-#' Fit linear model for one element.
+#' Fit a linear model for a single element
+#' If the number of subjects with finite scalar values (not \code{NaN},
+#' \code{NA}, or \code{Inf}) does not exceed \code{num.subj.lthr}, the
+#' element is skipped and all statistics are set to \code{NaN}.
 #'
-#' @description
-#' `analyseOneElement.lm` fits a linear model for one element data, and returns requested model statistics.
+#' @param i_element Integer. The 1-based index of the element to analyse.
+#' @param formula A \code{\link[stats]{formula}} passed to
+#'   \code{\link[stats]{lm}}.
+#' @param modelarray A \linkS4class{ModelArray} object.
+#' @param phenotypes A data.frame of the cohort with columns of independent
+#'   variables and covariates. Must contain a \code{"source_file"} column
+#'   matching \code{sources(modelarray)[[scalar]]}.
+#' @param scalar Character. The name of the element-wise scalar to analyse.
+#'   Must be one of \code{names(scalars(modelarray))}.
+#' @param var.terms Character vector. Statistics to extract per term from
+#'   \code{\link[broom]{tidy.lm}} (e.g. \code{"estimate"}, \code{"statistic"},
+#'   \code{"p.value"}).
+#' @param var.model Character vector. Statistics to extract for the overall
+#'   model from \code{\link[broom]{glance.lm}} (e.g. \code{"adj.r.squared"},
+#'   \code{"p.value"}).
+#' @param num.subj.lthr Numeric. The pre-computed minimum number of subjects
+#'   with finite values required for this element to be analysed. Elements
+#'   below this threshold are skipped. This value is typically computed by
+#'   the parent function from \code{num.subj.lthr.abs} and
+#'   \code{num.subj.lthr.rel}.
+#' @param num.stat.output Integer or \code{NULL}. The total number of output
+#'   columns (including \code{element_id}). Used when
+#'   \code{flag_initiate = FALSE} to generate an all-\code{NaN} row for
+#'   skipped elements. Must be \code{NULL} when \code{flag_initiate = TRUE}.
+#' @param flag_initiate Logical. If \code{TRUE}, fit the model once and return
+#'   metadata for initialising the output data.frame (column names and term
+#'   names). If \code{FALSE}, return a numeric vector of results for this
+#'   element.
+#' @param on_error Character. One of \code{"stop"}, \code{"skip"}, or
+#'   \code{"debug"}. When an error occurs fitting the model: \code{"stop"}
+#'   halts execution; \code{"skip"} returns all-\code{NaN} for this element;
+#'   \code{"debug"} drops into \code{\link{browser}} (if interactive) then
+#'   skips. Default: \code{"stop"}.
+#' @param ... Additional arguments passed to \code{\link[stats]{lm}}.
 #'
-#' @details
-#' `ModelArray.lm` iteratively calls this function to get statistics for all requested elements.
+#' @return If \code{flag_initiate = TRUE}, a list with components:
+#'   \describe{
+#'     \item{column_names}{Character vector. The column names for the output
+#'       data.frame, with \code{"element_id"} first.}
+#'     \item{list.terms}{Character vector. The names of the model terms
+#'       (from \code{\link[broom]{tidy.lm}}).}
+#'   }
+#'   If \code{flag_initiate = FALSE}, a numeric vector of length
+#'   \code{num.stat.output} with \code{element_id} (0-based) as the first
+#'   value and the requested statistics in subsequent positions. All-\code{NaN}
+#'   (except \code{element_id}) if the element had insufficient valid subjects
+#'   or if an error occurred with \code{on_error = "skip"}.
 #'
-#' @param i_element An integer, the i_th element, starting from 1.
-#' For initiating (flag_initiate = TRUE), use i_element=1
-#' @param formula Formula (passed to `stats::lm()`)
-#' @param modelarray ModelArray class
-#' @param phenotypes A data.frame of the cohort with columns of independent variables
-#' and covariates to be added to the model.
-#' @param scalar A character. The name of the element-wise scalar to be analysed
-#' @param var.terms A list of characters. The list of variables to save for terms (got from `broom::tidy()`).
-#' @param var.model A list of characters. The list of variables to save for the model (got from `broom::glance()`).
-#' @param num.subj.lthr The minimal number of subjects with valid value in input h5 file,
-#' i.e. number of subjects with finite values (defined by `is.finite()`,
-#' i.e. not NaN or NA or Inf) in h5 file > \code{num.subj.lthr},
-#' then this element will be run normally;
-#' otherwise, this element will be skipped and statistical outputs will be set as NaN.
-#' @param num.stat.output The number of output stat metrics
-#' (for generating all NaN stat when # subjects does not meet criteria).
-#' This includes column `element_id`.
-#' This is required when flag_initiate = TRUE.
-#' @param flag_initiate TRUE or FALSE, Whether this is to initiate the new analysis.
-#' If TRUE, it will return column names etc to be used for initiating data.frame;
-#' if FALSE, it will return the list of requested statistic values.
-#' @param ... Additional arguments for `stats::lm()`
-#' @param on_error Character: one of "stop", "skip", or "debug". When an error occurs while
-#' fitting one element, choose whether to stop, skip returning all-NaN values for that element,
-#' or drop into `browser()` (if interactive) then skip. Default: "stop".
+#' @seealso \code{\link{ModelArray.lm}} which calls this function iteratively,
+#'   \code{\link{analyseOneElement.gam}} for the GAM equivalent,
+#'   \code{\link{analyseOneElement.wrap}} for user-supplied functions.
 #'
-#' @return If flag_initiate==TRUE, returns column names, and list of term names of final results;
-#' if flag_initiate==FALSE, it will return the list of requested statistic values for a element.
+#' @keywords internal
+#' @rdname analyseOneElement.lm
 #' @export
 #' @importFrom stats lm
 #' @import broom
@@ -707,47 +729,60 @@ analyseOneElement.lm <- function(i_element,
     }
   }
 }
-#' Fit GAM for one element
+
+#' Fit a GAM for a single element
+#' returns metadata (column names, smooth term names, parametric term names,
+#' and the smoothing parameter criterion attribute name) used by
+#' \code{\link{ModelArray.gam}} to initialise the output data.frame. When
+#' \code{flag_initiate = FALSE}, it returns a numeric vector representing one
+#' row of the final results matrix.
 #'
-#' @description
-#' `analyseOneElement.gam` fits a GAM model for one element data, and returns requested model statistics.
+#' If the number of subjects with finite scalar values does not exceed
+#' \code{num.subj.lthr}, the element is skipped and all statistics are set
+#' to \code{NaN}.
 #'
-#' @details
-#' `ModelArray.gam` iteratively calls this function to get statistics for all requested elements.
+#' @inheritParams analyseOneElement.lm
 #'
-#' @param i_element An integer, the i_th element, starting from 1.
-#' For initiating (flag_initiate = TRUE), use i_element=1
-#' @param formula A formula (passed to `mgcv::gam()`)
-#' @param modelarray ModelArray class
-#' @param phenotypes A data.frame of the cohort with columns of independent variables
-#' and covariates to be added to the model
-#' @param scalar A character. The name of the element-wise scalar to be analysed
-#' @param var.smoothTerms The list of variables to save for smooth terms
-#' (got from broom::tidy(parametric = FALSE)). Example smooth term: age in formula "outcome ~ s(age)".
-#' @param var.parametricTerms The list of variables to save for parametric terms
-#' (got from broom::tidy(parametric = TRUE)). Example parametric term: sex in formula "outcome ~ s(age) + sex".
-#' @param var.model The list of variables to save for the model (got from broom::glance() and summary()).
-#' @param num.subj.lthr The minimal number of subjects with valid value in input h5 file,
-#' i.e. number of subjects with finite values (defined by `is.finite()`,
-#' i.e. not NaN or NA or Inf) in h5 file > \code{num.subj.lthr},
-#' then this element will be run normally;
-#' otherwise, this element will be skipped and statistical outputs will be set as NaN.
-#' @param num.stat.output The number of output stat metrics
-#' (for generating all NaN stat when # subjects does not meet criteria).
-#' This includes column `element_id`.
-#' This is required when flag_initiate = TRUE.
-#' @param flag_initiate TRUE or FALSE, Whether this is to initiate the new analysis.
-#' If TRUE, it will return column names etc to be used for initiating data.frame;
-#' if FALSE, it will return the list of requested statistic values.
-#' @param flag_sse TRUE or FALSE, Whether to calculate SSE (sum of squared error) for the model (`model.sse`).
-#' SSE is needed for calculating partial R-squared.
-#' @param ... Additional arguments for `mgcv::gam()`
-#' @param on_error Character: one of "stop", "skip", or "debug". When an error occurs while
-#' fitting one element, choose whether to stop, skip returning all-NaN values for that element,
-#' or drop into `browser()` (if interactive) then skip. Default: "stop".
-#' @return If flag_initiate==TRUE, returns column names,
-#' list of term names of final results, and attr.name of sp.criterion;
-#' if flag_initiate==FALSE, it will return the list of requested statistic values for a element.
+#' @param formula A \code{\link[stats]{formula}} passed to
+#'   \code{\link[mgcv]{gam}}.
+#' @param var.smoothTerms Character vector. Statistics to extract for smooth
+#'   terms from \code{\link[broom]{tidy.gam}} with \code{parametric = FALSE}
+#'   (e.g. \code{"edf"}, \code{"ref.df"}, \code{"statistic"},
+#'   \code{"p.value"}).
+#' @param var.parametricTerms Character vector. Statistics to extract for
+#'   parametric terms from \code{\link[broom]{tidy.gam}} with
+#'   \code{parametric = TRUE} (e.g. \code{"estimate"}, \code{"std.error"},
+#'   \code{"statistic"}, \code{"p.value"}).
+#' @param var.model Character vector. Statistics to extract for the overall
+#'   model from \code{\link[broom]{glance.gam}} and
+#'   \code{\link[mgcv]{summary.gam}} (e.g. \code{"adj.r.squared"},
+#'   \code{"dev.expl"}, \code{"sp.criterion"}).
+#' @param flag_sse Logical. If \code{TRUE}, also compute the error sum of
+#'   squares (\code{model.sse}) for the model, which is needed for
+#'   partial R-squared calculations in \code{\link{ModelArray.gam}}.
+#'   Default: \code{FALSE}.
+#' @param ... Additional arguments passed to \code{\link[mgcv]{gam}}.
+#'
+#' @return If \code{flag_initiate = TRUE}, a list with components:
+#'   \describe{
+#'     \item{column_names}{Character vector of output column names.}
+#'     \item{list.smoothTerms}{Character vector of smooth term names.}
+#'     \item{list.parametricTerms}{Character vector of parametric term names.}
+#'     \item{sp.criterion.attr.name}{Character. The name attribute of the
+#'       smoothing parameter selection criterion (e.g. \code{"REML"} or
+#'       \code{"GCV.Cp"}).}
+#'   }
+#'   If \code{flag_initiate = FALSE}, a numeric vector of length
+#'   \code{num.stat.output} with \code{element_id} (0-based) first and
+#'   requested statistics in subsequent positions. All-\code{NaN} (except
+#'   \code{element_id}) if the element was skipped.
+#'
+#' @seealso \code{\link{ModelArray.gam}} which calls this function iteratively,
+#'   \code{\link{analyseOneElement.lm}} for the linear model equivalent,
+#'   \code{\link{analyseOneElement.wrap}} for user-supplied functions.
+#'
+#' @keywords internal
+#' @rdname analyseOneElement.gam
 #' @export
 #' @import mgcv
 #' @import broom
@@ -1138,36 +1173,65 @@ analyseOneElement.gam <- function(i_element,
 }
 
 
-#' Run a user-supplied function for one element
+#' Run a user-supplied function for a single element
 #'
 #' @description
-#' `analyseOneElement.wrap` runs a user-supplied function `FUN` on a single element's data.
-#' It prepares the per-element data by attaching the element values as a new column named by `scalar`
-#' to the provided `phenotypes` data.frame, then calls `FUN(data = dat, ...)`.
+#' Runs a user-supplied function on one element's data, preparing the
+#' per-element data.frame by attaching all scalar values as new columns to
+#' the provided \code{phenotypes}. This is the per-element workhorse called
+#' iteratively by \code{\link{ModelArray.wrap}}.
 #'
 #' @details
-#' The user-supplied `FUN` should return either a one-row data.frame/tibble, a named list, or a named vector.
-#' The result will be coerced to a one-row tibble and combined into the final results matrix across elements.
+#' Most users should call \code{\link{ModelArray.wrap}} directly, which handles
+#' looping, parallelisation, and result assembly.
+#' \code{analyseOneElement.wrap} is exported for advanced use cases such as
+#' debugging a single element or building custom analysis loops.
 #'
-#' @param i_element An integer, the i_th element, starting from 1.
-#' @param user_fun A function that accepts at least an argument named `data` (the per-element
-#' `phenotypes` with the response column appended) and returns a one-row data.frame/tibble,
-#' named list, or named vector.
-#' @param modelarray ModelArray class
-#' @param phenotypes A data.frame of the cohort with columns of independent variables and covariates
-#' @param scalar A character. The name of the element-wise scalar to be analysed
-#' @param num.subj.lthr The minimal number of subjects with valid value in input h5 file
-#' @param num.stat.output The number of output stat metrics (including `element_id`).
-#' Required when `flag_initiate = TRUE`.
-#' @param flag_initiate TRUE or FALSE, whether this is to initiate the new analysis to get column names
-#' @param on_error Character: one of "stop", "skip", or "debug". When an error occurs while
-#' executing the user function, choose whether to stop, skip returning all-NaN values for this
-#' element, or drop into `browser()` (if interactive) then skip. Default: "stop".
-#' @param ... Additional arguments forwarded to `FUN`
+#' The user-supplied \code{user_fun} is called as
+#' \code{user_fun(data = dat, ...)} where \code{dat} is \code{phenotypes} with
+#' columns appended for \strong{all} scalars in the \linkS4class{ModelArray}
+#' (not just the one named by \code{scalar}). The function should return a
+#' one-row data.frame/tibble, a named list, or a named atomic vector. The
+#' result is coerced to a numeric vector for assembly into the final results
+#' matrix.
+#'
+#' Unlike \code{\link{analyseOneElement.lm}} and
+#' \code{\link{analyseOneElement.gam}}, this function appends \strong{all}
+#' scalar columns (not just the response) and checks for column name
+#' collisions between scalar names and existing \code{phenotypes} columns.
+#'
+#' If the number of subjects with finite values across all scalars does not
+#' exceed \code{num.subj.lthr}, the element is skipped and all statistics
+#' are set to \code{NaN}.
+#'
+#' @inheritParams analyseOneElement.lm
+#'
+#' @param user_fun A function that accepts at least an argument named
+#'   \code{data} (a data.frame: \code{phenotypes} with scalar columns
+#'   appended for the current element) and returns a one-row
+#'   data.frame/tibble, a named list, or a named atomic vector.
+#' @param ... Additional arguments forwarded to \code{user_fun}.
+#'
+#' @return If \code{flag_initiate = TRUE}, a list with one component:
+#'   \describe{
+#'     \item{column_names}{Character vector. The column names derived from
+#'       the return value of \code{user_fun}, with \code{"element_id"}
+#'       prepended.}
+#'   }
+#'   If \code{flag_initiate = FALSE}, a numeric vector of length
+#'   \code{num.stat.output} with \code{element_id} (0-based) first and
+#'   the coerced output of \code{user_fun} in subsequent positions.
+#'   All-\code{NaN} (except \code{element_id}) if the element was skipped
+#'   or if an error occurred with \code{on_error = "skip"}.
+#'
+#' @seealso \code{\link{ModelArray.wrap}} which calls this function
+#'   iteratively, \code{\link{exampleElementData}} for building a test
+#'   data.frame matching the format passed to \code{user_fun},
+#'   \code{\link{analyseOneElement.lm}} for the linear model equivalent,
+#'   \code{\link{analyseOneElement.gam}} for the GAM equivalent.
 #'
-#' @return If `flag_initiate==TRUE`, returns a list with `column_names`.
-#' If `flag_initiate==FALSE`, returns a numeric vector representing the one-row result for this element
-#' with `element_id` as the first value.
+#' @keywords internal
+#' @rdname analyseOneElement.wrap
 #' @export
 #' @importFrom dplyr %>%
 #' @import tibble

From 9985f6d58f3fd02f5d371b34129427f6bb6191bb Mon Sep 17 00:00:00 2001
From: araikes <adam.raikes@gmail.com>
Date: Tue, 31 Mar 2026 10:40:07 -0700
Subject: [PATCH 12/23] Update documentation for exported utility functions

---
 R/utils.R | 214 +++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 173 insertions(+), 41 deletions(-)

diff --git a/R/utils.R b/R/utils.R
index d16d9d5..b31b7ae 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -346,27 +346,83 @@ checker_gam_formula <- function(formula, gam.formula.breakdown, onemodel = NULL)
 #' Generate GAM formula with factor-smooth interaction
 #'
 #' @description
-#' This function will generate a formula in the following format:
-#' \code{y ~ orderedFactor + s(x) + s(x, by=orderedFactor)},
+#' Generates a formula in the format
+#' \code{y ~ orderedFactor + s(x) + s(x, by = orderedFactor)},
 #' where \code{y} is \code{response.var}, \code{x} is \code{smooth.var},
-#' and \code{orderedFactor} is \code{factor.var} - see \code{factor.var} for more.
-#' The formula generated could be further modified, e.g. adding covariates.
-#'
-#' @param response.var character class, the variable name for response
-#' @param factor.var character class, the variable name for factor. It should be an ordered factor.
-#' If not, it will generate it as a new column in `phenotypes`, which requires `reference.group`.
-#' @param smooth.var character class, the variable name in smooth term as main effect
-#' @param phenotypes data.frame class, the cohort matrix with columns of independent variables (including
-#' \code{factor.var} and \code{smooth.var}) to be added to the model
-#' @param reference.group character class, the reference group for ordered factor of `factor.var`; required when
-#' `factor.var` in `phenotypes` is not an ordered factor.
-#' @param prefix.ordered.factor character class, the prefix for ordered factor; required when `factor.var` in
-#' `phenotypes` is not an ordered factor.
-#' @param fx TRUE or FALSE, to be used in smooth term s(). Recommend TRUE.
-#' @param k integer, to be used in smooth term including the interaction term. If NULL (no entry), will use default
-#' value as in mgcv::s()
-#' @return a list, including: 1) formula generated; 2) data.frame phenotypes - updated if argument factor.var
-#' is not an ordered factor
+#' and \code{orderedFactor} is \code{factor.var}.
+#' The formula generated can be further modified, e.g. by adding covariates.
+#'
+#' @details
+#' This helper exists because setting up factor-smooth interactions in
+#' \code{\link[mgcv]{gam}} requires an ordered factor and a specific formula
+#' structure. If \code{factor.var} in \code{phenotypes} is not already an
+#' ordered factor, this function creates one using \code{reference.group} as
+#' the baseline level and adds it as a new column (named with
+#' \code{prefix.ordered.factor} prepended to \code{factor.var}) [2].
+#'
+#' The returned \code{phenotypes} data.frame must be used in the subsequent
+#' \code{\link{ModelArray.gam}} call so that the ordered factor column is
+#' available to the model.
+#'
+#' @param response.var Character. The variable name for the response
+#'   (dependent variable), typically a scalar name like \code{"FD"}.
+#' @param factor.var Character. The variable name for the factor. It should
+#'   be an ordered factor in \code{phenotypes}. If not, an ordered factor
+#'   will be generated as a new column, which requires \code{reference.group} [2].
+#' @param smooth.var Character. The variable name for the smooth term main
+#'   effect (e.g. \code{"age"}).
+#' @param phenotypes A data.frame of the cohort with columns of independent
+#'   variables, including \code{factor.var} and \code{smooth.var}.
+#' @param reference.group Character. The reference (baseline) group for the
+#'   ordered factor of \code{factor.var}. Required when \code{factor.var}
+#'   in \code{phenotypes} is not already an ordered factor [2].
+#' @param prefix.ordered.factor Character. Prefix for the ordered factor
+#'   column name. Required when \code{factor.var} in \code{phenotypes} is
+#'   not already an ordered factor. Default is \code{"o"} [2].
+#' @param fx Logical. Passed to \code{\link[mgcv]{s}}. If \code{TRUE}
+#'   (recommended), the smooth is treated as fixed degrees of freedom.
+#'   Default is \code{TRUE}.
+#' @param k Integer or \code{NULL}. Basis dimension passed to
+#'   \code{\link[mgcv]{s}} for both the main smooth and interaction terms.
+#'   If \code{NULL} (default), uses the default from \code{mgcv::s()} [2].
+#'
+#' @return A list with two components:
+#'   \describe{
+#'     \item{formula}{The generated \code{\link[stats]{formula}} object.}
+#'     \item{phenotypes}{The (possibly updated) data.frame. If
+#'       \code{factor.var} was not already an ordered factor, a new column
+#'       named \code{paste0(prefix.ordered.factor, factor.var)} is added.
+#'       Otherwise identical to the input [2].}
+#'   }
+#'
+#' @seealso \code{\link{gen_gamFormula_contIx}} for continuous-by-continuous
+#'   interactions, \code{\link{ModelArray.gam}} which accepts the generated
+#'   formula.
+#'
+#' @examples
+#' \dontrun{
+#' phenotypes <- read.csv("cohort.csv")
+#'
+#' # factor.var is not yet ordered - function creates it
+#' result <- gen_gamFormula_fxSmooth(
+#'   response.var = "FD",
+#'   factor.var = "sex",
+#'   smooth.var = "age",
+#'   phenotypes = phenotypes,
+#'   reference.group = "female"
+#' )
+#' result$formula
+#'
+#' # Use the updated phenotypes (contains the ordered factor column)
+#' results <- ModelArray.gam(
+#'   result$formula,
+#'   data = ma,
+#'   phenotypes = result$phenotypes,
+#'   scalar = "FD"
+#' )
+#' }
+#'
+#' @rdname gen_gamFormula_fxSmooth
 #' @importFrom mgcv s
 #' @importFrom stats as.formula
 #' @export
@@ -436,24 +492,64 @@ gen_gamFormula_fxSmooth <- function(response.var, factor.var, smooth.var, phenot
 
 
 
-#' Generate GAM formula with continuous*continuous interaction
+#' Generate GAM formula with continuous-by-continuous interaction
 #'
 #' @description
-#' This function will generate a formula in the following format: \code{y ~ ti(x) + ti(z) + ti(x,z)},
-#' where \code{y} is \code{response.var}, \code{x} is \code{cont1.var}, and \code{z} is \code{cont2.var}.
-#' The formula generated could be further modified, e.g. adding covariates.
-#'
-#' @param response.var character class, the variable name for response
-#' @param cont1.var character class, the name of the first continuous variable
-#' @param cont2.var character class, the name of the second continuous variable
-#' @param fx TRUE or FALSE, to be used in smooth term s(). Recommend TRUE.
-#' @param k integer, to be used in smooth term including the interaction term.
-#' If NULL (no entry), will use default value as in mgcv::s()
-#' @return The formula generated
+#' Generates a formula in the format
+#' \code{y ~ ti(x) + ti(z) + ti(x, z)}, where \code{y} is
+#' \code{response.var}, \code{x} is \code{cont1.var}, and \code{z} is
+#' \code{cont2.var}. The formula generated can be further modified, e.g.
+#' by adding covariates.
+#'
+#' @details
+#' This helper uses \code{\link[mgcv]{ti}} (tensor product interaction)
+#' terms so that the interaction \code{ti(x, z)} captures only the
+#' interaction effect, separate from the main effects \code{ti(x)} and
+#' \code{ti(z)}. This decomposition is important for interpretability and
+#' for requesting \code{changed.rsq.term.index} in
+#' \code{\link{ModelArray.gam}} [4].
+#'
+#' @param response.var Character. The variable name for the response
+#'   (dependent variable), typically a scalar name like \code{"FD"}.
+#' @param cont1.var Character. The name of the first continuous variable.
+#' @param cont2.var Character. The name of the second continuous variable.
+#' @param fx Logical. Passed to \code{\link[mgcv]{ti}}. If \code{TRUE}
+#'   (recommended), the smooth is treated as fixed degrees of freedom.
+#'   Default is \code{TRUE} [2].
+#' @param k Integer or \code{NULL}. Basis dimension passed to
+#'   \code{\link[mgcv]{ti}} for all three terms (both main effects and the
+#'   interaction). If \code{NULL} (default), uses the default from
+#'   \code{mgcv::ti()} [2].
+#'
+#' @return A \code{\link[stats]{formula}} object.
+#'
+#' @seealso \code{\link{gen_gamFormula_fxSmooth}} for factor-smooth
+#'   interactions, \code{\link{ModelArray.gam}} which accepts the generated
+#'   formula.
+#'
+#' @examples
+#' \dontrun{
+#' formula <- gen_gamFormula_contIx(
+#'   response.var = "FD",
+#'   cont1.var = "age",
+#'   cont2.var = "cognition"
+#' )
+#' formula
+#'
+#' # Use in ModelArray.gam with changed R-squared for the interaction
+#' results <- ModelArray.gam(
+#'   formula,
+#'   data = ma,
+#'   phenotypes = phenotypes,
+#'   scalar = "FD",
+#'   changed.rsq.term.index = list(3)
+#' )
+#' }
+#'
+#' @rdname gen_gamFormula_contIx
 #' @importFrom mgcv ti
 #' @importFrom stats as.formula
 #' @export
-#'
 gen_gamFormula_contIx <- function(response.var, cont1.var, cont2.var,
                                   fx = TRUE, k = NULL) {
   if (is.null(k)) {
@@ -491,18 +587,45 @@ bind_cols_check_emptyTibble <- function(a, b) {
 }
 
 
-#' Summarize an h5 file without loading the full ModelArray
+#' Summarize an HDF5 file without loading a full ModelArray
 #'
 #' @description
-#' Reads the h5 file structure and returns a summary of available scalars,
+#' Reads the HDF5 file structure and returns a summary of available scalars,
 #' their dimensions, and any saved analyses. Useful for inspecting large files
-#' without constructing a full ModelArray object.
+#' without constructing a full \linkS4class{ModelArray} object.
+#'
+#' @details
+#' This function opens the HDF5 file read-only via \code{\link[rhdf5]{h5ls}},
+#' inspects the group structure under \code{/scalars/} and \code{/results/},
+#' and closes the file. It does not load any data into memory. The returned
+#' object has a \code{print} method that displays a formatted summary [2].
+#'
+#' @param filepath Character. Path to an HDF5 (\code{.h5}) file.
 #'
-#' @param filepath Path to an h5 file
-#' @return A list with components:
-#'   \item{scalars}{A data.frame with columns: name, nElements, nInputFiles}
-#'   \item{analyses}{Character vector of analysis names}
-#'   \item{filepath}{The input filepath}
+#' @return An object of class \code{"h5summary"}, which is a list with
+#'   components:
+#'   \describe{
+#'     \item{scalars}{A data.frame with columns \code{name},
+#'       \code{nElements}, and \code{nInputFiles} [2].}
+#'     \item{analyses}{Character vector of analysis names found under
+#'       \code{/results/} [2].}
+#'     \item{filepath}{The input filepath.}
+#'   }
+#'
+#' @seealso \code{\link{ModelArray}} for loading the full object,
+#'   \linkS4class{ModelArray} for the class definition.
+#'
+#' @examples
+#' \dontrun{
+#' h5summary("path/to/data.h5")
+#'
+#' # Inspect before deciding which scalars to load
+#' info <- h5summary("path/to/data.h5")
+#' info$scalars$name
+#' ma <- ModelArray("path/to/data.h5", scalar_types = info$scalars$name)
+#' }
+#'
+#' @rdname h5summary
 #' @importFrom rhdf5 h5ls h5closeAll
 #' @export
 h5summary <- function(filepath) {
@@ -549,6 +672,15 @@ h5summary <- function(filepath) {
   )
 }
 
+#' @rdname h5summary
+#'
+#' @param x An \code{h5summary} object as returned by
+#'   \code{\link{h5summary}}.
+#' @param ... Additional arguments (currently ignored).
+#'
+#' @return Invisible \code{x}. Called for its side effect of printing a
+#'   human-readable summary to the console.
+#'
 #' @method print h5summary
 #' @export
 print.h5summary <- function(x, ...) {

From 564757c5a96a2c5daeb0ab45daf827f65c888466 Mon Sep 17 00:00:00 2001
From: araikes <adam.raikes@gmail.com>
Date: Tue, 31 Mar 2026 10:40:43 -0700
Subject: [PATCH 13/23] Update documentation for writeResults

---
 R/ModelArray_Constructor.R | 75 +++++++++++++++++++++++++++++++++-----
 1 file changed, 65 insertions(+), 10 deletions(-)

diff --git a/R/ModelArray_Constructor.R b/R/ModelArray_Constructor.R
index 456fd8c..13173db 100644
--- a/R/ModelArray_Constructor.R
+++ b/R/ModelArray_Constructor.R
@@ -1387,21 +1387,76 @@ analyseOneElement.wrap <- function(i_element,
   }
 }
 
-#' Write outputs from element-wise statistical analysis to the HDF5 file.
+#' Write outputs from element-wise statistical analysis to an HDF5 file
 #'
 #' @description
-#' Create a group named `analysis_name` in HDF5 file,
-#' then write the statistical results data.frame (i.e. for one analysis) in it.
+#' Creates a group named \code{analysis_name} under \code{/results/} in the
+#' HDF5 file, then writes the statistical results data.frame (i.e. for one
+#' analysis) into it as \code{results_matrix} along with column names [7].
 #'
 #' @details
-#' debug tip: For "Error in H5File.open(filename, mode, file_create_pl, file_access_pl)",
-#' check if there is message 'No such file or directory'. Try absolute .h5 filename.
+#' The results are stored at
+#' \code{/results/<analysis_name>/results_matrix} with column names saved
+#' as a separate dataset at
+#' \code{/results/<analysis_name>/column_names} [7].
 #'
-#' @param fn.output A character, The HDF5 (.h5) filename for the output
-#' @param df.output A data.frame object with element-wise statistical results, returned from `ModelArray.lm()` etc
-#' @param analysis_name A character, the name of the results
-#' @param overwrite If a group with the same analysis_name exists in HDF5 file,
-#' whether overwrite it (TRUE) or not (FALSE)
+#' If any column of \code{df.output} is not numeric or integer, it is
+#' coerced to numeric via \code{factor()} and the factor levels are saved
+#' as a look-up table at
+#' \code{/results/<analysis_name>/lut_forcol<i>} [7].
+#'
+#' \strong{Debugging tip:} If you encounter
+#' \code{"Error in H5File.open(filename, mode, file_create_pl, file_access_pl)"},
+#' check if the message mentions "No such file or directory". Try using an
+#' absolute path for the \code{fn.output} argument [7].
+#'
+#' @param fn.output Character. The HDF5 (\code{.h5}) filename for the output.
+#'   The file must already exist; use an absolute path if you encounter
+#'   file-not-found errors.
+#' @param df.output A data.frame of element-wise statistical results, as
+#'   returned by \code{\link{ModelArray.lm}},
+#'   \code{\link{ModelArray.gam}}, or \code{\link{ModelArray.wrap}}.
+#'   Must inherit from \code{data.frame} [7].
+#' @param analysis_name Character. The name for this set of results. Used
+#'   as the group name under \code{/results/} in the HDF5 file.
+#'   Default is \code{"myAnalysis"} [7].
+#' @param overwrite Logical. If a group with the same \code{analysis_name}
+#'   already exists in the HDF5 file, whether to overwrite it (\code{TRUE})
+#'   or skip with a warning (\code{FALSE}). Default is \code{TRUE} [7].
+#'
+#' @return Invisible \code{NULL}. Called for its side effect of writing
+#'   results to the HDF5 file.
+#'
+#' @seealso \code{\link{ModelArray.lm}}, \code{\link{ModelArray.gam}},
+#'   \code{\link{ModelArray.wrap}} which produce the \code{df.output},
+#'   \code{\link{results}} for reading results back from a
+#'   \linkS4class{ModelArray}, \code{\link{h5summary}} for inspecting what
+#'   has been written.
+#'
+#' @examples
+#' \dontrun{
+#' ma <- ModelArray("data.h5", scalar_types = c("FD"))
+#' phenotypes <- read.csv("cohort.csv")
+#'
+#' results <- ModelArray.lm(
+#'   FD ~ age + sex,
+#'   data = ma,
+#'   phenotypes = phenotypes,
+#'   scalar = "FD"
+#' )
+#'
+#' writeResults(
+#'   fn.output = "data.h5",
+#'   df.output = results,
+#'   analysis_name = "lm_age_sex",
+#'   overwrite = TRUE
+#' )
+#'
+#' # Verify
+#' h5summary("data.h5")
+#' }
+#'
+#' @rdname writeResults
 #' @import hdf5r
 #' @export
 writeResults <- function(fn.output,

From 1b68ef0a3e3312ba03551b9db97172a7982a8c70 Mon Sep 17 00:00:00 2001
From: araikes <adam.raikes@gmail.com>
Date: Tue, 31 Mar 2026 10:41:41 -0700
Subject: [PATCH 14/23] Fix invalid syntax in printAdditionalArgu docs

---
 R/utils.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/utils.R b/R/utils.R
index b31b7ae..6f0a2fe 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -77,7 +77,7 @@ flagAnalysisExistInh5 <- function(fn_h5, analysis_name) {
 #' print the additional arguments settings
 #' @param FUN The function, e.g. mgcv::gam, without "()"
 #' @param argu_name The argument name of the function
-#' @param dots: list of additional arguments
+#' @param dots list of additional arguments
 #' @param message_default The message for default
 #' @param message_usr_input The message describing user's input
 #' @importFrom crayon black

From 93ebb2b0b7efa937219409cdf410755e824bd74d Mon Sep 17 00:00:00 2001
From: araikes <adam.raikes@gmail.com>
Date: Tue, 31 Mar 2026 10:47:18 -0700
Subject: [PATCH 15/23] Update documentation for mergeModelArrays

---
 R/merge.R | 102 +++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 89 insertions(+), 13 deletions(-)

diff --git a/R/merge.R b/R/merge.R
index 1e82a4e..60fc3c1 100644
--- a/R/merge.R
+++ b/R/merge.R
@@ -1,19 +1,95 @@
-#' Merge multiple ModelArrays from different h5 files
+#' Merge multiple ModelArrays from different HDF5 files
 #'
 #' @description
-#' Combines scalars from multiple ModelArray objects into a single ModelArray,
-#' aligning subjects via phenotype columns. Uses `DelayedArray::acbind()` for
-#' virtual column-binding — no h5 rewriting is needed.
+#' Combines scalars from multiple \linkS4class{ModelArray} objects into a
+#' single \linkS4class{ModelArray}, aligning subjects via shared phenotype
+#' columns. Uses \code{\link[DelayedArray]{acbind}} for virtual
+#' column-binding — no HDF5 rewriting is needed.
 #'
-#' @param modelarrays A list of ModelArray objects
-#' @param phenotypes_list A list of data.frames, one per ModelArray. Each must
-#'   contain a `source_file` column matching its corresponding ModelArray's sources.
-#' @param merge_on Character vector of column names to join phenotypes on
-#'   (e.g., `c("subject_id")`). These columns must exist in all phenotypes data.frames.
-#' @return A list with:
-#'   \item{data}{A combined ModelArray with scalars from all inputs}
-#'   \item{phenotypes}{The inner-joined phenotypes data.frame. The original
-#'     `source_file` columns are renamed to `source_file.<scalar_name>`.}
+#' @details
+#' The merge performs an inner join of the phenotype data.frames on the
+#' columns specified by \code{merge_on}. Only subjects present in all
+#' phenotype data.frames are retained. Scalar matrices from each input
+#' \linkS4class{ModelArray} are column-subsetted and reordered to match
+#' the joined subject list [6].
+#'
+#' A unified \code{source_file} column is created from the \code{merge_on}
+#' columns so that downstream analysis functions
+#' (\code{\link{ModelArray.lm}}, \code{\link{ModelArray.gam}},
+#' \code{\link{ModelArray.wrap}}) can align phenotypes to scalars. The
+#' original \code{source_file} columns are renamed to
+#' \code{source_file.<first_scalar_name>} for each input
+#' \linkS4class{ModelArray} [6].
+#'
+#' Scalar names must be unique across all input ModelArrays. If two
+#' ModelArrays share a scalar name (e.g. both have \code{"FD"}), the
+#' function will error. Element counts (number of rows) must match
+#' across all scalars [6].
+#'
+#' If element metadata is available (see \code{\link{elementMetadata}}),
+#' the function checks that it is consistent across inputs and warns if
+#' it differs or is only partially available [6].
+#'
+#' @param modelarrays A list of at least two \linkS4class{ModelArray}
+#'   objects, each constructed from a different HDF5 file.
+#' @param phenotypes_list A list of data.frames, one per
+#'   \linkS4class{ModelArray} in \code{modelarrays}. Each must contain a
+#'   \code{source_file} column whose entries match the corresponding
+#'   ModelArray's sources (i.e. \code{sources(modelarrays[[i]])}).
+#'   Each must also contain all columns named in \code{merge_on} [6].
+#' @param merge_on Character vector of column names present in all
+#'   data.frames in \code{phenotypes_list}, used to inner-join subjects
+#'   across sessions/modalities (e.g. \code{c("subject_id")}). The
+#'   combination of these columns must uniquely identify each subject
+#'   within each data.frame [6].
+#'
+#' @return A list with two components:
+#'   \describe{
+#'     \item{data}{A combined \linkS4class{ModelArray} containing scalars
+#'       from all inputs. Each scalar's columns are subsetted and reordered
+#'       to match the inner-joined subject list.}
+#'     \item{phenotypes}{The inner-joined data.frame. Original
+#'       \code{source_file} columns are renamed to
+#'       \code{source_file.<scalar_name>} and a new unified
+#'       \code{source_file} column is added for use with analysis
+#'       functions [6].}
+#'   }
+#'
+#' @seealso \code{\link{ModelArray}} for constructing individual
+#'   ModelArray objects, \code{\link{ModelArray.lm}},
+#'   \code{\link{ModelArray.gam}}, \code{\link{ModelArray.wrap}} for
+#'   fitting models on the merged object,
+#'   \code{\link{elementMetadata}} for element correspondence checks.
+#'
+#' @examples
+#' \dontrun{
+#' # Load two sessions from different h5 files
+#' ma1 <- ModelArray("session1.h5", scalar_types = c("FD"))
+#' ma2 <- ModelArray("session2.h5", scalar_types = c("FC"))
+#' phen1 <- read.csv("session1_cohort.csv")
+#' phen2 <- read.csv("session2_cohort.csv")
+#'
+#' # Merge on subject ID
+#' merged <- mergeModelArrays(
+#'   modelarrays = list(ma1, ma2),
+#'   phenotypes_list = list(phen1, phen2),
+#'   merge_on = "subject_id"
+#' )
+#'
+#' # Use the merged object for cross-scalar analysis
+#' merged$data
+#' scalarNames(merged$data)   # c("FD", "FC")
+#' head(merged$phenotypes)
+#'
+#' results <- ModelArray.lm(
+#'   FD ~ age + sex,
+#'   data = merged$data,
+#'   phenotypes = merged$phenotypes,
+#'   scalar = "FD"
+#' )
+#' }
+#'
+#' @rdname mergeModelArrays
 #' @importFrom DelayedArray acbind
 #' @importFrom utils head
 #' @export

From ab243e5d1aa4416f1d4689c1fed5069b06eec79d Mon Sep 17 00:00:00 2001
From: araikes <adam.raikes@gmail.com>
Date: Tue, 31 Mar 2026 10:50:16 -0700
Subject: [PATCH 16/23] Fix style issues

---
 R/ModelArray_S4Methods.R | 32 +++++++++++++++++++-------------
 R/analyse-helpers.R      |  6 ++++--
 R/merge.R                |  2 +-
 R/utils.R                |  2 --
 4 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/R/ModelArray_S4Methods.R b/R/ModelArray_S4Methods.R
index 274bc72..7c67cc1 100644
--- a/R/ModelArray_S4Methods.R
+++ b/R/ModelArray_S4Methods.R
@@ -71,8 +71,8 @@ setMethod("show", "ModelArray", function(object) { # , group_name_results="resul
 #' @examples
 #' \dontrun{
 #' ma <- ModelArray("data.h5", scalar_types = c("FD"))
-#' sources(ma)          # named list
-#' sources(ma)[["FD"]]  # character vector of filenames
+#' sources(ma) # named list
+#' sources(ma)[["FD"]] # character vector of filenames
 #' }
 #'
 #' @name sources
@@ -106,8 +106,8 @@ setMethod("sources", "ModelArray", function(x) x@sources)
 #' @examples
 #' \dontrun{
 #' ma <- ModelArray("data.h5", scalar_types = c("FD", "FC"))
-#' scalars(ma)         # named list of all scalars
-#' scalars(ma, "FD")   # single DelayedArray matrix
+#' scalars(ma) # named list of all scalars
+#' scalars(ma, "FD") # single DelayedArray matrix
 #' }
 #'
 #' @name scalars
@@ -163,10 +163,12 @@ setMethod(
 #'
 #' @examples
 #' \dontrun{
-#' ma <- ModelArray("data.h5", scalar_types = c("FD"),
-#'                  analysis_names = c("lm_age"))
-#' results(ma)                # named list of all results
-#' results(ma, "lm_age")     # single result set
+#' ma <- ModelArray("data.h5",
+#'   scalar_types = c("FD"),
+#'   analysis_names = c("lm_age")
+#' )
+#' results(ma) # named list of all results
+#' results(ma, "lm_age") # single result set
 #' results(ma, "lm_age")$results_matrix
 #' }
 #'
@@ -380,7 +382,7 @@ setMethod("nInputFiles", "ModelArray", function(x, scalar = NULL) {
 #' @examples
 #' \dontrun{
 #' ma <- ModelArray("data.h5", scalar_types = c("FD", "FC"))
-#' scalarNames(ma)   # c("FD", "FC")
+#' scalarNames(ma) # c("FD", "FC")
 #' }
 #'
 #' @name scalarNames
@@ -411,9 +413,11 @@ setMethod("scalarNames", "ModelArray", function(x) {
 #'
 #' @examples
 #' \dontrun{
-#' ma <- ModelArray("data.h5", scalar_types = c("FD"),
-#'                  analysis_names = c("lm_age"))
-#' analysisNames(ma)   # "lm_age"
+#' ma <- ModelArray("data.h5",
+#'   scalar_types = c("FD"),
+#'   analysis_names = c("lm_age")
+#' )
+#' analysisNames(ma) # "lm_age"
 #' }
 #'
 #' @name analysisNames
@@ -465,7 +469,9 @@ setMethod("elementMetadata", "ModelArray", function(x) {
       rhdf5::h5read(filepath, p),
       error = function(e) NULL
     )
-    if (!is.null(result)) return(result)
+    if (!is.null(result)) {
+      return(result)
+    }
   }
   NULL
 })
diff --git a/R/analyse-helpers.R b/R/analyse-helpers.R
index d3bce7a..73771e2 100644
--- a/R/analyse-helpers.R
+++ b/R/analyse-helpers.R
@@ -219,12 +219,14 @@
     }
   } else {
     if (pbar) {
-      fits <- pbapply::pblapply(element.subset,
+      fits <- pbapply::pblapply(
+        element.subset,
         FUN,
         ...
       )
     } else {
-      fits <- lapply(element.subset,
+      fits <- lapply(
+        element.subset,
         FUN,
         ...
       )
diff --git a/R/merge.R b/R/merge.R
index 60fc3c1..f96a31b 100644
--- a/R/merge.R
+++ b/R/merge.R
@@ -78,7 +78,7 @@
 #'
 #' # Use the merged object for cross-scalar analysis
 #' merged$data
-#' scalarNames(merged$data)   # c("FD", "FC")
+#' scalarNames(merged$data) # c("FD", "FC")
 #' head(merged$phenotypes)
 #'
 #' results <- ModelArray.lm(
diff --git a/R/utils.R b/R/utils.R
index 6f0a2fe..2b6d0ea 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -25,7 +25,6 @@ flagObjectExistInh5 <- function(fn_h5, group_name = "/results", object_name = "m
 }
 
 
-
 #' check if h5 group "results" exist in current .h5 file
 #' @param fn_h5 filename of the .h5 file
 #' @noRd
@@ -491,7 +490,6 @@ gen_gamFormula_fxSmooth <- function(response.var, factor.var, smooth.var, phenot
 }
 
 
-
 #' Generate GAM formula with continuous-by-continuous interaction
 #'
 #' @description

From e73bc8954767546397b19bd4b410bec1df34f063 Mon Sep 17 00:00:00 2001
From: araikes <adam.raikes@gmail.com>
Date: Tue, 31 Mar 2026 10:58:11 -0700
Subject: [PATCH 17/23] Introduce headings into reference page.

---
 _pkgdown.yml | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 64 insertions(+), 1 deletion(-)

diff --git a/_pkgdown.yml b/_pkgdown.yml
index 95f2ddd..cf5b3e0 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -1,7 +1,11 @@
 url: https://pennlinc.github.io/ModelArray/
+
 template:
   bootstrap: 5
 
+development:
+  mode: auto
+
 navbar:
   structure:
     left: [intro, introductions, case-studies, reference, news]
@@ -37,10 +41,69 @@ navbar:
         - text: "Developer Documentation"
           href: articles/doc_for_developer.html
 
+reference:
+- title: "ModelArray Class"
+  desc: >
+    Create, load, and combine ModelArray objects backed by HDF5 files.
+  contents:
+  - ModelArray-class
+  - ModelArray
+  - h5summary
+  - mergeModelArrays
+
+- title: "Accessors"
+  desc: >
+    Extract scalar data, source filenames, results, and metadata from a
+    ModelArray object.
+  contents:
+  - scalars
+  - sources
+  - results
+  - scalarNames
+  - analysisNames
+  - nElements
+  - nInputFiles
+  - numElementsTotal
+  - elementMetadata
+
+- title: "Element-wise Analysis"
+  desc: >
+    Fit statistical models at every element (fixel, voxel, or vertex)
+    in a ModelArray and write results back to HDF5.
+  contents:
+  - ModelArray.lm
+  - ModelArray.gam
+  - ModelArray.wrap
+  - writeResults
+
+- title: "Testing and Debugging"
+  desc: >
+    Build per-element test data and inspect your setup before committing
+    to a full analysis run.
+  contents:
+  - exampleElementData
+
+- title: "Formula Helpers"
+  desc: >
+    Generate GAM formulas for common interaction structures used with
+    ModelArray.gam.
+  contents:
+  - gen_gamFormula_fxSmooth
+  - gen_gamFormula_contIx
+
+- title: "Internal"
+  desc: >
+    Per-element workhorse functions called by the analysis functions.
+    Most users should not need to call these directly.
+  contents:
+  - analyseOneElement.lm
+  - analyseOneElement.gam
+  - analyseOneElement.wrap
+
 redirects:
   - ["articles/wrap.html", "articles/modelling.html"]
   - ["articles/wrap_function.html", "articles/modelling.html"]
   - ["articles/voxel-wise_data.html", "articles/modelling.html"]
   - ["articles/basic_r_intro.html", "articles/installations.html"]
   - ["articles/faq.html", "articles/exploring-h5.html"]
-  - ["articles/debugging.html", "articles/exploring-h5.html"]
+  - ["articles/debugging.html", "articles/exploring-h5.html"]
\ No newline at end of file

From 7e4da429828586fc16604dc4e73e16f9186e62d0 Mon Sep 17 00:00:00 2001
From: araikes <adam.raikes@gmail.com>
Date: Tue, 31 Mar 2026 11:06:11 -0700
Subject: [PATCH 18/23] Regenerate Rd files

---
 man/ModelArray.Rd              |   2 +-
 man/analyseOneElement.gam.Rd   | 108 ++++++++++++++++++++++-----------
 man/analyseOneElement.lm.Rd    |  90 +++++++++++++++++----------
 man/analyseOneElement.wrap.Rd  |  99 ++++++++++++++++++++++--------
 man/analysisNames.Rd           |   8 ++-
 man/gen_gamFormula_contIx.Rd   |  62 +++++++++++++++----
 man/gen_gamFormula_fxSmooth.Rd |  93 ++++++++++++++++++++++------
 man/h5summary.Rd               |  53 +++++++++++++---
 man/mergeModelArrays.Rd        | 101 ++++++++++++++++++++++++++----
 man/pipe.Rd                    |   2 +-
 man/results.Rd                 |  10 +--
 man/scalarNames.Rd             |   2 +-
 man/scalars.Rd                 |   4 +-
 man/sources.Rd                 |   4 +-
 man/writeResults.Rd            |  76 ++++++++++++++++++++---
 15 files changed, 547 insertions(+), 167 deletions(-)

diff --git a/man/ModelArray.Rd b/man/ModelArray.Rd
index d23c8d2..0a82c63 100644
--- a/man/ModelArray.Rd
+++ b/man/ModelArray.Rd
@@ -40,7 +40,7 @@ If \code{analysis_names} is non-empty, saved results are loaded from
 }
 \examples{
 \dontrun{
-ma <- ModelArray("path/to/data.h5", scalar_types = c("FD", "FC"))
+ma <- ModelArray("path/to/data.h5", scalar_types = c("FD"))
 ma
 }
 
diff --git a/man/analyseOneElement.gam.Rd b/man/analyseOneElement.gam.Rd
index 7dd5329..58733da 100644
--- a/man/analyseOneElement.gam.Rd
+++ b/man/analyseOneElement.gam.Rd
@@ -2,7 +2,12 @@
 % Please edit documentation in R/ModelArray_Constructor.R
 \name{analyseOneElement.gam}
 \alias{analyseOneElement.gam}
-\title{Fit GAM for one element}
+\title{Fit a GAM for a single element
+returns metadata (column names, smooth term names, parametric term names,
+and the smoothing parameter criterion attribute name) used by
+\code{\link{ModelArray.gam}} to initialise the output data.frame. When
+\code{flag_initiate = FALSE}, it returns a numeric vector representing one
+row of the final results matrix.}
 \usage{
 analyseOneElement.gam(
   i_element,
@@ -22,58 +27,87 @@ analyseOneElement.gam(
 )
 }
 \arguments{
-\item{i_element}{An integer, the i_th element, starting from 1.
-For initiating (flag_initiate = TRUE), use i_element=1}
+\item{i_element}{Integer. The 1-based index of the element to analyse.}
 
-\item{formula}{A formula (passed to `mgcv::gam()`)}
+\item{formula}{A \code{\link[stats]{formula}} passed to
+\code{\link[mgcv]{gam}}.}
 
-\item{modelarray}{ModelArray class}
+\item{modelarray}{A \linkS4class{ModelArray} object.}
 
-\item{phenotypes}{A data.frame of the cohort with columns of independent variables
-and covariates to be added to the model}
+\item{phenotypes}{A data.frame of the cohort with columns of independent
+variables and covariates. Must contain a \code{"source_file"} column
+matching \code{sources(modelarray)[[scalar]]}.}
 
-\item{scalar}{A character. The name of the element-wise scalar to be analysed}
+\item{scalar}{Character. The name of the element-wise scalar to analyse.
+Must be one of \code{names(scalars(modelarray))}.}
 
-\item{var.smoothTerms}{The list of variables to save for smooth terms
-(got from broom::tidy(parametric = FALSE)). Example smooth term: age in formula "outcome ~ s(age)".}
+\item{var.smoothTerms}{Character vector. Statistics to extract for smooth
+terms from \code{\link[broom]{tidy.gam}} with \code{parametric = FALSE}
+(e.g. \code{"edf"}, \code{"ref.df"}, \code{"statistic"},
+\code{"p.value"}).}
 
-\item{var.parametricTerms}{The list of variables to save for parametric terms
-(got from broom::tidy(parametric = TRUE)). Example parametric term: sex in formula "outcome ~ s(age) + sex".}
+\item{var.parametricTerms}{Character vector. Statistics to extract for
+parametric terms from \code{\link[broom]{tidy.gam}} with
+\code{parametric = TRUE} (e.g. \code{"estimate"}, \code{"std.error"},
+\code{"statistic"}, \code{"p.value"}).}
 
-\item{var.model}{The list of variables to save for the model (got from broom::glance() and summary()).}
+\item{var.model}{Character vector. Statistics to extract for the overall
+model from \code{\link[broom]{glance.gam}} and
+\code{\link[mgcv]{summary.gam}} (e.g. \code{"adj.r.squared"},
+\code{"dev.expl"}, \code{"sp.criterion"}).}
 
-\item{num.subj.lthr}{The minimal number of subjects with valid value in input h5 file,
-i.e. number of subjects with finite values (defined by `is.finite()`,
-i.e. not NaN or NA or Inf) in h5 file > \code{num.subj.lthr},
-then this element will be run normally;
-otherwise, this element will be skipped and statistical outputs will be set as NaN.}
+\item{num.subj.lthr}{Numeric. The pre-computed minimum number of subjects
+with finite values required for this element to be analysed. Elements
+below this threshold are skipped. This value is typically computed by
+the parent function from \code{num.subj.lthr.abs} and
+\code{num.subj.lthr.rel}.}
 
-\item{num.stat.output}{The number of output stat metrics
-(for generating all NaN stat when # subjects does not meet criteria).
-This includes column `element_id`.
-This is required when flag_initiate = TRUE.}
+\item{num.stat.output}{Integer or \code{NULL}. The total number of output
+columns (including \code{element_id}). Used when
+\code{flag_initiate = FALSE} to generate an all-\code{NaN} row for
+skipped elements. Must be \code{NULL} when \code{flag_initiate = TRUE}.}
 
-\item{flag_initiate}{TRUE or FALSE, Whether this is to initiate the new analysis.
-If TRUE, it will return column names etc to be used for initiating data.frame;
-if FALSE, it will return the list of requested statistic values.}
+\item{flag_initiate}{Logical. If \code{TRUE}, fit the model once and return
+metadata for initialising the output data.frame (column names and term
+names). If \code{FALSE}, return a numeric vector of results for this
+element.}
 
-\item{flag_sse}{TRUE or FALSE, Whether to calculate SSE (sum of squared error) for the model (`model.sse`).
-SSE is needed for calculating partial R-squared.}
+\item{flag_sse}{Logical. If \code{TRUE}, also compute the error sum of
+squares (\code{model.sse}) for the model, which is needed for
+partial R-squared calculations in \code{\link{ModelArray.gam}}.
+Default: \code{FALSE}.}
 
-\item{on_error}{Character: one of "stop", "skip", or "debug". When an error occurs while
-fitting one element, choose whether to stop, skip returning all-NaN values for that element,
-or drop into `browser()` (if interactive) then skip. Default: "stop".}
+\item{on_error}{Character. One of \code{"stop"}, \code{"skip"}, or
+\code{"debug"}. When an error occurs fitting the model: \code{"stop"}
+halts execution; \code{"skip"} returns all-\code{NaN} for this element;
+\code{"debug"} drops into \code{\link{browser}} (if interactive) then
+skips. Default: \code{"stop"}.}
 
-\item{...}{Additional arguments for `mgcv::gam()`}
+\item{...}{Additional arguments passed to \code{\link[mgcv]{gam}}.}
 }
 \value{
-If flag_initiate==TRUE, returns column names,
-list of term names of final results, and attr.name of sp.criterion;
-if flag_initiate==FALSE, it will return the list of requested statistic values for a element.
+If \code{flag_initiate = TRUE}, a list with components:
+\describe{
+\item{column_names}{Character vector of output column names.}
+\item{list.smoothTerms}{Character vector of smooth term names.}
+\item{list.parametricTerms}{Character vector of parametric term names.}
+\item{sp.criterion.attr.name}{Character. The name attribute of the
+smoothing parameter selection criterion (e.g. \code{"REML"} or
+\code{"GCV.Cp"}).}
+}
+If \code{flag_initiate = FALSE}, a numeric vector of length
+\code{num.stat.output} with \code{element_id} (0-based) first and
+requested statistics in subsequent positions. All-\code{NaN} (except
+\code{element_id}) if the element was skipped.
 }
 \description{
-`analyseOneElement.gam` fits a GAM model for one element data, and returns requested model statistics.
+If the number of subjects with finite scalar values does not exceed
+\code{num.subj.lthr}, the element is skipped and all statistics are set
+to \code{NaN}.
 }
-\details{
-`ModelArray.gam` iteratively calls this function to get statistics for all requested elements.
+\seealso{
+\code{\link{ModelArray.gam}} which calls this function iteratively,
+\code{\link{analyseOneElement.lm}} for the linear model equivalent,
+\code{\link{analyseOneElement.wrap}} for user-supplied functions.
 }
+\keyword{internal}
diff --git a/man/analyseOneElement.lm.Rd b/man/analyseOneElement.lm.Rd
index 52987a4..1069a1d 100644
--- a/man/analyseOneElement.lm.Rd
+++ b/man/analyseOneElement.lm.Rd
@@ -2,7 +2,10 @@
 % Please edit documentation in R/ModelArray_Constructor.R
 \name{analyseOneElement.lm}
 \alias{analyseOneElement.lm}
-\title{Fit linear model for one element.}
+\title{Fit a linear model for a single element
+If the number of subjects with finite scalar values (not \code{NaN},
+\code{NA}, or \code{Inf}) does not exceed \code{num.subj.lthr}, the
+element is skipped and all statistics are set to \code{NaN}.}
 \usage{
 analyseOneElement.lm(
   i_element,
@@ -20,50 +23,75 @@ analyseOneElement.lm(
 )
 }
 \arguments{
-\item{i_element}{An integer, the i_th element, starting from 1.
-For initiating (flag_initiate = TRUE), use i_element=1}
+\item{i_element}{Integer. The 1-based index of the element to analyse.}
 
-\item{formula}{Formula (passed to `stats::lm()`)}
+\item{formula}{A \code{\link[stats]{formula}} passed to
+\code{\link[stats]{lm}}.}
 
-\item{modelarray}{ModelArray class}
+\item{modelarray}{A \linkS4class{ModelArray} object.}
 
-\item{phenotypes}{A data.frame of the cohort with columns of independent variables
-and covariates to be added to the model.}
+\item{phenotypes}{A data.frame of the cohort with columns of independent
+variables and covariates. Must contain a \code{"source_file"} column
+matching \code{sources(modelarray)[[scalar]]}.}
 
-\item{scalar}{A character. The name of the element-wise scalar to be analysed}
+\item{scalar}{Character. The name of the element-wise scalar to analyse.
+Must be one of \code{names(scalars(modelarray))}.}
 
-\item{var.terms}{A list of characters. The list of variables to save for terms (got from `broom::tidy()`).}
+\item{var.terms}{Character vector. Statistics to extract per term from
+\code{\link[broom]{tidy.lm}} (e.g. \code{"estimate"}, \code{"statistic"},
+\code{"p.value"}).}
 
-\item{var.model}{A list of characters. The list of variables to save for the model (got from `broom::glance()`).}
+\item{var.model}{Character vector. Statistics to extract for the overall
+model from \code{\link[broom]{glance.lm}} (e.g. \code{"adj.r.squared"},
+\code{"p.value"}).}
 
-\item{num.subj.lthr}{The minimal number of subjects with valid value in input h5 file,
-i.e. number of subjects with finite values (defined by `is.finite()`,
-i.e. not NaN or NA or Inf) in h5 file > \code{num.subj.lthr},
-then this element will be run normally;
-otherwise, this element will be skipped and statistical outputs will be set as NaN.}
+\item{num.subj.lthr}{Numeric. The pre-computed minimum number of subjects
+with finite values required for this element to be analysed. Elements
+below this threshold are skipped. This value is typically computed by
+the parent function from \code{num.subj.lthr.abs} and
+\code{num.subj.lthr.rel}.}
 
-\item{num.stat.output}{The number of output stat metrics
-(for generating all NaN stat when # subjects does not meet criteria).
-This includes column `element_id`.
-This is required when flag_initiate = TRUE.}
+\item{num.stat.output}{Integer or \code{NULL}. The total number of output
+columns (including \code{element_id}). Used when
+\code{flag_initiate = FALSE} to generate an all-\code{NaN} row for
+skipped elements. Must be \code{NULL} when \code{flag_initiate = TRUE}.}
 
-\item{flag_initiate}{TRUE or FALSE, Whether this is to initiate the new analysis.
-If TRUE, it will return column names etc to be used for initiating data.frame;
-if FALSE, it will return the list of requested statistic values.}
+\item{flag_initiate}{Logical. If \code{TRUE}, fit the model once and return
+metadata for initialising the output data.frame (column names and term
+names). If \code{FALSE}, return a numeric vector of results for this
+element.}
 
-\item{on_error}{Character: one of "stop", "skip", or "debug". When an error occurs while
-fitting one element, choose whether to stop, skip returning all-NaN values for that element,
-or drop into `browser()` (if interactive) then skip. Default: "stop".}
+\item{on_error}{Character. One of \code{"stop"}, \code{"skip"}, or
+\code{"debug"}. When an error occurs fitting the model: \code{"stop"}
+halts execution; \code{"skip"} returns all-\code{NaN} for this element;
+\code{"debug"} drops into \code{\link{browser}} (if interactive) then
+skips. Default: \code{"stop"}.}
 
-\item{...}{Additional arguments for `stats::lm()`}
+\item{...}{Additional arguments passed to \code{\link[stats]{lm}}.}
 }
 \value{
-If flag_initiate==TRUE, returns column names, and list of term names of final results;
-if flag_initiate==FALSE, it will return the list of requested statistic values for a element.
+If \code{flag_initiate = TRUE}, a list with components:
+\describe{
+\item{column_names}{Character vector. The column names for the output
+data.frame, with \code{"element_id"} first.}
+\item{list.terms}{Character vector. The names of the model terms
+(from \code{\link[broom]{tidy.lm}}).}
+}
+If \code{flag_initiate = FALSE}, a numeric vector of length
+\code{num.stat.output} with \code{element_id} (0-based) as the first
+value and the requested statistics in subsequent positions. All-\code{NaN}
+(except \code{element_id}) if the element had insufficient valid subjects
+or if an error occurred with \code{on_error = "skip"}.
 }
 \description{
-`analyseOneElement.lm` fits a linear model for one element data, and returns requested model statistics.
+Fit a linear model for a single element
+If the number of subjects with finite scalar values (not \code{NaN},
+\code{NA}, or \code{Inf}) does not exceed \code{num.subj.lthr}, the
+element is skipped and all statistics are set to \code{NaN}.
 }
-\details{
-`ModelArray.lm` iteratively calls this function to get statistics for all requested elements.
+\seealso{
+\code{\link{ModelArray.lm}} which calls this function iteratively,
+\code{\link{analyseOneElement.gam}} for the GAM equivalent,
+\code{\link{analyseOneElement.wrap}} for user-supplied functions.
 }
+\keyword{internal}
diff --git a/man/analyseOneElement.wrap.Rd b/man/analyseOneElement.wrap.Rd
index 762e4db..1780548 100644
--- a/man/analyseOneElement.wrap.Rd
+++ b/man/analyseOneElement.wrap.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/ModelArray_Constructor.R
 \name{analyseOneElement.wrap}
 \alias{analyseOneElement.wrap}
-\title{Run a user-supplied function for one element}
+\title{Run a user-supplied function for a single element}
 \usage{
 analyseOneElement.wrap(
   i_element,
@@ -18,42 +18,93 @@ analyseOneElement.wrap(
 )
 }
 \arguments{
-\item{i_element}{An integer, the i_th element, starting from 1.}
+\item{i_element}{Integer. The 1-based index of the element to analyse.}
 
-\item{user_fun}{A function that accepts at least an argument named `data` (the per-element
-`phenotypes` with the response column appended) and returns a one-row data.frame/tibble,
-named list, or named vector.}
+\item{user_fun}{A function that accepts at least an argument named
+\code{data} (a data.frame: \code{phenotypes} with scalar columns
+appended for the current element) and returns a one-row
+data.frame/tibble, a named list, or a named atomic vector.}
 
-\item{modelarray}{ModelArray class}
+\item{modelarray}{A \linkS4class{ModelArray} object.}
 
-\item{phenotypes}{A data.frame of the cohort with columns of independent variables and covariates}
+\item{phenotypes}{A data.frame of the cohort with columns of independent
+variables and covariates. Must contain a \code{"source_file"} column
+matching \code{sources(modelarray)[[scalar]]}.}
 
-\item{scalar}{A character. The name of the element-wise scalar to be analysed}
+\item{scalar}{Character. The name of the element-wise scalar to analyse.
+Must be one of \code{names(scalars(modelarray))}.}
 
-\item{num.subj.lthr}{The minimal number of subjects with valid value in input h5 file}
+\item{num.subj.lthr}{Numeric. The pre-computed minimum number of subjects
+with finite values required for this element to be analysed. Elements
+below this threshold are skipped. This value is typically computed by
+the parent function from \code{num.subj.lthr.abs} and
+\code{num.subj.lthr.rel}.}
 
-\item{num.stat.output}{The number of output stat metrics (including `element_id`).
-Required when `flag_initiate = TRUE`.}
+\item{num.stat.output}{Integer or \code{NULL}. The total number of output
+columns (including \code{element_id}). Used when
+\code{flag_initiate = FALSE} to generate an all-\code{NaN} row for
+skipped elements. Must be \code{NULL} when \code{flag_initiate = TRUE}.}
 
-\item{flag_initiate}{TRUE or FALSE, whether this is to initiate the new analysis to get column names}
+\item{flag_initiate}{Logical. If \code{TRUE}, fit the model once and return
+metadata for initialising the output data.frame (column names and term
+names). If \code{FALSE}, return a numeric vector of results for this
+element.}
 
-\item{on_error}{Character: one of "stop", "skip", or "debug". When an error occurs while
-executing the user function, choose whether to stop, skip returning all-NaN values for this
-element, or drop into `browser()` (if interactive) then skip. Default: "stop".}
+\item{on_error}{Character. One of \code{"stop"}, \code{"skip"}, or
+\code{"debug"}. When an error occurs fitting the model: \code{"stop"}
+halts execution; \code{"skip"} returns all-\code{NaN} for this element;
+\code{"debug"} drops into \code{\link{browser}} (if interactive) then
+skips. Default: \code{"stop"}.}
 
-\item{...}{Additional arguments forwarded to `FUN`}
+\item{...}{Additional arguments forwarded to \code{user_fun}.}
 }
 \value{
-If `flag_initiate==TRUE`, returns a list with `column_names`.
-If `flag_initiate==FALSE`, returns a numeric vector representing the one-row result for this element
-with `element_id` as the first value.
+If \code{flag_initiate = TRUE}, a list with one component:
+\describe{
+\item{column_names}{Character vector. The column names derived from
+the return value of \code{user_fun}, with \code{"element_id"}
+prepended.}
+}
+If \code{flag_initiate = FALSE}, a numeric vector of length
+\code{num.stat.output} with \code{element_id} (0-based) first and
+the coerced output of \code{user_fun} in subsequent positions.
+All-\code{NaN} (except \code{element_id}) if the element was skipped
+or if an error occurred with \code{on_error = "skip"}.
 }
 \description{
-`analyseOneElement.wrap` runs a user-supplied function `FUN` on a single element's data.
-It prepares the per-element data by attaching the element values as a new column named by `scalar`
-to the provided `phenotypes` data.frame, then calls `FUN(data = dat, ...)`.
+Runs a user-supplied function on one element's data, preparing the
+per-element data.frame by attaching all scalar values as new columns to
+the provided \code{phenotypes}. This is the per-element workhorse called
+iteratively by \code{\link{ModelArray.wrap}}.
 }
 \details{
-The user-supplied `FUN` should return either a one-row data.frame/tibble, a named list, or a named vector.
-The result will be coerced to a one-row tibble and combined into the final results matrix across elements.
+Most users should call \code{\link{ModelArray.wrap}} directly, which handles
+looping, parallelisation, and result assembly.
+\code{analyseOneElement.wrap} is exported for advanced use cases such as
+debugging a single element or building custom analysis loops.
+
+The user-supplied \code{user_fun} is called as
+\code{user_fun(data = dat, ...)} where \code{dat} is \code{phenotypes} with
+columns appended for \strong{all} scalars in the \linkS4class{ModelArray}
+(not just the one named by \code{scalar}). The function should return a
+one-row data.frame/tibble, a named list, or a named atomic vector. The
+result is coerced to a numeric vector for assembly into the final results
+matrix.
+
+Unlike \code{\link{analyseOneElement.lm}} and
+\code{\link{analyseOneElement.gam}}, this function appends \strong{all}
+scalar columns (not just the response) and checks for column name
+collisions between scalar names and existing \code{phenotypes} columns.
+
+If the number of subjects with finite values across all scalars does not
+exceed \code{num.subj.lthr}, the element is skipped and all statistics
+are set to \code{NaN}.
+}
+\seealso{
+\code{\link{ModelArray.wrap}} which calls this function
+iteratively, \code{\link{exampleElementData}} for building a test
+data.frame matching the format passed to \code{user_fun},
+\code{\link{analyseOneElement.lm}} for the linear model equivalent,
+\code{\link{analyseOneElement.gam}} for the GAM equivalent.
 }
+\keyword{internal}
diff --git a/man/analysisNames.Rd b/man/analysisNames.Rd
index 88470c6..2b27f7e 100644
--- a/man/analysisNames.Rd
+++ b/man/analysisNames.Rd
@@ -23,9 +23,11 @@ Returns the names of all analysis result sets currently loaded in a
 }
 \examples{
 \dontrun{
-ma <- ModelArray("data.h5", scalar_types = c("FD"),
-                 analysis_names = c("lm_age"))
-analysisNames(ma)   # "lm_age"
+ma <- ModelArray("data.h5",
+  scalar_types = c("FD"),
+  analysis_names = c("lm_age")
+)
+analysisNames(ma) # "lm_age"
 }
 
 }
diff --git a/man/gen_gamFormula_contIx.Rd b/man/gen_gamFormula_contIx.Rd
index 9f778b1..eadf41d 100644
--- a/man/gen_gamFormula_contIx.Rd
+++ b/man/gen_gamFormula_contIx.Rd
@@ -2,27 +2,67 @@
 % Please edit documentation in R/utils.R
 \name{gen_gamFormula_contIx}
 \alias{gen_gamFormula_contIx}
-\title{Generate GAM formula with continuous*continuous interaction}
+\title{Generate GAM formula with continuous-by-continuous interaction}
 \usage{
 gen_gamFormula_contIx(response.var, cont1.var, cont2.var, fx = TRUE, k = NULL)
 }
 \arguments{
-\item{response.var}{character class, the variable name for response}
+\item{response.var}{Character. The variable name for the response
+(dependent variable), typically a scalar name like \code{"FD"}.}
 
-\item{cont1.var}{character class, the name of the first continuous variable}
+\item{cont1.var}{Character. The name of the first continuous variable.}
 
-\item{cont2.var}{character class, the name of the second continuous variable}
+\item{cont2.var}{Character. The name of the second continuous variable.}
 
-\item{fx}{TRUE or FALSE, to be used in smooth term s(). Recommend TRUE.}
+\item{fx}{Logical. Passed to \code{\link[mgcv]{ti}}. If \code{TRUE}
+(recommended), the smooth is treated as fixed degrees of freedom.
+Default is \code{TRUE} \link{2}.}
 
-\item{k}{integer, to be used in smooth term including the interaction term.
-If NULL (no entry), will use default value as in mgcv::s()}
+\item{k}{Integer or \code{NULL}. Basis dimension passed to
+\code{\link[mgcv]{ti}} for all three terms (both main effects and the
+interaction). If \code{NULL} (default), uses the default from
+\code{mgcv::ti()} \link{2}.}
 }
 \value{
-The formula generated
+A \code{\link[stats]{formula}} object.
 }
 \description{
-This function will generate a formula in the following format: \code{y ~ ti(x) + ti(z) + ti(x,z)},
-where \code{y} is \code{response.var}, \code{x} is \code{cont1.var}, and \code{z} is \code{cont2.var}.
-The formula generated could be further modified, e.g. adding covariates.
+Generates a formula in the format
+\code{y ~ ti(x) + ti(z) + ti(x, z)}, where \code{y} is
+\code{response.var}, \code{x} is \code{cont1.var}, and \code{z} is
+\code{cont2.var}. The formula generated can be further modified, e.g.
+by adding covariates.
+}
+\details{
+This helper uses \code{\link[mgcv]{ti}} (tensor product interaction)
+terms so that the interaction \code{ti(x, z)} captures only the
+interaction effect, separate from the main effects \code{ti(x)} and
+\code{ti(z)}. This decomposition is important for interpretability and
+for requesting \code{changed.rsq.term.index} in
+\code{\link{ModelArray.gam}} \link{4}.
+}
+\examples{
+\dontrun{
+formula <- gen_gamFormula_contIx(
+  response.var = "FD",
+  cont1.var = "age",
+  cont2.var = "cognition"
+)
+formula
+
+# Use in ModelArray.gam with changed R-squared for the interaction
+results <- ModelArray.gam(
+  formula,
+  data = ma,
+  phenotypes = phenotypes,
+  scalar = "FD",
+  changed.rsq.term.index = list(3)
+)
+}
+
+}
+\seealso{
+\code{\link{gen_gamFormula_fxSmooth}} for factor-smooth
+interactions, \code{\link{ModelArray.gam}} which accepts the generated
+formula.
 }
diff --git a/man/gen_gamFormula_fxSmooth.Rd b/man/gen_gamFormula_fxSmooth.Rd
index 87f9a74..b8d92c7 100644
--- a/man/gen_gamFormula_fxSmooth.Rd
+++ b/man/gen_gamFormula_fxSmooth.Rd
@@ -16,35 +16,90 @@ gen_gamFormula_fxSmooth(
 )
 }
 \arguments{
-\item{response.var}{character class, the variable name for response}
+\item{response.var}{Character. The variable name for the response
+(dependent variable), typically a scalar name like \code{"FD"}.}
 
-\item{factor.var}{character class, the variable name for factor. It should be an ordered factor.
-If not, it will generate it as a new column in `phenotypes`, which requires `reference.group`.}
+\item{factor.var}{Character. The variable name for the factor. It should
+be an ordered factor in \code{phenotypes}. If not, an ordered factor
+will be generated as a new column, which requires \code{reference.group} \link{2}.}
 
-\item{smooth.var}{character class, the variable name in smooth term as main effect}
+\item{smooth.var}{Character. The variable name for the smooth term main
+effect (e.g. \code{"age"}).}
 
-\item{phenotypes}{data.frame class, the cohort matrix with columns of independent variables (including
-\code{factor.var} and \code{smooth.var}) to be added to the model}
+\item{phenotypes}{A data.frame of the cohort with columns of independent
+variables, including \code{factor.var} and \code{smooth.var}.}
 
-\item{reference.group}{character class, the reference group for ordered factor of `factor.var`; required when
-`factor.var` in `phenotypes` is not an ordered factor.}
+\item{reference.group}{Character. The reference (baseline) group for the
+ordered factor of \code{factor.var}. Required when \code{factor.var}
+in \code{phenotypes} is not already an ordered factor \link{2}.}
 
-\item{prefix.ordered.factor}{character class, the prefix for ordered factor; required when `factor.var` in
-`phenotypes` is not an ordered factor.}
+\item{prefix.ordered.factor}{Character. Prefix for the ordered factor
+column name. Required when \code{factor.var} in \code{phenotypes} is
+not already an ordered factor. Default is \code{"o"} \link{2}.}
 
-\item{fx}{TRUE or FALSE, to be used in smooth term s(). Recommend TRUE.}
+\item{fx}{Logical. Passed to \code{\link[mgcv]{s}}. If \code{TRUE}
+(recommended), the smooth is treated as fixed degrees of freedom.
+Default is \code{TRUE}.}
 
-\item{k}{integer, to be used in smooth term including the interaction term. If NULL (no entry), will use default
-value as in mgcv::s()}
+\item{k}{Integer or \code{NULL}. Basis dimension passed to
+\code{\link[mgcv]{s}} for both the main smooth and interaction terms.
+If \code{NULL} (default), uses the default from \code{mgcv::s()} \link{2}.}
 }
 \value{
-a list, including: 1) formula generated; 2) data.frame phenotypes - updated if argument factor.var
-is not an ordered factor
+A list with two components:
+\describe{
+\item{formula}{The generated \code{\link[stats]{formula}} object.}
+\item{phenotypes}{The (possibly updated) data.frame. If
+\code{factor.var} was not already an ordered factor, a new column
+named \code{paste0(prefix.ordered.factor, factor.var)} is added.
+Otherwise identical to the input \link{2}.}
+}
 }
 \description{
-This function will generate a formula in the following format:
-\code{y ~ orderedFactor + s(x) + s(x, by=orderedFactor)},
+Generates a formula in the format
+\code{y ~ orderedFactor + s(x) + s(x, by = orderedFactor)},
 where \code{y} is \code{response.var}, \code{x} is \code{smooth.var},
-and \code{orderedFactor} is \code{factor.var} - see \code{factor.var} for more.
-The formula generated could be further modified, e.g. adding covariates.
+and \code{orderedFactor} is \code{factor.var}.
+The formula generated can be further modified, e.g. by adding covariates.
+}
+\details{
+This helper exists because setting up factor-smooth interactions in
+\code{\link[mgcv]{gam}} requires an ordered factor and a specific formula
+structure. If \code{factor.var} in \code{phenotypes} is not already an
+ordered factor, this function creates one using \code{reference.group} as
+the baseline level and adds it as a new column (named with
+\code{prefix.ordered.factor} prepended to \code{factor.var}) \link{2}.
+
+The returned \code{phenotypes} data.frame must be used in the subsequent
+\code{\link{ModelArray.gam}} call so that the ordered factor column is
+available to the model.
+}
+\examples{
+\dontrun{
+phenotypes <- read.csv("cohort.csv")
+
+# factor.var is not yet ordered - function creates it
+result <- gen_gamFormula_fxSmooth(
+  response.var = "FD",
+  factor.var = "sex",
+  smooth.var = "age",
+  phenotypes = phenotypes,
+  reference.group = "female"
+)
+result$formula
+
+# Use the updated phenotypes (contains the ordered factor column)
+results <- ModelArray.gam(
+  result$formula,
+  data = ma,
+  phenotypes = result$phenotypes,
+  scalar = "FD"
+)
+}
+
+}
+\seealso{
+\code{\link{gen_gamFormula_contIx}} for continuous-by-continuous
+interactions, \code{\link{ModelArray.gam}} which accepts the generated
+formula.
 }
diff --git a/man/h5summary.Rd b/man/h5summary.Rd
index d3f2836..3f76000 100644
--- a/man/h5summary.Rd
+++ b/man/h5summary.Rd
@@ -2,21 +2,58 @@
 % Please edit documentation in R/utils.R
 \name{h5summary}
 \alias{h5summary}
-\title{Summarize an h5 file without loading the full ModelArray}
+\alias{print.h5summary}
+\title{Summarize an HDF5 file without loading a full ModelArray}
 \usage{
 h5summary(filepath)
+
+\method{print}{h5summary}(x, ...)
 }
 \arguments{
-\item{filepath}{Path to an h5 file}
+\item{filepath}{Character. Path to an HDF5 (\code{.h5}) file.}
+
+\item{x}{An \code{h5summary} object as returned by
+\code{\link{h5summary}}.}
+
+\item{...}{Additional arguments (currently ignored).}
 }
 \value{
-A list with components:
-  \item{scalars}{A data.frame with columns: name, nElements, nInputFiles}
-  \item{analyses}{Character vector of analysis names}
-  \item{filepath}{The input filepath}
+An object of class \code{"h5summary"}, which is a list with
+components:
+\describe{
+\item{scalars}{A data.frame with columns \code{name},
+\code{nElements}, and \code{nInputFiles} \link{2}.}
+\item{analyses}{Character vector of analysis names found under
+\code{/results/} \link{2}.}
+\item{filepath}{The input filepath.}
+}
+
+Invisible \code{x}. Called for its side effect of printing a
+human-readable summary to the console.
 }
 \description{
-Reads the h5 file structure and returns a summary of available scalars,
+Reads the HDF5 file structure and returns a summary of available scalars,
 their dimensions, and any saved analyses. Useful for inspecting large files
-without constructing a full ModelArray object.
+without constructing a full \linkS4class{ModelArray} object.
+}
+\details{
+This function opens the HDF5 file read-only via \code{\link[rhdf5]{h5ls}},
+inspects the group structure under \code{/scalars/} and \code{/results/},
+and closes the file. It does not load any data into memory. The returned
+object has a \code{print} method that displays a formatted summary \link{2}.
+}
+\examples{
+\dontrun{
+h5summary("path/to/data.h5")
+
+# Inspect before deciding which scalars to load
+info <- h5summary("path/to/data.h5")
+info$scalars$name
+ma <- ModelArray("path/to/data.h5", scalar_types = info$scalars$name)
+}
+
+}
+\seealso{
+\code{\link{ModelArray}} for loading the full object,
+\linkS4class{ModelArray} for the class definition.
 }
diff --git a/man/mergeModelArrays.Rd b/man/mergeModelArrays.Rd
index bfdbb0f..a7f279f 100644
--- a/man/mergeModelArrays.Rd
+++ b/man/mergeModelArrays.Rd
@@ -2,27 +2,102 @@
 % Please edit documentation in R/merge.R
 \name{mergeModelArrays}
 \alias{mergeModelArrays}
-\title{Merge multiple ModelArrays from different h5 files}
+\title{Merge multiple ModelArrays from different HDF5 files}
 \usage{
 mergeModelArrays(modelarrays, phenotypes_list, merge_on)
 }
 \arguments{
-\item{modelarrays}{A list of ModelArray objects}
+\item{modelarrays}{A list of at least two \linkS4class{ModelArray}
+objects, each constructed from a different HDF5 file.}
 
-\item{phenotypes_list}{A list of data.frames, one per ModelArray. Each must
-contain a `source_file` column matching its corresponding ModelArray's sources.}
+\item{phenotypes_list}{A list of data.frames, one per
+\linkS4class{ModelArray} in \code{modelarrays}. Each must contain a
+\code{source_file} column whose entries match the corresponding
+ModelArray's sources (i.e. \code{sources(modelarrays[[i]])}).
+Each must also contain all columns named in \code{merge_on} \link{6}.}
 
-\item{merge_on}{Character vector of column names to join phenotypes on
-(e.g., `c("subject_id")`). These columns must exist in all phenotypes data.frames.}
+\item{merge_on}{Character vector of column names present in all
+data.frames in \code{phenotypes_list}, used to inner-join subjects
+across sessions/modalities (e.g. \code{c("subject_id")}). The
+combination of these columns must uniquely identify each subject
+within each data.frame \link{6}.}
 }
 \value{
-A list with:
-  \item{data}{A combined ModelArray with scalars from all inputs}
-  \item{phenotypes}{The inner-joined phenotypes data.frame. The original
-    `source_file` columns are renamed to `source_file.<scalar_name>`.}
+A list with two components:
+\describe{
+\item{data}{A combined \linkS4class{ModelArray} containing scalars
+from all inputs. Each scalar's columns are subsetted and reordered
+to match the inner-joined subject list.}
+\item{phenotypes}{The inner-joined data.frame. Original
+\code{source_file} columns are renamed to
+\code{source_file.<scalar_name>} and a new unified
+\code{source_file} column is added for use with analysis
+functions \link{6}.}
+}
 }
 \description{
-Combines scalars from multiple ModelArray objects into a single ModelArray,
-aligning subjects via phenotype columns. Uses `DelayedArray::acbind()` for
-virtual column-binding — no h5 rewriting is needed.
+Combines scalars from multiple \linkS4class{ModelArray} objects into a
+single \linkS4class{ModelArray}, aligning subjects via shared phenotype
+columns. Uses \code{\link[DelayedArray]{acbind}} for virtual
+column-binding — no HDF5 rewriting is needed.
+}
+\details{
+The merge performs an inner join of the phenotype data.frames on the
+columns specified by \code{merge_on}. Only subjects present in all
+phenotype data.frames are retained. Scalar matrices from each input
+\linkS4class{ModelArray} are column-subsetted and reordered to match
+the joined subject list \link{6}.
+
+A unified \code{source_file} column is created from the \code{merge_on}
+columns so that downstream analysis functions
+(\code{\link{ModelArray.lm}}, \code{\link{ModelArray.gam}},
+\code{\link{ModelArray.wrap}}) can align phenotypes to scalars. The
+original \code{source_file} columns are renamed to
+\code{source_file.<first_scalar_name>} for each input
+\linkS4class{ModelArray} \link{6}.
+
+Scalar names must be unique across all input ModelArrays. If two
+ModelArrays share a scalar name (e.g. both have \code{"FD"}), the
+function will error. Element counts (number of rows) must match
+across all scalars \link{6}.
+
+If element metadata is available (see \code{\link{elementMetadata}}),
+the function checks that it is consistent across inputs and warns if
+it differs or is only partially available \link{6}.
+}
+\examples{
+\dontrun{
+# Load two sessions from different h5 files
+ma1 <- ModelArray("session1.h5", scalar_types = c("FD"))
+ma2 <- ModelArray("session2.h5", scalar_types = c("FC"))
+phen1 <- read.csv("session1_cohort.csv")
+phen2 <- read.csv("session2_cohort.csv")
+
+# Merge on subject ID
+merged <- mergeModelArrays(
+  modelarrays = list(ma1, ma2),
+  phenotypes_list = list(phen1, phen2),
+  merge_on = "subject_id"
+)
+
+# Use the merged object for cross-scalar analysis
+merged$data
+scalarNames(merged$data) # c("FD", "FC")
+head(merged$phenotypes)
+
+results <- ModelArray.lm(
+  FD ~ age + sex,
+  data = merged$data,
+  phenotypes = merged$phenotypes,
+  scalar = "FD"
+)
+}
+
+}
+\seealso{
+\code{\link{ModelArray}} for constructing individual
+ModelArray objects, \code{\link{ModelArray.lm}},
+\code{\link{ModelArray.gam}}, \code{\link{ModelArray.wrap}} for
+fitting models on the merged object,
+\code{\link{elementMetadata}} for element correspondence checks.
 }
diff --git a/man/pipe.Rd b/man/pipe.Rd
index 1f8f237..a648c29 100644
--- a/man/pipe.Rd
+++ b/man/pipe.Rd
@@ -12,7 +12,7 @@ lhs \%>\% rhs
 \item{rhs}{A function call using the magrittr semantics.}
 }
 \value{
-The result of calling `rhs(lhs)`.
+The result of calling \code{rhs(lhs)}.
 }
 \description{
 See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
diff --git a/man/results.Rd b/man/results.Rd
index 862db7f..bc346fc 100644
--- a/man/results.Rd
+++ b/man/results.Rd
@@ -38,10 +38,12 @@ back with \code{\link{writeResults}}.
 }
 \examples{
 \dontrun{
-ma <- ModelArray("data.h5", scalar_types = c("FD"),
-                 analysis_names = c("lm_age"))
-results(ma)                # named list of all results
-results(ma, "lm_age")     # single result set
+ma <- ModelArray("data.h5",
+  scalar_types = c("FD"),
+  analysis_names = c("lm_age")
+)
+results(ma) # named list of all results
+results(ma, "lm_age") # single result set
 results(ma, "lm_age")$results_matrix
 }
 
diff --git a/man/scalarNames.Rd b/man/scalarNames.Rd
index f2449be..84f846a 100644
--- a/man/scalarNames.Rd
+++ b/man/scalarNames.Rd
@@ -22,7 +22,7 @@ Returns the names of all scalar datasets loaded in a
 \examples{
 \dontrun{
 ma <- ModelArray("data.h5", scalar_types = c("FD", "FC"))
-scalarNames(ma)   # c("FD", "FC")
+scalarNames(ma) # c("FD", "FC")
 }
 
 }
diff --git a/man/scalars.Rd b/man/scalars.Rd
index 7b6d504..13459a2 100644
--- a/man/scalars.Rd
+++ b/man/scalars.Rd
@@ -29,8 +29,8 @@ matrices. When called with a single scalar name, returns that one matrix.
 \examples{
 \dontrun{
 ma <- ModelArray("data.h5", scalar_types = c("FD", "FC"))
-scalars(ma)         # named list of all scalars
-scalars(ma, "FD")   # single DelayedArray matrix
+scalars(ma) # named list of all scalars
+scalars(ma, "FD") # single DelayedArray matrix
 }
 
 }
diff --git a/man/sources.Rd b/man/sources.Rd
index f55f6e9..b879979 100644
--- a/man/sources.Rd
+++ b/man/sources.Rd
@@ -25,8 +25,8 @@ file/subject.
 \examples{
 \dontrun{
 ma <- ModelArray("data.h5", scalar_types = c("FD"))
-sources(ma)          # named list
-sources(ma)[["FD"]]  # character vector of filenames
+sources(ma) # named list
+sources(ma)[["FD"]] # character vector of filenames
 }
 
 }
diff --git a/man/writeResults.Rd b/man/writeResults.Rd
index 927de8e..6743d90 100644
--- a/man/writeResults.Rd
+++ b/man/writeResults.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/ModelArray_Constructor.R
 \name{writeResults}
 \alias{writeResults}
-\title{Write outputs from element-wise statistical analysis to the HDF5 file.}
+\title{Write outputs from element-wise statistical analysis to an HDF5 file}
 \usage{
 writeResults(
   fn.output,
@@ -12,20 +12,76 @@ writeResults(
 )
 }
 \arguments{
-\item{fn.output}{A character, The HDF5 (.h5) filename for the output}
+\item{fn.output}{Character. The HDF5 (\code{.h5}) filename for the output.
+The file must already exist; use an absolute path if you encounter
+file-not-found errors.}
 
-\item{df.output}{A data.frame object with element-wise statistical results, returned from `ModelArray.lm()` etc}
+\item{df.output}{A data.frame of element-wise statistical results, as
+returned by \code{\link{ModelArray.lm}},
+\code{\link{ModelArray.gam}}, or \code{\link{ModelArray.wrap}}.
+Must inherit from \code{data.frame} \link{7}.}
 
-\item{analysis_name}{A character, the name of the results}
+\item{analysis_name}{Character. The name for this set of results. Used
+as the group name under \code{/results/} in the HDF5 file.
+Default is \code{"myAnalysis"} \link{7}.}
 
-\item{overwrite}{If a group with the same analysis_name exists in HDF5 file,
-whether overwrite it (TRUE) or not (FALSE)}
+\item{overwrite}{Logical. If a group with the same \code{analysis_name}
+already exists in the HDF5 file, whether to overwrite it (\code{TRUE})
+or skip with a warning (\code{FALSE}). Default is \code{TRUE} \link{7}.}
+}
+\value{
+Invisible \code{NULL}. Called for its side effect of writing
+results to the HDF5 file.
 }
 \description{
-Create a group named `analysis_name` in HDF5 file,
-then write the statistical results data.frame (i.e. for one analysis) in it.
+Creates a group named \code{analysis_name} under \code{/results/} in the
+HDF5 file, then writes the statistical results data.frame (i.e. for one
+analysis) into it as \code{results_matrix} along with column names \link{7}.
 }
 \details{
-debug tip: For "Error in H5File.open(filename, mode, file_create_pl, file_access_pl)",
-check if there is message 'No such file or directory'. Try absolute .h5 filename.
+The results are stored at
+\code{/results/<analysis_name>/results_matrix} with column names saved
+as a separate dataset at
+\code{/results/<analysis_name>/column_names} \link{7}.
+
+If any column of \code{df.output} is not numeric or integer, it is
+coerced to numeric via \code{factor()} and the factor levels are saved
+as a look-up table at
+\code{/results/<analysis_name>/lut_forcol<i>} \link{7}.
+
+\strong{Debugging tip:} If you encounter
+\code{"Error in H5File.open(filename, mode, file_create_pl, file_access_pl)"},
+check if the message mentions "No such file or directory". Try using an
+absolute path for the \code{fn.output} argument \link{7}.
+}
+\examples{
+\dontrun{
+ma <- ModelArray("data.h5", scalar_types = c("FD"))
+phenotypes <- read.csv("cohort.csv")
+
+results <- ModelArray.lm(
+  FD ~ age + sex,
+  data = ma,
+  phenotypes = phenotypes,
+  scalar = "FD"
+)
+
+writeResults(
+  fn.output = "data.h5",
+  df.output = results,
+  analysis_name = "lm_age_sex",
+  overwrite = TRUE
+)
+
+# Verify
+h5summary("data.h5")
+}
+
+}
+\seealso{
+\code{\link{ModelArray.lm}}, \code{\link{ModelArray.gam}},
+\code{\link{ModelArray.wrap}} which produce the \code{df.output},
+\code{\link{results}} for reading results back from a
+\linkS4class{ModelArray}, \code{\link{h5summary}} for inspecting what
+has been written.
 }

From 1d327f4567f6cea8f87bc509c3652db4feea5482 Mon Sep 17 00:00:00 2001
From: araikes <adam.raikes@gmail.com>
Date: Tue, 31 Mar 2026 11:08:49 -0700
Subject: [PATCH 19/23] Fix aberrent linking objects introduced.

---
 R/ModelArray_Constructor.R | 14 +++++++-------
 R/merge.R                  | 14 +++++++-------
 R/utils.R                  | 24 ++++++++++++------------
 3 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/R/ModelArray_Constructor.R b/R/ModelArray_Constructor.R
index 13173db..c7c1d7c 100644
--- a/R/ModelArray_Constructor.R
+++ b/R/ModelArray_Constructor.R
@@ -1392,23 +1392,23 @@ analyseOneElement.wrap <- function(i_element,
 #' @description
 #' Creates a group named \code{analysis_name} under \code{/results/} in the
 #' HDF5 file, then writes the statistical results data.frame (i.e. for one
-#' analysis) into it as \code{results_matrix} along with column names [7].
+#' analysis) into it as \code{results_matrix} along with column names.
 #'
 #' @details
 #' The results are stored at
 #' \code{/results/<analysis_name>/results_matrix} with column names saved
 #' as a separate dataset at
-#' \code{/results/<analysis_name>/column_names} [7].
+#' \code{/results/<analysis_name>/column_names}.
 #'
 #' If any column of \code{df.output} is not numeric or integer, it is
 #' coerced to numeric via \code{factor()} and the factor levels are saved
 #' as a look-up table at
-#' \code{/results/<analysis_name>/lut_forcol<i>} [7].
+#' \code{/results/<analysis_name>/lut_forcol<i>}.
 #'
 #' \strong{Debugging tip:} If you encounter
 #' \code{"Error in H5File.open(filename, mode, file_create_pl, file_access_pl)"},
 #' check if the message mentions "No such file or directory". Try using an
-#' absolute path for the \code{fn.output} argument [7].
+#' absolute path for the \code{fn.output} argument.
 #'
 #' @param fn.output Character. The HDF5 (\code{.h5}) filename for the output.
 #'   The file must already exist; use an absolute path if you encounter
@@ -1416,13 +1416,13 @@ analyseOneElement.wrap <- function(i_element,
 #' @param df.output A data.frame of element-wise statistical results, as
 #'   returned by \code{\link{ModelArray.lm}},
 #'   \code{\link{ModelArray.gam}}, or \code{\link{ModelArray.wrap}}.
-#'   Must inherit from \code{data.frame} [7].
+#'   Must inherit from \code{data.frame}.
 #' @param analysis_name Character. The name for this set of results. Used
 #'   as the group name under \code{/results/} in the HDF5 file.
-#'   Default is \code{"myAnalysis"} [7].
+#'   Default is \code{"myAnalysis"}.
 #' @param overwrite Logical. If a group with the same \code{analysis_name}
 #'   already exists in the HDF5 file, whether to overwrite it (\code{TRUE})
-#'   or skip with a warning (\code{FALSE}). Default is \code{TRUE} [7].
+#'   or skip with a warning (\code{FALSE}). Default is \code{TRUE}.
 #'
 #' @return Invisible \code{NULL}. Called for its side effect of writing
 #'   results to the HDF5 file.
diff --git a/R/merge.R b/R/merge.R
index f96a31b..f8db780 100644
--- a/R/merge.R
+++ b/R/merge.R
@@ -11,7 +11,7 @@
 #' columns specified by \code{merge_on}. Only subjects present in all
 #' phenotype data.frames are retained. Scalar matrices from each input
 #' \linkS4class{ModelArray} are column-subsetted and reordered to match
-#' the joined subject list [6].
+#' the joined subject list.
 #'
 #' A unified \code{source_file} column is created from the \code{merge_on}
 #' columns so that downstream analysis functions
@@ -19,16 +19,16 @@
 #' \code{\link{ModelArray.wrap}}) can align phenotypes to scalars. The
 #' original \code{source_file} columns are renamed to
 #' \code{source_file.<first_scalar_name>} for each input
-#' \linkS4class{ModelArray} [6].
+#' \linkS4class{ModelArray}.
 #'
 #' Scalar names must be unique across all input ModelArrays. If two
 #' ModelArrays share a scalar name (e.g. both have \code{"FD"}), the
 #' function will error. Element counts (number of rows) must match
-#' across all scalars [6].
+#' across all scalars.
 #'
 #' If element metadata is available (see \code{\link{elementMetadata}}),
 #' the function checks that it is consistent across inputs and warns if
-#' it differs or is only partially available [6].
+#' it differs or is only partially available.
 #'
 #' @param modelarrays A list of at least two \linkS4class{ModelArray}
 #'   objects, each constructed from a different HDF5 file.
@@ -36,12 +36,12 @@
 #'   \linkS4class{ModelArray} in \code{modelarrays}. Each must contain a
 #'   \code{source_file} column whose entries match the corresponding
 #'   ModelArray's sources (i.e. \code{sources(modelarrays[[i]])}).
-#'   Each must also contain all columns named in \code{merge_on} [6].
+#'   Each must also contain all columns named in \code{merge_on}.
 #' @param merge_on Character vector of column names present in all
 #'   data.frames in \code{phenotypes_list}, used to inner-join subjects
 #'   across sessions/modalities (e.g. \code{c("subject_id")}). The
 #'   combination of these columns must uniquely identify each subject
-#'   within each data.frame [6].
+#'   within each data.frame.
 #'
 #' @return A list with two components:
 #'   \describe{
@@ -52,7 +52,7 @@
 #'       \code{source_file} columns are renamed to
 #'       \code{source_file.<scalar_name>} and a new unified
 #'       \code{source_file} column is added for use with analysis
-#'       functions [6].}
+#'       functions.}
 #'   }
 #'
 #' @seealso \code{\link{ModelArray}} for constructing individual
diff --git a/R/utils.R b/R/utils.R
index 2b6d0ea..69ae303 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -357,7 +357,7 @@ checker_gam_formula <- function(formula, gam.formula.breakdown, onemodel = NULL)
 #' structure. If \code{factor.var} in \code{phenotypes} is not already an
 #' ordered factor, this function creates one using \code{reference.group} as
 #' the baseline level and adds it as a new column (named with
-#' \code{prefix.ordered.factor} prepended to \code{factor.var}) [2].
+#' \code{prefix.ordered.factor} prepended to \code{factor.var}).
 #'
 #' The returned \code{phenotypes} data.frame must be used in the subsequent
 #' \code{\link{ModelArray.gam}} call so that the ordered factor column is
@@ -367,23 +367,23 @@ checker_gam_formula <- function(formula, gam.formula.breakdown, onemodel = NULL)
 #'   (dependent variable), typically a scalar name like \code{"FD"}.
 #' @param factor.var Character. The variable name for the factor. It should
 #'   be an ordered factor in \code{phenotypes}. If not, an ordered factor
-#'   will be generated as a new column, which requires \code{reference.group} [2].
+#'   will be generated as a new column, which requires \code{reference.group}.
 #' @param smooth.var Character. The variable name for the smooth term main
 #'   effect (e.g. \code{"age"}).
 #' @param phenotypes A data.frame of the cohort with columns of independent
 #'   variables, including \code{factor.var} and \code{smooth.var}.
 #' @param reference.group Character. The reference (baseline) group for the
 #'   ordered factor of \code{factor.var}. Required when \code{factor.var}
-#'   in \code{phenotypes} is not already an ordered factor [2].
+#'   in \code{phenotypes} is not already an ordered factor.
 #' @param prefix.ordered.factor Character. Prefix for the ordered factor
 #'   column name. Required when \code{factor.var} in \code{phenotypes} is
-#'   not already an ordered factor. Default is \code{"o"} [2].
+#'   not already an ordered factor. Default is \code{"o"}.
 #' @param fx Logical. Passed to \code{\link[mgcv]{s}}. If \code{TRUE}
 #'   (recommended), the smooth is treated as fixed degrees of freedom.
 #'   Default is \code{TRUE}.
 #' @param k Integer or \code{NULL}. Basis dimension passed to
 #'   \code{\link[mgcv]{s}} for both the main smooth and interaction terms.
-#'   If \code{NULL} (default), uses the default from \code{mgcv::s()} [2].
+#'   If \code{NULL} (default), uses the default from \code{mgcv::s()}.
 #'
 #' @return A list with two components:
 #'   \describe{
@@ -391,7 +391,7 @@ checker_gam_formula <- function(formula, gam.formula.breakdown, onemodel = NULL)
 #'     \item{phenotypes}{The (possibly updated) data.frame. If
 #'       \code{factor.var} was not already an ordered factor, a new column
 #'       named \code{paste0(prefix.ordered.factor, factor.var)} is added.
-#'       Otherwise identical to the input [2].}
+#'       Otherwise identical to the input.}
 #'   }
 #'
 #' @seealso \code{\link{gen_gamFormula_contIx}} for continuous-by-continuous
@@ -505,7 +505,7 @@ gen_gamFormula_fxSmooth <- function(response.var, factor.var, smooth.var, phenot
 #' interaction effect, separate from the main effects \code{ti(x)} and
 #' \code{ti(z)}. This decomposition is important for interpretability and
 #' for requesting \code{changed.rsq.term.index} in
-#' \code{\link{ModelArray.gam}} [4].
+#' \code{\link{ModelArray.gam}}.
 #'
 #' @param response.var Character. The variable name for the response
 #'   (dependent variable), typically a scalar name like \code{"FD"}.
@@ -513,11 +513,11 @@ gen_gamFormula_fxSmooth <- function(response.var, factor.var, smooth.var, phenot
 #' @param cont2.var Character. The name of the second continuous variable.
 #' @param fx Logical. Passed to \code{\link[mgcv]{ti}}. If \code{TRUE}
 #'   (recommended), the smooth is treated as fixed degrees of freedom.
-#'   Default is \code{TRUE} [2].
+#'   Default is \code{TRUE}.
 #' @param k Integer or \code{NULL}. Basis dimension passed to
 #'   \code{\link[mgcv]{ti}} for all three terms (both main effects and the
 #'   interaction). If \code{NULL} (default), uses the default from
-#'   \code{mgcv::ti()} [2].
+#'   \code{mgcv::ti()}.
 #'
 #' @return A \code{\link[stats]{formula}} object.
 #'
@@ -596,7 +596,7 @@ bind_cols_check_emptyTibble <- function(a, b) {
 #' This function opens the HDF5 file read-only via \code{\link[rhdf5]{h5ls}},
 #' inspects the group structure under \code{/scalars/} and \code{/results/},
 #' and closes the file. It does not load any data into memory. The returned
-#' object has a \code{print} method that displays a formatted summary [2].
+#' object has a \code{print} method that displays a formatted summary.
 #'
 #' @param filepath Character. Path to an HDF5 (\code{.h5}) file.
 #'
@@ -604,9 +604,9 @@ bind_cols_check_emptyTibble <- function(a, b) {
 #'   components:
 #'   \describe{
 #'     \item{scalars}{A data.frame with columns \code{name},
-#'       \code{nElements}, and \code{nInputFiles} [2].}
+#'       \code{nElements}, and \code{nInputFiles}.}
 #'     \item{analyses}{Character vector of analysis names found under
-#'       \code{/results/} [2].}
+#'       \code{/results/}.}
 #'     \item{filepath}{The input filepath.}
 #'   }
 #'

From 9fe7ba578268b9c20e5d2068de20123a19697653 Mon Sep 17 00:00:00 2001
From: araikes <adam.raikes@gmail.com>
Date: Tue, 31 Mar 2026 11:09:32 -0700
Subject: [PATCH 20/23] Update Rd files again

---
 man/gen_gamFormula_contIx.Rd   |  6 +++---
 man/gen_gamFormula_fxSmooth.Rd | 12 ++++++------
 man/h5summary.Rd               |  6 +++---
 man/mergeModelArrays.Rd        | 14 +++++++-------
 man/writeResults.Rd            | 14 +++++++-------
 5 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/man/gen_gamFormula_contIx.Rd b/man/gen_gamFormula_contIx.Rd
index eadf41d..7dda277 100644
--- a/man/gen_gamFormula_contIx.Rd
+++ b/man/gen_gamFormula_contIx.Rd
@@ -16,12 +16,12 @@ gen_gamFormula_contIx(response.var, cont1.var, cont2.var, fx = TRUE, k = NULL)
 
 \item{fx}{Logical. Passed to \code{\link[mgcv]{ti}}. If \code{TRUE}
 (recommended), the smooth is treated as fixed degrees of freedom.
-Default is \code{TRUE} \link{2}.}
+Default is \code{TRUE}.}
 
 \item{k}{Integer or \code{NULL}. Basis dimension passed to
 \code{\link[mgcv]{ti}} for all three terms (both main effects and the
 interaction). If \code{NULL} (default), uses the default from
-\code{mgcv::ti()} \link{2}.}
+\code{mgcv::ti()}.}
 }
 \value{
 A \code{\link[stats]{formula}} object.
@@ -39,7 +39,7 @@ terms so that the interaction \code{ti(x, z)} captures only the
 interaction effect, separate from the main effects \code{ti(x)} and
 \code{ti(z)}. This decomposition is important for interpretability and
 for requesting \code{changed.rsq.term.index} in
-\code{\link{ModelArray.gam}} \link{4}.
+\code{\link{ModelArray.gam}}.
 }
 \examples{
 \dontrun{
diff --git a/man/gen_gamFormula_fxSmooth.Rd b/man/gen_gamFormula_fxSmooth.Rd
index b8d92c7..63b9a33 100644
--- a/man/gen_gamFormula_fxSmooth.Rd
+++ b/man/gen_gamFormula_fxSmooth.Rd
@@ -21,7 +21,7 @@ gen_gamFormula_fxSmooth(
 
 \item{factor.var}{Character. The variable name for the factor. It should
 be an ordered factor in \code{phenotypes}. If not, an ordered factor
-will be generated as a new column, which requires \code{reference.group} \link{2}.}
+will be generated as a new column, which requires \code{reference.group}.}
 
 \item{smooth.var}{Character. The variable name for the smooth term main
 effect (e.g. \code{"age"}).}
@@ -31,11 +31,11 @@ variables, including \code{factor.var} and \code{smooth.var}.}
 
 \item{reference.group}{Character. The reference (baseline) group for the
 ordered factor of \code{factor.var}. Required when \code{factor.var}
-in \code{phenotypes} is not already an ordered factor \link{2}.}
+in \code{phenotypes} is not already an ordered factor.}
 
 \item{prefix.ordered.factor}{Character. Prefix for the ordered factor
 column name. Required when \code{factor.var} in \code{phenotypes} is
-not already an ordered factor. Default is \code{"o"} \link{2}.}
+not already an ordered factor. Default is \code{"o"}.}
 
 \item{fx}{Logical. Passed to \code{\link[mgcv]{s}}. If \code{TRUE}
 (recommended), the smooth is treated as fixed degrees of freedom.
@@ -43,7 +43,7 @@ Default is \code{TRUE}.}
 
 \item{k}{Integer or \code{NULL}. Basis dimension passed to
 \code{\link[mgcv]{s}} for both the main smooth and interaction terms.
-If \code{NULL} (default), uses the default from \code{mgcv::s()} \link{2}.}
+If \code{NULL} (default), uses the default from \code{mgcv::s()}.}
 }
 \value{
 A list with two components:
@@ -52,7 +52,7 @@ A list with two components:
 \item{phenotypes}{The (possibly updated) data.frame. If
 \code{factor.var} was not already an ordered factor, a new column
 named \code{paste0(prefix.ordered.factor, factor.var)} is added.
-Otherwise identical to the input \link{2}.}
+Otherwise identical to the input.}
 }
 }
 \description{
@@ -68,7 +68,7 @@ This helper exists because setting up factor-smooth interactions in
 structure. If \code{factor.var} in \code{phenotypes} is not already an
 ordered factor, this function creates one using \code{reference.group} as
 the baseline level and adds it as a new column (named with
-\code{prefix.ordered.factor} prepended to \code{factor.var}) \link{2}.
+\code{prefix.ordered.factor} prepended to \code{factor.var}).
 
 The returned \code{phenotypes} data.frame must be used in the subsequent
 \code{\link{ModelArray.gam}} call so that the ordered factor column is
diff --git a/man/h5summary.Rd b/man/h5summary.Rd
index 3f76000..1788988 100644
--- a/man/h5summary.Rd
+++ b/man/h5summary.Rd
@@ -22,9 +22,9 @@ An object of class \code{"h5summary"}, which is a list with
 components:
 \describe{
 \item{scalars}{A data.frame with columns \code{name},
-\code{nElements}, and \code{nInputFiles} \link{2}.}
+\code{nElements}, and \code{nInputFiles}.}
 \item{analyses}{Character vector of analysis names found under
-\code{/results/} \link{2}.}
+\code{/results/}.}
 \item{filepath}{The input filepath.}
 }
 
@@ -40,7 +40,7 @@ without constructing a full \linkS4class{ModelArray} object.
 This function opens the HDF5 file read-only via \code{\link[rhdf5]{h5ls}},
 inspects the group structure under \code{/scalars/} and \code{/results/},
 and closes the file. It does not load any data into memory. The returned
-object has a \code{print} method that displays a formatted summary \link{2}.
+object has a \code{print} method that displays a formatted summary.
 }
 \examples{
 \dontrun{
diff --git a/man/mergeModelArrays.Rd b/man/mergeModelArrays.Rd
index a7f279f..77dd090 100644
--- a/man/mergeModelArrays.Rd
+++ b/man/mergeModelArrays.Rd
@@ -14,13 +14,13 @@ objects, each constructed from a different HDF5 file.}
 \linkS4class{ModelArray} in \code{modelarrays}. Each must contain a
 \code{source_file} column whose entries match the corresponding
 ModelArray's sources (i.e. \code{sources(modelarrays[[i]])}).
-Each must also contain all columns named in \code{merge_on} \link{6}.}
+Each must also contain all columns named in \code{merge_on}.}
 
 \item{merge_on}{Character vector of column names present in all
 data.frames in \code{phenotypes_list}, used to inner-join subjects
 across sessions/modalities (e.g. \code{c("subject_id")}). The
 combination of these columns must uniquely identify each subject
-within each data.frame \link{6}.}
+within each data.frame.}
 }
 \value{
 A list with two components:
@@ -32,7 +32,7 @@ to match the inner-joined subject list.}
 \code{source_file} columns are renamed to
 \code{source_file.<scalar_name>} and a new unified
 \code{source_file} column is added for use with analysis
-functions \link{6}.}
+functions.}
 }
 }
 \description{
@@ -46,7 +46,7 @@ The merge performs an inner join of the phenotype data.frames on the
 columns specified by \code{merge_on}. Only subjects present in all
 phenotype data.frames are retained. Scalar matrices from each input
 \linkS4class{ModelArray} are column-subsetted and reordered to match
-the joined subject list \link{6}.
+the joined subject list.
 
 A unified \code{source_file} column is created from the \code{merge_on}
 columns so that downstream analysis functions
@@ -54,16 +54,16 @@ columns so that downstream analysis functions
 \code{\link{ModelArray.wrap}}) can align phenotypes to scalars. The
 original \code{source_file} columns are renamed to
 \code{source_file.<first_scalar_name>} for each input
-\linkS4class{ModelArray} \link{6}.
+\linkS4class{ModelArray}.
 
 Scalar names must be unique across all input ModelArrays. If two
 ModelArrays share a scalar name (e.g. both have \code{"FD"}), the
 function will error. Element counts (number of rows) must match
-across all scalars \link{6}.
+across all scalars.
 
 If element metadata is available (see \code{\link{elementMetadata}}),
 the function checks that it is consistent across inputs and warns if
-it differs or is only partially available \link{6}.
+it differs or is only partially available.
 }
 \examples{
 \dontrun{
diff --git a/man/writeResults.Rd b/man/writeResults.Rd
index 6743d90..9e0653e 100644
--- a/man/writeResults.Rd
+++ b/man/writeResults.Rd
@@ -19,15 +19,15 @@ file-not-found errors.}
 \item{df.output}{A data.frame of element-wise statistical results, as
 returned by \code{\link{ModelArray.lm}},
 \code{\link{ModelArray.gam}}, or \code{\link{ModelArray.wrap}}.
-Must inherit from \code{data.frame} \link{7}.}
+Must inherit from \code{data.frame}.}
 
 \item{analysis_name}{Character. The name for this set of results. Used
 as the group name under \code{/results/} in the HDF5 file.
-Default is \code{"myAnalysis"} \link{7}.}
+Default is \code{"myAnalysis"}.}
 
 \item{overwrite}{Logical. If a group with the same \code{analysis_name}
 already exists in the HDF5 file, whether to overwrite it (\code{TRUE})
-or skip with a warning (\code{FALSE}). Default is \code{TRUE} \link{7}.}
+or skip with a warning (\code{FALSE}). Default is \code{TRUE}.}
 }
 \value{
 Invisible \code{NULL}. Called for its side effect of writing
@@ -36,23 +36,23 @@ results to the HDF5 file.
 \description{
 Creates a group named \code{analysis_name} under \code{/results/} in the
 HDF5 file, then writes the statistical results data.frame (i.e. for one
-analysis) into it as \code{results_matrix} along with column names \link{7}.
+analysis) into it as \code{results_matrix} along with column names.
 }
 \details{
 The results are stored at
 \code{/results/<analysis_name>/results_matrix} with column names saved
 as a separate dataset at
-\code{/results/<analysis_name>/column_names} \link{7}.
+\code{/results/<analysis_name>/column_names}.
 
 If any column of \code{df.output} is not numeric or integer, it is
 coerced to numeric via \code{factor()} and the factor levels are saved
 as a look-up table at
-\code{/results/<analysis_name>/lut_forcol<i>} \link{7}.
+\code{/results/<analysis_name>/lut_forcol<i>}.
 
 \strong{Debugging tip:} If you encounter
 \code{"Error in H5File.open(filename, mode, file_create_pl, file_access_pl)"},
 check if the message mentions "No such file or directory". Try using an
-absolute path for the \code{fn.output} argument \link{7}.
+absolute path for the \code{fn.output} argument.
 }
 \examples{
 \dontrun{

From b3c7a3c2ecfa387c0a1107b1979d9cb38c2d4522 Mon Sep 17 00:00:00 2001
From: araikes <adam.raikes@gmail.com>
Date: Tue, 31 Mar 2026 11:12:55 -0700
Subject: [PATCH 21/23] Fix unresolved linking in utils

---
 R/utils.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/utils.R b/R/utils.R
index 69ae303..8d96ccf 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -154,8 +154,8 @@ check_validity_correctPValue <- function(correct.list, name.correct.list,
 #' @details
 #' ref: https://www.rdocumentation.org/packages/mgcv/versions/1.8-38/topics/s
 #'
-#' @param ofInterest got via: gam.formula.breakdown <- mgcv::interpret.gam(formula);
-#' ofInterest <- gam.formula.breakdown$smooth.spec[[i]]
+#' @param ofInterest got via: `gam.formula.breakdown <- mgcv::interpret.gam(formula)`;
+#' `ofInterest <- gam.formula.breakdown$smooth.spec[[i]]`
 #' @importFrom mgcv s
 #' @importFrom dplyr %>%
 #' @importFrom crayon black
@@ -233,8 +233,8 @@ checker_gam_s <- function(ofInterest) {
 #' ref: https://www.rdocumentation.org/packages/mgcv/versions/1.8-38/topics/te or /t2()
 #'
 #' @param FUN could be mgcv::te(), ti() or t2()
-#' @param ofInterest got via: gam.formula.breakdown <- mgcv::interpret.gam(formula);
-#' ofInterest <- gam.formula.breakdown$smooth.spec[[i]]
+#' @param ofInterest got via: `gam.formula.breakdown <- mgcv::interpret.gam(formula)`;
+#' `ofInterest <- gam.formula.breakdown$smooth.spec[[i]]`
 #' @importFrom mgcv te ti t2
 #' @importFrom dplyr %>%
 #' @importFrom crayon black

From 445e681d8bd81d5457710f466cf1e7c05242a4d3 Mon Sep 17 00:00:00 2001
From: araikes <adam.raikes@gmail.com>
Date: Wed, 1 Apr 2026 10:45:57 -0700
Subject: [PATCH 22/23] Update merged ModelArray example.

---
 R/merge.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/merge.R b/R/merge.R
index f8db780..55745c2 100644
--- a/R/merge.R
+++ b/R/merge.R
@@ -82,10 +82,10 @@
 #' head(merged$phenotypes)
 #'
 #' results <- ModelArray.lm(
-#'   FD ~ age + sex,
+#'   FD ~ age + sex + FC,
 #'   data = merged$data,
 #'   phenotypes = merged$phenotypes,
-#'   scalar = "FD"
+#'   scalar = c("FD", "FC")
 #' )
 #' }
 #'

From 1a44faa4d1dd38a44fa56313ade22112269c6c53 Mon Sep 17 00:00:00 2001
From: araikes <adam.raikes@gmail.com>
Date: Wed, 1 Apr 2026 10:48:33 -0700
Subject: [PATCH 23/23] Update mergeModelArrays.Rd

---
 man/mergeModelArrays.Rd | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/man/mergeModelArrays.Rd b/man/mergeModelArrays.Rd
index 77dd090..328b773 100644
--- a/man/mergeModelArrays.Rd
+++ b/man/mergeModelArrays.Rd
@@ -86,10 +86,10 @@ scalarNames(merged$data) # c("FD", "FC")
 head(merged$phenotypes)
 
 results <- ModelArray.lm(
-  FD ~ age + sex,
+  FD ~ age + sex + FC,
   data = merged$data,
   phenotypes = merged$phenotypes,
-  scalar = "FD"
+  scalar = c("FD", "FC")
 )
 }