From 828136284f1348db4899857a8d26a04307b0e643 Mon Sep 17 00:00:00 2001 From: araikes Date: Thu, 26 Mar 2026 12:08:00 -0700 Subject: [PATCH 01/23] Update ModelArray roxygen This should fix the duplicate documentation of "usage" in ModelArray by separating the class and function roxygen. --- R/ModelArray_Constructor.R | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/R/ModelArray_Constructor.R b/R/ModelArray_Constructor.R index f16d709..e0e2203 100644 --- a/R/ModelArray_Constructor.R +++ b/R/ModelArray_Constructor.R @@ -1,16 +1,22 @@ # Exported Functions -### setClass of "ModelArray" ##### +#' ModelArray class +#' #' An S4 class to represent element-wise scalar data and statistics. #' #' @slot sources A list of source filenames -#' @slot scalars A list of element-wise scalar matrix -#' @slot results A list of statistical result matrix +#' @slot scalars A list of element-wise scalar matrices +#' @slot results A list of statistical result matrices #' @slot path Path to the h5 file on disk +#' +#' @name ModelArray-class +#' @rdname ModelArray-class +#' @keywords internal #' @importClassesFrom DelayedArray DelayedArray -ModelArray <- setClass( +NULL + +setClass( "ModelArray", - # contains="DelayedArray", slots = c( results = "list", sources = "list", @@ -43,21 +49,16 @@ ModelArraySeed <- function(filepath, name, type = NA) { -#' Load element-wise data from .h5 file as an ModelArray object +#' Construct a ModelArray object +#' +#' Load element-wise data from an .h5 file as a `ModelArray` object. #' -#' @details: -#' Tips for debugging: -#' if you run into this error: "Error in h(simpleError(msg, call)) : -#' error in evaluating the argument 'seed' in selecting a method for -#' function 'DelayedArray': HDF5. Symbol table. Can't open object." -#' Then please check if you give correct "scalar_types" - check via -#' rhdf5::h5ls(filename_for_h5) +#' @param filepath Path to an .h5 file +#' @param scalar_types Expected scalars +#' @param analysis_names The subfolder names for results in the .h5 file. +#' If empty (default), results are not read. #' -#' @param filepath file -#' @param scalar_types expected scalars -#' @param analysis_names the subfolder names for results in .h5 file. If empty -#' (default), results are not read. -#' @return ModelArray object +#' @return A `ModelArray` object #' @export #' @import methods #' @importFrom dplyr %>% From b17ec967a8a50dd644163891072cea7f5d8575c9 Mon Sep 17 00:00:00 2001 From: araikes Date: Thu, 26 Mar 2026 12:48:26 -0700 Subject: [PATCH 02/23] Update Rd files --- DESCRIPTION | 2 +- man/ModelArray-class.Rd | 21 +++++++++++++++++++++ man/ModelArray.Rd | 39 +++++++-------------------------------- 3 files changed, 29 insertions(+), 33 deletions(-) create mode 100644 man/ModelArray-class.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 3d74ee8..77882dd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -40,7 +40,7 @@ Imports: rlang, tibble, tidyr -RoxygenNote: 7.3.2 +RoxygenNote: 7.3.3 Suggests: rmarkdown, knitr, diff --git a/man/ModelArray-class.Rd b/man/ModelArray-class.Rd new file mode 100644 index 0000000..2f89cd0 --- /dev/null +++ b/man/ModelArray-class.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ModelArray_Constructor.R +\name{ModelArray-class} +\alias{ModelArray-class} +\title{ModelArray class} +\description{ +An S4 class to represent element-wise scalar data and statistics. +} +\section{Slots}{ + +\describe{ +\item{\code{sources}}{A list of source filenames} + +\item{\code{scalars}}{A list of element-wise scalar matrices} + +\item{\code{results}}{A list of statistical result matrices} + +\item{\code{path}}{Path to the h5 file on disk} +}} + +\keyword{internal} diff --git a/man/ModelArray.Rd b/man/ModelArray.Rd index defb99a..6d13363 100644 --- a/man/ModelArray.Rd +++ b/man/ModelArray.Rd @@ -2,46 +2,21 @@ % Please edit documentation in R/ModelArray_Constructor.R \name{ModelArray} \alias{ModelArray} -\title{An S4 class to represent element-wise scalar data and statistics.} +\title{Construct a ModelArray object} \usage{ -ModelArray(filepath, scalar_types = c("FD"), analysis_names = character(0)) - ModelArray(filepath, scalar_types = c("FD"), analysis_names = character(0)) } \arguments{ -\item{filepath}{file} +\item{filepath}{Path to an .h5 file} -\item{scalar_types}{expected scalars} +\item{scalar_types}{Expected scalars} -\item{analysis_names}{the subfolder names for results in .h5 file. If empty -(default), results are not read.} +\item{analysis_names}{The subfolder names for results in the .h5 file. +If empty (default), results are not read.} } \value{ -ModelArray object +A `ModelArray` object } \description{ -An S4 class to represent element-wise scalar data and statistics. - -Load element-wise data from .h5 file as an ModelArray object +Load element-wise data from an .h5 file as a `ModelArray` object. } -\details{ -: -Tips for debugging: -if you run into this error: "Error in h(simpleError(msg, call)) : -error in evaluating the argument 'seed' in selecting a method for -function 'DelayedArray': HDF5. Symbol table. Can't open object." -Then please check if you give correct "scalar_types" - check via -rhdf5::h5ls(filename_for_h5) -} -\section{Slots}{ - -\describe{ -\item{\code{sources}}{A list of source filenames} - -\item{\code{scalars}}{A list of element-wise scalar matrix} - -\item{\code{results}}{A list of statistical result matrix} - -\item{\code{path}}{Path to the h5 file on disk} -}} - From cd9f7cb2b4eaaf8141419f9eacd1b8a554139c85 Mon Sep 17 00:00:00 2001 From: araikes Date: Thu, 26 Mar 2026 13:05:26 -0700 Subject: [PATCH 03/23] Update DESCRIPTION Add markdown rendering for roxygen --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index 77882dd..fa3650f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -41,6 +41,7 @@ Imports: tibble, tidyr RoxygenNote: 7.3.3 +Roxygen: list(markdown = TRUE) Suggests: rmarkdown, knitr, From 69107eee1569c89b74d54c504abc4c38f4bda984 Mon Sep 17 00:00:00 2001 From: araikes Date: Fri, 27 Mar 2026 15:36:10 -0700 Subject: [PATCH 04/23] Testing new S4 and constructor docs --- NAMESPACE | 1 + R/ModelArray_Constructor.R | 96 ++++++++++++++++++++++++++++++-------- man/ModelArray-class.Rd | 21 --------- 3 files changed, 77 insertions(+), 41 deletions(-) delete mode 100644 man/ModelArray-class.Rd diff --git a/NAMESPACE b/NAMESPACE index 18c0fe2..201c081 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -24,6 +24,7 @@ export(scalarNames) export(scalars) export(sources) export(writeResults) +exportClasses(ModelArray) exportMethods(analysisNames) exportMethods(elementMetadata) exportMethods(exampleElementData) diff --git a/R/ModelArray_Constructor.R b/R/ModelArray_Constructor.R index e0e2203..1f50a94 100644 --- a/R/ModelArray_Constructor.R +++ b/R/ModelArray_Constructor.R @@ -1,21 +1,45 @@ -# Exported Functions - #' ModelArray class #' -#' An S4 class to represent element-wise scalar data and statistics. +#' ModelArray is an S4 class that represents element-wise scalar data and +#' associated statistical results backed by an HDF5 file on disk. +#' +#' @description +#' A ModelArray wraps one or more element-wise scalar matrices (e.g., FD, FC, +#' log_FC for fixel data) read lazily via \pkg{DelayedArray}, along with any +#' previously saved analysis results. The object holds references to the +#' underlying HDF5 file and reads data on demand, making it suitable for +#' large-scale neuroimaging datasets. +#' +#' @details +#' Each scalar in the HDF5 file is stored at \code{/scalars//values} +#' as a matrix of elements (rows) by source files (columns). Source filenames +#' are read from HDF5 attributes or companion datasets. Analysis results, if +#' present, live under \code{/results//results_matrix}. +#' +#' ModelArray objects are typically created with the \code{\link{ModelArray}} +#' constructor function. Element-wise models are fit with +#' \code{\link{ModelArray.lm}}, \code{\link{ModelArray.gam}}, or +#' \code{\link{ModelArray.wrap}}. #' -#' @slot sources A list of source filenames -#' @slot scalars A list of element-wise scalar matrices -#' @slot results A list of statistical result matrices -#' @slot path Path to the h5 file on disk +#' @slot sources A named list of character vectors. Each element corresponds +#' to a scalar and contains the source filenames (one per input file/subject). +#' @slot scalars A named list of \linkS4class{DelayedArray} matrices. +#' Each matrix has elements as rows and source files as columns. +#' @slot results A named list of analysis results. Each element is itself a +#' list containing at minimum \code{results_matrix} (a +#' \linkS4class{DelayedArray}). +#' @slot path Character. Path(s) to the HDF5 file(s) on disk. +#' +#' @seealso \code{\link{ModelArray}} for the constructor, +#' \code{\link{ModelArray.lm}}, \code{\link{ModelArray.gam}}, +#' \code{\link{ModelArray.wrap}} for analysis, +#' \code{\link{scalars}}, \code{\link{sources}}, \code{\link{results}} for +#' accessors. #' -#' @name ModelArray-class -#' @rdname ModelArray-class -#' @keywords internal #' @importClassesFrom DelayedArray DelayedArray -NULL - -setClass( +#' @exportClass ModelArray +#' @aliases ModelArray-class +ModelArray <- setClass( "ModelArray", slots = c( results = "list", @@ -49,16 +73,48 @@ ModelArraySeed <- function(filepath, name, type = NA) { -#' Construct a ModelArray object +#' Load element-wise data from an HDF5 file as a ModelArray object +#' +#' @description +#' Reads scalar matrices and (optionally) saved analysis results from an HDF5 +#' file and returns a \linkS4class{ModelArray} object. +#' +#' @details +#' The constructor reads each scalar listed in \code{scalar_types} from +#' \code{/scalars//values} in the HDF5 file, wrapping them as +#' \linkS4class{DelayedArray} objects for lazy access. Source filenames are +#' extracted from HDF5 attributes or companion datasets. +#' +#' If \code{analysis_names} is non-empty, saved results are read from +#' \code{/results//results_matrix} along with column name metadata. +#' +#' \strong{Debugging tip:} If you encounter +#' \code{"Error in h(simpleError(msg, call)) : error in evaluating the +#' argument 'seed'..."}, check that \code{scalar_types} matches the groups +#' present in the file. Inspect with \code{rhdf5::h5ls(filepath)}. +#' +#' @param filepath Character. Path to an existing HDF5 (\code{.h5}) file +#' containing element-wise scalar data. +#' @param scalar_types Character vector. Names of scalar groups to read from +#' \code{/scalars/} in the HDF5 file. Default is \code{c("FD")}. Must match +#' group names in the file; verify with \code{rhdf5::h5ls(filepath)}. +#' @param analysis_names Character vector. Subfolder names under +#' \code{/results/} to load. Default is \code{character(0)} (no results +#' loaded). +#' +#' @return A \linkS4class{ModelArray} object. #' -#' Load element-wise data from an .h5 file as a `ModelArray` object. +#' @seealso \linkS4class{ModelArray} for the class definition, +#' \code{\link{h5summary}} for inspecting an HDF5 file without loading, +#' \code{\link{scalars}}, \code{\link{sources}} for accessing loaded data. #' -#' @param filepath Path to an .h5 file -#' @param scalar_types Expected scalars -#' @param analysis_names The subfolder names for results in the .h5 file. -#' If empty (default), results are not read. +#' @examples +#' \dontrun{ +#' ma <- ModelArray("path/to/data.h5", scalar_types = c("FD", "FC")) +#' ma +#' scalars(ma) +#' } #' -#' @return A `ModelArray` object #' @export #' @import methods #' @importFrom dplyr %>% diff --git a/man/ModelArray-class.Rd b/man/ModelArray-class.Rd deleted file mode 100644 index 2f89cd0..0000000 --- a/man/ModelArray-class.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ModelArray_Constructor.R -\name{ModelArray-class} -\alias{ModelArray-class} -\title{ModelArray class} -\description{ -An S4 class to represent element-wise scalar data and statistics. -} -\section{Slots}{ - -\describe{ -\item{\code{sources}}{A list of source filenames} - -\item{\code{scalars}}{A list of element-wise scalar matrices} - -\item{\code{results}}{A list of statistical result matrices} - -\item{\code{path}}{Path to the h5 file on disk} -}} - -\keyword{internal} From 27c24c0625c6000e99b7da87352209bd47cc44ee Mon Sep 17 00:00:00 2001 From: araikes Date: Fri, 27 Mar 2026 17:33:15 -0700 Subject: [PATCH 05/23] Include updated ModelArray.Rd --- man/ModelArray.Rd | 90 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 83 insertions(+), 7 deletions(-) diff --git a/man/ModelArray.Rd b/man/ModelArray.Rd index 6d13363..96de23c 100644 --- a/man/ModelArray.Rd +++ b/man/ModelArray.Rd @@ -2,21 +2,97 @@ % Please edit documentation in R/ModelArray_Constructor.R \name{ModelArray} \alias{ModelArray} -\title{Construct a ModelArray object} +\alias{ModelArray-class} +\title{ModelArray class} \usage{ +ModelArray(filepath, scalar_types = c("FD"), analysis_names = character(0)) + ModelArray(filepath, scalar_types = c("FD"), analysis_names = character(0)) } \arguments{ -\item{filepath}{Path to an .h5 file} +\item{filepath}{Character. Path to an existing HDF5 (\code{.h5}) file +containing element-wise scalar data.} -\item{scalar_types}{Expected scalars} +\item{scalar_types}{Character vector. Names of scalar groups to read from +\code{/scalars/} in the HDF5 file. Default is \code{c("FD")}. Must match +group names in the file; verify with \code{rhdf5::h5ls(filepath)}.} -\item{analysis_names}{The subfolder names for results in the .h5 file. -If empty (default), results are not read.} +\item{analysis_names}{Character vector. Subfolder names under +\code{/results/} to load. Default is \code{character(0)} (no results +loaded).} } \value{ -A `ModelArray` object +A \linkS4class{ModelArray} object. } \description{ -Load element-wise data from an .h5 file as a `ModelArray` object. +A ModelArray wraps one or more element-wise scalar matrices (e.g., FD, FC, +log_FC for fixel data) read lazily via \pkg{DelayedArray}, along with any +previously saved analysis results. The object holds references to the +underlying HDF5 file and reads data on demand, making it suitable for +large-scale neuroimaging datasets. + +Reads scalar matrices and (optionally) saved analysis results from an HDF5 +file and returns a \linkS4class{ModelArray} object. +} +\details{ +ModelArray is an S4 class that represents element-wise scalar data and +associated statistical results backed by an HDF5 file on disk. + +Each scalar in the HDF5 file is stored at \code{/scalars//values} +as a matrix of elements (rows) by source files (columns). Source filenames +are read from HDF5 attributes or companion datasets. Analysis results, if +present, live under \code{/results//results_matrix}. + +ModelArray objects are typically created with the \code{\link{ModelArray}} +constructor function. Element-wise models are fit with +\code{\link{ModelArray.lm}}, \code{\link{ModelArray.gam}}, or +\code{\link{ModelArray.wrap}}. + +The constructor reads each scalar listed in \code{scalar_types} from +\code{/scalars//values} in the HDF5 file, wrapping them as +\linkS4class{DelayedArray} objects for lazy access. Source filenames are +extracted from HDF5 attributes or companion datasets. + +If \code{analysis_names} is non-empty, saved results are read from +\code{/results//results_matrix} along with column name metadata. + +\strong{Debugging tip:} If you encounter +\code{"Error in h(simpleError(msg, call)) : error in evaluating the +argument 'seed'..."}, check that \code{scalar_types} matches the groups +present in the file. Inspect with \code{rhdf5::h5ls(filepath)}. +} +\section{Slots}{ + +\describe{ +\item{\code{sources}}{A named list of character vectors. Each element corresponds +to a scalar and contains the source filenames (one per input file/subject).} + +\item{\code{scalars}}{A named list of \linkS4class{DelayedArray} matrices. +Each matrix has elements as rows and source files as columns.} + +\item{\code{results}}{A named list of analysis results. Each element is itself a +list containing at minimum \code{results_matrix} (a +\linkS4class{DelayedArray}).} + +\item{\code{path}}{Character. Path(s) to the HDF5 file(s) on disk.} +}} + +\examples{ +\dontrun{ +ma <- ModelArray("path/to/data.h5", scalar_types = c("FD", "FC")) +ma +scalars(ma) +} + +} +\seealso{ +\code{\link{ModelArray}} for the constructor, +\code{\link{ModelArray.lm}}, \code{\link{ModelArray.gam}}, +\code{\link{ModelArray.wrap}} for analysis, +\code{\link{scalars}}, \code{\link{sources}}, \code{\link{results}} for +accessors. + +\linkS4class{ModelArray} for the class definition, +\code{\link{h5summary}} for inspecting an HDF5 file without loading, +\code{\link{scalars}}, \code{\link{sources}} for accessing loaded data. } From 421b9ed91fdec463db06b87be9960a964590d318 Mon Sep 17 00:00:00 2001 From: araikes Date: Mon, 30 Mar 2026 16:30:11 -0700 Subject: [PATCH 06/23] Test disambiguating class and function --- NAMESPACE | 2 +- R/ModelArray_Constructor.R | 5 +--- man/ModelArray-class.Rd | 51 +++++++++++++++++++++++++++++++++++++ man/ModelArray.Rd | 52 +++----------------------------------- 4 files changed, 57 insertions(+), 53 deletions(-) create mode 100644 man/ModelArray-class.Rd diff --git a/NAMESPACE b/NAMESPACE index 201c081..bf243de 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,7 +2,6 @@ S3method(print,h5summary) export("%>%") -export(ModelArray) export(ModelArray.gam) export(ModelArray.lm) export(ModelArray.wrap) @@ -16,6 +15,7 @@ export(gen_gamFormula_contIx) export(gen_gamFormula_fxSmooth) export(h5summary) export(mergeModelArrays) +export(modelarray) export(nElements) export(nInputFiles) export(numElementsTotal) diff --git a/R/ModelArray_Constructor.R b/R/ModelArray_Constructor.R index 1f50a94..b690186 100644 --- a/R/ModelArray_Constructor.R +++ b/R/ModelArray_Constructor.R @@ -38,7 +38,6 @@ #' #' @importClassesFrom DelayedArray DelayedArray #' @exportClass ModelArray -#' @aliases ModelArray-class ModelArray <- setClass( "ModelArray", slots = c( @@ -49,8 +48,6 @@ ModelArray <- setClass( ) ) - - #' ModelArraySeed #' #' Generates a "seed" for the h5 file format. A wrapper around HDF5ArraySeed @@ -120,7 +117,7 @@ ModelArraySeed <- function(filepath, name, type = NA) { #' @importFrom dplyr %>% #' @importFrom DelayedArray DelayedArray realize #' @importFrom rhdf5 h5readAttributes -ModelArray <- function(filepath, +modelarray <- function(filepath, scalar_types = c("FD"), analysis_names = character(0)) { # TODO: try and use hdf5r instead of rhdf5 and delayedarray here diff --git a/man/ModelArray-class.Rd b/man/ModelArray-class.Rd new file mode 100644 index 0000000..205b468 --- /dev/null +++ b/man/ModelArray-class.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ModelArray_Constructor.R +\docType{class} +\name{ModelArray-class} +\alias{ModelArray-class} +\alias{ModelArray} +\title{ModelArray class} +\description{ +A ModelArray wraps one or more element-wise scalar matrices (e.g., FD, FC, +log_FC for fixel data) read lazily via \pkg{DelayedArray}, along with any +previously saved analysis results. The object holds references to the +underlying HDF5 file and reads data on demand, making it suitable for +large-scale neuroimaging datasets. +} +\details{ +ModelArray is an S4 class that represents element-wise scalar data and +associated statistical results backed by an HDF5 file on disk. + +Each scalar in the HDF5 file is stored at \code{/scalars//values} +as a matrix of elements (rows) by source files (columns). Source filenames +are read from HDF5 attributes or companion datasets. Analysis results, if +present, live under \code{/results//results_matrix}. + +ModelArray objects are typically created with the \code{\link{ModelArray}} +constructor function. Element-wise models are fit with +\code{\link{ModelArray.lm}}, \code{\link{ModelArray.gam}}, or +\code{\link{ModelArray.wrap}}. +} +\section{Slots}{ + +\describe{ +\item{\code{sources}}{A named list of character vectors. Each element corresponds +to a scalar and contains the source filenames (one per input file/subject).} + +\item{\code{scalars}}{A named list of \linkS4class{DelayedArray} matrices. +Each matrix has elements as rows and source files as columns.} + +\item{\code{results}}{A named list of analysis results. Each element is itself a +list containing at minimum \code{results_matrix} (a +\linkS4class{DelayedArray}).} + +\item{\code{path}}{Character. Path(s) to the HDF5 file(s) on disk.} +}} + +\seealso{ +\code{\link{ModelArray}} for the constructor, +\code{\link{ModelArray.lm}}, \code{\link{ModelArray.gam}}, +\code{\link{ModelArray.wrap}} for analysis, +\code{\link{scalars}}, \code{\link{sources}}, \code{\link{results}} for +accessors. +} diff --git a/man/ModelArray.Rd b/man/ModelArray.Rd index 96de23c..f2bdb38 100644 --- a/man/ModelArray.Rd +++ b/man/ModelArray.Rd @@ -1,13 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/ModelArray_Constructor.R -\name{ModelArray} -\alias{ModelArray} -\alias{ModelArray-class} -\title{ModelArray class} +\name{modelarray} +\alias{modelarray} +\title{Load element-wise data from an HDF5 file as a ModelArray object} \usage{ -ModelArray(filepath, scalar_types = c("FD"), analysis_names = character(0)) - -ModelArray(filepath, scalar_types = c("FD"), analysis_names = character(0)) +modelarray(filepath, scalar_types = c("FD"), analysis_names = character(0)) } \arguments{ \item{filepath}{Character. Path to an existing HDF5 (\code{.h5}) file @@ -25,29 +22,10 @@ loaded).} A \linkS4class{ModelArray} object. } \description{ -A ModelArray wraps one or more element-wise scalar matrices (e.g., FD, FC, -log_FC for fixel data) read lazily via \pkg{DelayedArray}, along with any -previously saved analysis results. The object holds references to the -underlying HDF5 file and reads data on demand, making it suitable for -large-scale neuroimaging datasets. - Reads scalar matrices and (optionally) saved analysis results from an HDF5 file and returns a \linkS4class{ModelArray} object. } \details{ -ModelArray is an S4 class that represents element-wise scalar data and -associated statistical results backed by an HDF5 file on disk. - -Each scalar in the HDF5 file is stored at \code{/scalars//values} -as a matrix of elements (rows) by source files (columns). Source filenames -are read from HDF5 attributes or companion datasets. Analysis results, if -present, live under \code{/results//results_matrix}. - -ModelArray objects are typically created with the \code{\link{ModelArray}} -constructor function. Element-wise models are fit with -\code{\link{ModelArray.lm}}, \code{\link{ModelArray.gam}}, or -\code{\link{ModelArray.wrap}}. - The constructor reads each scalar listed in \code{scalar_types} from \code{/scalars//values} in the HDF5 file, wrapping them as \linkS4class{DelayedArray} objects for lazy access. Source filenames are @@ -61,22 +39,6 @@ If \code{analysis_names} is non-empty, saved results are read from argument 'seed'..."}, check that \code{scalar_types} matches the groups present in the file. Inspect with \code{rhdf5::h5ls(filepath)}. } -\section{Slots}{ - -\describe{ -\item{\code{sources}}{A named list of character vectors. Each element corresponds -to a scalar and contains the source filenames (one per input file/subject).} - -\item{\code{scalars}}{A named list of \linkS4class{DelayedArray} matrices. -Each matrix has elements as rows and source files as columns.} - -\item{\code{results}}{A named list of analysis results. Each element is itself a -list containing at minimum \code{results_matrix} (a -\linkS4class{DelayedArray}).} - -\item{\code{path}}{Character. Path(s) to the HDF5 file(s) on disk.} -}} - \examples{ \dontrun{ ma <- ModelArray("path/to/data.h5", scalar_types = c("FD", "FC")) @@ -86,12 +48,6 @@ scalars(ma) } \seealso{ -\code{\link{ModelArray}} for the constructor, -\code{\link{ModelArray.lm}}, \code{\link{ModelArray.gam}}, -\code{\link{ModelArray.wrap}} for analysis, -\code{\link{scalars}}, \code{\link{sources}}, \code{\link{results}} for -accessors. - \linkS4class{ModelArray} for the class definition, \code{\link{h5summary}} for inspecting an HDF5 file without loading, \code{\link{scalars}}, \code{\link{sources}} for accessing loaded data. From 9f26ac620bbb0ce428bd4472879135ad965eb82d Mon Sep 17 00:00:00 2001 From: araikes Date: Mon, 30 Mar 2026 16:56:19 -0700 Subject: [PATCH 07/23] Disambiguate with aliases instead --- NAMESPACE | 2 +- R/ModelArray_Constructor.R | 45 +++++++++++++++++++------------------- man/ModelArray-class.Rd | 8 ++++--- man/ModelArray.Rd | 45 ++++++++++++++++++-------------------- 4 files changed, 50 insertions(+), 50 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index bf243de..201c081 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ S3method(print,h5summary) export("%>%") +export(ModelArray) export(ModelArray.gam) export(ModelArray.lm) export(ModelArray.wrap) @@ -15,7 +16,6 @@ export(gen_gamFormula_contIx) export(gen_gamFormula_fxSmooth) export(h5summary) export(mergeModelArrays) -export(modelarray) export(nElements) export(nInputFiles) export(numElementsTotal) diff --git a/R/ModelArray_Constructor.R b/R/ModelArray_Constructor.R index b690186..c1c8075 100644 --- a/R/ModelArray_Constructor.R +++ b/R/ModelArray_Constructor.R @@ -36,6 +36,8 @@ #' \code{\link{scalars}}, \code{\link{sources}}, \code{\link{results}} for #' accessors. #' +#' @aliases ModelArray-class +#' @rdname ModelArray-class #' @importClassesFrom DelayedArray DelayedArray #' @exportClass ModelArray ModelArray <- setClass( @@ -70,54 +72,53 @@ ModelArraySeed <- function(filepath, name, type = NA) { -#' Load element-wise data from an HDF5 file as a ModelArray object +#' Load element-wise data from an HDF5 file #' #' @description -#' Reads scalar matrices and (optionally) saved analysis results from an HDF5 -#' file and returns a \linkS4class{ModelArray} object. +#' Reads scalar matrices and (optionally) saved analysis results from +#' an HDF5 file and returns a \linkS4class{ModelArray} object. #' #' @details #' The constructor reads each scalar listed in \code{scalar_types} from -#' \code{/scalars//values} in the HDF5 file, wrapping them as -#' \linkS4class{DelayedArray} objects for lazy access. Source filenames are -#' extracted from HDF5 attributes or companion datasets. +#' \code{/scalars//values}, wrapping them as +#' \linkS4class{DelayedArray} objects. Source filenames are extracted +#' from HDF5 attributes or companion datasets. #' -#' If \code{analysis_names} is non-empty, saved results are read from -#' \code{/results//results_matrix} along with column name metadata. +#' If \code{analysis_names} is non-empty, saved results are loaded from +#' \code{/results//results_matrix}. #' #' \strong{Debugging tip:} If you encounter -#' \code{"Error in h(simpleError(msg, call)) : error in evaluating the -#' argument 'seed'..."}, check that \code{scalar_types} matches the groups -#' present in the file. Inspect with \code{rhdf5::h5ls(filepath)}. +#' \code{"error in evaluating the argument 'seed'..."}, check that +#' \code{scalar_types} matches groups in the file. Inspect with +#' \code{rhdf5::h5ls(filepath)}. #' -#' @param filepath Character. Path to an existing HDF5 (\code{.h5}) file -#' containing element-wise scalar data. -#' @param scalar_types Character vector. Names of scalar groups to read from -#' \code{/scalars/} in the HDF5 file. Default is \code{c("FD")}. Must match -#' group names in the file; verify with \code{rhdf5::h5ls(filepath)}. +#' @param filepath Character. Path to an existing HDF5 (\code{.h5}) +#' file containing element-wise scalar data. +#' @param scalar_types Character vector. Names of scalar groups to read +#' from \code{/scalars/} in the HDF5 file. Default is \code{c("FD")}. +#' Must match group names in the file. #' @param analysis_names Character vector. Subfolder names under -#' \code{/results/} to load. Default is \code{character(0)} (no results -#' loaded). +#' \code{/results/} to load. Default is \code{character(0)} (none). #' #' @return A \linkS4class{ModelArray} object. #' #' @seealso \linkS4class{ModelArray} for the class definition, -#' \code{\link{h5summary}} for inspecting an HDF5 file without loading, -#' \code{\link{scalars}}, \code{\link{sources}} for accessing loaded data. +#' \code{\link{h5summary}} for inspecting an HDF5 file. #' #' @examples #' \dontrun{ #' ma <- ModelArray("path/to/data.h5", scalar_types = c("FD", "FC")) #' ma -#' scalars(ma) #' } #' +#' @rdname ModelArray +#' @aliases ModelArray #' @export #' @import methods #' @importFrom dplyr %>% #' @importFrom DelayedArray DelayedArray realize #' @importFrom rhdf5 h5readAttributes -modelarray <- function(filepath, +ModelArray <- function(filepath, scalar_types = c("FD"), analysis_names = character(0)) { # TODO: try and use hdf5r instead of rhdf5 and delayedarray here diff --git a/man/ModelArray-class.Rd b/man/ModelArray-class.Rd index 205b468..95abad0 100644 --- a/man/ModelArray-class.Rd +++ b/man/ModelArray-class.Rd @@ -1,10 +1,12 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/ModelArray_Constructor.R -\docType{class} -\name{ModelArray-class} -\alias{ModelArray-class} +\name{ModelArray} \alias{ModelArray} +\alias{ModelArray-class} \title{ModelArray class} +\usage{ +ModelArray(filepath, scalar_types = c("FD"), analysis_names = character(0)) +} \description{ A ModelArray wraps one or more element-wise scalar matrices (e.g., FD, FC, log_FC for fixel data) read lazily via \pkg{DelayedArray}, along with any diff --git a/man/ModelArray.Rd b/man/ModelArray.Rd index f2bdb38..d23c8d2 100644 --- a/man/ModelArray.Rd +++ b/man/ModelArray.Rd @@ -1,54 +1,51 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/ModelArray_Constructor.R -\name{modelarray} -\alias{modelarray} -\title{Load element-wise data from an HDF5 file as a ModelArray object} +\name{ModelArray} +\alias{ModelArray} +\title{Load element-wise data from an HDF5 file} \usage{ -modelarray(filepath, scalar_types = c("FD"), analysis_names = character(0)) +ModelArray(filepath, scalar_types = c("FD"), analysis_names = character(0)) } \arguments{ -\item{filepath}{Character. Path to an existing HDF5 (\code{.h5}) file -containing element-wise scalar data.} +\item{filepath}{Character. Path to an existing HDF5 (\code{.h5}) +file containing element-wise scalar data.} -\item{scalar_types}{Character vector. Names of scalar groups to read from -\code{/scalars/} in the HDF5 file. Default is \code{c("FD")}. Must match -group names in the file; verify with \code{rhdf5::h5ls(filepath)}.} +\item{scalar_types}{Character vector. Names of scalar groups to read +from \code{/scalars/} in the HDF5 file. Default is \code{c("FD")}. +Must match group names in the file.} \item{analysis_names}{Character vector. Subfolder names under -\code{/results/} to load. Default is \code{character(0)} (no results -loaded).} +\code{/results/} to load. Default is \code{character(0)} (none).} } \value{ A \linkS4class{ModelArray} object. } \description{ -Reads scalar matrices and (optionally) saved analysis results from an HDF5 -file and returns a \linkS4class{ModelArray} object. +Reads scalar matrices and (optionally) saved analysis results from +an HDF5 file and returns a \linkS4class{ModelArray} object. } \details{ The constructor reads each scalar listed in \code{scalar_types} from -\code{/scalars//values} in the HDF5 file, wrapping them as -\linkS4class{DelayedArray} objects for lazy access. Source filenames are -extracted from HDF5 attributes or companion datasets. +\code{/scalars//values}, wrapping them as +\linkS4class{DelayedArray} objects. Source filenames are extracted +from HDF5 attributes or companion datasets. -If \code{analysis_names} is non-empty, saved results are read from -\code{/results//results_matrix} along with column name metadata. +If \code{analysis_names} is non-empty, saved results are loaded from +\code{/results//results_matrix}. \strong{Debugging tip:} If you encounter -\code{"Error in h(simpleError(msg, call)) : error in evaluating the -argument 'seed'..."}, check that \code{scalar_types} matches the groups -present in the file. Inspect with \code{rhdf5::h5ls(filepath)}. +\code{"error in evaluating the argument 'seed'..."}, check that +\code{scalar_types} matches groups in the file. Inspect with +\code{rhdf5::h5ls(filepath)}. } \examples{ \dontrun{ ma <- ModelArray("path/to/data.h5", scalar_types = c("FD", "FC")) ma -scalars(ma) } } \seealso{ \linkS4class{ModelArray} for the class definition, -\code{\link{h5summary}} for inspecting an HDF5 file without loading, -\code{\link{scalars}}, \code{\link{sources}} for accessing loaded data. +\code{\link{h5summary}} for inspecting an HDF5 file. } From dc32033f25568f3b7acb65b41c66272dd6918e89 Mon Sep 17 00:00:00 2001 From: araikes Date: Mon, 30 Mar 2026 17:10:49 -0700 Subject: [PATCH 08/23] Update S4 methods documentation --- man/ModelArray-class.Rd | 16 +++++++++++++- man/analysisNames.Rd | 21 ++++++++++++++++--- man/elementMetadata.Rd | 22 ++++++++++++++++---- man/nElements.Rd | 22 ++++++++++++++++---- man/nInputFiles.Rd | 22 ++++++++++++++++---- man/results.Rd | 39 +++++++++++++++++++++++++++++++---- man/scalarNames.Rd | 18 +++++++++++++--- man/scalars.Rd | 26 +++++++++++++++++++---- man/show-ModelArray-method.Rd | 15 -------------- 9 files changed, 159 insertions(+), 42 deletions(-) delete mode 100644 man/show-ModelArray-method.Rd diff --git a/man/ModelArray-class.Rd b/man/ModelArray-class.Rd index 95abad0..2deb8af 100644 --- a/man/ModelArray-class.Rd +++ b/man/ModelArray-class.Rd @@ -1,11 +1,22 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ModelArray_Constructor.R +% Please edit documentation in R/ModelArray_Constructor.R, +% R/ModelArray_S4Methods.R \name{ModelArray} \alias{ModelArray} \alias{ModelArray-class} +\alias{show,ModelArray-method} \title{ModelArray class} \usage{ ModelArray(filepath, scalar_types = c("FD"), analysis_names = character(0)) + +\S4method{show}{ModelArray}(object) +} +\arguments{ +\item{object}{A \linkS4class{ModelArray} object.} +} +\value{ +Invisible \code{NULL}. Called for its side effect of printing +to the console. } \description{ A ModelArray wraps one or more element-wise scalar matrices (e.g., FD, FC, @@ -13,6 +24,9 @@ log_FC for fixel data) read lazily via \pkg{DelayedArray}, along with any previously saved analysis results. The object holds references to the underlying HDF5 file and reads data on demand, making it suitable for large-scale neuroimaging datasets. + +Prints a summary of the ModelArray including file path, scalar dimensions, +and any saved analysis names. } \details{ ModelArray is an S4 class that represents element-wise scalar data and diff --git a/man/analysisNames.Rd b/man/analysisNames.Rd index 2279fd7..88470c6 100644 --- a/man/analysisNames.Rd +++ b/man/analysisNames.Rd @@ -10,11 +10,26 @@ analysisNames(x) \S4method{analysisNames}{ModelArray}(x) } \arguments{ -\item{x}{A ModelArray object} +\item{x}{A \linkS4class{ModelArray} object.} } \value{ -Character vector of analysis names +Character vector of analysis names. Returns \code{character(0)} +if no analyses have been loaded or saved. } \description{ -Names of analyses in a ModelArray +Returns the names of all analysis result sets currently loaded in a +\linkS4class{ModelArray}. These correspond to subfolder names under +\code{/results/} in the HDF5 file. +} +\examples{ +\dontrun{ +ma <- ModelArray("data.h5", scalar_types = c("FD"), + analysis_names = c("lm_age")) +analysisNames(ma) # "lm_age" +} + +} +\seealso{ +\code{\link{results}}, \code{\link{scalarNames}}, +\code{\link{writeResults}} } diff --git a/man/elementMetadata.Rd b/man/elementMetadata.Rd index fb35fc1..7590f4e 100644 --- a/man/elementMetadata.Rd +++ b/man/elementMetadata.Rd @@ -10,12 +10,26 @@ elementMetadata(x) \S4method{elementMetadata}{ModelArray}(x) } \arguments{ -\item{x}{A ModelArray object} +\item{x}{A \linkS4class{ModelArray} object.} } \value{ -A matrix or data.frame of element metadata, or NULL +A matrix or data.frame of element metadata if found, or \code{NULL} +if no known metadata dataset exists in the HDF5 file. } \description{ -Reads element metadata (e.g., greyordinates for cifti data) from the h5 file -if present. Returns NULL if no element metadata is found. +Reads element metadata (e.g., greyordinates for CIFTI data, or fixel/voxel +coordinate information) from the HDF5 file if present. The function searches +for known metadata dataset names (\code{"greyordinates"}, \code{"fixels"}, +\code{"voxels"}) at the top level of the HDF5 file. +} +\examples{ +\dontrun{ +ma <- ModelArray("data.h5", scalar_types = c("FD")) +meta <- elementMetadata(ma) +if (!is.null(meta)) head(meta) +} + +} +\seealso{ +\code{\link{nElements}}, \code{\link{scalars}} } diff --git a/man/nElements.Rd b/man/nElements.Rd index 0c0b0da..e0b264c 100644 --- a/man/nElements.Rd +++ b/man/nElements.Rd @@ -10,13 +10,27 @@ nElements(x, scalar = NULL) \S4method{nElements}{ModelArray}(x, scalar = NULL) } \arguments{ -\item{x}{A ModelArray object} +\item{x}{A \linkS4class{ModelArray} object.} -\item{scalar}{Optional scalar name. Defaults to the first scalar.} +\item{scalar}{Optional character string. Name of the scalar to query. +Defaults to the first scalar in \code{names(scalars(x))}.} } \value{ -Integer, number of elements (rows) +Integer. The number of elements (rows) in the scalar matrix. } \description{ -Number of elements in a ModelArray +Returns the number of elements (e.g., fixels or voxels) for a given scalar +in a \linkS4class{ModelArray}. This is the row count of the scalar matrix. +} +\examples{ +\dontrun{ +ma <- ModelArray("data.h5", scalar_types = c("FD")) +nElements(ma) +nElements(ma, "FD") +} + +} +\seealso{ +\code{\link{nInputFiles}}, \code{\link{numElementsTotal}}, +\code{\link{scalars}}, \code{\link{scalarNames}} } diff --git a/man/nInputFiles.Rd b/man/nInputFiles.Rd index 8ce751d..4667fb2 100644 --- a/man/nInputFiles.Rd +++ b/man/nInputFiles.Rd @@ -10,13 +10,27 @@ nInputFiles(x, scalar = NULL) \S4method{nInputFiles}{ModelArray}(x, scalar = NULL) } \arguments{ -\item{x}{A ModelArray object} +\item{x}{A \linkS4class{ModelArray} object.} -\item{scalar}{Optional scalar name. Defaults to the first scalar.} +\item{scalar}{Optional character string. Name of the scalar to query. +Defaults to the first scalar in \code{names(scalars(x))}.} } \value{ -Integer, number of input files (columns) +Integer. The number of input files (columns) in the scalar matrix. } \description{ -Number of input files in a ModelArray +Returns the number of input files (i.e., subjects or source files) for a +given scalar in a \linkS4class{ModelArray}. This is the column count of the +scalar matrix. +} +\examples{ +\dontrun{ +ma <- ModelArray("data.h5", scalar_types = c("FD")) +nInputFiles(ma) +} + +} +\seealso{ +\code{\link{nElements}}, \code{\link{sources}}, +\code{\link{scalarNames}} } diff --git a/man/results.Rd b/man/results.Rd index 72f516d..862db7f 100644 --- a/man/results.Rd +++ b/man/results.Rd @@ -10,13 +10,44 @@ results(x, ...) \S4method{results}{ModelArray}(x, ...) } \arguments{ -\item{x}{A ModelArray object} +\item{x}{A \linkS4class{ModelArray} object.} -\item{...}{Additional arguments. Currently accepts an analysis name (character).} +\item{...}{Optional: a single character string giving the analysis name to +extract. If omitted, the entire named list is returned.} } \value{ -Statistical results in this ModelArray object +If called with no extra arguments, a named list of result lists. +If called with an analysis name, the corresponding result list (containing +at minimum \code{results_matrix}). } \description{ -Statistical results of a ModelArray object +Retrieve previously saved analysis results from a \linkS4class{ModelArray}. +When called with no additional arguments, returns the full named list of all +result sets. When called with a single analysis name, returns that one +result set. +} +\details{ +Each result set is itself a list containing at minimum +\code{results_matrix} (a \linkS4class{DelayedArray} with elements as rows +and statistics as columns). Column names are stored alongside the matrix +in the HDF5 file. + +Results are only available if \code{analysis_names} was supplied when the +\linkS4class{ModelArray} was constructed, or if results have been written +back with \code{\link{writeResults}}. +} +\examples{ +\dontrun{ +ma <- ModelArray("data.h5", scalar_types = c("FD"), + analysis_names = c("lm_age")) +results(ma) # named list of all results +results(ma, "lm_age") # single result set +results(ma, "lm_age")$results_matrix +} + +} +\seealso{ +\code{\link{sources}}, \code{\link{scalars}}, +\code{\link{analysisNames}}, \code{\link{writeResults}}, +\code{\link{ModelArray.lm}}, \code{\link{ModelArray.gam}} } diff --git a/man/scalarNames.Rd b/man/scalarNames.Rd index 95c918f..f2449be 100644 --- a/man/scalarNames.Rd +++ b/man/scalarNames.Rd @@ -10,11 +10,23 @@ scalarNames(x) \S4method{scalarNames}{ModelArray}(x) } \arguments{ -\item{x}{A ModelArray object} +\item{x}{A \linkS4class{ModelArray} object.} } \value{ -Character vector of scalar names +Character vector of scalar names. } \description{ -Names of scalars in a ModelArray +Returns the names of all scalar datasets loaded in a +\linkS4class{ModelArray} (e.g. \code{"FD"}, \code{"FC"}, \code{"log_FC"}). +} +\examples{ +\dontrun{ +ma <- ModelArray("data.h5", scalar_types = c("FD", "FC")) +scalarNames(ma) # c("FD", "FC") +} + +} +\seealso{ +\code{\link{scalars}}, \code{\link{analysisNames}}, +\code{\link{nElements}} } diff --git a/man/scalars.Rd b/man/scalars.Rd index 57e9513..7b6d504 100644 --- a/man/scalars.Rd +++ b/man/scalars.Rd @@ -10,13 +10,31 @@ scalars(x, ...) \S4method{scalars}{ModelArray}(x, ...) } \arguments{ -\item{x}{A ModelArray object} +\item{x}{A \linkS4class{ModelArray} object.} -\item{...}{Additional arguments. Currently accepts a scalar name (character).} +\item{...}{Optional: a single character string giving the scalar name to +extract. If omitted, the entire named list is returned.} } \value{ -A matrix of element-wise scalar data: elements (row) by source files (column). +If called with no extra arguments, a named list of +\linkS4class{DelayedArray} matrices (elements as rows, source files as +columns). If called with a scalar name, the corresponding single +\linkS4class{DelayedArray} matrix. } \description{ -Element-wise scalar data of a ModelArray object +Retrieve scalar matrices from a \linkS4class{ModelArray}. When called +with no additional arguments, returns the full named list of all scalar +matrices. When called with a single scalar name, returns that one matrix. +} +\examples{ +\dontrun{ +ma <- ModelArray("data.h5", scalar_types = c("FD", "FC")) +scalars(ma) # named list of all scalars +scalars(ma, "FD") # single DelayedArray matrix +} + +} +\seealso{ +\code{\link{sources}}, \code{\link{results}}, +\code{\link{scalarNames}}, \code{\link{nElements}} } diff --git a/man/show-ModelArray-method.Rd b/man/show-ModelArray-method.Rd deleted file mode 100644 index ecd0e43..0000000 --- a/man/show-ModelArray-method.Rd +++ /dev/null @@ -1,15 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ModelArray_S4Methods.R -\name{show,ModelArray-method} -\alias{show,ModelArray-method} -\title{Show ModelArray object} -\usage{ -\S4method{show}{ModelArray}(object) -} -\arguments{ -\item{object}{An ModelArray object} -} -\description{ -Print the basic information for an ModelArray object, including number of source files, -scalar names, and any analysis names. -} From f177e7f0a9b533a689084d187321633f9dbdfc41 Mon Sep 17 00:00:00 2001 From: araikes Date: Mon, 30 Mar 2026 17:26:05 -0700 Subject: [PATCH 09/23] Update ModelArray.lm roxygen --- R/ModelArray_S4Methods.R | 279 ++++++++++++++++++++++++++++++++------ R/analyse.R | 202 ++++++++++++++++----------- man/ModelArray.lm.Rd | 197 +++++++++++++++++---------- man/exampleElementData.Rd | 50 +++++-- man/sources.Rd | 22 ++- 5 files changed, 539 insertions(+), 211 deletions(-) diff --git a/R/ModelArray_S4Methods.R b/R/ModelArray_S4Methods.R index 0cfe44c..274bc72 100644 --- a/R/ModelArray_S4Methods.R +++ b/R/ModelArray_S4Methods.R @@ -1,13 +1,17 @@ ### Methods of "ModelArray" ### ### Show ModelArray ##### -#' Show ModelArray object +#' @rdname ModelArray-class #' #' @description -#' Print the basic information for an ModelArray object, including number of source files, -#' scalar names, and any analysis names. +#' Prints a summary of the ModelArray including file path, scalar dimensions, +#' and any saved analysis names. +#' +#' @param object A \linkS4class{ModelArray} object. +#' +#' @return Invisible \code{NULL}. Called for its side effect of printing +#' to the console. #' -#' @param object An ModelArray object #' @export setMethod("show", "ModelArray", function(object) { # , group_name_results="results" @@ -50,8 +54,27 @@ setMethod("show", "ModelArray", function(object) { # , group_name_results="resul #' Source filenames of a ModelArray object #' -#' @param x A ModelArray object -#' @return A list of source filenames +#' @description +#' Retrieve the named list of source filename vectors from a +#' \linkS4class{ModelArray}. Each element of the list corresponds to one +#' scalar and contains a character vector of filenames, one per input +#' file/subject. +#' +#' @param x A \linkS4class{ModelArray} object. +#' +#' @return A named list of character vectors. Names correspond to scalar names +#' (e.g. \code{"FD"}, \code{"FC"}). +#' +#' @seealso \code{\link{scalars}}, \code{\link{results}}, +#' \code{\link{scalarNames}}, \code{\link{nInputFiles}} +#' +#' @examples +#' \dontrun{ +#' ma <- ModelArray("data.h5", scalar_types = c("FD")) +#' sources(ma) # named list +#' sources(ma)[["FD"]] # character vector of filenames +#' } +#' #' @name sources #' @export setGeneric("sources", function(x) standardGeneric("sources")) @@ -63,9 +86,30 @@ setMethod("sources", "ModelArray", function(x) x@sources) #' Element-wise scalar data of a ModelArray object #' -#' @param x A ModelArray object -#' @param ... Additional arguments. Currently accepts a scalar name (character). -#' @return A matrix of element-wise scalar data: elements (row) by source files (column). +#' @description +#' Retrieve scalar matrices from a \linkS4class{ModelArray}. When called +#' with no additional arguments, returns the full named list of all scalar +#' matrices. When called with a single scalar name, returns that one matrix. +#' +#' @param x A \linkS4class{ModelArray} object. +#' @param ... Optional: a single character string giving the scalar name to +#' extract. If omitted, the entire named list is returned. +#' +#' @return If called with no extra arguments, a named list of +#' \linkS4class{DelayedArray} matrices (elements as rows, source files as +#' columns). If called with a scalar name, the corresponding single +#' \linkS4class{DelayedArray} matrix. +#' +#' @seealso \code{\link{sources}}, \code{\link{results}}, +#' \code{\link{scalarNames}}, \code{\link{nElements}} +#' +#' @examples +#' \dontrun{ +#' ma <- ModelArray("data.h5", scalar_types = c("FD", "FC")) +#' scalars(ma) # named list of all scalars +#' scalars(ma, "FD") # single DelayedArray matrix +#' } +#' #' @name scalars #' @export setGeneric("scalars", function(x, ...) standardGeneric("scalars")) @@ -89,9 +133,43 @@ setMethod( #' Statistical results of a ModelArray object #' -#' @param x A ModelArray object -#' @param ... Additional arguments. Currently accepts an analysis name (character). -#' @return Statistical results in this ModelArray object +#' @description +#' Retrieve previously saved analysis results from a \linkS4class{ModelArray}. +#' When called with no additional arguments, returns the full named list of all +#' result sets. When called with a single analysis name, returns that one +#' result set. +#' +#' @details +#' Each result set is itself a list containing at minimum +#' \code{results_matrix} (a \linkS4class{DelayedArray} with elements as rows +#' and statistics as columns). Column names are stored alongside the matrix +#' in the HDF5 file. +#' +#' Results are only available if \code{analysis_names} was supplied when the +#' \linkS4class{ModelArray} was constructed, or if results have been written +#' back with \code{\link{writeResults}}. +#' +#' @param x A \linkS4class{ModelArray} object. +#' @param ... Optional: a single character string giving the analysis name to +#' extract. If omitted, the entire named list is returned. +#' +#' @return If called with no extra arguments, a named list of result lists. +#' If called with an analysis name, the corresponding result list (containing +#' at minimum \code{results_matrix}). +#' +#' @seealso \code{\link{sources}}, \code{\link{scalars}}, +#' \code{\link{analysisNames}}, \code{\link{writeResults}}, +#' \code{\link{ModelArray.lm}}, \code{\link{ModelArray.gam}} +#' +#' @examples +#' \dontrun{ +#' ma <- ModelArray("data.h5", scalar_types = c("FD"), +#' analysis_names = c("lm_age")) +#' results(ma) # named list of all results +#' results(ma, "lm_age") # single result set +#' results(ma, "lm_age")$results_matrix +#' } +#' #' @name results #' @export setGeneric("results", function(x, ...) standardGeneric("results")) @@ -138,31 +216,48 @@ setMethod( #' Example per-element data.frame for user functions #' -#' @title Example per-element data.frame for user functions -#' @name exampleElementData -#' @rdname exampleElementData #' @description -#' Generic for constructing a per-element data.frame from a `ModelArray`. -#' See the `ModelArray` method for details. +#' Constructs a per-element data.frame from a \linkS4class{ModelArray} that +#' mirrors the \code{data} argument passed to user functions by +#' \code{\link{ModelArray.wrap}}. This is useful for testing and debugging +#' user-supplied functions outside of the full element-wise analysis loop. +#' +#' @details +#' Returns a copy of \code{phenotypes} with an extra column named by +#' \code{scalar} populated with the selected element's values from the +#' \linkS4class{ModelArray}. This mirrors the per-element data that +#' \code{\link{ModelArray.wrap}} passes to user functions (as \code{data = dat}). +#' +#' Use this to verify that your custom function works correctly on a single +#' element before committing to a full \code{\link{ModelArray.wrap}} run. #' -#' @param x A `ModelArray` object (or compatible type) -#' @param ... Additional arguments (ignored) +#' @param x A \linkS4class{ModelArray} object. +#' @param ... Additional arguments passed to the method (currently unused +#' at the generic level). +#' +#' @seealso \code{\link{ModelArray.wrap}}, \code{\link{scalars}} +#' +#' @rdname exampleElementData #' @export setGeneric("exampleElementData", function(x, ...) standardGeneric("exampleElementData")) -#' Example per-element data.frame for user functions #' @rdname exampleElementData #' -#' @description -#' Returns a copy of `phenotypes` with an extra column named by `scalar` populated -#' with the selected element's values from the `ModelArray`. This mirrors the -#' per-element data that `ModelArray.wrap` passes to user functions (`data = dat`). -#' -#' @param x An ModelArray object -#' @param scalar A character. The name of the element-wise scalar to append -#' @param i_element An integer, the i_th element (1-based) -#' @param phenotypes A data.frame of the cohort with independent variables/covariates -#' @return A data.frame with the additional response column named by `scalar` +#' @param x A \linkS4class{ModelArray} object. +#' @param scalar Character. The name of the element-wise scalar to append +#' as a column. Must be one of \code{names(scalars(x))}. Default is +#' \code{"FD"}. +#' @param i_element Integer. The 1-based index of the element whose values +#' should be extracted. Must be between 1 and the number of elements for +#' the given scalar. +#' @param phenotypes A data.frame of the cohort with columns of independent +#' variables and covariates. Must have the same number of rows as the +#' number of source files in the \linkS4class{ModelArray}. +#' +#' @return A data.frame: the input \code{phenotypes} with one additional +#' column named by \code{scalar} containing that element's values across +#' all subjects. +#' #' @examples #' \dontrun{ #' h5_path <- system.file("extdata", "n50_fixels.h5", package = "ModelArray") @@ -170,7 +265,15 @@ setGeneric("exampleElementData", function(x, ...) standardGeneric("exampleElemen #' ma <- ModelArray(h5_path, scalar_types = c("FD")) #' phen <- read.csv(csv_path) #' df <- exampleElementData(ma, scalar = "FD", i_element = 1, phenotypes = phen) +#' +#' # Now test your custom function on this single element: +#' my_fun <- function(data, ...) { +#' mod <- lm(FD ~ age + sex, data = data) +#' broom::tidy(mod) +#' } +#' my_fun(data = df) #' } +#' #' @export setMethod( "exampleElementData", @@ -197,9 +300,27 @@ setMethod( #' Number of elements in a ModelArray #' -#' @param x A ModelArray object -#' @param scalar Optional scalar name. Defaults to the first scalar. -#' @return Integer, number of elements (rows) +#' @description +#' Returns the number of elements (e.g., fixels or voxels) for a given scalar +#' in a \linkS4class{ModelArray}. This is the row count of the scalar matrix. +#' +#' @param x A \linkS4class{ModelArray} object. +#' @param scalar Optional character string. Name of the scalar to query. +#' Defaults to the first scalar in \code{names(scalars(x))}. +#' +#' @return Integer. The number of elements (rows) in the scalar matrix. +#' +#' @seealso \code{\link{nInputFiles}}, \code{\link{numElementsTotal}}, +#' \code{\link{scalars}}, \code{\link{scalarNames}} +#' +#' @examples +#' \dontrun{ +#' ma <- ModelArray("data.h5", scalar_types = c("FD")) +#' nElements(ma) +#' nElements(ma, "FD") +#' } +#' +#' @name nElements #' @export setGeneric("nElements", function(x, scalar = NULL) standardGeneric("nElements")) @@ -212,9 +333,27 @@ setMethod("nElements", "ModelArray", function(x, scalar = NULL) { #' Number of input files in a ModelArray #' -#' @param x A ModelArray object -#' @param scalar Optional scalar name. Defaults to the first scalar. -#' @return Integer, number of input files (columns) +#' @description +#' Returns the number of input files (i.e., subjects or source files) for a +#' given scalar in a \linkS4class{ModelArray}. This is the column count of the +#' scalar matrix. +#' +#' @param x A \linkS4class{ModelArray} object. +#' @param scalar Optional character string. Name of the scalar to query. +#' Defaults to the first scalar in \code{names(scalars(x))}. +#' +#' @return Integer. The number of input files (columns) in the scalar matrix. +#' +#' @seealso \code{\link{nElements}}, \code{\link{sources}}, +#' \code{\link{scalarNames}} +#' +#' @examples +#' \dontrun{ +#' ma <- ModelArray("data.h5", scalar_types = c("FD")) +#' nInputFiles(ma) +#' } +#' +#' @name nInputFiles #' @export setGeneric("nInputFiles", function(x, scalar = NULL) standardGeneric("nInputFiles")) @@ -227,8 +366,24 @@ setMethod("nInputFiles", "ModelArray", function(x, scalar = NULL) { #' Names of scalars in a ModelArray #' -#' @param x A ModelArray object -#' @return Character vector of scalar names +#' @description +#' Returns the names of all scalar datasets loaded in a +#' \linkS4class{ModelArray} (e.g. \code{"FD"}, \code{"FC"}, \code{"log_FC"}). +#' +#' @param x A \linkS4class{ModelArray} object. +#' +#' @return Character vector of scalar names. +#' +#' @seealso \code{\link{scalars}}, \code{\link{analysisNames}}, +#' \code{\link{nElements}} +#' +#' @examples +#' \dontrun{ +#' ma <- ModelArray("data.h5", scalar_types = c("FD", "FC")) +#' scalarNames(ma) # c("FD", "FC") +#' } +#' +#' @name scalarNames #' @export setGeneric("scalarNames", function(x) standardGeneric("scalarNames")) @@ -238,10 +393,30 @@ setMethod("scalarNames", "ModelArray", function(x) { names(x@scalars) }) + #' Names of analyses in a ModelArray #' -#' @param x A ModelArray object -#' @return Character vector of analysis names +#' @description +#' Returns the names of all analysis result sets currently loaded in a +#' \linkS4class{ModelArray}. These correspond to subfolder names under +#' \code{/results/} in the HDF5 file. +#' +#' @param x A \linkS4class{ModelArray} object. +#' +#' @return Character vector of analysis names. Returns \code{character(0)} +#' if no analyses have been loaded or saved. +#' +#' @seealso \code{\link{results}}, \code{\link{scalarNames}}, +#' \code{\link{writeResults}} +#' +#' @examples +#' \dontrun{ +#' ma <- ModelArray("data.h5", scalar_types = c("FD"), +#' analysis_names = c("lm_age")) +#' analysisNames(ma) # "lm_age" +#' } +#' +#' @name analysisNames #' @export setGeneric("analysisNames", function(x) standardGeneric("analysisNames")) @@ -254,11 +429,26 @@ setMethod("analysisNames", "ModelArray", function(x) { #' Element metadata from a ModelArray #' #' @description -#' Reads element metadata (e.g., greyordinates for cifti data) from the h5 file -#' if present. Returns NULL if no element metadata is found. +#' Reads element metadata (e.g., greyordinates for CIFTI data, or fixel/voxel +#' coordinate information) from the HDF5 file if present. The function searches +#' for known metadata dataset names (\code{"greyordinates"}, \code{"fixels"}, +#' \code{"voxels"}) at the top level of the HDF5 file. +#' +#' @param x A \linkS4class{ModelArray} object. +#' +#' @return A matrix or data.frame of element metadata if found, or \code{NULL} +#' if no known metadata dataset exists in the HDF5 file. +#' +#' @seealso \code{\link{nElements}}, \code{\link{scalars}} #' -#' @param x A ModelArray object -#' @return A matrix or data.frame of element metadata, or NULL +#' @examples +#' \dontrun{ +#' ma <- ModelArray("data.h5", scalar_types = c("FD")) +#' meta <- elementMetadata(ma) +#' if (!is.null(meta)) head(meta) +#' } +#' +#' @name elementMetadata #' @export setGeneric("elementMetadata", function(x) standardGeneric("elementMetadata")) @@ -279,5 +469,6 @@ setMethod("elementMetadata", "ModelArray", function(x) { } NULL }) + # setMethod("lm", # signature = c("formula", "ModelArray", "data.frame", "character", "integer")) diff --git a/R/analyse.R b/R/analyse.R index cb0ec6c..5f3c2c0 100644 --- a/R/analyse.R +++ b/R/analyse.R @@ -1,93 +1,139 @@ -# This file provides the functions for model fitting of (all) elements - - -#' Fit linear model for element-wise data +#' Fit element-wise linear models #' #' @description -#' `ModelArray.lm` fits linear model (`stats::lm()`) for each of elements requested, and returns a tibble -#' dataframe of requested model statistics. +#' `ModelArray.lm` fits a linear model at each requested element in a +#' \linkS4class{ModelArray} and returns a tibble of requested model statistics. #' #' @details -#' You may request returning specific statistical variables by setting \code{var.*}, -#' or you can get all by setting \code{full.outputs=TRUE}. -#' Note that statistics covered by \code{full.outputs} or \code{var.*} are the ones from broom::tidy() -#' and broom::glance() only, and do not include corrected p-values. -#' However FDR-corrected p-values ("fdr") are generated by default. +#' You may request returning specific statistical variables by setting +#' \code{var.*}, or you can get all by setting \code{full.outputs = TRUE}. +#' Note that statistics covered by \code{full.outputs} or \code{var.*} are +#' the ones from \code{broom::tidy()}, \code{broom::glance()} only, and do +#' not include corrected p-values. However FDR-corrected p-values +#' (\code{"fdr"}) are generated by default. +#' #' List of acceptable statistic names for each of \code{var.*}: #' \itemize{ -#' \item \code{var.terms}: c("estimate","std.error","statistic","p.value"); -#' For interpretation please see \link[broom]{tidy.lm}. -#' \item \code{var.model}: c("r.squared", "adj.r.squared", "sigma", "statistic", -#' "p.value", "df", "logLik", "AIC", "BIC", "deviance", "df.residual", "nobs"); -#' For interpretation please see \link[broom]{glance.lm}. +#' \item \code{var.terms}: \code{c("estimate", "std.error", "statistic", +#' "p.value")}; For interpretation please see \link[broom]{tidy.lm}. +#' \item \code{var.model}: \code{c("r.squared", "adj.r.squared", "sigma", +#' "statistic", "p.value", "df", "logLik", "AIC", "BIC", "deviance", +#' "df.residual", "nobs")}; For interpretation please see +#' \link[broom]{glance.lm}. #' } -#' For p-value corrections (arguments \code{correct.p.value.*}), -#' supported methods include all methods in `p.adjust.methods` except "none". -#' Can be more than one method. FDR-corrected p-values ("fdr") are calculated by default. -#' Turn it off by setting to "none". \cr -#' Arguments \code{num.subj.lthr.abs} and \code{num.subj.lthr.rel} -#' are mainly for input data with subject-specific masks, -#' i.e. currently only for volume data. For fixel-wise data, you may ignore these arguments. #' -#' @param formula Formula (passed to `stats::lm()`) -#' @param data ModelArray class -#' @param phenotypes A data.frame of the cohort with columns of independent variables -#' and covariates to be added to the model. It should contains a column called "source_file", -#' and this column should match to that in \code{data}. -#' @param scalar A character. The name of the element-wise scalar to be analysed -#' @param element.subset A list of positive integers (min = 1, max = number of elements). -#' The subset of elements you want to run. Default is `NULL`, i.e. requesting all elements in `data`. -#' @param full.outputs TRUE or FALSE, Whether to return full set of outputs. -#' If FALSE, it will only return those requested in arguments \code{var.*} and \code{correct.p.value.*}; -#' if TRUE, arguments \code{var.*} will be ignored, and will return all possible statistics for -#' \code{var.*} and any options requested in arguments \code{correct.p.value.*}. -#' @param var.terms A list of characters. -#' The list of variables to save for terms (got from `broom::tidy()`). -#' See "Details" section for more. -#' @param var.model A list of characters. -#' The list of variables to save for the model (got from `broom::glance()`). -#' See "Details" section for more. -#' @param correct.p.value.terms A list of characters. -#' To perform and add a column for p.value correction for each term. Default: "fdr". -#' See "Details" section for more. -#' @param correct.p.value.model A list of characters. -#' To perform and add a column for p.value correction for the model. -#' Default: "fdr". See "Details" section for more. -#' @param num.subj.lthr.abs -#' An integer, lower threshold of absolute number of subjects. -#' For an element, if number of subjects who have finite values (defined by `is.finite()`, -#' i.e. not NaN or NA or Inf) in h5 file > \code{num.subj.lthr.abs}, -#' then this element will be run normally; otherwise, -#' this element will be skipped and statistical outputs will be set as NaN. -#' Default is 10. -#' @param num.subj.lthr.rel A value between 0-1, lower threshold of relative number of subjects. -#' Similar to \code{num.subj.lthr.abs}, -#' if proportion of subjects who have valid value > \code{num.subj.lthr.rel}, -#' then this element will be run normally; otherwise, -#' this element will be skipped and statistical outputs will be set as NaN. -#' Default is 0.2. -#' @param verbose TRUE or FALSE, to print verbose message or not -#' @param pbar TRUE or FALSE, to print progress bar or not -#' @param n_cores Positive integer, The number of CPU cores to run with -#' @param on_error Character: one of "stop", "skip", or "debug". When an error occurs -#' while fitting an element, choose whether to stop, skip returning all-NaN values for -#' that element, or drop into `browser()` (if interactive) then skip. Default: "stop". -#' @param write_results_name Optional analysis name for incremental writes to -#' `results//results_matrix`. -#' @param write_results_file Optional HDF5 file path used when `write_results_name` is provided. -#' @param write_results_flush_every Positive integer number of elements per write block. -#' @param write_results_storage_mode Storage mode for results writes (e.g., `"double"`). -#' @param write_results_compression_level Gzip compression level (0-9) for results writes. -#' @param return_output If TRUE (default), return the combined data.frame. If FALSE, -#' returns `invisible(NULL)`; useful for streaming large runs to HDF5. -#' @param ... Additional arguments for `stats::lm()` -#' @return Tibble with the summarized model statistics for all elements requested +#' For p-value corrections (arguments \code{correct.p.value.*}), supported +#' methods include all methods in \code{p.adjust.methods} except +#' \code{"none"}. You can request more than one method. FDR-corrected +#' p-values (\code{"fdr"}) are calculated by default. Turn it off by +#' setting to \code{"none"}. +#' +#' Arguments \code{num.subj.lthr.abs} and \code{num.subj.lthr.rel} are +#' mainly for input data with subject-specific masks, i.e. currently only +#' for volume data. For fixel-wise data, you may ignore these arguments. +#' +#' @param formula Formula (passed to \code{\link[stats]{lm}}). +#' @param data A \linkS4class{ModelArray} object. +#' @param phenotypes A data.frame of the cohort with columns of independent +#' variables and covariates to be added to the model. It must contain a +#' column called \code{"source_file"} whose entries match those in +#' \code{sources(data)[[scalar]]}. +#' @param scalar Character. The name of the element-wise scalar to analyse. +#' Must be one of \code{names(scalars(data))}. +#' @param element.subset Integer vector of element indices (1-based) to run. +#' Default is \code{NULL}, i.e. all elements in \code{data}. +#' @param full.outputs Logical. If \code{TRUE}, return the full set of +#' statistics (ignoring \code{var.*} arguments). If \code{FALSE} (default), +#' only return those requested in \code{var.*} and +#' \code{correct.p.value.*}. +#' @param var.terms Character vector. Statistics to save per term, from +#' \code{broom::tidy()}. See Details. +#' @param var.model Character vector. Statistics to save for the overall +#' model, from \code{broom::glance()}. See Details. +#' @param correct.p.value.terms Character vector. P-value correction +#' method(s) for each term. Default: \code{"fdr"}. See Details. +#' @param correct.p.value.model Character vector. P-value correction +#' method(s) for the model-level p-value. Default: \code{"fdr"}. +#' See Details. +#' @param num.subj.lthr.abs Integer. Lower threshold for the absolute +#' number of subjects with finite scalar values (not \code{NaN}, +#' \code{NA}, or \code{Inf}) required per element. Elements below this +#' threshold are skipped (outputs set to \code{NaN}). Default is 10. +#' @param num.subj.lthr.rel Numeric between 0 and 1. Lower threshold for +#' the proportion of subjects with finite values. Used together with +#' \code{num.subj.lthr.abs} (the effective threshold is the maximum of +#' the two). Default is 0.2. +#' @param verbose Logical. Print progress messages. Default \code{TRUE}. +#' @param pbar Logical. Show progress bar. Default \code{TRUE}. +#' @param n_cores Positive integer. Number of CPU cores for parallel +#' processing via \code{\link[parallel]{mclapply}}. Default is 1 +#' (serial). +#' @param on_error Character: one of \code{"stop"}, \code{"skip"}, or +#' \code{"debug"}. When an error occurs fitting one element: \code{"stop"} +#' halts execution; \code{"skip"} returns all-\code{NaN} for that element; +#' \code{"debug"} drops into \code{\link{browser}} (if interactive) then +#' skips. Default: \code{"stop"}. +#' @param write_results_name Optional character. If provided, results are +#' incrementally written to +#' \code{results//results_matrix} in the HDF5 file +#' specified by \code{write_results_file}. +#' @param write_results_file Optional character. HDF5 file path for +#' incremental result writes. Required when \code{write_results_name} +#' is provided. +#' @param write_results_flush_every Positive integer. Number of elements +#' per write block. Default 1000. +#' @param write_results_storage_mode Character. Storage mode for HDF5 +#' writes (e.g. \code{"double"}). Default \code{"double"}. +#' @param write_results_compression_level Integer 0--9. Gzip compression +#' level for HDF5 writes. Default 4. +#' @param return_output Logical. If \code{TRUE} (default), return the +#' combined data.frame. If \code{FALSE}, return \code{invisible(NULL)}; +#' useful when writing large outputs directly to HDF5. +#' @param ... Additional arguments passed to \code{\link[stats]{lm}}. +#' +#' @return A tibble with one row per element. The first column is +#' \code{element_id} (0-based). Remaining columns contain the requested +#' statistics, named as \code{.} for per-term statistics +#' and \code{model.} for model-level statistics. If p-value +#' corrections were requested, additional columns are appended with the +#' correction method as suffix (e.g. \code{.p.value.fdr}). +#' +#' @seealso \code{\link{ModelArray.gam}} for generalized additive models, +#' \code{\link{ModelArray.wrap}} for user-supplied functions, +#' \linkS4class{ModelArray} for the input class, +#' \code{\link{ModelArray}} for the constructor, +#' \code{\link{exampleElementData}} for testing formulas on a single +#' element. +#' +#' @examplesIf interactive() +#' ma <- ModelArray("path/to/data.h5", scalar_types = c("FD")) +#' phenotypes <- read.csv("cohort.csv") +#' +#' # Fit linear model with default outputs +#' results <- ModelArray.lm( +#' FD ~ age + sex, +#' data = ma, +#' phenotypes = phenotypes, +#' scalar = "FD" +#' ) +#' head(results) +#' +#' # Full outputs, no p-value correction +#' results_full <- ModelArray.lm( +#' FD ~ age + sex, +#' data = ma, +#' phenotypes = phenotypes, +#' scalar = "FD", +#' full.outputs = TRUE, +#' correct.p.value.terms = "none", +#' correct.p.value.model = "none" +#' ) +#' +#' @rdname ModelArray.lm #' @importFrom dplyr %>% #' @import doParallel #' @import tibble -#' @importFrom stats p.adjust lm #' @importFrom glue glue -#' @importFrom rlang := #' @export ModelArray.lm <- function(formula, data, phenotypes, scalar, element.subset = NULL, full.outputs = FALSE, diff --git a/man/ModelArray.lm.Rd b/man/ModelArray.lm.Rd index 9188bbf..20c55e9 100644 --- a/man/ModelArray.lm.Rd +++ b/man/ModelArray.lm.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/analyse.R \name{ModelArray.lm} \alias{ModelArray.lm} -\title{Fit linear model for element-wise data} +\title{Fit element-wise linear models} \usage{ ModelArray.lm( formula, @@ -31,107 +31,158 @@ ModelArray.lm( ) } \arguments{ -\item{formula}{Formula (passed to `stats::lm()`)} +\item{formula}{Formula (passed to \code{\link[stats]{lm}}).} -\item{data}{ModelArray class} +\item{data}{A \linkS4class{ModelArray} object.} -\item{phenotypes}{A data.frame of the cohort with columns of independent variables -and covariates to be added to the model. It should contains a column called "source_file", -and this column should match to that in \code{data}.} +\item{phenotypes}{A data.frame of the cohort with columns of independent +variables and covariates to be added to the model. It must contain a +column called \code{"source_file"} whose entries match those in +\code{sources(data)[[scalar]]}.} -\item{scalar}{A character. The name of the element-wise scalar to be analysed} +\item{scalar}{Character. The name of the element-wise scalar to analyse. +Must be one of \code{names(scalars(data))}.} -\item{element.subset}{A list of positive integers (min = 1, max = number of elements). -The subset of elements you want to run. Default is `NULL`, i.e. requesting all elements in `data`.} +\item{element.subset}{Integer vector of element indices (1-based) to run. +Default is \code{NULL}, i.e. all elements in \code{data}.} -\item{full.outputs}{TRUE or FALSE, Whether to return full set of outputs. -If FALSE, it will only return those requested in arguments \code{var.*} and \code{correct.p.value.*}; -if TRUE, arguments \code{var.*} will be ignored, and will return all possible statistics for -\code{var.*} and any options requested in arguments \code{correct.p.value.*}.} +\item{full.outputs}{Logical. If \code{TRUE}, return the full set of +statistics (ignoring \code{var.*} arguments). If \code{FALSE} (default), +only return those requested in \code{var.*} and +\code{correct.p.value.*}.} -\item{var.terms}{A list of characters. -The list of variables to save for terms (got from `broom::tidy()`). -See "Details" section for more.} +\item{var.terms}{Character vector. Statistics to save per term, from +\code{broom::tidy()}. See Details.} -\item{var.model}{A list of characters. -The list of variables to save for the model (got from `broom::glance()`). -See "Details" section for more.} +\item{var.model}{Character vector. Statistics to save for the overall +model, from \code{broom::glance()}. See Details.} -\item{correct.p.value.terms}{A list of characters. -To perform and add a column for p.value correction for each term. Default: "fdr". -See "Details" section for more.} +\item{correct.p.value.terms}{Character vector. P-value correction +method(s) for each term. Default: \code{"fdr"}. See Details.} -\item{correct.p.value.model}{A list of characters. -To perform and add a column for p.value correction for the model. -Default: "fdr". See "Details" section for more.} +\item{correct.p.value.model}{Character vector. P-value correction +method(s) for the model-level p-value. Default: \code{"fdr"}. +See Details.} -\item{num.subj.lthr.abs}{An integer, lower threshold of absolute number of subjects. -For an element, if number of subjects who have finite values (defined by `is.finite()`, -i.e. not NaN or NA or Inf) in h5 file > \code{num.subj.lthr.abs}, -then this element will be run normally; otherwise, -this element will be skipped and statistical outputs will be set as NaN. -Default is 10.} +\item{num.subj.lthr.abs}{Integer. Lower threshold for the absolute +number of subjects with finite scalar values (not \code{NaN}, +\code{NA}, or \code{Inf}) required per element. Elements below this +threshold are skipped (outputs set to \code{NaN}). Default is 10.} -\item{num.subj.lthr.rel}{A value between 0-1, lower threshold of relative number of subjects. -Similar to \code{num.subj.lthr.abs}, -if proportion of subjects who have valid value > \code{num.subj.lthr.rel}, -then this element will be run normally; otherwise, -this element will be skipped and statistical outputs will be set as NaN. -Default is 0.2.} +\item{num.subj.lthr.rel}{Numeric between 0 and 1. Lower threshold for +the proportion of subjects with finite values. Used together with +\code{num.subj.lthr.abs} (the effective threshold is the maximum of +the two). Default is 0.2.} -\item{verbose}{TRUE or FALSE, to print verbose message or not} +\item{verbose}{Logical. Print progress messages. Default \code{TRUE}.} -\item{pbar}{TRUE or FALSE, to print progress bar or not} +\item{pbar}{Logical. Show progress bar. Default \code{TRUE}.} -\item{n_cores}{Positive integer, The number of CPU cores to run with} +\item{n_cores}{Positive integer. Number of CPU cores for parallel +processing via \code{\link[parallel]{mclapply}}. Default is 1 +(serial).} -\item{on_error}{Character: one of "stop", "skip", or "debug". When an error occurs -while fitting an element, choose whether to stop, skip returning all-NaN values for -that element, or drop into `browser()` (if interactive) then skip. Default: "stop".} +\item{on_error}{Character: one of \code{"stop"}, \code{"skip"}, or +\code{"debug"}. When an error occurs fitting one element: \code{"stop"} +halts execution; \code{"skip"} returns all-\code{NaN} for that element; +\code{"debug"} drops into \code{\link{browser}} (if interactive) then +skips. Default: \code{"stop"}.} -\item{write_results_name}{Optional analysis name for incremental writes to -`results//results_matrix`.} +\item{write_results_name}{Optional character. If provided, results are +incrementally written to +\code{results//results_matrix} in the HDF5 file +specified by \code{write_results_file}.} -\item{write_results_file}{Optional HDF5 file path used when `write_results_name` is provided.} +\item{write_results_file}{Optional character. HDF5 file path for +incremental result writes. Required when \code{write_results_name} +is provided.} -\item{write_results_flush_every}{Positive integer number of elements per write block.} +\item{write_results_flush_every}{Positive integer. Number of elements +per write block. Default 1000.} -\item{write_results_storage_mode}{Storage mode for results writes (e.g., `"double"`).} +\item{write_results_storage_mode}{Character. Storage mode for HDF5 +writes (e.g. \code{"double"}). Default \code{"double"}.} -\item{write_results_compression_level}{Gzip compression level (0-9) for results writes.} +\item{write_results_compression_level}{Integer 0--9. Gzip compression +level for HDF5 writes. Default 4.} -\item{return_output}{If TRUE (default), return the combined data.frame. If FALSE, -returns `invisible(NULL)`; useful for streaming large runs to HDF5.} +\item{return_output}{Logical. If \code{TRUE} (default), return the +combined data.frame. If \code{FALSE}, return \code{invisible(NULL)}; +useful when writing large outputs directly to HDF5.} -\item{...}{Additional arguments for `stats::lm()`} +\item{...}{Additional arguments passed to \code{\link[stats]{lm}}.} } \value{ -Tibble with the summarized model statistics for all elements requested when -`return_output = TRUE`; otherwise `invisible(NULL)`. +A tibble with one row per element. The first column is +\code{element_id} (0-based). Remaining columns contain the requested +statistics, named as \code{.} for per-term statistics +and \code{model.} for model-level statistics. If p-value +corrections were requested, additional columns are appended with the +correction method as suffix (e.g. \code{.p.value.fdr}). } \description{ -`ModelArray.lm` fits linear model (`stats::lm()`) for each of elements requested, and returns a tibble -dataframe of requested model statistics. +\code{ModelArray.lm} fits a linear model at each requested element in a +\linkS4class{ModelArray} and returns a tibble of requested model statistics. } \details{ -You may request returning specific statistical variables by setting \code{var.*}, -or you can get all by setting \code{full.outputs=TRUE}. -Note that statistics covered by \code{full.outputs} or \code{var.*} are the ones from broom::tidy() -and broom::glance() only, and do not include corrected p-values. -However FDR-corrected p-values ("fdr") are generated by default. +You may request returning specific statistical variables by setting +\code{var.*}, or you can get all by setting \code{full.outputs = TRUE}. +Note that statistics covered by \code{full.outputs} or \code{var.*} are +the ones from \code{broom::tidy()}, \code{broom::glance()} only, and do +not include corrected p-values. However FDR-corrected p-values +(\code{"fdr"}) are generated by default. + List of acceptable statistic names for each of \code{var.*}: \itemize{ - \item \code{var.terms}: c("estimate","std.error","statistic","p.value"); - For interpretation please see \link[broom]{tidy.lm}. - \item \code{var.model}: c("r.squared", "adj.r.squared", "sigma", "statistic", - "p.value", "df", "logLik", "AIC", "BIC", "deviance", "df.residual", "nobs"); - For interpretation please see \link[broom]{glance.lm}. +\item \code{var.terms}: \code{c("estimate", "std.error", "statistic", + "p.value")}; For interpretation please see \link[broom]{tidy.lm}. +\item \code{var.model}: \code{c("r.squared", "adj.r.squared", "sigma", + "statistic", "p.value", "df", "logLik", "AIC", "BIC", "deviance", + "df.residual", "nobs")}; For interpretation please see +\link[broom]{glance.lm}. +} + +For p-value corrections (arguments \code{correct.p.value.*}), supported +methods include all methods in \code{p.adjust.methods} except +\code{"none"}. You can request more than one method. FDR-corrected +p-values (\code{"fdr"}) are calculated by default. Turn it off by +setting to \code{"none"}. + +Arguments \code{num.subj.lthr.abs} and \code{num.subj.lthr.rel} are +mainly for input data with subject-specific masks, i.e. currently only +for volume data. For fixel-wise data, you may ignore these arguments. +} +\examples{ +\dontshow{if (interactive()) withAutoprint(\{ # examplesIf} +ma <- ModelArray("path/to/data.h5", scalar_types = c("FD")) +phenotypes <- read.csv("cohort.csv") + +# Fit linear model with default outputs +results <- ModelArray.lm( + FD ~ age + sex, + data = ma, + phenotypes = phenotypes, + scalar = "FD" +) +head(results) + +# Full outputs, no p-value correction +results_full <- ModelArray.lm( + FD ~ age + sex, + data = ma, + phenotypes = phenotypes, + scalar = "FD", + full.outputs = TRUE, + correct.p.value.terms = "none", + correct.p.value.model = "none" +) +\dontshow{\}) # examplesIf} } -For p-value corrections (arguments \code{correct.p.value.*}), -supported methods include all methods in `p.adjust.methods` except "none". -Can be more than one method. FDR-corrected p-values ("fdr") are calculated by default. -Turn it off by setting to "none". \cr -Arguments \code{num.subj.lthr.abs} and \code{num.subj.lthr.rel} -are mainly for input data with subject-specific masks, - i.e. currently only for volume data. For fixel-wise data, you may ignore these arguments. +\seealso{ +\code{\link{ModelArray.gam}} for generalized additive models, +\code{\link{ModelArray.wrap}} for user-supplied functions, +\linkS4class{ModelArray} for the input class, +\code{\link{ModelArray}} for the constructor, +\code{\link{exampleElementData}} for testing formulas on a single +element. } diff --git a/man/exampleElementData.Rd b/man/exampleElementData.Rd index 1699b25..f6e1383 100644 --- a/man/exampleElementData.Rd +++ b/man/exampleElementData.Rd @@ -10,29 +10,42 @@ exampleElementData(x, ...) \S4method{exampleElementData}{ModelArray}(x, scalar = "FD", i_element = 1L, phenotypes) } \arguments{ -\item{x}{An ModelArray object} +\item{x}{A \linkS4class{ModelArray} object.} -\item{...}{Additional arguments (ignored)} +\item{...}{Additional arguments passed to the method (currently unused +at the generic level).} -\item{scalar}{A character. The name of the element-wise scalar to append} +\item{scalar}{Character. The name of the element-wise scalar to append +as a column. Must be one of \code{names(scalars(x))}. Default is +\code{"FD"}.} -\item{i_element}{An integer, the i_th element (1-based)} +\item{i_element}{Integer. The 1-based index of the element whose values +should be extracted. Must be between 1 and the number of elements for +the given scalar.} -\item{phenotypes}{A data.frame of the cohort with independent variables/covariates} +\item{phenotypes}{A data.frame of the cohort with columns of independent +variables and covariates. Must have the same number of rows as the +number of source files in the \linkS4class{ModelArray}.} } \value{ -A data.frame with the additional response column named by `scalar` +A data.frame: the input \code{phenotypes} with one additional +column named by \code{scalar} containing that element's values across +all subjects. } \description{ -Generic for constructing a per-element data.frame from a `ModelArray`. -See the `ModelArray` method for details. - -Returns a copy of `phenotypes` with an extra column named by `scalar` populated -with the selected element's values from the `ModelArray`. This mirrors the -per-element data that `ModelArray.wrap` passes to user functions (`data = dat`). +Constructs a per-element data.frame from a \linkS4class{ModelArray} that +mirrors the \code{data} argument passed to user functions by +\code{\link{ModelArray.wrap}}. This is useful for testing and debugging +user-supplied functions outside of the full element-wise analysis loop. } \details{ -Example per-element data.frame for user functions +Returns a copy of \code{phenotypes} with an extra column named by +\code{scalar} populated with the selected element's values from the +\linkS4class{ModelArray}. This mirrors the per-element data that +\code{\link{ModelArray.wrap}} passes to user functions (as \code{data = dat}). + +Use this to verify that your custom function works correctly on a single +element before committing to a full \code{\link{ModelArray.wrap}} run. } \examples{ \dontrun{ @@ -41,5 +54,16 @@ csv_path <- system.file("extdata", "n50_cohort.csv", package = "ModelArray") ma <- ModelArray(h5_path, scalar_types = c("FD")) phen <- read.csv(csv_path) df <- exampleElementData(ma, scalar = "FD", i_element = 1, phenotypes = phen) + +# Now test your custom function on this single element: +my_fun <- function(data, ...) { + mod <- lm(FD ~ age + sex, data = data) + broom::tidy(mod) +} +my_fun(data = df) +} + } +\seealso{ +\code{\link{ModelArray.wrap}}, \code{\link{scalars}} } diff --git a/man/sources.Rd b/man/sources.Rd index 67af997..f55f6e9 100644 --- a/man/sources.Rd +++ b/man/sources.Rd @@ -10,11 +10,27 @@ sources(x) \S4method{sources}{ModelArray}(x) } \arguments{ -\item{x}{A ModelArray object} +\item{x}{A \linkS4class{ModelArray} object.} } \value{ -A list of source filenames +A named list of character vectors. Names correspond to scalar names +(e.g. \code{"FD"}, \code{"FC"}). } \description{ -Source filenames of a ModelArray object +Retrieve the named list of source filename vectors from a +\linkS4class{ModelArray}. Each element of the list corresponds to one +scalar and contains a character vector of filenames, one per input +file/subject. +} +\examples{ +\dontrun{ +ma <- ModelArray("data.h5", scalar_types = c("FD")) +sources(ma) # named list +sources(ma)[["FD"]] # character vector of filenames +} + +} +\seealso{ +\code{\link{scalars}}, \code{\link{results}}, +\code{\link{scalarNames}}, \code{\link{nInputFiles}} } From 2bdf5b75ab09808233703619d53bcb26c7ee52de Mon Sep 17 00:00:00 2001 From: araikes Date: Mon, 30 Mar 2026 17:47:58 -0700 Subject: [PATCH 10/23] Update gam and wrap functions --- NAMESPACE | 5 - R/analyse.R | 342 ++++++++++++++++++----------------------- man/ModelArray.gam.Rd | 250 +++++++++++++----------------- man/ModelArray.wrap.Rd | 182 ++++++++++++++++------ 4 files changed, 387 insertions(+), 392 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 201c081..525532f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -50,7 +50,6 @@ importFrom(crayon,black) importFrom(dplyr,"%>%") importFrom(dplyr,bind_cols) importFrom(dplyr,filter) -importFrom(dplyr,mutate) importFrom(dplyr,select) importFrom(glue,glue) importFrom(magrittr,"%>%") @@ -61,12 +60,8 @@ importFrom(mgcv,ti) importFrom(rhdf5,h5closeAll) importFrom(rhdf5,h5ls) importFrom(rhdf5,h5readAttributes) -importFrom(rlang,":=") importFrom(rlang,.data) importFrom(stats,as.formula) -importFrom(stats,drop.terms) importFrom(stats,lm) -importFrom(stats,p.adjust) importFrom(stats,p.adjust.methods) -importFrom(stats,terms) importFrom(utils,head) diff --git a/R/analyse.R b/R/analyse.R index 5f3c2c0..b7547f2 100644 --- a/R/analyse.R +++ b/R/analyse.R @@ -353,163 +353,73 @@ ModelArray.lm <- function(formula, data, phenotypes, scalar, element.subset = NU invisible(NULL) } -#' Run GAM for element-wise data +#' Fit element-wise generalized additive models +#' no model-level p-value for GAMs, so there is no +#' \code{correct.p.value.model} argument. #' -#' @description -#' `ModelArray.gam` fits gam model for each of elements requested, -#' and returns a tibble data.frame of requested model statistics. +#' @inheritParams ModelArray.lm #' -#' @details -#' You may request returning specific statistical variables by setting \code{var.*}, -#' or you can get all by setting \code{full.outputs=TRUE}. -#' Note that statistics covered by \code{full.outputs} or -#' \code{var.*} are the ones from broom::tidy(), broom::glance(), and summary() only, -#' and do not include delta adjusted R-squared or partial R-squared or corrected p-values. -#' However FDR-corrected p-values ("fdr") are generated by default. +#' @param formula Formula (passed to \code{\link[mgcv]{gam}}). +#' @param var.smoothTerms Character vector. Statistics to save for smooth +#' terms, from \code{broom::tidy(parametric = FALSE)}. See Details. +#' @param var.parametricTerms Character vector. Statistics to save for +#' parametric terms, from \code{broom::tidy(parametric = TRUE)}. +#' See Details. +#' @param var.model Character vector. Statistics to save for the overall +#' model, from \code{broom::glance()} and \code{summary()}. See Details. +#' @param changed.rsq.term.index A list of positive integers. Each value +#' is the index of a term on the right-hand side of \code{formula} for +#' which delta adjusted R-squared and partial R-squared should be +#' computed. Usually the term of interest is a smooth term or interaction +#' term. Default \code{NULL} (not computed). See Details for warnings. +#' @param correct.p.value.smoothTerms Character vector. P-value +#' correction method(s) for each smooth term. Default: \code{"fdr"}. +#' @param correct.p.value.parametricTerms Character vector. P-value +#' correction method(s) for each parametric term. Default: \code{"fdr"}. +#' @param ... Additional arguments passed to \code{\link[mgcv]{gam}}. #' -#' List of acceptable statistic names for each of \code{var.*}: -#' \itemize{ -#' \item \code{var.smoothTerms}: c("edf","ref.df","statistic","p.value"); -#' For interpretation please see \link[broom]{tidy.gam} with `parametric=FALSE`. -#' \item \code{var.parametricTerms}: c("estimate", "std.error","statistic","p.value"); -#' For interpretation please see \link[broom]{tidy.gam} with `parametric=TRUE`. -#' \item \code{var.model}: c("adj.r.squared","dev.expl", "sp.criterion", "scale", "df", -#' "logLik","AIC", "BIC", "deviance", "df.residual", "nobs"); "adj.r.squared" is \code{r.sq} from -#' \link[mgcv]{summary.gam}; "sp.criterion" is \code{sp.criterion} from \link[mgcv]{summary.gam}; -#' For interpretation please see \link[broom]{glance.gam} and \link[mgcv]{summary.gam}. -#' } +#' @return A tibble with one row per element. The first column is +#' \code{element_id} (0-based). Remaining columns contain the requested +#' statistics, named as \code{.}. If +#' \code{changed.rsq.term.index} was requested, additional columns +#' \code{.delta.adj.rsq} and \code{.partial.rsq} are +#' appended. #' -#' Regarding formula: So far these kinds of formula are tested: -#' \itemize{ -#' \item formula with smooth term, but without any interactions. -#' Examples like \code{y ~ s(x) + orderedFactor}; \code{y ~ s(x) + s(z)} -#' \item formula with interaction, but limited to only one interaction term, and in the formats of: -#' \itemize{ -#' \item Formula #1: \code{y ~ orderedFactor + s(x) + s(x, by=orderedFactor) + other_covariate}, -#' where \code{orderedFactor} should be discrete variables and generated by `ordered`. -#' The interaction term will be displayed as "s_x_BYorderedFactor" -#' in the column name in returned data.frame. -#' You may use function `gen_gamFormula_fxSmooth()` to generate one. -#' \item Formula #2: \code{y ~ ti(x) + ti(z) + ti(x,z) + other_covariate}, -#' where \code{x} and \code{z} should be continuous variables. -#' The interaction term will be displayed as "ti_x_z" in the column name in the returned data.frame. -#' You may use function `gen_gamFormula_contIx()` to generate one. -#' } -#' } -#' You may be interested in how important a term is in a model. -#' We provide two ways of quantification (see below). -#' Both of them require running the reduced model without this term of interest, -#' thus it will take longer time to run. -#' You can make such request via argument \code{changed.rsq.term.index}, and you'll get both quantifications. -#' \itemize{ -#' \item Delta adjusted R-squared (\code{delta.adj.rsq}) is defined as the difference between -#' adjusted R-squared of full model (full formula in \code{formula}) and that of reduced model -#' (formula without the term of interest). Notice that adjusted R-squared includes the penalty -#' from the model complexity. -#' \item Partial R-squared (\code{partial.rsq}) is defined as: -#' \code{(sse.reduced.model - sse.full.model) / sse.reduced.model}, -#' where \code{sse} is the error sum of squares (or, residual sum of squares). -#' It quantifies the amount of variance in the response variable that cannot be explained by the reduced model -#' (model without term of interest), but can be explained by the term of interest in the full model. +#' @seealso \code{\link{ModelArray.lm}} for linear models, +#' \code{\link{ModelArray.wrap}} for user-supplied functions, +#' \code{\link{gen_gamFormula_fxSmooth}} and +#' \code{\link{gen_gamFormula_contIx}} for formula helpers, +#' \linkS4class{ModelArray} for the input class. +#' +#' @examples{ +#' \dontrun{ +#' ma <- ModelArray("path/to/data.h5", scalar_types = c("FD")) +#' phenotypes <- read.csv("cohort.csv") +#' +#' results <- ModelArray.gam( +#' FD ~ s(age, fx = TRUE) + sex, +#' data = ma, +#' phenotypes = phenotypes, +#' scalar = "FD" +#' ) +#' head(results) +#' +#' # With changed R-squared for the smooth term (term index 1) +#' results_rsq <- ModelArray.gam( +#' FD ~ s(age, fx = TRUE) + sex, +#' data = ma, +#' phenotypes = phenotypes, +#' scalar = "FD", +#' changed.rsq.term.index = list(1) +#' ) #' } -#' \strong{___!!! WARNING !!!___}: -#' If you want to request \code{changed.rsq.term.index} for a term that has missing values, -#' before feeding \code{phenotypes} into \code{ModelArray.gam()}, -#' you must exclude those observations (i.e., those rows in \code{phenotypes}) -#' who have missing values in this term of interest from \code{phenotypes}. -#' You should do the same for each term you'd like to request in \code{changed.rsq.term.index}, -#' if that term has missing values. -#' Without such exclusion, the full and reduced models would include different number of subjects, -#' causing inaccuracy of calculation of \code{delta.adj.rsq} and \code{partial.rsq}. \cr \cr -#' Other notes on \code{changed.rsq.term.index}: -#' \itemize{ -#' \item When requesting \code{changed.rsq.term.index}, -#' \code{fx} should be set as \code{TRUE}, so that degree of freedom is fixed. -#' \item For formula with interactions, only formula in above formats are tested, -#' and only the values of interaction term are valid. -#' The delta.adj.rsq and partial.rsq for main effect (such as s(x) in Formula #1) -#' may not "functionally" be these metrics, as their definitions should be changed to -#' reduced formula without both main effect and interaction term. #' } -#' For p-value corrections (arguments \code{correct.p.value.*}), -#' supported methods include all methods in `p.adjust.methods` except "none". -#' You can request more than one method. FDR-corrected p-values ("fdr") are calculated by default. -#' Turn it off by setting to "none". -#' Please notice that different from `ModelArray.lm`, -#' there is no p.value for the GAM model, so no "correct.p.value.model" for GAM model. \cr -#' Arguments \code{num.subj.lthr.abs} and \code{num.subj.lthr.rel} -#' are mainly for input data with subject-specific masks, i.e. currently only for volume data. -#' For fixel-wise data, you may ignore these arguments. #' -#' @param formula Formula (passed to `mgcv::gam()`) -#' @param data ModelArray class -#' @param phenotypes A data.frame of the cohort with columns of independent variables and covariates -#' to be added to the model. It should contains a column called "source_file", -#' and this column should match to that in \code{data}. -#' @param scalar A character. The name of the element-wise scalar to be analysed -#' @param element.subset A list of positive integers (min = 1, max = number of elements). -#' The subset of elements you want to run. Default is `NULL`, i.e. requesting all elements in `data`. -#' @param full.outputs TRUE or FALSE, Whether to return full set of outputs. -#' If FALSE, it will only return those requested in arguments \code{var.*} and \code{correct.p.value.*}; -#' if TRUE, arguments \code{var.*} will be ignored, and will return all possible statistics for \code{var.*} -#' and any options requested in arguments \code{correct.p.value.*}. -#' @param var.smoothTerms A list of characters. -#' The list of variables to save for smooth terms (got from `broom::tidy(parametric = FALSE)`). -#' Example smooth term: age in formula "outcome ~ s(age)". See "Details" section for more. -#' @param var.parametricTerms A list of characters. -#' The list of variables to save for parametric terms (got from `broom::tidy(parametric = TRUE)`). -#' Example parametric term: sex in formula "outcome ~ s(age) + sex". -#' See "Details" section for more. -#' @param var.model A list of characters. -#' The list of variables to save for the model (got from `broom::glance()` and `summary()`). -#' See "Details" section for more. -#' @param changed.rsq.term.index A list of (one or several) positive integers. -#' Each element in the list means the i-th term of the formula's right hand side -#' as the term of interest for changed R-squared between with and without it. -#' Both delta adjusted R-squared and partial R-squared will be calculated for each of term requested. -#' Usually term of interest is smooth term, or interaction term in models with interactions. -#' See "Details" section for more, especially the "WARNING" in Details section for cases with caution!! -#' @param correct.p.value.smoothTerms A list of characters. -#' To perform and add a column for p.value correction for each smooth term. -#' Default: "fdr". See "Details" section for more. -#' @param correct.p.value.parametricTerms A list of characters. -#' To perform and add a column for p.value correction for each parametric term. Default: "fdr". -#' See "Details" section for more. -#' @param num.subj.lthr.abs An integer, lower threshold of absolute number of subjects. -#' For an element, if number of subjects who have finite values (defined by `is.finite()`, -#' i.e. not NaN or NA or Inf) in h5 file > \code{num.subj.lthr.abs}, -#' then this element will be run normally; -#' otherwise, this element will be skipped and statistical outputs will be set as NaN. -#' Default is 10. -#' @param num.subj.lthr.rel A value between 0-1, lower threshold of relative number of subjects. -#' Similar to \code{num.subj.lthr.abs}, -#' if proportion of subjects who have valid value > \code{num.subj.lthr.rel}, -#' then this element will be run normally; otherwise, -#' this element will be skipped and statistical outputs will be set as NaN. -#' Default is 0.2. -#' @param verbose TRUE or FALSE, to print verbose messages or not -#' @param pbar TRUE or FALSE, to print progress bar or not -#' @param n_cores Positive integer, The number of CPU cores to run with -#' @param on_error Character: one of "stop", "skip", or "debug". When an error occurs -#' while fitting an element, choose whether to stop, skip returning all-NaN values for -#' that element, or drop into `browser()` (if interactive) then skip. Default: "stop". -#' @param write_results_name Optional analysis name for incremental writes to -#' `results//results_matrix`. -#' @param write_results_file Optional HDF5 file path used when `write_results_name` is provided. -#' @param write_results_flush_every Positive integer number of elements per write block. -#' @param write_results_storage_mode Storage mode for results writes (e.g., `"double"`). -#' @param write_results_compression_level Gzip compression level (0-9) for results writes. -#' @param return_output If TRUE (default), return the combined data.frame. If FALSE, -#' returns `invisible(NULL)`; useful for streaming large runs to HDF5. -#' @param ... Additional arguments for `mgcv::gam()` -#' @return Tibble with the summarized model statistics for all elements requested -#' @importFrom dplyr %>% mutate +#' @rdname ModelArray.gam +#' @importFrom dplyr %>% #' @import doParallel #' @import tibble -#' @import mgcv -#' @importFrom stats terms as.formula drop.terms p.adjust #' @importFrom glue glue -#' @importFrom rlang := #' @export ModelArray.gam <- function(formula, data, phenotypes, scalar, element.subset = NULL, full.outputs = FALSE, @@ -905,57 +815,101 @@ ModelArray.gam <- function(formula, data, phenotypes, scalar, element.subset = N #' Run a user-supplied function for element-wise data #' #' @description -#' `ModelArray.wrap` runs a user-supplied function `FUN` for each requested element and -#' returns a tibble/data.frame of results combined across elements. +#' `ModelArray.wrap` runs a user-supplied function \code{FUN} at each +#' requested element and returns a tibble of results combined across +#' elements. #' #' @details -#' This provides a generic framework reusing ModelArray's per-element looping, alignment, -#' subject-thresholding, and parallelization. The user function will be called as -#' `FUN(data = dat, ...)` where `dat` is `phenotypes` with the response column named `scalar` -#' appended for the current element. The return value from `FUN` for a single element must be -#' either a one-row data.frame/tibble, a named list, or a named atomic vector. The column -#' names from the first successful element determine the final schema. +#' This provides a generic framework reusing ModelArray's per-element +#' looping, alignment, subject-thresholding, and parallelization. The user +#' function is called as \code{FUN(data = dat, ...)} where \code{dat} is +#' \code{phenotypes} with all scalar columns appended for the current +#' element. The return value from \code{FUN} for a single element must be +#' one of: +#' \itemize{ +#' \item a one-row \code{data.frame} or \code{tibble} +#' \item a named list +#' \item a named atomic vector +#' } +#' The column names from the first successful element determine the final +#' schema. +#' +#' Note: \code{ModelArray.wrap} never performs any p-value corrections or +#' modifications. If you need adjusted p-values (e.g. FDR), implement +#' them inside \code{FUN}. +#' +#' Use \code{\link{exampleElementData}} to construct a sample per-element +#' data.frame for testing your function before committing to a full run. #' -#' Note: `ModelArray.wrap` never performs any p-value corrections or modifications. -#' If you need adjusted p-values (e.g., FDR), implement them inside your provided `FUN`. +#' @inheritParams ModelArray.lm +#' +#' @param FUN A function that accepts at least \code{data} (a data.frame) +#' and returns a one-row data.frame/tibble, a named list, or a named +#' vector of results for that element. +#' @param write_scalar_name Optional character. If provided, selected +#' output columns are written into +#' \code{scalars//values} in the HDF5 file specified +#' by \code{write_scalar_file}. +#' @param write_scalar_file Optional character. HDF5 output file path. +#' Required when \code{write_scalar_name} is provided. +#' @param write_scalar_columns Optional character or integer vector +#' selecting which output columns to save as scalar values. If +#' \code{NULL} (default), uses all output columns except +#' \code{element_id}. +#' @param write_scalar_column_names Optional character vector of source +#' names saved as scalar \code{column_names}. If \code{NULL}, uses +#' \code{phenotypes$source_file}. +#' @param write_scalar_flush_every Positive integer. Elements per write +#' block for scalar writes. Default 1000. +#' @param write_scalar_storage_mode Character. Storage mode for scalar +#' writes (e.g. \code{"double"}). Default \code{"double"}. +#' @param write_scalar_compression_level Integer 0--9. Gzip compression +#' level for scalar writes. Default 4. +#' @param ... Additional arguments forwarded to \code{FUN}. +#' +#' @return A tibble with one row per element and first column +#' \code{element_id} (0-based). Remaining columns are determined by +#' the return value of \code{FUN}. +#' +#' @seealso \code{\link{ModelArray.lm}} for linear models, +#' \code{\link{ModelArray.gam}} for GAMs, +#' \code{\link{exampleElementData}} for building test data, +#' \linkS4class{ModelArray} for the input class. +#' +#' @examples{ +#' \dontrun{ +#' ma <- ModelArray("path/to/data.h5", scalar_types = c("FD")) +#' phenotypes <- read.csv("cohort.csv") +#' +#' # Simple custom function +#' my_fun <- function(data, ...) { +#' mod <- lm(FD ~ age + sex, data = data) +#' tidy_out <- broom::tidy(mod) +#' # Return a one-row tibble +#' tibble::tibble( +#' age_estimate = tidy_out$estimate[tidy_out$term == "age"], +#' age_pvalue = tidy_out$p.value[tidy_out$term == "age"] +#' ) +#' } +#' +#' +#' # Test on one element first +#' test_df <- exampleElementData(ma, scalar = "FD", +#' i_element = 1, +#' phenotypes = phenotypes) +#' my_fun(data = test_df) +#' +#' # Run across all elements +#' results <- ModelArray.wrap( +#' FUN = my_fun, +#' data = ma, +#' phenotypes = phenotypes, +#' scalar = "FD" +#' ) +#' } +#' } #' -#' @param FUN A function that accepts at least `data` and returns a one-row data.frame/tibble, -#' a named list, or a named vector of results for that element. -#' @param data ModelArray class -#' @param phenotypes A data.frame with cohort covariates; must contain column `source_file`. -#' It must match `sources(data)[[scalar]]` order and contents (reordered if needed). -#' @param scalar Character, the scalar name to analyze -#' @param element.subset Optional integer vector of element indices (1-based); defaults to all -#' @param num.subj.lthr.abs Integer lower threshold for subjects with finite values (default 10) -#' @param num.subj.lthr.rel Relative lower threshold (0-1) (default 0.2) -#' @param verbose TRUE/FALSE to print messages -#' @param pbar TRUE/FALSE to show progress bar -#' @param n_cores Positive integer number of CPU cores -#' @param on_error Character: one of "stop", "skip", or "debug". When an error occurs in -#' the user function for an element, choose whether to stop, skip returning all-NaN values -#' for that element, or drop into `browser()` (if interactive) then skip. Default: "stop". -#' @param write_scalar_name Optional character scalar name. If provided, `ModelArray.wrap` -#' writes selected output columns into `scalars//values` while processing. -#' @param write_scalar_file Optional character HDF5 output filename used when -#' `write_scalar_name` is provided. -#' @param write_scalar_columns Optional character/integer selector for output columns to save -#' as scalar values. If NULL, uses all wrap output columns except `element_id`. -#' @param write_scalar_column_names Optional character vector of source names saved as -#' scalar `column_names`. If NULL, uses `phenotypes$source_file`. -#' @param write_scalar_flush_every Positive integer number of elements per write block. -#' @param write_scalar_storage_mode Storage mode for scalar writes (e.g., `"double"`). -#' @param write_scalar_compression_level Gzip compression level (0-9) for scalar writes. -#' @param write_results_name Optional analysis name for incremental writes to -#' `results//results_matrix`. -#' @param write_results_file Optional HDF5 file path used when `write_results_name` is provided. -#' @param write_results_flush_every Positive integer number of elements per write block for -#' results writes. -#' @param write_results_storage_mode Storage mode for results writes (e.g., `"double"`). -#' @param write_results_compression_level Gzip compression level (0-9) for results writes. -#' @param return_output If TRUE (default), return the combined data.frame. If FALSE, -#' returns `invisible(NULL)`; useful when writing large outputs directly to HDF5. -#' @param ... Additional arguments forwarded to `FUN` -#' @return Tibble/data.frame with one row per element and first column `element_id` +#' @rdname ModelArray.wrap #' @importFrom dplyr %>% #' @import doParallel #' @import tibble diff --git a/man/ModelArray.gam.Rd b/man/ModelArray.gam.Rd index ffa2990..74fbe72 100644 --- a/man/ModelArray.gam.Rd +++ b/man/ModelArray.gam.Rd @@ -2,7 +2,9 @@ % Please edit documentation in R/analyse.R \name{ModelArray.gam} \alias{ModelArray.gam} -\title{Run GAM for element-wise data} +\title{Fit element-wise generalized additive models +no model-level p-value for GAMs, so there is no +\code{correct.p.value.model} argument.} \usage{ ModelArray.gam( formula, @@ -33,179 +35,139 @@ ModelArray.gam( ) } \arguments{ -\item{formula}{Formula (passed to `mgcv::gam()`)} +\item{formula}{Formula (passed to \code{\link[mgcv]{gam}}).} -\item{data}{ModelArray class} +\item{data}{A \linkS4class{ModelArray} object.} -\item{phenotypes}{A data.frame of the cohort with columns of independent variables and covariates -to be added to the model. It should contains a column called "source_file", -and this column should match to that in \code{data}.} +\item{phenotypes}{A data.frame of the cohort with columns of independent +variables and covariates to be added to the model. It must contain a +column called \code{"source_file"} whose entries match those in +\code{sources(data)[[scalar]]}.} -\item{scalar}{A character. The name of the element-wise scalar to be analysed} +\item{scalar}{Character. The name of the element-wise scalar to analyse. +Must be one of \code{names(scalars(data))}.} -\item{element.subset}{A list of positive integers (min = 1, max = number of elements). -The subset of elements you want to run. Default is `NULL`, i.e. requesting all elements in `data`.} +\item{element.subset}{Integer vector of element indices (1-based) to run. +Default is \code{NULL}, i.e. all elements in \code{data}.} -\item{full.outputs}{TRUE or FALSE, Whether to return full set of outputs. -If FALSE, it will only return those requested in arguments \code{var.*} and \code{correct.p.value.*}; -if TRUE, arguments \code{var.*} will be ignored, and will return all possible statistics for \code{var.*} -and any options requested in arguments \code{correct.p.value.*}.} +\item{full.outputs}{Logical. If \code{TRUE}, return the full set of +statistics (ignoring \code{var.*} arguments). If \code{FALSE} (default), +only return those requested in \code{var.*} and +\code{correct.p.value.*}.} -\item{var.smoothTerms}{A list of characters. -The list of variables to save for smooth terms (got from `broom::tidy(parametric = FALSE)`). -Example smooth term: age in formula "outcome ~ s(age)". See "Details" section for more.} +\item{var.smoothTerms}{Character vector. Statistics to save for smooth +terms, from \code{broom::tidy(parametric = FALSE)}. See Details.} -\item{var.parametricTerms}{A list of characters. -The list of variables to save for parametric terms (got from `broom::tidy(parametric = TRUE)`). -Example parametric term: sex in formula "outcome ~ s(age) + sex". -See "Details" section for more.} +\item{var.parametricTerms}{Character vector. Statistics to save for +parametric terms, from \code{broom::tidy(parametric = TRUE)}. +See Details.} -\item{var.model}{A list of characters. -The list of variables to save for the model (got from `broom::glance()` and `summary()`). -See "Details" section for more.} +\item{var.model}{Character vector. Statistics to save for the overall +model, from \code{broom::glance()} and \code{summary()}. See Details.} -\item{changed.rsq.term.index}{A list of (one or several) positive integers. -Each element in the list means the i-th term of the formula's right hand side -as the term of interest for changed R-squared between with and without it. -Both delta adjusted R-squared and partial R-squared will be calculated for each of term requested. -Usually term of interest is smooth term, or interaction term in models with interactions. - See "Details" section for more, especially the "WARNING" in Details section for cases with caution!!} +\item{changed.rsq.term.index}{A list of positive integers. Each value +is the index of a term on the right-hand side of \code{formula} for +which delta adjusted R-squared and partial R-squared should be +computed. Usually the term of interest is a smooth term or interaction +term. Default \code{NULL} (not computed). See Details for warnings.} -\item{correct.p.value.smoothTerms}{A list of characters. -To perform and add a column for p.value correction for each smooth term. -Default: "fdr". See "Details" section for more.} +\item{correct.p.value.smoothTerms}{Character vector. P-value +correction method(s) for each smooth term. Default: \code{"fdr"}.} -\item{correct.p.value.parametricTerms}{A list of characters. -To perform and add a column for p.value correction for each parametric term. Default: "fdr". -See "Details" section for more.} +\item{correct.p.value.parametricTerms}{Character vector. P-value +correction method(s) for each parametric term. Default: \code{"fdr"}.} -\item{num.subj.lthr.abs}{An integer, lower threshold of absolute number of subjects. -For an element, if number of subjects who have finite values (defined by `is.finite()`, -i.e. not NaN or NA or Inf) in h5 file > \code{num.subj.lthr.abs}, -then this element will be run normally; -otherwise, this element will be skipped and statistical outputs will be set as NaN. -Default is 10.} +\item{num.subj.lthr.abs}{Integer. Lower threshold for the absolute +number of subjects with finite scalar values (not \code{NaN}, +\code{NA}, or \code{Inf}) required per element. Elements below this +threshold are skipped (outputs set to \code{NaN}). Default is 10.} -\item{num.subj.lthr.rel}{A value between 0-1, lower threshold of relative number of subjects. -Similar to \code{num.subj.lthr.abs}, -if proportion of subjects who have valid value > \code{num.subj.lthr.rel}, -then this element will be run normally; otherwise, -this element will be skipped and statistical outputs will be set as NaN. -Default is 0.2.} +\item{num.subj.lthr.rel}{Numeric between 0 and 1. Lower threshold for +the proportion of subjects with finite values. Used together with +\code{num.subj.lthr.abs} (the effective threshold is the maximum of +the two). Default is 0.2.} -\item{verbose}{TRUE or FALSE, to print verbose messages or not} +\item{verbose}{Logical. Print progress messages. Default \code{TRUE}.} -\item{pbar}{TRUE or FALSE, to print progress bar or not} +\item{pbar}{Logical. Show progress bar. Default \code{TRUE}.} -\item{n_cores}{Positive integer, The number of CPU cores to run with} +\item{n_cores}{Positive integer. Number of CPU cores for parallel +processing via \code{\link[parallel]{mclapply}}. Default is 1 +(serial).} -\item{on_error}{Character: one of "stop", "skip", or "debug". When an error occurs -while fitting an element, choose whether to stop, skip returning all-NaN values for -that element, or drop into `browser()` (if interactive) then skip. Default: "stop".} +\item{on_error}{Character: one of \code{"stop"}, \code{"skip"}, or +\code{"debug"}. When an error occurs fitting one element: \code{"stop"} +halts execution; \code{"skip"} returns all-\code{NaN} for that element; +\code{"debug"} drops into \code{\link{browser}} (if interactive) then +skips. Default: \code{"stop"}.} -\item{write_results_name}{Optional analysis name for incremental writes to -`results//results_matrix`.} +\item{write_results_name}{Optional character. If provided, results are +incrementally written to +\code{results//results_matrix} in the HDF5 file +specified by \code{write_results_file}.} -\item{write_results_file}{Optional HDF5 file path used when `write_results_name` is provided.} +\item{write_results_file}{Optional character. HDF5 file path for +incremental result writes. Required when \code{write_results_name} +is provided.} -\item{write_results_flush_every}{Positive integer number of elements per write block.} +\item{write_results_flush_every}{Positive integer. Number of elements +per write block. Default 1000.} -\item{write_results_storage_mode}{Storage mode for results writes (e.g., `"double"`).} +\item{write_results_storage_mode}{Character. Storage mode for HDF5 +writes (e.g. \code{"double"}). Default \code{"double"}.} -\item{write_results_compression_level}{Gzip compression level (0-9) for results writes.} +\item{write_results_compression_level}{Integer 0--9. Gzip compression +level for HDF5 writes. Default 4.} -\item{return_output}{If TRUE (default), return the combined data.frame. If FALSE, -returns `invisible(NULL)`; useful for streaming large runs to HDF5.} +\item{return_output}{Logical. If \code{TRUE} (default), return the +combined data.frame. If \code{FALSE}, return \code{invisible(NULL)}; +useful when writing large outputs directly to HDF5.} -\item{...}{Additional arguments for `mgcv::gam()`} +\item{...}{Additional arguments passed to \code{\link[mgcv]{gam}}.} } \value{ -Tibble with the summarized model statistics for all elements requested when -`return_output = TRUE`; otherwise `invisible(NULL)`. +A tibble with one row per element. The first column is +\code{element_id} (0-based). Remaining columns contain the requested +statistics, named as \code{.}. If +\code{changed.rsq.term.index} was requested, additional columns +\code{.delta.adj.rsq} and \code{.partial.rsq} are +appended. } \description{ -`ModelArray.gam` fits gam model for each of elements requested, -and returns a tibble data.frame of requested model statistics. +Fit element-wise generalized additive models +no model-level p-value for GAMs, so there is no +\code{correct.p.value.model} argument. } -\details{ -You may request returning specific statistical variables by setting \code{var.*}, -or you can get all by setting \code{full.outputs=TRUE}. -Note that statistics covered by \code{full.outputs} or -\code{var.*} are the ones from broom::tidy(), broom::glance(), and summary() only, -and do not include delta adjusted R-squared or partial R-squared or corrected p-values. -However FDR-corrected p-values ("fdr") are generated by default. - -List of acceptable statistic names for each of \code{var.*}: -\itemize{ - \item \code{var.smoothTerms}: c("edf","ref.df","statistic","p.value"); - For interpretation please see \link[broom]{tidy.gam} with `parametric=FALSE`. - \item \code{var.parametricTerms}: c("estimate", "std.error","statistic","p.value"); - For interpretation please see \link[broom]{tidy.gam} with `parametric=TRUE`. - \item \code{var.model}: c("adj.r.squared","dev.expl", "sp.criterion", "scale", "df", - "logLik","AIC", "BIC", "deviance", "df.residual", "nobs"); "adj.r.squared" is \code{r.sq} from - \link[mgcv]{summary.gam}; "sp.criterion" is \code{sp.criterion} from \link[mgcv]{summary.gam}; - For interpretation please see \link[broom]{glance.gam} and \link[mgcv]{summary.gam}. -} - -Regarding formula: So far these kinds of formula are tested: -\itemize{ - \item formula with smooth term, but without any interactions. - Examples like \code{y ~ s(x) + orderedFactor}; \code{y ~ s(x) + s(z)} - \item formula with interaction, but limited to only one interaction term, and in the formats of: - \itemize{ - \item Formula #1: \code{y ~ orderedFactor + s(x) + s(x, by=orderedFactor) + other_covariate}, - where \code{orderedFactor} should be discrete variables and generated by `ordered`. - The interaction term will be displayed as "s_x_BYorderedFactor" - in the column name in returned data.frame. - You may use function `gen_gamFormula_fxSmooth()` to generate one. - \item Formula #2: \code{y ~ ti(x) + ti(z) + ti(x,z) + other_covariate}, - where \code{x} and \code{z} should be continuous variables. - The interaction term will be displayed as "ti_x_z" in the column name in the returned data.frame. - You may use function `gen_gamFormula_contIx()` to generate one. - } +\examples{ +{ +\dontrun{ +ma <- ModelArray("path/to/data.h5", scalar_types = c("FD")) +phenotypes <- read.csv("cohort.csv") + +results <- ModelArray.gam( + FD ~ s(age, fx = TRUE) + sex, + data = ma, + phenotypes = phenotypes, + scalar = "FD" +) +head(results) + +# With changed R-squared for the smooth term (term index 1) +results_rsq <- ModelArray.gam( + FD ~ s(age, fx = TRUE) + sex, + data = ma, + phenotypes = phenotypes, + scalar = "FD", + changed.rsq.term.index = list(1) +) } -You may be interested in how important a term is in a model. -We provide two ways of quantification (see below). -Both of them require running the reduced model without this term of interest, -thus it will take longer time to run. -You can make such request via argument \code{changed.rsq.term.index}, and you'll get both quantifications. -\itemize{ - \item Delta adjusted R-squared (\code{delta.adj.rsq}) is defined as the difference between - adjusted R-squared of full model (full formula in \code{formula}) and that of reduced model - (formula without the term of interest). Notice that adjusted R-squared includes the penalty - from the model complexity. - \item Partial R-squared (\code{partial.rsq}) is defined as: - \code{(sse.reduced.model - sse.full.model) / sse.reduced.model}, - where \code{sse} is the error sum of squares (or, residual sum of squares). - It quantifies the amount of variance in the response variable that cannot be explained by the reduced model - (model without term of interest), but can be explained by the term of interest in the full model. } -\strong{___!!! WARNING !!!___}: -If you want to request \code{changed.rsq.term.index} for a term that has missing values, -before feeding \code{phenotypes} into \code{ModelArray.gam()}, -you must exclude those observations (i.e., those rows in \code{phenotypes}) -who have missing values in this term of interest from \code{phenotypes}. -You should do the same for each term you'd like to request in \code{changed.rsq.term.index}, -if that term has missing values. -Without such exclusion, the full and reduced models would include different number of subjects, -causing inaccuracy of calculation of \code{delta.adj.rsq} and \code{partial.rsq}. \cr \cr -Other notes on \code{changed.rsq.term.index}: -\itemize{ - \item When requesting \code{changed.rsq.term.index}, - \code{fx} should be set as \code{TRUE}, so that degree of freedom is fixed. - \item For formula with interactions, only formula in above formats are tested, - and only the values of interaction term are valid. - The delta.adj.rsq and partial.rsq for main effect (such as s(x) in Formula #1) - may not "functionally" be these metrics, as their definitions should be changed to - reduced formula without both main effect and interaction term. + } -For p-value corrections (arguments \code{correct.p.value.*}), -supported methods include all methods in `p.adjust.methods` except "none". -You can request more than one method. FDR-corrected p-values ("fdr") are calculated by default. -Turn it off by setting to "none". -Please notice that different from `ModelArray.lm`, -there is no p.value for the GAM model, so no "correct.p.value.model" for GAM model. \cr -Arguments \code{num.subj.lthr.abs} and \code{num.subj.lthr.rel} -are mainly for input data with subject-specific masks, i.e. currently only for volume data. -For fixel-wise data, you may ignore these arguments. +\seealso{ +\code{\link{ModelArray.lm}} for linear models, +\code{\link{ModelArray.wrap}} for user-supplied functions, +\code{\link{gen_gamFormula_fxSmooth}} and +\code{\link{gen_gamFormula_contIx}} for formula helpers, +\linkS4class{ModelArray} for the input class. } diff --git a/man/ModelArray.wrap.Rd b/man/ModelArray.wrap.Rd index 19b5835..10d0686 100644 --- a/man/ModelArray.wrap.Rd +++ b/man/ModelArray.wrap.Rd @@ -33,83 +33,167 @@ ModelArray.wrap( ) } \arguments{ -\item{FUN}{A function that accepts at least `data` and returns a one-row data.frame/tibble, -a named list, or a named vector of results for that element.} +\item{FUN}{A function that accepts at least \code{data} (a data.frame) +and returns a one-row data.frame/tibble, a named list, or a named +vector of results for that element.} -\item{data}{ModelArray class} +\item{data}{A \linkS4class{ModelArray} object.} -\item{phenotypes}{A data.frame with cohort covariates; must contain column `source_file`. -It must match `sources(data)[[scalar]]` order and contents (reordered if needed).} +\item{phenotypes}{A data.frame of the cohort with columns of independent +variables and covariates to be added to the model. It must contain a +column called \code{"source_file"} whose entries match those in +\code{sources(data)[[scalar]]}.} -\item{scalar}{Character, the scalar name to analyze} +\item{scalar}{Character. The name of the element-wise scalar to analyse. +Must be one of \code{names(scalars(data))}.} -\item{element.subset}{Optional integer vector of element indices (1-based); defaults to all} +\item{element.subset}{Integer vector of element indices (1-based) to run. +Default is \code{NULL}, i.e. all elements in \code{data}.} -\item{num.subj.lthr.abs}{Integer lower threshold for subjects with finite values (default 10)} +\item{num.subj.lthr.abs}{Integer. Lower threshold for the absolute +number of subjects with finite scalar values (not \code{NaN}, +\code{NA}, or \code{Inf}) required per element. Elements below this +threshold are skipped (outputs set to \code{NaN}). Default is 10.} -\item{num.subj.lthr.rel}{Relative lower threshold (0-1) (default 0.2)} +\item{num.subj.lthr.rel}{Numeric between 0 and 1. Lower threshold for +the proportion of subjects with finite values. Used together with +\code{num.subj.lthr.abs} (the effective threshold is the maximum of +the two). Default is 0.2.} -\item{verbose}{TRUE/FALSE to print messages} +\item{verbose}{Logical. Print progress messages. Default \code{TRUE}.} -\item{pbar}{TRUE/FALSE to show progress bar} +\item{pbar}{Logical. Show progress bar. Default \code{TRUE}.} -\item{n_cores}{Positive integer number of CPU cores} +\item{n_cores}{Positive integer. Number of CPU cores for parallel +processing via \code{\link[parallel]{mclapply}}. Default is 1 +(serial).} -\item{on_error}{Character: one of "stop", "skip", or "debug". When an error occurs in -the user function for an element, choose whether to stop, skip returning all-NaN values -for that element, or drop into `browser()` (if interactive) then skip. Default: "stop".} +\item{on_error}{Character: one of \code{"stop"}, \code{"skip"}, or +\code{"debug"}. When an error occurs fitting one element: \code{"stop"} +halts execution; \code{"skip"} returns all-\code{NaN} for that element; +\code{"debug"} drops into \code{\link{browser}} (if interactive) then +skips. Default: \code{"stop"}.} -\item{write_scalar_name}{Optional character scalar name. If provided, `ModelArray.wrap` -writes selected output columns into `scalars//values` while processing.} +\item{write_scalar_name}{Optional character. If provided, selected +output columns are written into +\code{scalars//values} in the HDF5 file specified +by \code{write_scalar_file}.} -\item{write_scalar_file}{Optional character HDF5 output filename used when -`write_scalar_name` is provided.} +\item{write_scalar_file}{Optional character. HDF5 output file path. +Required when \code{write_scalar_name} is provided.} -\item{write_scalar_columns}{Optional character/integer selector for output columns to save -as scalar values. If NULL, uses all wrap output columns except `element_id`.} +\item{write_scalar_columns}{Optional character or integer vector +selecting which output columns to save as scalar values. If +\code{NULL} (default), uses all output columns except +\code{element_id}.} -\item{write_scalar_column_names}{Optional character vector of source names saved as -scalar `column_names`. If NULL, uses `phenotypes$source_file`.} +\item{write_scalar_column_names}{Optional character vector of source +names saved as scalar \code{column_names}. If \code{NULL}, uses +\code{phenotypes$source_file}.} -\item{write_scalar_flush_every}{Positive integer number of elements per write block.} +\item{write_scalar_flush_every}{Positive integer. Elements per write +block for scalar writes. Default 1000.} -\item{write_scalar_storage_mode}{Storage mode for scalar writes (e.g., `"double"`).} +\item{write_scalar_storage_mode}{Character. Storage mode for scalar +writes (e.g. \code{"double"}). Default \code{"double"}.} -\item{write_scalar_compression_level}{Gzip compression level (0-9) for scalar writes.} +\item{write_scalar_compression_level}{Integer 0--9. Gzip compression +level for scalar writes. Default 4.} -\item{write_results_name}{Optional analysis name for incremental writes to -`results//results_matrix`.} +\item{write_results_name}{Optional character. If provided, results are +incrementally written to +\code{results//results_matrix} in the HDF5 file +specified by \code{write_results_file}.} -\item{write_results_file}{Optional HDF5 file path used when `write_results_name` is provided.} +\item{write_results_file}{Optional character. HDF5 file path for +incremental result writes. Required when \code{write_results_name} +is provided.} -\item{write_results_flush_every}{Positive integer number of elements per write block for -results writes.} +\item{write_results_flush_every}{Positive integer. Number of elements +per write block. Default 1000.} -\item{write_results_storage_mode}{Storage mode for results writes (e.g., `"double"`).} +\item{write_results_storage_mode}{Character. Storage mode for HDF5 +writes (e.g. \code{"double"}). Default \code{"double"}.} -\item{write_results_compression_level}{Gzip compression level (0-9) for results writes.} +\item{write_results_compression_level}{Integer 0--9. Gzip compression +level for HDF5 writes. Default 4.} -\item{return_output}{If TRUE (default), return the combined data.frame. If FALSE, -returns `invisible(NULL)`; useful when writing large outputs directly to HDF5.} +\item{return_output}{Logical. If \code{TRUE} (default), return the +combined data.frame. If \code{FALSE}, return \code{invisible(NULL)}; +useful when writing large outputs directly to HDF5.} -\item{...}{Additional arguments forwarded to `FUN`} +\item{...}{Additional arguments forwarded to \code{FUN}.} } \value{ -Tibble/data.frame with one row per element and first column `element_id` -when `return_output = TRUE`; otherwise `invisible(NULL)`. +A tibble with one row per element and first column +\code{element_id} (0-based). Remaining columns are determined by +the return value of \code{FUN}. } \description{ -`ModelArray.wrap` runs a user-supplied function `FUN` for each requested element and -returns a tibble/data.frame of results combined across elements. +\code{ModelArray.wrap} runs a user-supplied function \code{FUN} at each +requested element and returns a tibble of results combined across +elements. } \details{ -This provides a generic framework reusing ModelArray's per-element looping, alignment, -subject-thresholding, and parallelization. The user function will be called as -`FUN(data = dat, ...)` where `dat` is `phenotypes` with the response column named `scalar` -appended for the current element. The return value from `FUN` for a single element must be -either a one-row data.frame/tibble, a named list, or a named atomic vector. The column -names from the first successful element determine the final schema. - -Note: `ModelArray.wrap` never performs any p-value corrections or modifications. -If you need adjusted p-values (e.g., FDR), implement them inside your provided `FUN`. +This provides a generic framework reusing ModelArray's per-element +looping, alignment, subject-thresholding, and parallelization. The user +function is called as \code{FUN(data = dat, ...)} where \code{dat} is +\code{phenotypes} with all scalar columns appended for the current +element. The return value from \code{FUN} for a single element must be +one of: +\itemize{ +\item a one-row \code{data.frame} or \code{tibble} +\item a named list +\item a named atomic vector +} +The column names from the first successful element determine the final +schema. + +Note: \code{ModelArray.wrap} never performs any p-value corrections or +modifications. If you need adjusted p-values (e.g. FDR), implement +them inside \code{FUN}. + +Use \code{\link{exampleElementData}} to construct a sample per-element +data.frame for testing your function before committing to a full run. +} +\examples{ +{ +\dontrun{ +ma <- ModelArray("path/to/data.h5", scalar_types = c("FD")) +phenotypes <- read.csv("cohort.csv") + +# Simple custom function +my_fun <- function(data, ...) { + mod <- lm(FD ~ age + sex, data = data) + tidy_out <- broom::tidy(mod) + # Return a one-row tibble + tibble::tibble( + age_estimate = tidy_out$estimate[tidy_out$term == "age"], + age_pvalue = tidy_out$p.value[tidy_out$term == "age"] + ) +} + + +# Test on one element first +test_df <- exampleElementData(ma, scalar = "FD", + i_element = 1, + phenotypes = phenotypes) +my_fun(data = test_df) + +# Run across all elements +results <- ModelArray.wrap( + FUN = my_fun, + data = ma, + phenotypes = phenotypes, + scalar = "FD" +) +} +} + +} +\seealso{ +\code{\link{ModelArray.lm}} for linear models, +\code{\link{ModelArray.gam}} for GAMs, +\code{\link{exampleElementData}} for building test data, +\linkS4class{ModelArray} for the input class. } From 37eeb60a3a4aee7e35879a69a42b031565eb9d2a Mon Sep 17 00:00:00 2001 From: araikes Date: Tue, 31 Mar 2026 10:33:54 -0700 Subject: [PATCH 11/23] Update analyzeOneElement function documentation --- R/ModelArray_Constructor.R | 256 +++++++++++++++++++++++-------------- 1 file changed, 160 insertions(+), 96 deletions(-) diff --git a/R/ModelArray_Constructor.R b/R/ModelArray_Constructor.R index c1c8075..456fd8c 100644 --- a/R/ModelArray_Constructor.R +++ b/R/ModelArray_Constructor.R @@ -71,7 +71,6 @@ ModelArraySeed <- function(filepath, name, type = NA) { } - #' Load element-wise data from an HDF5 file #' #' @description @@ -107,7 +106,7 @@ ModelArraySeed <- function(filepath, name, type = NA) { #' #' @examples #' \dontrun{ -#' ma <- ModelArray("path/to/data.h5", scalar_types = c("FD", "FC")) +#' ma <- ModelArray("path/to/data.h5", scalar_types = c("FD")) #' ma #' } #' @@ -399,42 +398,65 @@ numElementsTotal <- function(modelarray, scalar_name = "FD") { numElementsTotal } -#' Fit linear model for one element. +#' Fit a linear model for a single element +#' If the number of subjects with finite scalar values (not \code{NaN}, +#' \code{NA}, or \code{Inf}) does not exceed \code{num.subj.lthr}, the +#' element is skipped and all statistics are set to \code{NaN}. #' -#' @description -#' `analyseOneElement.lm` fits a linear model for one element data, and returns requested model statistics. +#' @param i_element Integer. The 1-based index of the element to analyse. +#' @param formula A \code{\link[stats]{formula}} passed to +#' \code{\link[stats]{lm}}. +#' @param modelarray A \linkS4class{ModelArray} object. +#' @param phenotypes A data.frame of the cohort with columns of independent +#' variables and covariates. Must contain a \code{"source_file"} column +#' matching \code{sources(modelarray)[[scalar]]}. +#' @param scalar Character. The name of the element-wise scalar to analyse. +#' Must be one of \code{names(scalars(modelarray))}. +#' @param var.terms Character vector. Statistics to extract per term from +#' \code{\link[broom]{tidy.lm}} (e.g. \code{"estimate"}, \code{"statistic"}, +#' \code{"p.value"}). +#' @param var.model Character vector. Statistics to extract for the overall +#' model from \code{\link[broom]{glance.lm}} (e.g. \code{"adj.r.squared"}, +#' \code{"p.value"}). +#' @param num.subj.lthr Numeric. The pre-computed minimum number of subjects +#' with finite values required for this element to be analysed. Elements +#' below this threshold are skipped. This value is typically computed by +#' the parent function from \code{num.subj.lthr.abs} and +#' \code{num.subj.lthr.rel}. +#' @param num.stat.output Integer or \code{NULL}. The total number of output +#' columns (including \code{element_id}). Used when +#' \code{flag_initiate = FALSE} to generate an all-\code{NaN} row for +#' skipped elements. Must be \code{NULL} when \code{flag_initiate = TRUE}. +#' @param flag_initiate Logical. If \code{TRUE}, fit the model once and return +#' metadata for initialising the output data.frame (column names and term +#' names). If \code{FALSE}, return a numeric vector of results for this +#' element. +#' @param on_error Character. One of \code{"stop"}, \code{"skip"}, or +#' \code{"debug"}. When an error occurs fitting the model: \code{"stop"} +#' halts execution; \code{"skip"} returns all-\code{NaN} for this element; +#' \code{"debug"} drops into \code{\link{browser}} (if interactive) then +#' skips. Default: \code{"stop"}. +#' @param ... Additional arguments passed to \code{\link[stats]{lm}}. #' -#' @details -#' `ModelArray.lm` iteratively calls this function to get statistics for all requested elements. +#' @return If \code{flag_initiate = TRUE}, a list with components: +#' \describe{ +#' \item{column_names}{Character vector. The column names for the output +#' data.frame, with \code{"element_id"} first.} +#' \item{list.terms}{Character vector. The names of the model terms +#' (from \code{\link[broom]{tidy.lm}}).} +#' } +#' If \code{flag_initiate = FALSE}, a numeric vector of length +#' \code{num.stat.output} with \code{element_id} (0-based) as the first +#' value and the requested statistics in subsequent positions. All-\code{NaN} +#' (except \code{element_id}) if the element had insufficient valid subjects +#' or if an error occurred with \code{on_error = "skip"}. #' -#' @param i_element An integer, the i_th element, starting from 1. -#' For initiating (flag_initiate = TRUE), use i_element=1 -#' @param formula Formula (passed to `stats::lm()`) -#' @param modelarray ModelArray class -#' @param phenotypes A data.frame of the cohort with columns of independent variables -#' and covariates to be added to the model. -#' @param scalar A character. The name of the element-wise scalar to be analysed -#' @param var.terms A list of characters. The list of variables to save for terms (got from `broom::tidy()`). -#' @param var.model A list of characters. The list of variables to save for the model (got from `broom::glance()`). -#' @param num.subj.lthr The minimal number of subjects with valid value in input h5 file, -#' i.e. number of subjects with finite values (defined by `is.finite()`, -#' i.e. not NaN or NA or Inf) in h5 file > \code{num.subj.lthr}, -#' then this element will be run normally; -#' otherwise, this element will be skipped and statistical outputs will be set as NaN. -#' @param num.stat.output The number of output stat metrics -#' (for generating all NaN stat when # subjects does not meet criteria). -#' This includes column `element_id`. -#' This is required when flag_initiate = TRUE. -#' @param flag_initiate TRUE or FALSE, Whether this is to initiate the new analysis. -#' If TRUE, it will return column names etc to be used for initiating data.frame; -#' if FALSE, it will return the list of requested statistic values. -#' @param ... Additional arguments for `stats::lm()` -#' @param on_error Character: one of "stop", "skip", or "debug". When an error occurs while -#' fitting one element, choose whether to stop, skip returning all-NaN values for that element, -#' or drop into `browser()` (if interactive) then skip. Default: "stop". +#' @seealso \code{\link{ModelArray.lm}} which calls this function iteratively, +#' \code{\link{analyseOneElement.gam}} for the GAM equivalent, +#' \code{\link{analyseOneElement.wrap}} for user-supplied functions. #' -#' @return If flag_initiate==TRUE, returns column names, and list of term names of final results; -#' if flag_initiate==FALSE, it will return the list of requested statistic values for a element. +#' @keywords internal +#' @rdname analyseOneElement.lm #' @export #' @importFrom stats lm #' @import broom @@ -707,47 +729,60 @@ analyseOneElement.lm <- function(i_element, } } } -#' Fit GAM for one element + +#' Fit a GAM for a single element +#' returns metadata (column names, smooth term names, parametric term names, +#' and the smoothing parameter criterion attribute name) used by +#' \code{\link{ModelArray.gam}} to initialise the output data.frame. When +#' \code{flag_initiate = FALSE}, it returns a numeric vector representing one +#' row of the final results matrix. #' -#' @description -#' `analyseOneElement.gam` fits a GAM model for one element data, and returns requested model statistics. +#' If the number of subjects with finite scalar values does not exceed +#' \code{num.subj.lthr}, the element is skipped and all statistics are set +#' to \code{NaN}. #' -#' @details -#' `ModelArray.gam` iteratively calls this function to get statistics for all requested elements. +#' @inheritParams analyseOneElement.lm #' -#' @param i_element An integer, the i_th element, starting from 1. -#' For initiating (flag_initiate = TRUE), use i_element=1 -#' @param formula A formula (passed to `mgcv::gam()`) -#' @param modelarray ModelArray class -#' @param phenotypes A data.frame of the cohort with columns of independent variables -#' and covariates to be added to the model -#' @param scalar A character. The name of the element-wise scalar to be analysed -#' @param var.smoothTerms The list of variables to save for smooth terms -#' (got from broom::tidy(parametric = FALSE)). Example smooth term: age in formula "outcome ~ s(age)". -#' @param var.parametricTerms The list of variables to save for parametric terms -#' (got from broom::tidy(parametric = TRUE)). Example parametric term: sex in formula "outcome ~ s(age) + sex". -#' @param var.model The list of variables to save for the model (got from broom::glance() and summary()). -#' @param num.subj.lthr The minimal number of subjects with valid value in input h5 file, -#' i.e. number of subjects with finite values (defined by `is.finite()`, -#' i.e. not NaN or NA or Inf) in h5 file > \code{num.subj.lthr}, -#' then this element will be run normally; -#' otherwise, this element will be skipped and statistical outputs will be set as NaN. -#' @param num.stat.output The number of output stat metrics -#' (for generating all NaN stat when # subjects does not meet criteria). -#' This includes column `element_id`. -#' This is required when flag_initiate = TRUE. -#' @param flag_initiate TRUE or FALSE, Whether this is to initiate the new analysis. -#' If TRUE, it will return column names etc to be used for initiating data.frame; -#' if FALSE, it will return the list of requested statistic values. -#' @param flag_sse TRUE or FALSE, Whether to calculate SSE (sum of squared error) for the model (`model.sse`). -#' SSE is needed for calculating partial R-squared. -#' @param ... Additional arguments for `mgcv::gam()` -#' @param on_error Character: one of "stop", "skip", or "debug". When an error occurs while -#' fitting one element, choose whether to stop, skip returning all-NaN values for that element, -#' or drop into `browser()` (if interactive) then skip. Default: "stop". -#' @return If flag_initiate==TRUE, returns column names, -#' list of term names of final results, and attr.name of sp.criterion; -#' if flag_initiate==FALSE, it will return the list of requested statistic values for a element. +#' @param formula A \code{\link[stats]{formula}} passed to +#' \code{\link[mgcv]{gam}}. +#' @param var.smoothTerms Character vector. Statistics to extract for smooth +#' terms from \code{\link[broom]{tidy.gam}} with \code{parametric = FALSE} +#' (e.g. \code{"edf"}, \code{"ref.df"}, \code{"statistic"}, +#' \code{"p.value"}). +#' @param var.parametricTerms Character vector. Statistics to extract for +#' parametric terms from \code{\link[broom]{tidy.gam}} with +#' \code{parametric = TRUE} (e.g. \code{"estimate"}, \code{"std.error"}, +#' \code{"statistic"}, \code{"p.value"}). +#' @param var.model Character vector. Statistics to extract for the overall +#' model from \code{\link[broom]{glance.gam}} and +#' \code{\link[mgcv]{summary.gam}} (e.g. \code{"adj.r.squared"}, +#' \code{"dev.expl"}, \code{"sp.criterion"}). +#' @param flag_sse Logical. If \code{TRUE}, also compute the error sum of +#' squares (\code{model.sse}) for the model, which is needed for +#' partial R-squared calculations in \code{\link{ModelArray.gam}}. +#' Default: \code{FALSE}. +#' @param ... Additional arguments passed to \code{\link[mgcv]{gam}}. +#' +#' @return If \code{flag_initiate = TRUE}, a list with components: +#' \describe{ +#' \item{column_names}{Character vector of output column names.} +#' \item{list.smoothTerms}{Character vector of smooth term names.} +#' \item{list.parametricTerms}{Character vector of parametric term names.} +#' \item{sp.criterion.attr.name}{Character. The name attribute of the +#' smoothing parameter selection criterion (e.g. \code{"REML"} or +#' \code{"GCV.Cp"}).} +#' } +#' If \code{flag_initiate = FALSE}, a numeric vector of length +#' \code{num.stat.output} with \code{element_id} (0-based) first and +#' requested statistics in subsequent positions. All-\code{NaN} (except +#' \code{element_id}) if the element was skipped. +#' +#' @seealso \code{\link{ModelArray.gam}} which calls this function iteratively, +#' \code{\link{analyseOneElement.lm}} for the linear model equivalent, +#' \code{\link{analyseOneElement.wrap}} for user-supplied functions. +#' +#' @keywords internal +#' @rdname analyseOneElement.gam #' @export #' @import mgcv #' @import broom @@ -1138,36 +1173,65 @@ analyseOneElement.gam <- function(i_element, } -#' Run a user-supplied function for one element +#' Run a user-supplied function for a single element #' #' @description -#' `analyseOneElement.wrap` runs a user-supplied function `FUN` on a single element's data. -#' It prepares the per-element data by attaching the element values as a new column named by `scalar` -#' to the provided `phenotypes` data.frame, then calls `FUN(data = dat, ...)`. +#' Runs a user-supplied function on one element's data, preparing the +#' per-element data.frame by attaching all scalar values as new columns to +#' the provided \code{phenotypes}. This is the per-element workhorse called +#' iteratively by \code{\link{ModelArray.wrap}}. #' #' @details -#' The user-supplied `FUN` should return either a one-row data.frame/tibble, a named list, or a named vector. -#' The result will be coerced to a one-row tibble and combined into the final results matrix across elements. +#' Most users should call \code{\link{ModelArray.wrap}} directly, which handles +#' looping, parallelisation, and result assembly. +#' \code{analyseOneElement.wrap} is exported for advanced use cases such as +#' debugging a single element or building custom analysis loops. #' -#' @param i_element An integer, the i_th element, starting from 1. -#' @param user_fun A function that accepts at least an argument named `data` (the per-element -#' `phenotypes` with the response column appended) and returns a one-row data.frame/tibble, -#' named list, or named vector. -#' @param modelarray ModelArray class -#' @param phenotypes A data.frame of the cohort with columns of independent variables and covariates -#' @param scalar A character. The name of the element-wise scalar to be analysed -#' @param num.subj.lthr The minimal number of subjects with valid value in input h5 file -#' @param num.stat.output The number of output stat metrics (including `element_id`). -#' Required when `flag_initiate = TRUE`. -#' @param flag_initiate TRUE or FALSE, whether this is to initiate the new analysis to get column names -#' @param on_error Character: one of "stop", "skip", or "debug". When an error occurs while -#' executing the user function, choose whether to stop, skip returning all-NaN values for this -#' element, or drop into `browser()` (if interactive) then skip. Default: "stop". -#' @param ... Additional arguments forwarded to `FUN` +#' The user-supplied \code{user_fun} is called as +#' \code{user_fun(data = dat, ...)} where \code{dat} is \code{phenotypes} with +#' columns appended for \strong{all} scalars in the \linkS4class{ModelArray} +#' (not just the one named by \code{scalar}). The function should return a +#' one-row data.frame/tibble, a named list, or a named atomic vector. The +#' result is coerced to a numeric vector for assembly into the final results +#' matrix. +#' +#' Unlike \code{\link{analyseOneElement.lm}} and +#' \code{\link{analyseOneElement.gam}}, this function appends \strong{all} +#' scalar columns (not just the response) and checks for column name +#' collisions between scalar names and existing \code{phenotypes} columns. +#' +#' If the number of subjects with finite values across all scalars does not +#' exceed \code{num.subj.lthr}, the element is skipped and all statistics +#' are set to \code{NaN}. +#' +#' @inheritParams analyseOneElement.lm +#' +#' @param user_fun A function that accepts at least an argument named +#' \code{data} (a data.frame: \code{phenotypes} with scalar columns +#' appended for the current element) and returns a one-row +#' data.frame/tibble, a named list, or a named atomic vector. +#' @param ... Additional arguments forwarded to \code{user_fun}. +#' +#' @return If \code{flag_initiate = TRUE}, a list with one component: +#' \describe{ +#' \item{column_names}{Character vector. The column names derived from +#' the return value of \code{user_fun}, with \code{"element_id"} +#' prepended.} +#' } +#' If \code{flag_initiate = FALSE}, a numeric vector of length +#' \code{num.stat.output} with \code{element_id} (0-based) first and +#' the coerced output of \code{user_fun} in subsequent positions. +#' All-\code{NaN} (except \code{element_id}) if the element was skipped +#' or if an error occurred with \code{on_error = "skip"}. +#' +#' @seealso \code{\link{ModelArray.wrap}} which calls this function +#' iteratively, \code{\link{exampleElementData}} for building a test +#' data.frame matching the format passed to \code{user_fun}, +#' \code{\link{analyseOneElement.lm}} for the linear model equivalent, +#' \code{\link{analyseOneElement.gam}} for the GAM equivalent. #' -#' @return If `flag_initiate==TRUE`, returns a list with `column_names`. -#' If `flag_initiate==FALSE`, returns a numeric vector representing the one-row result for this element -#' with `element_id` as the first value. +#' @keywords internal +#' @rdname analyseOneElement.wrap #' @export #' @importFrom dplyr %>% #' @import tibble From 9985f6d58f3fd02f5d371b34129427f6bb6191bb Mon Sep 17 00:00:00 2001 From: araikes Date: Tue, 31 Mar 2026 10:40:07 -0700 Subject: [PATCH 12/23] Update documentation for exported utility functions --- R/utils.R | 214 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 173 insertions(+), 41 deletions(-) diff --git a/R/utils.R b/R/utils.R index d16d9d5..b31b7ae 100644 --- a/R/utils.R +++ b/R/utils.R @@ -346,27 +346,83 @@ checker_gam_formula <- function(formula, gam.formula.breakdown, onemodel = NULL) #' Generate GAM formula with factor-smooth interaction #' #' @description -#' This function will generate a formula in the following format: -#' \code{y ~ orderedFactor + s(x) + s(x, by=orderedFactor)}, +#' Generates a formula in the format +#' \code{y ~ orderedFactor + s(x) + s(x, by = orderedFactor)}, #' where \code{y} is \code{response.var}, \code{x} is \code{smooth.var}, -#' and \code{orderedFactor} is \code{factor.var} - see \code{factor.var} for more. -#' The formula generated could be further modified, e.g. adding covariates. -#' -#' @param response.var character class, the variable name for response -#' @param factor.var character class, the variable name for factor. It should be an ordered factor. -#' If not, it will generate it as a new column in `phenotypes`, which requires `reference.group`. -#' @param smooth.var character class, the variable name in smooth term as main effect -#' @param phenotypes data.frame class, the cohort matrix with columns of independent variables (including -#' \code{factor.var} and \code{smooth.var}) to be added to the model -#' @param reference.group character class, the reference group for ordered factor of `factor.var`; required when -#' `factor.var` in `phenotypes` is not an ordered factor. -#' @param prefix.ordered.factor character class, the prefix for ordered factor; required when `factor.var` in -#' `phenotypes` is not an ordered factor. -#' @param fx TRUE or FALSE, to be used in smooth term s(). Recommend TRUE. -#' @param k integer, to be used in smooth term including the interaction term. If NULL (no entry), will use default -#' value as in mgcv::s() -#' @return a list, including: 1) formula generated; 2) data.frame phenotypes - updated if argument factor.var -#' is not an ordered factor +#' and \code{orderedFactor} is \code{factor.var}. +#' The formula generated can be further modified, e.g. by adding covariates. +#' +#' @details +#' This helper exists because setting up factor-smooth interactions in +#' \code{\link[mgcv]{gam}} requires an ordered factor and a specific formula +#' structure. If \code{factor.var} in \code{phenotypes} is not already an +#' ordered factor, this function creates one using \code{reference.group} as +#' the baseline level and adds it as a new column (named with +#' \code{prefix.ordered.factor} prepended to \code{factor.var}) [2]. +#' +#' The returned \code{phenotypes} data.frame must be used in the subsequent +#' \code{\link{ModelArray.gam}} call so that the ordered factor column is +#' available to the model. +#' +#' @param response.var Character. The variable name for the response +#' (dependent variable), typically a scalar name like \code{"FD"}. +#' @param factor.var Character. The variable name for the factor. It should +#' be an ordered factor in \code{phenotypes}. If not, an ordered factor +#' will be generated as a new column, which requires \code{reference.group} [2]. +#' @param smooth.var Character. The variable name for the smooth term main +#' effect (e.g. \code{"age"}). +#' @param phenotypes A data.frame of the cohort with columns of independent +#' variables, including \code{factor.var} and \code{smooth.var}. +#' @param reference.group Character. The reference (baseline) group for the +#' ordered factor of \code{factor.var}. Required when \code{factor.var} +#' in \code{phenotypes} is not already an ordered factor [2]. +#' @param prefix.ordered.factor Character. Prefix for the ordered factor +#' column name. Required when \code{factor.var} in \code{phenotypes} is +#' not already an ordered factor. Default is \code{"o"} [2]. +#' @param fx Logical. Passed to \code{\link[mgcv]{s}}. If \code{TRUE} +#' (recommended), the smooth is treated as fixed degrees of freedom. +#' Default is \code{TRUE}. +#' @param k Integer or \code{NULL}. Basis dimension passed to +#' \code{\link[mgcv]{s}} for both the main smooth and interaction terms. +#' If \code{NULL} (default), uses the default from \code{mgcv::s()} [2]. +#' +#' @return A list with two components: +#' \describe{ +#' \item{formula}{The generated \code{\link[stats]{formula}} object.} +#' \item{phenotypes}{The (possibly updated) data.frame. If +#' \code{factor.var} was not already an ordered factor, a new column +#' named \code{paste0(prefix.ordered.factor, factor.var)} is added. +#' Otherwise identical to the input [2].} +#' } +#' +#' @seealso \code{\link{gen_gamFormula_contIx}} for continuous-by-continuous +#' interactions, \code{\link{ModelArray.gam}} which accepts the generated +#' formula. +#' +#' @examples +#' \dontrun{ +#' phenotypes <- read.csv("cohort.csv") +#' +#' # factor.var is not yet ordered - function creates it +#' result <- gen_gamFormula_fxSmooth( +#' response.var = "FD", +#' factor.var = "sex", +#' smooth.var = "age", +#' phenotypes = phenotypes, +#' reference.group = "female" +#' ) +#' result$formula +#' +#' # Use the updated phenotypes (contains the ordered factor column) +#' results <- ModelArray.gam( +#' result$formula, +#' data = ma, +#' phenotypes = result$phenotypes, +#' scalar = "FD" +#' ) +#' } +#' +#' @rdname gen_gamFormula_fxSmooth #' @importFrom mgcv s #' @importFrom stats as.formula #' @export @@ -436,24 +492,64 @@ gen_gamFormula_fxSmooth <- function(response.var, factor.var, smooth.var, phenot -#' Generate GAM formula with continuous*continuous interaction +#' Generate GAM formula with continuous-by-continuous interaction #' #' @description -#' This function will generate a formula in the following format: \code{y ~ ti(x) + ti(z) + ti(x,z)}, -#' where \code{y} is \code{response.var}, \code{x} is \code{cont1.var}, and \code{z} is \code{cont2.var}. -#' The formula generated could be further modified, e.g. adding covariates. -#' -#' @param response.var character class, the variable name for response -#' @param cont1.var character class, the name of the first continuous variable -#' @param cont2.var character class, the name of the second continuous variable -#' @param fx TRUE or FALSE, to be used in smooth term s(). Recommend TRUE. -#' @param k integer, to be used in smooth term including the interaction term. -#' If NULL (no entry), will use default value as in mgcv::s() -#' @return The formula generated +#' Generates a formula in the format +#' \code{y ~ ti(x) + ti(z) + ti(x, z)}, where \code{y} is +#' \code{response.var}, \code{x} is \code{cont1.var}, and \code{z} is +#' \code{cont2.var}. The formula generated can be further modified, e.g. +#' by adding covariates. +#' +#' @details +#' This helper uses \code{\link[mgcv]{ti}} (tensor product interaction) +#' terms so that the interaction \code{ti(x, z)} captures only the +#' interaction effect, separate from the main effects \code{ti(x)} and +#' \code{ti(z)}. This decomposition is important for interpretability and +#' for requesting \code{changed.rsq.term.index} in +#' \code{\link{ModelArray.gam}} [4]. +#' +#' @param response.var Character. The variable name for the response +#' (dependent variable), typically a scalar name like \code{"FD"}. +#' @param cont1.var Character. The name of the first continuous variable. +#' @param cont2.var Character. The name of the second continuous variable. +#' @param fx Logical. Passed to \code{\link[mgcv]{ti}}. If \code{TRUE} +#' (recommended), the smooth is treated as fixed degrees of freedom. +#' Default is \code{TRUE} [2]. +#' @param k Integer or \code{NULL}. Basis dimension passed to +#' \code{\link[mgcv]{ti}} for all three terms (both main effects and the +#' interaction). If \code{NULL} (default), uses the default from +#' \code{mgcv::ti()} [2]. +#' +#' @return A \code{\link[stats]{formula}} object. +#' +#' @seealso \code{\link{gen_gamFormula_fxSmooth}} for factor-smooth +#' interactions, \code{\link{ModelArray.gam}} which accepts the generated +#' formula. +#' +#' @examples +#' \dontrun{ +#' formula <- gen_gamFormula_contIx( +#' response.var = "FD", +#' cont1.var = "age", +#' cont2.var = "cognition" +#' ) +#' formula +#' +#' # Use in ModelArray.gam with changed R-squared for the interaction +#' results <- ModelArray.gam( +#' formula, +#' data = ma, +#' phenotypes = phenotypes, +#' scalar = "FD", +#' changed.rsq.term.index = list(3) +#' ) +#' } +#' +#' @rdname gen_gamFormula_contIx #' @importFrom mgcv ti #' @importFrom stats as.formula #' @export -#' gen_gamFormula_contIx <- function(response.var, cont1.var, cont2.var, fx = TRUE, k = NULL) { if (is.null(k)) { @@ -491,18 +587,45 @@ bind_cols_check_emptyTibble <- function(a, b) { } -#' Summarize an h5 file without loading the full ModelArray +#' Summarize an HDF5 file without loading a full ModelArray #' #' @description -#' Reads the h5 file structure and returns a summary of available scalars, +#' Reads the HDF5 file structure and returns a summary of available scalars, #' their dimensions, and any saved analyses. Useful for inspecting large files -#' without constructing a full ModelArray object. +#' without constructing a full \linkS4class{ModelArray} object. +#' +#' @details +#' This function opens the HDF5 file read-only via \code{\link[rhdf5]{h5ls}}, +#' inspects the group structure under \code{/scalars/} and \code{/results/}, +#' and closes the file. It does not load any data into memory. The returned +#' object has a \code{print} method that displays a formatted summary [2]. +#' +#' @param filepath Character. Path to an HDF5 (\code{.h5}) file. #' -#' @param filepath Path to an h5 file -#' @return A list with components: -#' \item{scalars}{A data.frame with columns: name, nElements, nInputFiles} -#' \item{analyses}{Character vector of analysis names} -#' \item{filepath}{The input filepath} +#' @return An object of class \code{"h5summary"}, which is a list with +#' components: +#' \describe{ +#' \item{scalars}{A data.frame with columns \code{name}, +#' \code{nElements}, and \code{nInputFiles} [2].} +#' \item{analyses}{Character vector of analysis names found under +#' \code{/results/} [2].} +#' \item{filepath}{The input filepath.} +#' } +#' +#' @seealso \code{\link{ModelArray}} for loading the full object, +#' \linkS4class{ModelArray} for the class definition. +#' +#' @examples +#' \dontrun{ +#' h5summary("path/to/data.h5") +#' +#' # Inspect before deciding which scalars to load +#' info <- h5summary("path/to/data.h5") +#' info$scalars$name +#' ma <- ModelArray("path/to/data.h5", scalar_types = info$scalars$name) +#' } +#' +#' @rdname h5summary #' @importFrom rhdf5 h5ls h5closeAll #' @export h5summary <- function(filepath) { @@ -549,6 +672,15 @@ h5summary <- function(filepath) { ) } +#' @rdname h5summary +#' +#' @param x An \code{h5summary} object as returned by +#' \code{\link{h5summary}}. +#' @param ... Additional arguments (currently ignored). +#' +#' @return Invisible \code{x}. Called for its side effect of printing a +#' human-readable summary to the console. +#' #' @method print h5summary #' @export print.h5summary <- function(x, ...) { From 564757c5a96a2c5daeb0ab45daf827f65c888466 Mon Sep 17 00:00:00 2001 From: araikes Date: Tue, 31 Mar 2026 10:40:43 -0700 Subject: [PATCH 13/23] Update documentation for writeResults --- R/ModelArray_Constructor.R | 75 +++++++++++++++++++++++++++++++++----- 1 file changed, 65 insertions(+), 10 deletions(-) diff --git a/R/ModelArray_Constructor.R b/R/ModelArray_Constructor.R index 456fd8c..13173db 100644 --- a/R/ModelArray_Constructor.R +++ b/R/ModelArray_Constructor.R @@ -1387,21 +1387,76 @@ analyseOneElement.wrap <- function(i_element, } } -#' Write outputs from element-wise statistical analysis to the HDF5 file. +#' Write outputs from element-wise statistical analysis to an HDF5 file #' #' @description -#' Create a group named `analysis_name` in HDF5 file, -#' then write the statistical results data.frame (i.e. for one analysis) in it. +#' Creates a group named \code{analysis_name} under \code{/results/} in the +#' HDF5 file, then writes the statistical results data.frame (i.e. for one +#' analysis) into it as \code{results_matrix} along with column names [7]. #' #' @details -#' debug tip: For "Error in H5File.open(filename, mode, file_create_pl, file_access_pl)", -#' check if there is message 'No such file or directory'. Try absolute .h5 filename. +#' The results are stored at +#' \code{/results//results_matrix} with column names saved +#' as a separate dataset at +#' \code{/results//column_names} [7]. #' -#' @param fn.output A character, The HDF5 (.h5) filename for the output -#' @param df.output A data.frame object with element-wise statistical results, returned from `ModelArray.lm()` etc -#' @param analysis_name A character, the name of the results -#' @param overwrite If a group with the same analysis_name exists in HDF5 file, -#' whether overwrite it (TRUE) or not (FALSE) +#' If any column of \code{df.output} is not numeric or integer, it is +#' coerced to numeric via \code{factor()} and the factor levels are saved +#' as a look-up table at +#' \code{/results//lut_forcol} [7]. +#' +#' \strong{Debugging tip:} If you encounter +#' \code{"Error in H5File.open(filename, mode, file_create_pl, file_access_pl)"}, +#' check if the message mentions "No such file or directory". Try using an +#' absolute path for the \code{fn.output} argument [7]. +#' +#' @param fn.output Character. The HDF5 (\code{.h5}) filename for the output. +#' The file must already exist; use an absolute path if you encounter +#' file-not-found errors. +#' @param df.output A data.frame of element-wise statistical results, as +#' returned by \code{\link{ModelArray.lm}}, +#' \code{\link{ModelArray.gam}}, or \code{\link{ModelArray.wrap}}. +#' Must inherit from \code{data.frame} [7]. +#' @param analysis_name Character. The name for this set of results. Used +#' as the group name under \code{/results/} in the HDF5 file. +#' Default is \code{"myAnalysis"} [7]. +#' @param overwrite Logical. If a group with the same \code{analysis_name} +#' already exists in the HDF5 file, whether to overwrite it (\code{TRUE}) +#' or skip with a warning (\code{FALSE}). Default is \code{TRUE} [7]. +#' +#' @return Invisible \code{NULL}. Called for its side effect of writing +#' results to the HDF5 file. +#' +#' @seealso \code{\link{ModelArray.lm}}, \code{\link{ModelArray.gam}}, +#' \code{\link{ModelArray.wrap}} which produce the \code{df.output}, +#' \code{\link{results}} for reading results back from a +#' \linkS4class{ModelArray}, \code{\link{h5summary}} for inspecting what +#' has been written. +#' +#' @examples +#' \dontrun{ +#' ma <- ModelArray("data.h5", scalar_types = c("FD")) +#' phenotypes <- read.csv("cohort.csv") +#' +#' results <- ModelArray.lm( +#' FD ~ age + sex, +#' data = ma, +#' phenotypes = phenotypes, +#' scalar = "FD" +#' ) +#' +#' writeResults( +#' fn.output = "data.h5", +#' df.output = results, +#' analysis_name = "lm_age_sex", +#' overwrite = TRUE +#' ) +#' +#' # Verify +#' h5summary("data.h5") +#' } +#' +#' @rdname writeResults #' @import hdf5r #' @export writeResults <- function(fn.output, From 1b68ef0a3e3312ba03551b9db97172a7982a8c70 Mon Sep 17 00:00:00 2001 From: araikes Date: Tue, 31 Mar 2026 10:41:41 -0700 Subject: [PATCH 14/23] Fix invalid syntax in printAdditionalArgu docs --- R/utils.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/utils.R b/R/utils.R index b31b7ae..6f0a2fe 100644 --- a/R/utils.R +++ b/R/utils.R @@ -77,7 +77,7 @@ flagAnalysisExistInh5 <- function(fn_h5, analysis_name) { #' print the additional arguments settings #' @param FUN The function, e.g. mgcv::gam, without "()" #' @param argu_name The argument name of the function -#' @param dots: list of additional arguments +#' @param dots list of additional arguments #' @param message_default The message for default #' @param message_usr_input The message describing user's input #' @importFrom crayon black From 93ebb2b0b7efa937219409cdf410755e824bd74d Mon Sep 17 00:00:00 2001 From: araikes Date: Tue, 31 Mar 2026 10:47:18 -0700 Subject: [PATCH 15/23] Update documentation for mergeModelArrays --- R/merge.R | 102 +++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 89 insertions(+), 13 deletions(-) diff --git a/R/merge.R b/R/merge.R index 1e82a4e..60fc3c1 100644 --- a/R/merge.R +++ b/R/merge.R @@ -1,19 +1,95 @@ -#' Merge multiple ModelArrays from different h5 files +#' Merge multiple ModelArrays from different HDF5 files #' #' @description -#' Combines scalars from multiple ModelArray objects into a single ModelArray, -#' aligning subjects via phenotype columns. Uses `DelayedArray::acbind()` for -#' virtual column-binding — no h5 rewriting is needed. +#' Combines scalars from multiple \linkS4class{ModelArray} objects into a +#' single \linkS4class{ModelArray}, aligning subjects via shared phenotype +#' columns. Uses \code{\link[DelayedArray]{acbind}} for virtual +#' column-binding — no HDF5 rewriting is needed. #' -#' @param modelarrays A list of ModelArray objects -#' @param phenotypes_list A list of data.frames, one per ModelArray. Each must -#' contain a `source_file` column matching its corresponding ModelArray's sources. -#' @param merge_on Character vector of column names to join phenotypes on -#' (e.g., `c("subject_id")`). These columns must exist in all phenotypes data.frames. -#' @return A list with: -#' \item{data}{A combined ModelArray with scalars from all inputs} -#' \item{phenotypes}{The inner-joined phenotypes data.frame. The original -#' `source_file` columns are renamed to `source_file.`.} +#' @details +#' The merge performs an inner join of the phenotype data.frames on the +#' columns specified by \code{merge_on}. Only subjects present in all +#' phenotype data.frames are retained. Scalar matrices from each input +#' \linkS4class{ModelArray} are column-subsetted and reordered to match +#' the joined subject list [6]. +#' +#' A unified \code{source_file} column is created from the \code{merge_on} +#' columns so that downstream analysis functions +#' (\code{\link{ModelArray.lm}}, \code{\link{ModelArray.gam}}, +#' \code{\link{ModelArray.wrap}}) can align phenotypes to scalars. The +#' original \code{source_file} columns are renamed to +#' \code{source_file.} for each input +#' \linkS4class{ModelArray} [6]. +#' +#' Scalar names must be unique across all input ModelArrays. If two +#' ModelArrays share a scalar name (e.g. both have \code{"FD"}), the +#' function will error. Element counts (number of rows) must match +#' across all scalars [6]. +#' +#' If element metadata is available (see \code{\link{elementMetadata}}), +#' the function checks that it is consistent across inputs and warns if +#' it differs or is only partially available [6]. +#' +#' @param modelarrays A list of at least two \linkS4class{ModelArray} +#' objects, each constructed from a different HDF5 file. +#' @param phenotypes_list A list of data.frames, one per +#' \linkS4class{ModelArray} in \code{modelarrays}. Each must contain a +#' \code{source_file} column whose entries match the corresponding +#' ModelArray's sources (i.e. \code{sources(modelarrays[[i]])}). +#' Each must also contain all columns named in \code{merge_on} [6]. +#' @param merge_on Character vector of column names present in all +#' data.frames in \code{phenotypes_list}, used to inner-join subjects +#' across sessions/modalities (e.g. \code{c("subject_id")}). The +#' combination of these columns must uniquely identify each subject +#' within each data.frame [6]. +#' +#' @return A list with two components: +#' \describe{ +#' \item{data}{A combined \linkS4class{ModelArray} containing scalars +#' from all inputs. Each scalar's columns are subsetted and reordered +#' to match the inner-joined subject list.} +#' \item{phenotypes}{The inner-joined data.frame. Original +#' \code{source_file} columns are renamed to +#' \code{source_file.} and a new unified +#' \code{source_file} column is added for use with analysis +#' functions [6].} +#' } +#' +#' @seealso \code{\link{ModelArray}} for constructing individual +#' ModelArray objects, \code{\link{ModelArray.lm}}, +#' \code{\link{ModelArray.gam}}, \code{\link{ModelArray.wrap}} for +#' fitting models on the merged object, +#' \code{\link{elementMetadata}} for element correspondence checks. +#' +#' @examples +#' \dontrun{ +#' # Load two sessions from different h5 files +#' ma1 <- ModelArray("session1.h5", scalar_types = c("FD")) +#' ma2 <- ModelArray("session2.h5", scalar_types = c("FC")) +#' phen1 <- read.csv("session1_cohort.csv") +#' phen2 <- read.csv("session2_cohort.csv") +#' +#' # Merge on subject ID +#' merged <- mergeModelArrays( +#' modelarrays = list(ma1, ma2), +#' phenotypes_list = list(phen1, phen2), +#' merge_on = "subject_id" +#' ) +#' +#' # Use the merged object for cross-scalar analysis +#' merged$data +#' scalarNames(merged$data) # c("FD", "FC") +#' head(merged$phenotypes) +#' +#' results <- ModelArray.lm( +#' FD ~ age + sex, +#' data = merged$data, +#' phenotypes = merged$phenotypes, +#' scalar = "FD" +#' ) +#' } +#' +#' @rdname mergeModelArrays #' @importFrom DelayedArray acbind #' @importFrom utils head #' @export From ab243e5d1aa4416f1d4689c1fed5069b06eec79d Mon Sep 17 00:00:00 2001 From: araikes Date: Tue, 31 Mar 2026 10:50:16 -0700 Subject: [PATCH 16/23] Fix style issues --- R/ModelArray_S4Methods.R | 32 +++++++++++++++++++------------- R/analyse-helpers.R | 6 ++++-- R/merge.R | 2 +- R/utils.R | 2 -- 4 files changed, 24 insertions(+), 18 deletions(-) diff --git a/R/ModelArray_S4Methods.R b/R/ModelArray_S4Methods.R index 274bc72..7c67cc1 100644 --- a/R/ModelArray_S4Methods.R +++ b/R/ModelArray_S4Methods.R @@ -71,8 +71,8 @@ setMethod("show", "ModelArray", function(object) { # , group_name_results="resul #' @examples #' \dontrun{ #' ma <- ModelArray("data.h5", scalar_types = c("FD")) -#' sources(ma) # named list -#' sources(ma)[["FD"]] # character vector of filenames +#' sources(ma) # named list +#' sources(ma)[["FD"]] # character vector of filenames #' } #' #' @name sources @@ -106,8 +106,8 @@ setMethod("sources", "ModelArray", function(x) x@sources) #' @examples #' \dontrun{ #' ma <- ModelArray("data.h5", scalar_types = c("FD", "FC")) -#' scalars(ma) # named list of all scalars -#' scalars(ma, "FD") # single DelayedArray matrix +#' scalars(ma) # named list of all scalars +#' scalars(ma, "FD") # single DelayedArray matrix #' } #' #' @name scalars @@ -163,10 +163,12 @@ setMethod( #' #' @examples #' \dontrun{ -#' ma <- ModelArray("data.h5", scalar_types = c("FD"), -#' analysis_names = c("lm_age")) -#' results(ma) # named list of all results -#' results(ma, "lm_age") # single result set +#' ma <- ModelArray("data.h5", +#' scalar_types = c("FD"), +#' analysis_names = c("lm_age") +#' ) +#' results(ma) # named list of all results +#' results(ma, "lm_age") # single result set #' results(ma, "lm_age")$results_matrix #' } #' @@ -380,7 +382,7 @@ setMethod("nInputFiles", "ModelArray", function(x, scalar = NULL) { #' @examples #' \dontrun{ #' ma <- ModelArray("data.h5", scalar_types = c("FD", "FC")) -#' scalarNames(ma) # c("FD", "FC") +#' scalarNames(ma) # c("FD", "FC") #' } #' #' @name scalarNames @@ -411,9 +413,11 @@ setMethod("scalarNames", "ModelArray", function(x) { #' #' @examples #' \dontrun{ -#' ma <- ModelArray("data.h5", scalar_types = c("FD"), -#' analysis_names = c("lm_age")) -#' analysisNames(ma) # "lm_age" +#' ma <- ModelArray("data.h5", +#' scalar_types = c("FD"), +#' analysis_names = c("lm_age") +#' ) +#' analysisNames(ma) # "lm_age" #' } #' #' @name analysisNames @@ -465,7 +469,9 @@ setMethod("elementMetadata", "ModelArray", function(x) { rhdf5::h5read(filepath, p), error = function(e) NULL ) - if (!is.null(result)) return(result) + if (!is.null(result)) { + return(result) + } } NULL }) diff --git a/R/analyse-helpers.R b/R/analyse-helpers.R index d3bce7a..73771e2 100644 --- a/R/analyse-helpers.R +++ b/R/analyse-helpers.R @@ -219,12 +219,14 @@ } } else { if (pbar) { - fits <- pbapply::pblapply(element.subset, + fits <- pbapply::pblapply( + element.subset, FUN, ... ) } else { - fits <- lapply(element.subset, + fits <- lapply( + element.subset, FUN, ... ) diff --git a/R/merge.R b/R/merge.R index 60fc3c1..f96a31b 100644 --- a/R/merge.R +++ b/R/merge.R @@ -78,7 +78,7 @@ #' #' # Use the merged object for cross-scalar analysis #' merged$data -#' scalarNames(merged$data) # c("FD", "FC") +#' scalarNames(merged$data) # c("FD", "FC") #' head(merged$phenotypes) #' #' results <- ModelArray.lm( diff --git a/R/utils.R b/R/utils.R index 6f0a2fe..2b6d0ea 100644 --- a/R/utils.R +++ b/R/utils.R @@ -25,7 +25,6 @@ flagObjectExistInh5 <- function(fn_h5, group_name = "/results", object_name = "m } - #' check if h5 group "results" exist in current .h5 file #' @param fn_h5 filename of the .h5 file #' @noRd @@ -491,7 +490,6 @@ gen_gamFormula_fxSmooth <- function(response.var, factor.var, smooth.var, phenot } - #' Generate GAM formula with continuous-by-continuous interaction #' #' @description From e73bc8954767546397b19bd4b410bec1df34f063 Mon Sep 17 00:00:00 2001 From: araikes Date: Tue, 31 Mar 2026 10:58:11 -0700 Subject: [PATCH 17/23] Introduce headings into reference page. --- _pkgdown.yml | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) diff --git a/_pkgdown.yml b/_pkgdown.yml index 95f2ddd..cf5b3e0 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,7 +1,11 @@ url: https://pennlinc.github.io/ModelArray/ + template: bootstrap: 5 +development: + mode: auto + navbar: structure: left: [intro, introductions, case-studies, reference, news] @@ -37,10 +41,69 @@ navbar: - text: "Developer Documentation" href: articles/doc_for_developer.html +reference: +- title: "ModelArray Class" + desc: > + Create, load, and combine ModelArray objects backed by HDF5 files. + contents: + - ModelArray-class + - ModelArray + - h5summary + - mergeModelArrays + +- title: "Accessors" + desc: > + Extract scalar data, source filenames, results, and metadata from a + ModelArray object. + contents: + - scalars + - sources + - results + - scalarNames + - analysisNames + - nElements + - nInputFiles + - numElementsTotal + - elementMetadata + +- title: "Element-wise Analysis" + desc: > + Fit statistical models at every element (fixel, voxel, or vertex) + in a ModelArray and write results back to HDF5. + contents: + - ModelArray.lm + - ModelArray.gam + - ModelArray.wrap + - writeResults + +- title: "Testing and Debugging" + desc: > + Build per-element test data and inspect your setup before committing + to a full analysis run. + contents: + - exampleElementData + +- title: "Formula Helpers" + desc: > + Generate GAM formulas for common interaction structures used with + ModelArray.gam. + contents: + - gen_gamFormula_fxSmooth + - gen_gamFormula_contIx + +- title: "Internal" + desc: > + Per-element workhorse functions called by the analysis functions. + Most users should not need to call these directly. + contents: + - analyseOneElement.lm + - analyseOneElement.gam + - analyseOneElement.wrap + redirects: - ["articles/wrap.html", "articles/modelling.html"] - ["articles/wrap_function.html", "articles/modelling.html"] - ["articles/voxel-wise_data.html", "articles/modelling.html"] - ["articles/basic_r_intro.html", "articles/installations.html"] - ["articles/faq.html", "articles/exploring-h5.html"] - - ["articles/debugging.html", "articles/exploring-h5.html"] + - ["articles/debugging.html", "articles/exploring-h5.html"] \ No newline at end of file From 7e4da429828586fc16604dc4e73e16f9186e62d0 Mon Sep 17 00:00:00 2001 From: araikes Date: Tue, 31 Mar 2026 11:06:11 -0700 Subject: [PATCH 18/23] Regenerate Rd files --- man/ModelArray.Rd | 2 +- man/analyseOneElement.gam.Rd | 108 ++++++++++++++++++++++----------- man/analyseOneElement.lm.Rd | 90 +++++++++++++++++---------- man/analyseOneElement.wrap.Rd | 99 ++++++++++++++++++++++-------- man/analysisNames.Rd | 8 ++- man/gen_gamFormula_contIx.Rd | 62 +++++++++++++++---- man/gen_gamFormula_fxSmooth.Rd | 93 ++++++++++++++++++++++------ man/h5summary.Rd | 53 +++++++++++++--- man/mergeModelArrays.Rd | 101 ++++++++++++++++++++++++++---- man/pipe.Rd | 2 +- man/results.Rd | 10 +-- man/scalarNames.Rd | 2 +- man/scalars.Rd | 4 +- man/sources.Rd | 4 +- man/writeResults.Rd | 76 ++++++++++++++++++++--- 15 files changed, 547 insertions(+), 167 deletions(-) diff --git a/man/ModelArray.Rd b/man/ModelArray.Rd index d23c8d2..0a82c63 100644 --- a/man/ModelArray.Rd +++ b/man/ModelArray.Rd @@ -40,7 +40,7 @@ If \code{analysis_names} is non-empty, saved results are loaded from } \examples{ \dontrun{ -ma <- ModelArray("path/to/data.h5", scalar_types = c("FD", "FC")) +ma <- ModelArray("path/to/data.h5", scalar_types = c("FD")) ma } diff --git a/man/analyseOneElement.gam.Rd b/man/analyseOneElement.gam.Rd index 7dd5329..58733da 100644 --- a/man/analyseOneElement.gam.Rd +++ b/man/analyseOneElement.gam.Rd @@ -2,7 +2,12 @@ % Please edit documentation in R/ModelArray_Constructor.R \name{analyseOneElement.gam} \alias{analyseOneElement.gam} -\title{Fit GAM for one element} +\title{Fit a GAM for a single element +returns metadata (column names, smooth term names, parametric term names, +and the smoothing parameter criterion attribute name) used by +\code{\link{ModelArray.gam}} to initialise the output data.frame. When +\code{flag_initiate = FALSE}, it returns a numeric vector representing one +row of the final results matrix.} \usage{ analyseOneElement.gam( i_element, @@ -22,58 +27,87 @@ analyseOneElement.gam( ) } \arguments{ -\item{i_element}{An integer, the i_th element, starting from 1. -For initiating (flag_initiate = TRUE), use i_element=1} +\item{i_element}{Integer. The 1-based index of the element to analyse.} -\item{formula}{A formula (passed to `mgcv::gam()`)} +\item{formula}{A \code{\link[stats]{formula}} passed to +\code{\link[mgcv]{gam}}.} -\item{modelarray}{ModelArray class} +\item{modelarray}{A \linkS4class{ModelArray} object.} -\item{phenotypes}{A data.frame of the cohort with columns of independent variables -and covariates to be added to the model} +\item{phenotypes}{A data.frame of the cohort with columns of independent +variables and covariates. Must contain a \code{"source_file"} column +matching \code{sources(modelarray)[[scalar]]}.} -\item{scalar}{A character. The name of the element-wise scalar to be analysed} +\item{scalar}{Character. The name of the element-wise scalar to analyse. +Must be one of \code{names(scalars(modelarray))}.} -\item{var.smoothTerms}{The list of variables to save for smooth terms -(got from broom::tidy(parametric = FALSE)). Example smooth term: age in formula "outcome ~ s(age)".} +\item{var.smoothTerms}{Character vector. Statistics to extract for smooth +terms from \code{\link[broom]{tidy.gam}} with \code{parametric = FALSE} +(e.g. \code{"edf"}, \code{"ref.df"}, \code{"statistic"}, +\code{"p.value"}).} -\item{var.parametricTerms}{The list of variables to save for parametric terms -(got from broom::tidy(parametric = TRUE)). Example parametric term: sex in formula "outcome ~ s(age) + sex".} +\item{var.parametricTerms}{Character vector. Statistics to extract for +parametric terms from \code{\link[broom]{tidy.gam}} with +\code{parametric = TRUE} (e.g. \code{"estimate"}, \code{"std.error"}, +\code{"statistic"}, \code{"p.value"}).} -\item{var.model}{The list of variables to save for the model (got from broom::glance() and summary()).} +\item{var.model}{Character vector. Statistics to extract for the overall +model from \code{\link[broom]{glance.gam}} and +\code{\link[mgcv]{summary.gam}} (e.g. \code{"adj.r.squared"}, +\code{"dev.expl"}, \code{"sp.criterion"}).} -\item{num.subj.lthr}{The minimal number of subjects with valid value in input h5 file, -i.e. number of subjects with finite values (defined by `is.finite()`, -i.e. not NaN or NA or Inf) in h5 file > \code{num.subj.lthr}, -then this element will be run normally; -otherwise, this element will be skipped and statistical outputs will be set as NaN.} +\item{num.subj.lthr}{Numeric. The pre-computed minimum number of subjects +with finite values required for this element to be analysed. Elements +below this threshold are skipped. This value is typically computed by +the parent function from \code{num.subj.lthr.abs} and +\code{num.subj.lthr.rel}.} -\item{num.stat.output}{The number of output stat metrics -(for generating all NaN stat when # subjects does not meet criteria). -This includes column `element_id`. -This is required when flag_initiate = TRUE.} +\item{num.stat.output}{Integer or \code{NULL}. The total number of output +columns (including \code{element_id}). Used when +\code{flag_initiate = FALSE} to generate an all-\code{NaN} row for +skipped elements. Must be \code{NULL} when \code{flag_initiate = TRUE}.} -\item{flag_initiate}{TRUE or FALSE, Whether this is to initiate the new analysis. -If TRUE, it will return column names etc to be used for initiating data.frame; -if FALSE, it will return the list of requested statistic values.} +\item{flag_initiate}{Logical. If \code{TRUE}, fit the model once and return +metadata for initialising the output data.frame (column names and term +names). If \code{FALSE}, return a numeric vector of results for this +element.} -\item{flag_sse}{TRUE or FALSE, Whether to calculate SSE (sum of squared error) for the model (`model.sse`). -SSE is needed for calculating partial R-squared.} +\item{flag_sse}{Logical. If \code{TRUE}, also compute the error sum of +squares (\code{model.sse}) for the model, which is needed for +partial R-squared calculations in \code{\link{ModelArray.gam}}. +Default: \code{FALSE}.} -\item{on_error}{Character: one of "stop", "skip", or "debug". When an error occurs while -fitting one element, choose whether to stop, skip returning all-NaN values for that element, -or drop into `browser()` (if interactive) then skip. Default: "stop".} +\item{on_error}{Character. One of \code{"stop"}, \code{"skip"}, or +\code{"debug"}. When an error occurs fitting the model: \code{"stop"} +halts execution; \code{"skip"} returns all-\code{NaN} for this element; +\code{"debug"} drops into \code{\link{browser}} (if interactive) then +skips. Default: \code{"stop"}.} -\item{...}{Additional arguments for `mgcv::gam()`} +\item{...}{Additional arguments passed to \code{\link[mgcv]{gam}}.} } \value{ -If flag_initiate==TRUE, returns column names, -list of term names of final results, and attr.name of sp.criterion; -if flag_initiate==FALSE, it will return the list of requested statistic values for a element. +If \code{flag_initiate = TRUE}, a list with components: +\describe{ +\item{column_names}{Character vector of output column names.} +\item{list.smoothTerms}{Character vector of smooth term names.} +\item{list.parametricTerms}{Character vector of parametric term names.} +\item{sp.criterion.attr.name}{Character. The name attribute of the +smoothing parameter selection criterion (e.g. \code{"REML"} or +\code{"GCV.Cp"}).} +} +If \code{flag_initiate = FALSE}, a numeric vector of length +\code{num.stat.output} with \code{element_id} (0-based) first and +requested statistics in subsequent positions. All-\code{NaN} (except +\code{element_id}) if the element was skipped. } \description{ -`analyseOneElement.gam` fits a GAM model for one element data, and returns requested model statistics. +If the number of subjects with finite scalar values does not exceed +\code{num.subj.lthr}, the element is skipped and all statistics are set +to \code{NaN}. } -\details{ -`ModelArray.gam` iteratively calls this function to get statistics for all requested elements. +\seealso{ +\code{\link{ModelArray.gam}} which calls this function iteratively, +\code{\link{analyseOneElement.lm}} for the linear model equivalent, +\code{\link{analyseOneElement.wrap}} for user-supplied functions. } +\keyword{internal} diff --git a/man/analyseOneElement.lm.Rd b/man/analyseOneElement.lm.Rd index 52987a4..1069a1d 100644 --- a/man/analyseOneElement.lm.Rd +++ b/man/analyseOneElement.lm.Rd @@ -2,7 +2,10 @@ % Please edit documentation in R/ModelArray_Constructor.R \name{analyseOneElement.lm} \alias{analyseOneElement.lm} -\title{Fit linear model for one element.} +\title{Fit a linear model for a single element +If the number of subjects with finite scalar values (not \code{NaN}, +\code{NA}, or \code{Inf}) does not exceed \code{num.subj.lthr}, the +element is skipped and all statistics are set to \code{NaN}.} \usage{ analyseOneElement.lm( i_element, @@ -20,50 +23,75 @@ analyseOneElement.lm( ) } \arguments{ -\item{i_element}{An integer, the i_th element, starting from 1. -For initiating (flag_initiate = TRUE), use i_element=1} +\item{i_element}{Integer. The 1-based index of the element to analyse.} -\item{formula}{Formula (passed to `stats::lm()`)} +\item{formula}{A \code{\link[stats]{formula}} passed to +\code{\link[stats]{lm}}.} -\item{modelarray}{ModelArray class} +\item{modelarray}{A \linkS4class{ModelArray} object.} -\item{phenotypes}{A data.frame of the cohort with columns of independent variables -and covariates to be added to the model.} +\item{phenotypes}{A data.frame of the cohort with columns of independent +variables and covariates. Must contain a \code{"source_file"} column +matching \code{sources(modelarray)[[scalar]]}.} -\item{scalar}{A character. The name of the element-wise scalar to be analysed} +\item{scalar}{Character. The name of the element-wise scalar to analyse. +Must be one of \code{names(scalars(modelarray))}.} -\item{var.terms}{A list of characters. The list of variables to save for terms (got from `broom::tidy()`).} +\item{var.terms}{Character vector. Statistics to extract per term from +\code{\link[broom]{tidy.lm}} (e.g. \code{"estimate"}, \code{"statistic"}, +\code{"p.value"}).} -\item{var.model}{A list of characters. The list of variables to save for the model (got from `broom::glance()`).} +\item{var.model}{Character vector. Statistics to extract for the overall +model from \code{\link[broom]{glance.lm}} (e.g. \code{"adj.r.squared"}, +\code{"p.value"}).} -\item{num.subj.lthr}{The minimal number of subjects with valid value in input h5 file, -i.e. number of subjects with finite values (defined by `is.finite()`, -i.e. not NaN or NA or Inf) in h5 file > \code{num.subj.lthr}, -then this element will be run normally; -otherwise, this element will be skipped and statistical outputs will be set as NaN.} +\item{num.subj.lthr}{Numeric. The pre-computed minimum number of subjects +with finite values required for this element to be analysed. Elements +below this threshold are skipped. This value is typically computed by +the parent function from \code{num.subj.lthr.abs} and +\code{num.subj.lthr.rel}.} -\item{num.stat.output}{The number of output stat metrics -(for generating all NaN stat when # subjects does not meet criteria). -This includes column `element_id`. -This is required when flag_initiate = TRUE.} +\item{num.stat.output}{Integer or \code{NULL}. The total number of output +columns (including \code{element_id}). Used when +\code{flag_initiate = FALSE} to generate an all-\code{NaN} row for +skipped elements. Must be \code{NULL} when \code{flag_initiate = TRUE}.} -\item{flag_initiate}{TRUE or FALSE, Whether this is to initiate the new analysis. -If TRUE, it will return column names etc to be used for initiating data.frame; -if FALSE, it will return the list of requested statistic values.} +\item{flag_initiate}{Logical. If \code{TRUE}, fit the model once and return +metadata for initialising the output data.frame (column names and term +names). If \code{FALSE}, return a numeric vector of results for this +element.} -\item{on_error}{Character: one of "stop", "skip", or "debug". When an error occurs while -fitting one element, choose whether to stop, skip returning all-NaN values for that element, -or drop into `browser()` (if interactive) then skip. Default: "stop".} +\item{on_error}{Character. One of \code{"stop"}, \code{"skip"}, or +\code{"debug"}. When an error occurs fitting the model: \code{"stop"} +halts execution; \code{"skip"} returns all-\code{NaN} for this element; +\code{"debug"} drops into \code{\link{browser}} (if interactive) then +skips. Default: \code{"stop"}.} -\item{...}{Additional arguments for `stats::lm()`} +\item{...}{Additional arguments passed to \code{\link[stats]{lm}}.} } \value{ -If flag_initiate==TRUE, returns column names, and list of term names of final results; -if flag_initiate==FALSE, it will return the list of requested statistic values for a element. +If \code{flag_initiate = TRUE}, a list with components: +\describe{ +\item{column_names}{Character vector. The column names for the output +data.frame, with \code{"element_id"} first.} +\item{list.terms}{Character vector. The names of the model terms +(from \code{\link[broom]{tidy.lm}}).} +} +If \code{flag_initiate = FALSE}, a numeric vector of length +\code{num.stat.output} with \code{element_id} (0-based) as the first +value and the requested statistics in subsequent positions. All-\code{NaN} +(except \code{element_id}) if the element had insufficient valid subjects +or if an error occurred with \code{on_error = "skip"}. } \description{ -`analyseOneElement.lm` fits a linear model for one element data, and returns requested model statistics. +Fit a linear model for a single element +If the number of subjects with finite scalar values (not \code{NaN}, +\code{NA}, or \code{Inf}) does not exceed \code{num.subj.lthr}, the +element is skipped and all statistics are set to \code{NaN}. } -\details{ -`ModelArray.lm` iteratively calls this function to get statistics for all requested elements. +\seealso{ +\code{\link{ModelArray.lm}} which calls this function iteratively, +\code{\link{analyseOneElement.gam}} for the GAM equivalent, +\code{\link{analyseOneElement.wrap}} for user-supplied functions. } +\keyword{internal} diff --git a/man/analyseOneElement.wrap.Rd b/man/analyseOneElement.wrap.Rd index 762e4db..1780548 100644 --- a/man/analyseOneElement.wrap.Rd +++ b/man/analyseOneElement.wrap.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/ModelArray_Constructor.R \name{analyseOneElement.wrap} \alias{analyseOneElement.wrap} -\title{Run a user-supplied function for one element} +\title{Run a user-supplied function for a single element} \usage{ analyseOneElement.wrap( i_element, @@ -18,42 +18,93 @@ analyseOneElement.wrap( ) } \arguments{ -\item{i_element}{An integer, the i_th element, starting from 1.} +\item{i_element}{Integer. The 1-based index of the element to analyse.} -\item{user_fun}{A function that accepts at least an argument named `data` (the per-element -`phenotypes` with the response column appended) and returns a one-row data.frame/tibble, -named list, or named vector.} +\item{user_fun}{A function that accepts at least an argument named +\code{data} (a data.frame: \code{phenotypes} with scalar columns +appended for the current element) and returns a one-row +data.frame/tibble, a named list, or a named atomic vector.} -\item{modelarray}{ModelArray class} +\item{modelarray}{A \linkS4class{ModelArray} object.} -\item{phenotypes}{A data.frame of the cohort with columns of independent variables and covariates} +\item{phenotypes}{A data.frame of the cohort with columns of independent +variables and covariates. Must contain a \code{"source_file"} column +matching \code{sources(modelarray)[[scalar]]}.} -\item{scalar}{A character. The name of the element-wise scalar to be analysed} +\item{scalar}{Character. The name of the element-wise scalar to analyse. +Must be one of \code{names(scalars(modelarray))}.} -\item{num.subj.lthr}{The minimal number of subjects with valid value in input h5 file} +\item{num.subj.lthr}{Numeric. The pre-computed minimum number of subjects +with finite values required for this element to be analysed. Elements +below this threshold are skipped. This value is typically computed by +the parent function from \code{num.subj.lthr.abs} and +\code{num.subj.lthr.rel}.} -\item{num.stat.output}{The number of output stat metrics (including `element_id`). -Required when `flag_initiate = TRUE`.} +\item{num.stat.output}{Integer or \code{NULL}. The total number of output +columns (including \code{element_id}). Used when +\code{flag_initiate = FALSE} to generate an all-\code{NaN} row for +skipped elements. Must be \code{NULL} when \code{flag_initiate = TRUE}.} -\item{flag_initiate}{TRUE or FALSE, whether this is to initiate the new analysis to get column names} +\item{flag_initiate}{Logical. If \code{TRUE}, fit the model once and return +metadata for initialising the output data.frame (column names and term +names). If \code{FALSE}, return a numeric vector of results for this +element.} -\item{on_error}{Character: one of "stop", "skip", or "debug". When an error occurs while -executing the user function, choose whether to stop, skip returning all-NaN values for this -element, or drop into `browser()` (if interactive) then skip. Default: "stop".} +\item{on_error}{Character. One of \code{"stop"}, \code{"skip"}, or +\code{"debug"}. When an error occurs fitting the model: \code{"stop"} +halts execution; \code{"skip"} returns all-\code{NaN} for this element; +\code{"debug"} drops into \code{\link{browser}} (if interactive) then +skips. Default: \code{"stop"}.} -\item{...}{Additional arguments forwarded to `FUN`} +\item{...}{Additional arguments forwarded to \code{user_fun}.} } \value{ -If `flag_initiate==TRUE`, returns a list with `column_names`. -If `flag_initiate==FALSE`, returns a numeric vector representing the one-row result for this element -with `element_id` as the first value. +If \code{flag_initiate = TRUE}, a list with one component: +\describe{ +\item{column_names}{Character vector. The column names derived from +the return value of \code{user_fun}, with \code{"element_id"} +prepended.} +} +If \code{flag_initiate = FALSE}, a numeric vector of length +\code{num.stat.output} with \code{element_id} (0-based) first and +the coerced output of \code{user_fun} in subsequent positions. +All-\code{NaN} (except \code{element_id}) if the element was skipped +or if an error occurred with \code{on_error = "skip"}. } \description{ -`analyseOneElement.wrap` runs a user-supplied function `FUN` on a single element's data. -It prepares the per-element data by attaching the element values as a new column named by `scalar` -to the provided `phenotypes` data.frame, then calls `FUN(data = dat, ...)`. +Runs a user-supplied function on one element's data, preparing the +per-element data.frame by attaching all scalar values as new columns to +the provided \code{phenotypes}. This is the per-element workhorse called +iteratively by \code{\link{ModelArray.wrap}}. } \details{ -The user-supplied `FUN` should return either a one-row data.frame/tibble, a named list, or a named vector. -The result will be coerced to a one-row tibble and combined into the final results matrix across elements. +Most users should call \code{\link{ModelArray.wrap}} directly, which handles +looping, parallelisation, and result assembly. +\code{analyseOneElement.wrap} is exported for advanced use cases such as +debugging a single element or building custom analysis loops. + +The user-supplied \code{user_fun} is called as +\code{user_fun(data = dat, ...)} where \code{dat} is \code{phenotypes} with +columns appended for \strong{all} scalars in the \linkS4class{ModelArray} +(not just the one named by \code{scalar}). The function should return a +one-row data.frame/tibble, a named list, or a named atomic vector. The +result is coerced to a numeric vector for assembly into the final results +matrix. + +Unlike \code{\link{analyseOneElement.lm}} and +\code{\link{analyseOneElement.gam}}, this function appends \strong{all} +scalar columns (not just the response) and checks for column name +collisions between scalar names and existing \code{phenotypes} columns. + +If the number of subjects with finite values across all scalars does not +exceed \code{num.subj.lthr}, the element is skipped and all statistics +are set to \code{NaN}. +} +\seealso{ +\code{\link{ModelArray.wrap}} which calls this function +iteratively, \code{\link{exampleElementData}} for building a test +data.frame matching the format passed to \code{user_fun}, +\code{\link{analyseOneElement.lm}} for the linear model equivalent, +\code{\link{analyseOneElement.gam}} for the GAM equivalent. } +\keyword{internal} diff --git a/man/analysisNames.Rd b/man/analysisNames.Rd index 88470c6..2b27f7e 100644 --- a/man/analysisNames.Rd +++ b/man/analysisNames.Rd @@ -23,9 +23,11 @@ Returns the names of all analysis result sets currently loaded in a } \examples{ \dontrun{ -ma <- ModelArray("data.h5", scalar_types = c("FD"), - analysis_names = c("lm_age")) -analysisNames(ma) # "lm_age" +ma <- ModelArray("data.h5", + scalar_types = c("FD"), + analysis_names = c("lm_age") +) +analysisNames(ma) # "lm_age" } } diff --git a/man/gen_gamFormula_contIx.Rd b/man/gen_gamFormula_contIx.Rd index 9f778b1..eadf41d 100644 --- a/man/gen_gamFormula_contIx.Rd +++ b/man/gen_gamFormula_contIx.Rd @@ -2,27 +2,67 @@ % Please edit documentation in R/utils.R \name{gen_gamFormula_contIx} \alias{gen_gamFormula_contIx} -\title{Generate GAM formula with continuous*continuous interaction} +\title{Generate GAM formula with continuous-by-continuous interaction} \usage{ gen_gamFormula_contIx(response.var, cont1.var, cont2.var, fx = TRUE, k = NULL) } \arguments{ -\item{response.var}{character class, the variable name for response} +\item{response.var}{Character. The variable name for the response +(dependent variable), typically a scalar name like \code{"FD"}.} -\item{cont1.var}{character class, the name of the first continuous variable} +\item{cont1.var}{Character. The name of the first continuous variable.} -\item{cont2.var}{character class, the name of the second continuous variable} +\item{cont2.var}{Character. The name of the second continuous variable.} -\item{fx}{TRUE or FALSE, to be used in smooth term s(). Recommend TRUE.} +\item{fx}{Logical. Passed to \code{\link[mgcv]{ti}}. If \code{TRUE} +(recommended), the smooth is treated as fixed degrees of freedom. +Default is \code{TRUE} \link{2}.} -\item{k}{integer, to be used in smooth term including the interaction term. -If NULL (no entry), will use default value as in mgcv::s()} +\item{k}{Integer or \code{NULL}. Basis dimension passed to +\code{\link[mgcv]{ti}} for all three terms (both main effects and the +interaction). If \code{NULL} (default), uses the default from +\code{mgcv::ti()} \link{2}.} } \value{ -The formula generated +A \code{\link[stats]{formula}} object. } \description{ -This function will generate a formula in the following format: \code{y ~ ti(x) + ti(z) + ti(x,z)}, -where \code{y} is \code{response.var}, \code{x} is \code{cont1.var}, and \code{z} is \code{cont2.var}. -The formula generated could be further modified, e.g. adding covariates. +Generates a formula in the format +\code{y ~ ti(x) + ti(z) + ti(x, z)}, where \code{y} is +\code{response.var}, \code{x} is \code{cont1.var}, and \code{z} is +\code{cont2.var}. The formula generated can be further modified, e.g. +by adding covariates. +} +\details{ +This helper uses \code{\link[mgcv]{ti}} (tensor product interaction) +terms so that the interaction \code{ti(x, z)} captures only the +interaction effect, separate from the main effects \code{ti(x)} and +\code{ti(z)}. This decomposition is important for interpretability and +for requesting \code{changed.rsq.term.index} in +\code{\link{ModelArray.gam}} \link{4}. +} +\examples{ +\dontrun{ +formula <- gen_gamFormula_contIx( + response.var = "FD", + cont1.var = "age", + cont2.var = "cognition" +) +formula + +# Use in ModelArray.gam with changed R-squared for the interaction +results <- ModelArray.gam( + formula, + data = ma, + phenotypes = phenotypes, + scalar = "FD", + changed.rsq.term.index = list(3) +) +} + +} +\seealso{ +\code{\link{gen_gamFormula_fxSmooth}} for factor-smooth +interactions, \code{\link{ModelArray.gam}} which accepts the generated +formula. } diff --git a/man/gen_gamFormula_fxSmooth.Rd b/man/gen_gamFormula_fxSmooth.Rd index 87f9a74..b8d92c7 100644 --- a/man/gen_gamFormula_fxSmooth.Rd +++ b/man/gen_gamFormula_fxSmooth.Rd @@ -16,35 +16,90 @@ gen_gamFormula_fxSmooth( ) } \arguments{ -\item{response.var}{character class, the variable name for response} +\item{response.var}{Character. The variable name for the response +(dependent variable), typically a scalar name like \code{"FD"}.} -\item{factor.var}{character class, the variable name for factor. It should be an ordered factor. -If not, it will generate it as a new column in `phenotypes`, which requires `reference.group`.} +\item{factor.var}{Character. The variable name for the factor. It should +be an ordered factor in \code{phenotypes}. If not, an ordered factor +will be generated as a new column, which requires \code{reference.group} \link{2}.} -\item{smooth.var}{character class, the variable name in smooth term as main effect} +\item{smooth.var}{Character. The variable name for the smooth term main +effect (e.g. \code{"age"}).} -\item{phenotypes}{data.frame class, the cohort matrix with columns of independent variables (including -\code{factor.var} and \code{smooth.var}) to be added to the model} +\item{phenotypes}{A data.frame of the cohort with columns of independent +variables, including \code{factor.var} and \code{smooth.var}.} -\item{reference.group}{character class, the reference group for ordered factor of `factor.var`; required when -`factor.var` in `phenotypes` is not an ordered factor.} +\item{reference.group}{Character. The reference (baseline) group for the +ordered factor of \code{factor.var}. Required when \code{factor.var} +in \code{phenotypes} is not already an ordered factor \link{2}.} -\item{prefix.ordered.factor}{character class, the prefix for ordered factor; required when `factor.var` in -`phenotypes` is not an ordered factor.} +\item{prefix.ordered.factor}{Character. Prefix for the ordered factor +column name. Required when \code{factor.var} in \code{phenotypes} is +not already an ordered factor. Default is \code{"o"} \link{2}.} -\item{fx}{TRUE or FALSE, to be used in smooth term s(). Recommend TRUE.} +\item{fx}{Logical. Passed to \code{\link[mgcv]{s}}. If \code{TRUE} +(recommended), the smooth is treated as fixed degrees of freedom. +Default is \code{TRUE}.} -\item{k}{integer, to be used in smooth term including the interaction term. If NULL (no entry), will use default -value as in mgcv::s()} +\item{k}{Integer or \code{NULL}. Basis dimension passed to +\code{\link[mgcv]{s}} for both the main smooth and interaction terms. +If \code{NULL} (default), uses the default from \code{mgcv::s()} \link{2}.} } \value{ -a list, including: 1) formula generated; 2) data.frame phenotypes - updated if argument factor.var -is not an ordered factor +A list with two components: +\describe{ +\item{formula}{The generated \code{\link[stats]{formula}} object.} +\item{phenotypes}{The (possibly updated) data.frame. If +\code{factor.var} was not already an ordered factor, a new column +named \code{paste0(prefix.ordered.factor, factor.var)} is added. +Otherwise identical to the input \link{2}.} +} } \description{ -This function will generate a formula in the following format: -\code{y ~ orderedFactor + s(x) + s(x, by=orderedFactor)}, +Generates a formula in the format +\code{y ~ orderedFactor + s(x) + s(x, by = orderedFactor)}, where \code{y} is \code{response.var}, \code{x} is \code{smooth.var}, -and \code{orderedFactor} is \code{factor.var} - see \code{factor.var} for more. -The formula generated could be further modified, e.g. adding covariates. +and \code{orderedFactor} is \code{factor.var}. +The formula generated can be further modified, e.g. by adding covariates. +} +\details{ +This helper exists because setting up factor-smooth interactions in +\code{\link[mgcv]{gam}} requires an ordered factor and a specific formula +structure. If \code{factor.var} in \code{phenotypes} is not already an +ordered factor, this function creates one using \code{reference.group} as +the baseline level and adds it as a new column (named with +\code{prefix.ordered.factor} prepended to \code{factor.var}) \link{2}. + +The returned \code{phenotypes} data.frame must be used in the subsequent +\code{\link{ModelArray.gam}} call so that the ordered factor column is +available to the model. +} +\examples{ +\dontrun{ +phenotypes <- read.csv("cohort.csv") + +# factor.var is not yet ordered - function creates it +result <- gen_gamFormula_fxSmooth( + response.var = "FD", + factor.var = "sex", + smooth.var = "age", + phenotypes = phenotypes, + reference.group = "female" +) +result$formula + +# Use the updated phenotypes (contains the ordered factor column) +results <- ModelArray.gam( + result$formula, + data = ma, + phenotypes = result$phenotypes, + scalar = "FD" +) +} + +} +\seealso{ +\code{\link{gen_gamFormula_contIx}} for continuous-by-continuous +interactions, \code{\link{ModelArray.gam}} which accepts the generated +formula. } diff --git a/man/h5summary.Rd b/man/h5summary.Rd index d3f2836..3f76000 100644 --- a/man/h5summary.Rd +++ b/man/h5summary.Rd @@ -2,21 +2,58 @@ % Please edit documentation in R/utils.R \name{h5summary} \alias{h5summary} -\title{Summarize an h5 file without loading the full ModelArray} +\alias{print.h5summary} +\title{Summarize an HDF5 file without loading a full ModelArray} \usage{ h5summary(filepath) + +\method{print}{h5summary}(x, ...) } \arguments{ -\item{filepath}{Path to an h5 file} +\item{filepath}{Character. Path to an HDF5 (\code{.h5}) file.} + +\item{x}{An \code{h5summary} object as returned by +\code{\link{h5summary}}.} + +\item{...}{Additional arguments (currently ignored).} } \value{ -A list with components: - \item{scalars}{A data.frame with columns: name, nElements, nInputFiles} - \item{analyses}{Character vector of analysis names} - \item{filepath}{The input filepath} +An object of class \code{"h5summary"}, which is a list with +components: +\describe{ +\item{scalars}{A data.frame with columns \code{name}, +\code{nElements}, and \code{nInputFiles} \link{2}.} +\item{analyses}{Character vector of analysis names found under +\code{/results/} \link{2}.} +\item{filepath}{The input filepath.} +} + +Invisible \code{x}. Called for its side effect of printing a +human-readable summary to the console. } \description{ -Reads the h5 file structure and returns a summary of available scalars, +Reads the HDF5 file structure and returns a summary of available scalars, their dimensions, and any saved analyses. Useful for inspecting large files -without constructing a full ModelArray object. +without constructing a full \linkS4class{ModelArray} object. +} +\details{ +This function opens the HDF5 file read-only via \code{\link[rhdf5]{h5ls}}, +inspects the group structure under \code{/scalars/} and \code{/results/}, +and closes the file. It does not load any data into memory. The returned +object has a \code{print} method that displays a formatted summary \link{2}. +} +\examples{ +\dontrun{ +h5summary("path/to/data.h5") + +# Inspect before deciding which scalars to load +info <- h5summary("path/to/data.h5") +info$scalars$name +ma <- ModelArray("path/to/data.h5", scalar_types = info$scalars$name) +} + +} +\seealso{ +\code{\link{ModelArray}} for loading the full object, +\linkS4class{ModelArray} for the class definition. } diff --git a/man/mergeModelArrays.Rd b/man/mergeModelArrays.Rd index bfdbb0f..a7f279f 100644 --- a/man/mergeModelArrays.Rd +++ b/man/mergeModelArrays.Rd @@ -2,27 +2,102 @@ % Please edit documentation in R/merge.R \name{mergeModelArrays} \alias{mergeModelArrays} -\title{Merge multiple ModelArrays from different h5 files} +\title{Merge multiple ModelArrays from different HDF5 files} \usage{ mergeModelArrays(modelarrays, phenotypes_list, merge_on) } \arguments{ -\item{modelarrays}{A list of ModelArray objects} +\item{modelarrays}{A list of at least two \linkS4class{ModelArray} +objects, each constructed from a different HDF5 file.} -\item{phenotypes_list}{A list of data.frames, one per ModelArray. Each must -contain a `source_file` column matching its corresponding ModelArray's sources.} +\item{phenotypes_list}{A list of data.frames, one per +\linkS4class{ModelArray} in \code{modelarrays}. Each must contain a +\code{source_file} column whose entries match the corresponding +ModelArray's sources (i.e. \code{sources(modelarrays[[i]])}). +Each must also contain all columns named in \code{merge_on} \link{6}.} -\item{merge_on}{Character vector of column names to join phenotypes on -(e.g., `c("subject_id")`). These columns must exist in all phenotypes data.frames.} +\item{merge_on}{Character vector of column names present in all +data.frames in \code{phenotypes_list}, used to inner-join subjects +across sessions/modalities (e.g. \code{c("subject_id")}). The +combination of these columns must uniquely identify each subject +within each data.frame \link{6}.} } \value{ -A list with: - \item{data}{A combined ModelArray with scalars from all inputs} - \item{phenotypes}{The inner-joined phenotypes data.frame. The original - `source_file` columns are renamed to `source_file.`.} +A list with two components: +\describe{ +\item{data}{A combined \linkS4class{ModelArray} containing scalars +from all inputs. Each scalar's columns are subsetted and reordered +to match the inner-joined subject list.} +\item{phenotypes}{The inner-joined data.frame. Original +\code{source_file} columns are renamed to +\code{source_file.} and a new unified +\code{source_file} column is added for use with analysis +functions \link{6}.} +} } \description{ -Combines scalars from multiple ModelArray objects into a single ModelArray, -aligning subjects via phenotype columns. Uses `DelayedArray::acbind()` for -virtual column-binding — no h5 rewriting is needed. +Combines scalars from multiple \linkS4class{ModelArray} objects into a +single \linkS4class{ModelArray}, aligning subjects via shared phenotype +columns. Uses \code{\link[DelayedArray]{acbind}} for virtual +column-binding — no HDF5 rewriting is needed. +} +\details{ +The merge performs an inner join of the phenotype data.frames on the +columns specified by \code{merge_on}. Only subjects present in all +phenotype data.frames are retained. Scalar matrices from each input +\linkS4class{ModelArray} are column-subsetted and reordered to match +the joined subject list \link{6}. + +A unified \code{source_file} column is created from the \code{merge_on} +columns so that downstream analysis functions +(\code{\link{ModelArray.lm}}, \code{\link{ModelArray.gam}}, +\code{\link{ModelArray.wrap}}) can align phenotypes to scalars. The +original \code{source_file} columns are renamed to +\code{source_file.} for each input +\linkS4class{ModelArray} \link{6}. + +Scalar names must be unique across all input ModelArrays. If two +ModelArrays share a scalar name (e.g. both have \code{"FD"}), the +function will error. Element counts (number of rows) must match +across all scalars \link{6}. + +If element metadata is available (see \code{\link{elementMetadata}}), +the function checks that it is consistent across inputs and warns if +it differs or is only partially available \link{6}. +} +\examples{ +\dontrun{ +# Load two sessions from different h5 files +ma1 <- ModelArray("session1.h5", scalar_types = c("FD")) +ma2 <- ModelArray("session2.h5", scalar_types = c("FC")) +phen1 <- read.csv("session1_cohort.csv") +phen2 <- read.csv("session2_cohort.csv") + +# Merge on subject ID +merged <- mergeModelArrays( + modelarrays = list(ma1, ma2), + phenotypes_list = list(phen1, phen2), + merge_on = "subject_id" +) + +# Use the merged object for cross-scalar analysis +merged$data +scalarNames(merged$data) # c("FD", "FC") +head(merged$phenotypes) + +results <- ModelArray.lm( + FD ~ age + sex, + data = merged$data, + phenotypes = merged$phenotypes, + scalar = "FD" +) +} + +} +\seealso{ +\code{\link{ModelArray}} for constructing individual +ModelArray objects, \code{\link{ModelArray.lm}}, +\code{\link{ModelArray.gam}}, \code{\link{ModelArray.wrap}} for +fitting models on the merged object, +\code{\link{elementMetadata}} for element correspondence checks. } diff --git a/man/pipe.Rd b/man/pipe.Rd index 1f8f237..a648c29 100644 --- a/man/pipe.Rd +++ b/man/pipe.Rd @@ -12,7 +12,7 @@ lhs \%>\% rhs \item{rhs}{A function call using the magrittr semantics.} } \value{ -The result of calling `rhs(lhs)`. +The result of calling \code{rhs(lhs)}. } \description{ See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. diff --git a/man/results.Rd b/man/results.Rd index 862db7f..bc346fc 100644 --- a/man/results.Rd +++ b/man/results.Rd @@ -38,10 +38,12 @@ back with \code{\link{writeResults}}. } \examples{ \dontrun{ -ma <- ModelArray("data.h5", scalar_types = c("FD"), - analysis_names = c("lm_age")) -results(ma) # named list of all results -results(ma, "lm_age") # single result set +ma <- ModelArray("data.h5", + scalar_types = c("FD"), + analysis_names = c("lm_age") +) +results(ma) # named list of all results +results(ma, "lm_age") # single result set results(ma, "lm_age")$results_matrix } diff --git a/man/scalarNames.Rd b/man/scalarNames.Rd index f2449be..84f846a 100644 --- a/man/scalarNames.Rd +++ b/man/scalarNames.Rd @@ -22,7 +22,7 @@ Returns the names of all scalar datasets loaded in a \examples{ \dontrun{ ma <- ModelArray("data.h5", scalar_types = c("FD", "FC")) -scalarNames(ma) # c("FD", "FC") +scalarNames(ma) # c("FD", "FC") } } diff --git a/man/scalars.Rd b/man/scalars.Rd index 7b6d504..13459a2 100644 --- a/man/scalars.Rd +++ b/man/scalars.Rd @@ -29,8 +29,8 @@ matrices. When called with a single scalar name, returns that one matrix. \examples{ \dontrun{ ma <- ModelArray("data.h5", scalar_types = c("FD", "FC")) -scalars(ma) # named list of all scalars -scalars(ma, "FD") # single DelayedArray matrix +scalars(ma) # named list of all scalars +scalars(ma, "FD") # single DelayedArray matrix } } diff --git a/man/sources.Rd b/man/sources.Rd index f55f6e9..b879979 100644 --- a/man/sources.Rd +++ b/man/sources.Rd @@ -25,8 +25,8 @@ file/subject. \examples{ \dontrun{ ma <- ModelArray("data.h5", scalar_types = c("FD")) -sources(ma) # named list -sources(ma)[["FD"]] # character vector of filenames +sources(ma) # named list +sources(ma)[["FD"]] # character vector of filenames } } diff --git a/man/writeResults.Rd b/man/writeResults.Rd index 927de8e..6743d90 100644 --- a/man/writeResults.Rd +++ b/man/writeResults.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/ModelArray_Constructor.R \name{writeResults} \alias{writeResults} -\title{Write outputs from element-wise statistical analysis to the HDF5 file.} +\title{Write outputs from element-wise statistical analysis to an HDF5 file} \usage{ writeResults( fn.output, @@ -12,20 +12,76 @@ writeResults( ) } \arguments{ -\item{fn.output}{A character, The HDF5 (.h5) filename for the output} +\item{fn.output}{Character. The HDF5 (\code{.h5}) filename for the output. +The file must already exist; use an absolute path if you encounter +file-not-found errors.} -\item{df.output}{A data.frame object with element-wise statistical results, returned from `ModelArray.lm()` etc} +\item{df.output}{A data.frame of element-wise statistical results, as +returned by \code{\link{ModelArray.lm}}, +\code{\link{ModelArray.gam}}, or \code{\link{ModelArray.wrap}}. +Must inherit from \code{data.frame} \link{7}.} -\item{analysis_name}{A character, the name of the results} +\item{analysis_name}{Character. The name for this set of results. Used +as the group name under \code{/results/} in the HDF5 file. +Default is \code{"myAnalysis"} \link{7}.} -\item{overwrite}{If a group with the same analysis_name exists in HDF5 file, -whether overwrite it (TRUE) or not (FALSE)} +\item{overwrite}{Logical. If a group with the same \code{analysis_name} +already exists in the HDF5 file, whether to overwrite it (\code{TRUE}) +or skip with a warning (\code{FALSE}). Default is \code{TRUE} \link{7}.} +} +\value{ +Invisible \code{NULL}. Called for its side effect of writing +results to the HDF5 file. } \description{ -Create a group named `analysis_name` in HDF5 file, -then write the statistical results data.frame (i.e. for one analysis) in it. +Creates a group named \code{analysis_name} under \code{/results/} in the +HDF5 file, then writes the statistical results data.frame (i.e. for one +analysis) into it as \code{results_matrix} along with column names \link{7}. } \details{ -debug tip: For "Error in H5File.open(filename, mode, file_create_pl, file_access_pl)", -check if there is message 'No such file or directory'. Try absolute .h5 filename. +The results are stored at +\code{/results//results_matrix} with column names saved +as a separate dataset at +\code{/results//column_names} \link{7}. + +If any column of \code{df.output} is not numeric or integer, it is +coerced to numeric via \code{factor()} and the factor levels are saved +as a look-up table at +\code{/results//lut_forcol} \link{7}. + +\strong{Debugging tip:} If you encounter +\code{"Error in H5File.open(filename, mode, file_create_pl, file_access_pl)"}, +check if the message mentions "No such file or directory". Try using an +absolute path for the \code{fn.output} argument \link{7}. +} +\examples{ +\dontrun{ +ma <- ModelArray("data.h5", scalar_types = c("FD")) +phenotypes <- read.csv("cohort.csv") + +results <- ModelArray.lm( + FD ~ age + sex, + data = ma, + phenotypes = phenotypes, + scalar = "FD" +) + +writeResults( + fn.output = "data.h5", + df.output = results, + analysis_name = "lm_age_sex", + overwrite = TRUE +) + +# Verify +h5summary("data.h5") +} + +} +\seealso{ +\code{\link{ModelArray.lm}}, \code{\link{ModelArray.gam}}, +\code{\link{ModelArray.wrap}} which produce the \code{df.output}, +\code{\link{results}} for reading results back from a +\linkS4class{ModelArray}, \code{\link{h5summary}} for inspecting what +has been written. } From 1d327f4567f6cea8f87bc509c3652db4feea5482 Mon Sep 17 00:00:00 2001 From: araikes Date: Tue, 31 Mar 2026 11:08:49 -0700 Subject: [PATCH 19/23] Fix aberrent linking objects introduced. --- R/ModelArray_Constructor.R | 14 +++++++------- R/merge.R | 14 +++++++------- R/utils.R | 24 ++++++++++++------------ 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/R/ModelArray_Constructor.R b/R/ModelArray_Constructor.R index 13173db..c7c1d7c 100644 --- a/R/ModelArray_Constructor.R +++ b/R/ModelArray_Constructor.R @@ -1392,23 +1392,23 @@ analyseOneElement.wrap <- function(i_element, #' @description #' Creates a group named \code{analysis_name} under \code{/results/} in the #' HDF5 file, then writes the statistical results data.frame (i.e. for one -#' analysis) into it as \code{results_matrix} along with column names [7]. +#' analysis) into it as \code{results_matrix} along with column names. #' #' @details #' The results are stored at #' \code{/results//results_matrix} with column names saved #' as a separate dataset at -#' \code{/results//column_names} [7]. +#' \code{/results//column_names}. #' #' If any column of \code{df.output} is not numeric or integer, it is #' coerced to numeric via \code{factor()} and the factor levels are saved #' as a look-up table at -#' \code{/results//lut_forcol} [7]. +#' \code{/results//lut_forcol}. #' #' \strong{Debugging tip:} If you encounter #' \code{"Error in H5File.open(filename, mode, file_create_pl, file_access_pl)"}, #' check if the message mentions "No such file or directory". Try using an -#' absolute path for the \code{fn.output} argument [7]. +#' absolute path for the \code{fn.output} argument. #' #' @param fn.output Character. The HDF5 (\code{.h5}) filename for the output. #' The file must already exist; use an absolute path if you encounter @@ -1416,13 +1416,13 @@ analyseOneElement.wrap <- function(i_element, #' @param df.output A data.frame of element-wise statistical results, as #' returned by \code{\link{ModelArray.lm}}, #' \code{\link{ModelArray.gam}}, or \code{\link{ModelArray.wrap}}. -#' Must inherit from \code{data.frame} [7]. +#' Must inherit from \code{data.frame}. #' @param analysis_name Character. The name for this set of results. Used #' as the group name under \code{/results/} in the HDF5 file. -#' Default is \code{"myAnalysis"} [7]. +#' Default is \code{"myAnalysis"}. #' @param overwrite Logical. If a group with the same \code{analysis_name} #' already exists in the HDF5 file, whether to overwrite it (\code{TRUE}) -#' or skip with a warning (\code{FALSE}). Default is \code{TRUE} [7]. +#' or skip with a warning (\code{FALSE}). Default is \code{TRUE}. #' #' @return Invisible \code{NULL}. Called for its side effect of writing #' results to the HDF5 file. diff --git a/R/merge.R b/R/merge.R index f96a31b..f8db780 100644 --- a/R/merge.R +++ b/R/merge.R @@ -11,7 +11,7 @@ #' columns specified by \code{merge_on}. Only subjects present in all #' phenotype data.frames are retained. Scalar matrices from each input #' \linkS4class{ModelArray} are column-subsetted and reordered to match -#' the joined subject list [6]. +#' the joined subject list. #' #' A unified \code{source_file} column is created from the \code{merge_on} #' columns so that downstream analysis functions @@ -19,16 +19,16 @@ #' \code{\link{ModelArray.wrap}}) can align phenotypes to scalars. The #' original \code{source_file} columns are renamed to #' \code{source_file.} for each input -#' \linkS4class{ModelArray} [6]. +#' \linkS4class{ModelArray}. #' #' Scalar names must be unique across all input ModelArrays. If two #' ModelArrays share a scalar name (e.g. both have \code{"FD"}), the #' function will error. Element counts (number of rows) must match -#' across all scalars [6]. +#' across all scalars. #' #' If element metadata is available (see \code{\link{elementMetadata}}), #' the function checks that it is consistent across inputs and warns if -#' it differs or is only partially available [6]. +#' it differs or is only partially available. #' #' @param modelarrays A list of at least two \linkS4class{ModelArray} #' objects, each constructed from a different HDF5 file. @@ -36,12 +36,12 @@ #' \linkS4class{ModelArray} in \code{modelarrays}. Each must contain a #' \code{source_file} column whose entries match the corresponding #' ModelArray's sources (i.e. \code{sources(modelarrays[[i]])}). -#' Each must also contain all columns named in \code{merge_on} [6]. +#' Each must also contain all columns named in \code{merge_on}. #' @param merge_on Character vector of column names present in all #' data.frames in \code{phenotypes_list}, used to inner-join subjects #' across sessions/modalities (e.g. \code{c("subject_id")}). The #' combination of these columns must uniquely identify each subject -#' within each data.frame [6]. +#' within each data.frame. #' #' @return A list with two components: #' \describe{ @@ -52,7 +52,7 @@ #' \code{source_file} columns are renamed to #' \code{source_file.} and a new unified #' \code{source_file} column is added for use with analysis -#' functions [6].} +#' functions.} #' } #' #' @seealso \code{\link{ModelArray}} for constructing individual diff --git a/R/utils.R b/R/utils.R index 2b6d0ea..69ae303 100644 --- a/R/utils.R +++ b/R/utils.R @@ -357,7 +357,7 @@ checker_gam_formula <- function(formula, gam.formula.breakdown, onemodel = NULL) #' structure. If \code{factor.var} in \code{phenotypes} is not already an #' ordered factor, this function creates one using \code{reference.group} as #' the baseline level and adds it as a new column (named with -#' \code{prefix.ordered.factor} prepended to \code{factor.var}) [2]. +#' \code{prefix.ordered.factor} prepended to \code{factor.var}). #' #' The returned \code{phenotypes} data.frame must be used in the subsequent #' \code{\link{ModelArray.gam}} call so that the ordered factor column is @@ -367,23 +367,23 @@ checker_gam_formula <- function(formula, gam.formula.breakdown, onemodel = NULL) #' (dependent variable), typically a scalar name like \code{"FD"}. #' @param factor.var Character. The variable name for the factor. It should #' be an ordered factor in \code{phenotypes}. If not, an ordered factor -#' will be generated as a new column, which requires \code{reference.group} [2]. +#' will be generated as a new column, which requires \code{reference.group}. #' @param smooth.var Character. The variable name for the smooth term main #' effect (e.g. \code{"age"}). #' @param phenotypes A data.frame of the cohort with columns of independent #' variables, including \code{factor.var} and \code{smooth.var}. #' @param reference.group Character. The reference (baseline) group for the #' ordered factor of \code{factor.var}. Required when \code{factor.var} -#' in \code{phenotypes} is not already an ordered factor [2]. +#' in \code{phenotypes} is not already an ordered factor. #' @param prefix.ordered.factor Character. Prefix for the ordered factor #' column name. Required when \code{factor.var} in \code{phenotypes} is -#' not already an ordered factor. Default is \code{"o"} [2]. +#' not already an ordered factor. Default is \code{"o"}. #' @param fx Logical. Passed to \code{\link[mgcv]{s}}. If \code{TRUE} #' (recommended), the smooth is treated as fixed degrees of freedom. #' Default is \code{TRUE}. #' @param k Integer or \code{NULL}. Basis dimension passed to #' \code{\link[mgcv]{s}} for both the main smooth and interaction terms. -#' If \code{NULL} (default), uses the default from \code{mgcv::s()} [2]. +#' If \code{NULL} (default), uses the default from \code{mgcv::s()}. #' #' @return A list with two components: #' \describe{ @@ -391,7 +391,7 @@ checker_gam_formula <- function(formula, gam.formula.breakdown, onemodel = NULL) #' \item{phenotypes}{The (possibly updated) data.frame. If #' \code{factor.var} was not already an ordered factor, a new column #' named \code{paste0(prefix.ordered.factor, factor.var)} is added. -#' Otherwise identical to the input [2].} +#' Otherwise identical to the input.} #' } #' #' @seealso \code{\link{gen_gamFormula_contIx}} for continuous-by-continuous @@ -505,7 +505,7 @@ gen_gamFormula_fxSmooth <- function(response.var, factor.var, smooth.var, phenot #' interaction effect, separate from the main effects \code{ti(x)} and #' \code{ti(z)}. This decomposition is important for interpretability and #' for requesting \code{changed.rsq.term.index} in -#' \code{\link{ModelArray.gam}} [4]. +#' \code{\link{ModelArray.gam}}. #' #' @param response.var Character. The variable name for the response #' (dependent variable), typically a scalar name like \code{"FD"}. @@ -513,11 +513,11 @@ gen_gamFormula_fxSmooth <- function(response.var, factor.var, smooth.var, phenot #' @param cont2.var Character. The name of the second continuous variable. #' @param fx Logical. Passed to \code{\link[mgcv]{ti}}. If \code{TRUE} #' (recommended), the smooth is treated as fixed degrees of freedom. -#' Default is \code{TRUE} [2]. +#' Default is \code{TRUE}. #' @param k Integer or \code{NULL}. Basis dimension passed to #' \code{\link[mgcv]{ti}} for all three terms (both main effects and the #' interaction). If \code{NULL} (default), uses the default from -#' \code{mgcv::ti()} [2]. +#' \code{mgcv::ti()}. #' #' @return A \code{\link[stats]{formula}} object. #' @@ -596,7 +596,7 @@ bind_cols_check_emptyTibble <- function(a, b) { #' This function opens the HDF5 file read-only via \code{\link[rhdf5]{h5ls}}, #' inspects the group structure under \code{/scalars/} and \code{/results/}, #' and closes the file. It does not load any data into memory. The returned -#' object has a \code{print} method that displays a formatted summary [2]. +#' object has a \code{print} method that displays a formatted summary. #' #' @param filepath Character. Path to an HDF5 (\code{.h5}) file. #' @@ -604,9 +604,9 @@ bind_cols_check_emptyTibble <- function(a, b) { #' components: #' \describe{ #' \item{scalars}{A data.frame with columns \code{name}, -#' \code{nElements}, and \code{nInputFiles} [2].} +#' \code{nElements}, and \code{nInputFiles}.} #' \item{analyses}{Character vector of analysis names found under -#' \code{/results/} [2].} +#' \code{/results/}.} #' \item{filepath}{The input filepath.} #' } #' From 9fe7ba578268b9c20e5d2068de20123a19697653 Mon Sep 17 00:00:00 2001 From: araikes Date: Tue, 31 Mar 2026 11:09:32 -0700 Subject: [PATCH 20/23] Update Rd files again --- man/gen_gamFormula_contIx.Rd | 6 +++--- man/gen_gamFormula_fxSmooth.Rd | 12 ++++++------ man/h5summary.Rd | 6 +++--- man/mergeModelArrays.Rd | 14 +++++++------- man/writeResults.Rd | 14 +++++++------- 5 files changed, 26 insertions(+), 26 deletions(-) diff --git a/man/gen_gamFormula_contIx.Rd b/man/gen_gamFormula_contIx.Rd index eadf41d..7dda277 100644 --- a/man/gen_gamFormula_contIx.Rd +++ b/man/gen_gamFormula_contIx.Rd @@ -16,12 +16,12 @@ gen_gamFormula_contIx(response.var, cont1.var, cont2.var, fx = TRUE, k = NULL) \item{fx}{Logical. Passed to \code{\link[mgcv]{ti}}. If \code{TRUE} (recommended), the smooth is treated as fixed degrees of freedom. -Default is \code{TRUE} \link{2}.} +Default is \code{TRUE}.} \item{k}{Integer or \code{NULL}. Basis dimension passed to \code{\link[mgcv]{ti}} for all three terms (both main effects and the interaction). If \code{NULL} (default), uses the default from -\code{mgcv::ti()} \link{2}.} +\code{mgcv::ti()}.} } \value{ A \code{\link[stats]{formula}} object. @@ -39,7 +39,7 @@ terms so that the interaction \code{ti(x, z)} captures only the interaction effect, separate from the main effects \code{ti(x)} and \code{ti(z)}. This decomposition is important for interpretability and for requesting \code{changed.rsq.term.index} in -\code{\link{ModelArray.gam}} \link{4}. +\code{\link{ModelArray.gam}}. } \examples{ \dontrun{ diff --git a/man/gen_gamFormula_fxSmooth.Rd b/man/gen_gamFormula_fxSmooth.Rd index b8d92c7..63b9a33 100644 --- a/man/gen_gamFormula_fxSmooth.Rd +++ b/man/gen_gamFormula_fxSmooth.Rd @@ -21,7 +21,7 @@ gen_gamFormula_fxSmooth( \item{factor.var}{Character. The variable name for the factor. It should be an ordered factor in \code{phenotypes}. If not, an ordered factor -will be generated as a new column, which requires \code{reference.group} \link{2}.} +will be generated as a new column, which requires \code{reference.group}.} \item{smooth.var}{Character. The variable name for the smooth term main effect (e.g. \code{"age"}).} @@ -31,11 +31,11 @@ variables, including \code{factor.var} and \code{smooth.var}.} \item{reference.group}{Character. The reference (baseline) group for the ordered factor of \code{factor.var}. Required when \code{factor.var} -in \code{phenotypes} is not already an ordered factor \link{2}.} +in \code{phenotypes} is not already an ordered factor.} \item{prefix.ordered.factor}{Character. Prefix for the ordered factor column name. Required when \code{factor.var} in \code{phenotypes} is -not already an ordered factor. Default is \code{"o"} \link{2}.} +not already an ordered factor. Default is \code{"o"}.} \item{fx}{Logical. Passed to \code{\link[mgcv]{s}}. If \code{TRUE} (recommended), the smooth is treated as fixed degrees of freedom. @@ -43,7 +43,7 @@ Default is \code{TRUE}.} \item{k}{Integer or \code{NULL}. Basis dimension passed to \code{\link[mgcv]{s}} for both the main smooth and interaction terms. -If \code{NULL} (default), uses the default from \code{mgcv::s()} \link{2}.} +If \code{NULL} (default), uses the default from \code{mgcv::s()}.} } \value{ A list with two components: @@ -52,7 +52,7 @@ A list with two components: \item{phenotypes}{The (possibly updated) data.frame. If \code{factor.var} was not already an ordered factor, a new column named \code{paste0(prefix.ordered.factor, factor.var)} is added. -Otherwise identical to the input \link{2}.} +Otherwise identical to the input.} } } \description{ @@ -68,7 +68,7 @@ This helper exists because setting up factor-smooth interactions in structure. If \code{factor.var} in \code{phenotypes} is not already an ordered factor, this function creates one using \code{reference.group} as the baseline level and adds it as a new column (named with -\code{prefix.ordered.factor} prepended to \code{factor.var}) \link{2}. +\code{prefix.ordered.factor} prepended to \code{factor.var}). The returned \code{phenotypes} data.frame must be used in the subsequent \code{\link{ModelArray.gam}} call so that the ordered factor column is diff --git a/man/h5summary.Rd b/man/h5summary.Rd index 3f76000..1788988 100644 --- a/man/h5summary.Rd +++ b/man/h5summary.Rd @@ -22,9 +22,9 @@ An object of class \code{"h5summary"}, which is a list with components: \describe{ \item{scalars}{A data.frame with columns \code{name}, -\code{nElements}, and \code{nInputFiles} \link{2}.} +\code{nElements}, and \code{nInputFiles}.} \item{analyses}{Character vector of analysis names found under -\code{/results/} \link{2}.} +\code{/results/}.} \item{filepath}{The input filepath.} } @@ -40,7 +40,7 @@ without constructing a full \linkS4class{ModelArray} object. This function opens the HDF5 file read-only via \code{\link[rhdf5]{h5ls}}, inspects the group structure under \code{/scalars/} and \code{/results/}, and closes the file. It does not load any data into memory. The returned -object has a \code{print} method that displays a formatted summary \link{2}. +object has a \code{print} method that displays a formatted summary. } \examples{ \dontrun{ diff --git a/man/mergeModelArrays.Rd b/man/mergeModelArrays.Rd index a7f279f..77dd090 100644 --- a/man/mergeModelArrays.Rd +++ b/man/mergeModelArrays.Rd @@ -14,13 +14,13 @@ objects, each constructed from a different HDF5 file.} \linkS4class{ModelArray} in \code{modelarrays}. Each must contain a \code{source_file} column whose entries match the corresponding ModelArray's sources (i.e. \code{sources(modelarrays[[i]])}). -Each must also contain all columns named in \code{merge_on} \link{6}.} +Each must also contain all columns named in \code{merge_on}.} \item{merge_on}{Character vector of column names present in all data.frames in \code{phenotypes_list}, used to inner-join subjects across sessions/modalities (e.g. \code{c("subject_id")}). The combination of these columns must uniquely identify each subject -within each data.frame \link{6}.} +within each data.frame.} } \value{ A list with two components: @@ -32,7 +32,7 @@ to match the inner-joined subject list.} \code{source_file} columns are renamed to \code{source_file.} and a new unified \code{source_file} column is added for use with analysis -functions \link{6}.} +functions.} } } \description{ @@ -46,7 +46,7 @@ The merge performs an inner join of the phenotype data.frames on the columns specified by \code{merge_on}. Only subjects present in all phenotype data.frames are retained. Scalar matrices from each input \linkS4class{ModelArray} are column-subsetted and reordered to match -the joined subject list \link{6}. +the joined subject list. A unified \code{source_file} column is created from the \code{merge_on} columns so that downstream analysis functions @@ -54,16 +54,16 @@ columns so that downstream analysis functions \code{\link{ModelArray.wrap}}) can align phenotypes to scalars. The original \code{source_file} columns are renamed to \code{source_file.} for each input -\linkS4class{ModelArray} \link{6}. +\linkS4class{ModelArray}. Scalar names must be unique across all input ModelArrays. If two ModelArrays share a scalar name (e.g. both have \code{"FD"}), the function will error. Element counts (number of rows) must match -across all scalars \link{6}. +across all scalars. If element metadata is available (see \code{\link{elementMetadata}}), the function checks that it is consistent across inputs and warns if -it differs or is only partially available \link{6}. +it differs or is only partially available. } \examples{ \dontrun{ diff --git a/man/writeResults.Rd b/man/writeResults.Rd index 6743d90..9e0653e 100644 --- a/man/writeResults.Rd +++ b/man/writeResults.Rd @@ -19,15 +19,15 @@ file-not-found errors.} \item{df.output}{A data.frame of element-wise statistical results, as returned by \code{\link{ModelArray.lm}}, \code{\link{ModelArray.gam}}, or \code{\link{ModelArray.wrap}}. -Must inherit from \code{data.frame} \link{7}.} +Must inherit from \code{data.frame}.} \item{analysis_name}{Character. The name for this set of results. Used as the group name under \code{/results/} in the HDF5 file. -Default is \code{"myAnalysis"} \link{7}.} +Default is \code{"myAnalysis"}.} \item{overwrite}{Logical. If a group with the same \code{analysis_name} already exists in the HDF5 file, whether to overwrite it (\code{TRUE}) -or skip with a warning (\code{FALSE}). Default is \code{TRUE} \link{7}.} +or skip with a warning (\code{FALSE}). Default is \code{TRUE}.} } \value{ Invisible \code{NULL}. Called for its side effect of writing @@ -36,23 +36,23 @@ results to the HDF5 file. \description{ Creates a group named \code{analysis_name} under \code{/results/} in the HDF5 file, then writes the statistical results data.frame (i.e. for one -analysis) into it as \code{results_matrix} along with column names \link{7}. +analysis) into it as \code{results_matrix} along with column names. } \details{ The results are stored at \code{/results//results_matrix} with column names saved as a separate dataset at -\code{/results//column_names} \link{7}. +\code{/results//column_names}. If any column of \code{df.output} is not numeric or integer, it is coerced to numeric via \code{factor()} and the factor levels are saved as a look-up table at -\code{/results//lut_forcol} \link{7}. +\code{/results//lut_forcol}. \strong{Debugging tip:} If you encounter \code{"Error in H5File.open(filename, mode, file_create_pl, file_access_pl)"}, check if the message mentions "No such file or directory". Try using an -absolute path for the \code{fn.output} argument \link{7}. +absolute path for the \code{fn.output} argument. } \examples{ \dontrun{ From b3c7a3c2ecfa387c0a1107b1979d9cb38c2d4522 Mon Sep 17 00:00:00 2001 From: araikes Date: Tue, 31 Mar 2026 11:12:55 -0700 Subject: [PATCH 21/23] Fix unresolved linking in utils --- R/utils.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/utils.R b/R/utils.R index 69ae303..8d96ccf 100644 --- a/R/utils.R +++ b/R/utils.R @@ -154,8 +154,8 @@ check_validity_correctPValue <- function(correct.list, name.correct.list, #' @details #' ref: https://www.rdocumentation.org/packages/mgcv/versions/1.8-38/topics/s #' -#' @param ofInterest got via: gam.formula.breakdown <- mgcv::interpret.gam(formula); -#' ofInterest <- gam.formula.breakdown$smooth.spec[[i]] +#' @param ofInterest got via: `gam.formula.breakdown <- mgcv::interpret.gam(formula)`; +#' `ofInterest <- gam.formula.breakdown$smooth.spec[[i]]` #' @importFrom mgcv s #' @importFrom dplyr %>% #' @importFrom crayon black @@ -233,8 +233,8 @@ checker_gam_s <- function(ofInterest) { #' ref: https://www.rdocumentation.org/packages/mgcv/versions/1.8-38/topics/te or /t2() #' #' @param FUN could be mgcv::te(), ti() or t2() -#' @param ofInterest got via: gam.formula.breakdown <- mgcv::interpret.gam(formula); -#' ofInterest <- gam.formula.breakdown$smooth.spec[[i]] +#' @param ofInterest got via: `gam.formula.breakdown <- mgcv::interpret.gam(formula)`; +#' `ofInterest <- gam.formula.breakdown$smooth.spec[[i]]` #' @importFrom mgcv te ti t2 #' @importFrom dplyr %>% #' @importFrom crayon black From 445e681d8bd81d5457710f466cf1e7c05242a4d3 Mon Sep 17 00:00:00 2001 From: araikes Date: Wed, 1 Apr 2026 10:45:57 -0700 Subject: [PATCH 22/23] Update merged ModelArray example. --- R/merge.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/merge.R b/R/merge.R index f8db780..55745c2 100644 --- a/R/merge.R +++ b/R/merge.R @@ -82,10 +82,10 @@ #' head(merged$phenotypes) #' #' results <- ModelArray.lm( -#' FD ~ age + sex, +#' FD ~ age + sex + FC, #' data = merged$data, #' phenotypes = merged$phenotypes, -#' scalar = "FD" +#' scalar = c("FD", "FC") #' ) #' } #' From 1a44faa4d1dd38a44fa56313ade22112269c6c53 Mon Sep 17 00:00:00 2001 From: araikes Date: Wed, 1 Apr 2026 10:48:33 -0700 Subject: [PATCH 23/23] Update mergeModelArrays.Rd --- man/mergeModelArrays.Rd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/man/mergeModelArrays.Rd b/man/mergeModelArrays.Rd index 77dd090..328b773 100644 --- a/man/mergeModelArrays.Rd +++ b/man/mergeModelArrays.Rd @@ -86,10 +86,10 @@ scalarNames(merged$data) # c("FD", "FC") head(merged$phenotypes) results <- ModelArray.lm( - FD ~ age + sex, + FD ~ age + sex + FC, data = merged$data, phenotypes = merged$phenotypes, - scalar = "FD" + scalar = c("FD", "FC") ) }