Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CRAN-SUBMISSION
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Version: 0.9.1
Date: 2026-02-09 03:01:47 UTC
SHA: bec5f707f2ea68a70c794b3fc0fda77030d06139
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,9 @@ Imports:
grid,
stats,
utils,
dplyr,
dplyr,
ca,
igraph,
rgl,
colorspace,
gt,
Expand Down
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ S3method(Summarise,default)
S3method(Summarise,glmlist)
S3method(Summarise,loglmlist)
S3method(assoc,glm)
S3method(assoc_graph,glm)
S3method(assoc_graph,list)
S3method(assoc_graph,loglm)
S3method(coef,glmlist)
S3method(color_table,data.frame)
S3method(color_table,default)
Expand All @@ -25,10 +28,12 @@ S3method(mosaic,loglmlist)
S3method(mosaic3d,default)
S3method(mosaic3d,loglm)
S3method(plot,HLtest)
S3method(plot,assoc_graph)
S3method(print,CMHtest)
S3method(print,GKgamma)
S3method(print,HLtest)
S3method(print,Kappa)
S3method(print,assoc_graph)
S3method(print,woolf_test)
S3method(rootogram,HLtest)
S3method(sieve,glm)
Expand All @@ -43,6 +48,7 @@ export(HLtest)
export(Kway)
export(LRstats)
export(Summarise)
export(assoc_graph)
export(blogits)
export(center3d)
export(collapse.table)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
* Added a `label = c("name", "formula")` argument to `LRstats()` to provide for labeling models by their model formulas in the output using `get_models()`.
* Handle list (...) of models with formula labels more flexibly in `LRstats()`
* Document `get_model()` and `get_models()` together
* Added `assoc_graph() and a plot method for association graphs of loglinear models.


## Version 0.9.1 (2026-02-08)
Expand Down
253 changes: 253 additions & 0 deletions R/assoc_graph.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
#' Association Graph for a Loglinear Model
#'
#' Construct an undirected graph representing the associations in a loglinear model.
#' Nodes represent variables and edges represent pairwise associations fitted in the model.
#' If two variables are not connected by an edge, they are conditionally independent
#' given the other variables.
#'
#' @param x An object specifying the model. Can be:
#' \itemize{
#' \item A \code{list} of character vectors (a margin/generating class list, as produced by
#' \code{\link{joint}}, \code{\link{conditional}}, etc.)
#' \item A fitted \code{\link[MASS]{loglm}} object
#' \item A fitted \code{\link[stats]{glm}} object (poisson family loglinear model)
#' }
#' @param result Type of result to return: \code{"igraph"} (default) returns an
#' \code{\link[igraph:igraph-package]{igraph}} object; \code{"matrix"} returns the
#' adjacency matrix; \code{"edge_list"} returns a two-column character matrix of edges.
#' @param \dots Additional arguments (currently unused).
#'
#' @return Depending on \code{result}:
#' \itemize{
#' \item \code{"igraph"}: An \code{igraph} undirected graph object of class
#' \code{c("assoc_graph", "igraph")}, with vertex names corresponding to
#' the variable names.
#' \item \code{"matrix"}: A symmetric adjacency matrix (0/1) with variable names as
#' row and column names.
#' \item \code{"edge_list"}: A two-column character matrix, each row an edge.
#' }
#'
#' @details
#' Each high-order term (margin) in a hierarchical loglinear model defines a clique
#' in the association graph. For example, the term \code{c("A", "B", "C")} generates
#' edges A--B, A--C, and B--C. Single-variable terms (as in mutual independence)
#' yield isolated nodes with no edges.
#'
#' For \code{loglm} objects, the margins are extracted from the \code{$margin} component.
#' For \code{glm} objects, the interaction terms are extracted from the model formula.
#'
#' @references
#' Khamis, H. J. (2011). \emph{The Association Graph and the Multigraph for Loglinear Models}.
#' SAGE Publications. \doi{10.4135/9781452226521}
#'
#' Darroch, J. N., Lauritzen, S. L., & Speed, T. P. (1980). Markov Fields and Log-Linear
#' Interaction Models for Contingency Tables. \emph{The Annals of Statistics}, 8(3), 522--539.
#' \doi{10.1214/aos/1176345006}
#'
#' Whittaker, J. (1990). \emph{Graphical Models in Applied Multivariate Statistics}.
#' John Wiley & Sons, Chichester.
#'
#' @seealso \code{\link{joint}}, \code{\link{conditional}}, \code{\link{mutual}},
#' \code{\link{saturated}}, \code{\link{loglin2string}}, \code{\link{seq_loglm}},
#' \code{\link{plot.assoc_graph}}
#'
#' @family loglinear models
#' @export
#' @examples
#' # Structural graphs from margin lists (3-way: A, B, C)
#' mutual(3, factors = c("A", "B", "C")) |> assoc_graph()
#' joint(3, factors = c("A", "B", "C")) |> assoc_graph()
#' conditional(3, factors = c("A", "B", "C")) |> assoc_graph()
#' saturated(3, factors = c("A", "B", "C")) |> assoc_graph()
#'
#' # Adjacency matrix form
#' conditional(3, factors = c("A", "B", "C")) |> assoc_graph(result = "matrix")
#'
#' # From a fitted loglm model (Berkeley admissions)
#' \dontrun{
#' mod <- MASS::loglm(~ (Admit + Gender) * Dept, data = UCBAdmissions)
#' assoc_graph(mod)
#' plot(assoc_graph(mod), main = "Berkeley: [AD] [GD]")
#' }
#'
#' # From glm models (Dayton Survey: cigarette, alcohol, marijuana, sex, race)
#' data(DaytonSurvey)
#'
#' # Mutual independence + sex*race: one edge only
#' mod.SR <- glm(Freq ~ . + sex*race, data = DaytonSurvey, family = poisson)
#' assoc_SRaph(mod.SR)
#' plot(assoc_SRaph(mod.SR), main = "Mutual indep. + [SR]")
#'
#' # [AM][AC][MC][AR][AS][RS]: {race, Sender} indep {marijuana, ciS} | alcohol
#' mod.cond <- glm(Freq ~ (cigarette + alcohol + marijuana)^2 +
#' (alcohol + sex + race)^2,
#' data = DaytonSurvey, family = poisson)
#' assoc_graph(mod.cond)
#' plot(assoc_graph(mod.cond),
#' groups = list(c("cigarette", "alcohol", "marijuana"),
#' c("sex", "race")),
#' main = "{R,S} indep {M,C} | A")
#'
assoc_graph <- function(x, ...) {
UseMethod("assoc_graph")
}

#' @rdname assoc_graph
#' @export
assoc_graph.list <- function(x, result = c("igraph", "matrix", "edge_list"), ...) {
result <- match.arg(result)
.margins_to_assoc_graph(x, result = result)
}

#' @rdname assoc_graph
#' @export
assoc_graph.loglm <- function(x, result = c("igraph", "matrix", "edge_list"), ...) {
result <- match.arg(result)
if (is.null(x$margin)) {
stop("Cannot extract margins from this loglm object")
}
.margins_to_assoc_graph(x$margin, result = result)
}

#' @rdname assoc_graph
#' @export
assoc_graph.glm <- function(x, result = c("igraph", "matrix", "edge_list"), ...) {
result <- match.arg(result)
margins <- .glm_to_margins(x)
.margins_to_assoc_graph(margins, result = result)
}


# --- Core helper: margin list -> assoc_graph ---

.margins_to_assoc_graph <- function(margins, result = "igraph") {

# all variable names (including isolated ones from single-variable terms)
all_vars <- unique(unlist(margins))

# pairwise edges from each clique
edge_list <- do.call(rbind, lapply(margins, function(m) {
if (length(m) >= 2) t(utils::combn(m, 2)) else NULL
}))

if (!is.null(edge_list) && nrow(edge_list) > 0) {
# deduplicate edges (sort each pair so A-B and B-A are treated the same)
edge_list <- unique(edge_list)
}

if (result == "edge_list") {
if (is.null(edge_list) || nrow(edge_list) == 0) {
return(matrix(character(0), ncol = 2, dimnames = list(NULL, c("from", "to"))))
}
colnames(edge_list) <- c("from", "to")
return(edge_list)
}

if (result == "matrix") {
nv <- length(all_vars)
adj <- matrix(0L, nv, nv, dimnames = list(all_vars, all_vars))
if (!is.null(edge_list) && nrow(edge_list) > 0) {
for (i in seq_len(nrow(edge_list))) {
adj[edge_list[i, 1], edge_list[i, 2]] <- 1L
adj[edge_list[i, 2], edge_list[i, 1]] <- 1L
}
}
return(adj)
}

# result == "igraph"
if (is.null(edge_list) || nrow(edge_list) == 0) {
g <- igraph::make_empty_graph(n = 0, directed = FALSE)
g <- igraph::add_vertices(g, length(all_vars), name = all_vars)
} else {
g <- igraph::graph_from_edgelist(edge_list, directed = FALSE)
# add any isolated nodes not covered by edges
missing <- setdiff(all_vars, igraph::V(g)$name)
if (length(missing) > 0) {
g <- igraph::add_vertices(g, length(missing), name = missing)
}
}

class(g) <- c("assoc_graph", class(g))
g
}


# --- Helper: extract generating class (margins) from a glm formula ---

.glm_to_margins <- function(object) {
tt <- stats::terms(object)
factors <- attr(tt, "factors")
order <- attr(tt, "order")

if (is.null(factors)) {
stop("Cannot extract model terms from this glm object")
}

# Get variable names involved in each term
# Only keep the highest-order terms (generating class for hierarchical model)
var_names <- rownames(factors)
term_names <- colnames(factors)

# Build list of variable sets for each term
term_vars <- lapply(seq_along(term_names), function(j) {
var_names[factors[, j] > 0]
})

# Filter to the generating class: remove terms that are subsets of other terms
is_maximal <- vapply(seq_along(term_vars), function(i) {
ti <- term_vars[[i]]
!any(vapply(seq_along(term_vars), function(j) {
if (i == j) return(FALSE)
all(ti %in% term_vars[[j]]) && length(term_vars[[j]]) > length(ti)
}, logical(1)))
}, logical(1))

margins <- term_vars[is_maximal]
names(margins) <- paste0("term", seq_along(margins))
margins
}


# --- Print method ---

#' @rdname assoc_graph
#' @export
print.assoc_graph <- function(x, ...) {
nv <- igraph::vcount(x)
ne <- igraph::ecount(x)
vnames <- igraph::V(x)$name

cat("Association graph: ", nv, " variables, ", ne, " edges\n", sep = "")
cat("Variables:", paste(vnames, collapse = ", "), "\n")

if (ne > 0) {
el <- igraph::as_edgelist(x)
edge_strings <- paste0(el[, 1], " -- ", el[, 2])
cat("Edges:", paste(edge_strings, collapse = ", "), "\n")
} else {
cat("Edges: (none -- mutual independence)\n")
}

# Show bracket notation
margins <- .graph_to_margins(x)
cat("Model:", loglin2string(margins), "\n")

invisible(x)
}


# --- Helper: recover generating class from the graph (maximal cliques) ---

.graph_to_margins <- function(g) {
if (igraph::ecount(g) == 0) {
# Mutual independence: each variable is its own term
margins <- as.list(igraph::V(g)$name)
} else {
# Maximal cliques give the generating class
cliques <- igraph::max_cliques(g)
margins <- lapply(cliques, function(cl) igraph::V(g)$name[cl])
}
names(margins) <- paste0("term", seq_along(margins))
margins
}
Loading