diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION new file mode 100644 index 0000000..cb12312 --- /dev/null +++ b/CRAN-SUBMISSION @@ -0,0 +1,3 @@ +Version: 0.9.1 +Date: 2026-02-09 03:01:47 UTC +SHA: bec5f707f2ea68a70c794b3fc0fda77030d06139 diff --git a/DESCRIPTION b/DESCRIPTION index 07e8a03..f23cec9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -68,8 +68,9 @@ Imports: grid, stats, utils, - dplyr, + dplyr, ca, + igraph, rgl, colorspace, gt, diff --git a/NAMESPACE b/NAMESPACE index d085347..ff85b07 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,6 +9,9 @@ S3method(Summarise,default) S3method(Summarise,glmlist) S3method(Summarise,loglmlist) S3method(assoc,glm) +S3method(assoc_graph,glm) +S3method(assoc_graph,list) +S3method(assoc_graph,loglm) S3method(coef,glmlist) S3method(color_table,data.frame) S3method(color_table,default) @@ -25,10 +28,12 @@ S3method(mosaic,loglmlist) S3method(mosaic3d,default) S3method(mosaic3d,loglm) S3method(plot,HLtest) +S3method(plot,assoc_graph) S3method(print,CMHtest) S3method(print,GKgamma) S3method(print,HLtest) S3method(print,Kappa) +S3method(print,assoc_graph) S3method(print,woolf_test) S3method(rootogram,HLtest) S3method(sieve,glm) @@ -43,6 +48,7 @@ export(HLtest) export(Kway) export(LRstats) export(Summarise) +export(assoc_graph) export(blogits) export(center3d) export(collapse.table) diff --git a/NEWS.md b/NEWS.md index 1572704..fc0e6ba 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,7 @@ * Added a `label = c("name", "formula")` argument to `LRstats()` to provide for labeling models by their model formulas in the output using `get_models()`. * Handle list (...) of models with formula labels more flexibly in `LRstats()` * Document `get_model()` and `get_models()` together +* Added `assoc_graph() and a plot method for association graphs of loglinear models. ## Version 0.9.1 (2026-02-08) diff --git a/R/assoc_graph.R b/R/assoc_graph.R new file mode 100644 index 0000000..baff2bf --- /dev/null +++ b/R/assoc_graph.R @@ -0,0 +1,253 @@ +#' Association Graph for a Loglinear Model +#' +#' Construct an undirected graph representing the associations in a loglinear model. +#' Nodes represent variables and edges represent pairwise associations fitted in the model. +#' If two variables are not connected by an edge, they are conditionally independent +#' given the other variables. +#' +#' @param x An object specifying the model. Can be: +#' \itemize{ +#' \item A \code{list} of character vectors (a margin/generating class list, as produced by +#' \code{\link{joint}}, \code{\link{conditional}}, etc.) +#' \item A fitted \code{\link[MASS]{loglm}} object +#' \item A fitted \code{\link[stats]{glm}} object (poisson family loglinear model) +#' } +#' @param result Type of result to return: \code{"igraph"} (default) returns an +#' \code{\link[igraph:igraph-package]{igraph}} object; \code{"matrix"} returns the +#' adjacency matrix; \code{"edge_list"} returns a two-column character matrix of edges. +#' @param \dots Additional arguments (currently unused). +#' +#' @return Depending on \code{result}: +#' \itemize{ +#' \item \code{"igraph"}: An \code{igraph} undirected graph object of class +#' \code{c("assoc_graph", "igraph")}, with vertex names corresponding to +#' the variable names. +#' \item \code{"matrix"}: A symmetric adjacency matrix (0/1) with variable names as +#' row and column names. +#' \item \code{"edge_list"}: A two-column character matrix, each row an edge. +#' } +#' +#' @details +#' Each high-order term (margin) in a hierarchical loglinear model defines a clique +#' in the association graph. For example, the term \code{c("A", "B", "C")} generates +#' edges A--B, A--C, and B--C. Single-variable terms (as in mutual independence) +#' yield isolated nodes with no edges. +#' +#' For \code{loglm} objects, the margins are extracted from the \code{$margin} component. +#' For \code{glm} objects, the interaction terms are extracted from the model formula. +#' +#' @references +#' Khamis, H. J. (2011). \emph{The Association Graph and the Multigraph for Loglinear Models}. +#' SAGE Publications. \doi{10.4135/9781452226521} +#' +#' Darroch, J. N., Lauritzen, S. L., & Speed, T. P. (1980). Markov Fields and Log-Linear +#' Interaction Models for Contingency Tables. \emph{The Annals of Statistics}, 8(3), 522--539. +#' \doi{10.1214/aos/1176345006} +#' +#' Whittaker, J. (1990). \emph{Graphical Models in Applied Multivariate Statistics}. +#' John Wiley & Sons, Chichester. +#' +#' @seealso \code{\link{joint}}, \code{\link{conditional}}, \code{\link{mutual}}, +#' \code{\link{saturated}}, \code{\link{loglin2string}}, \code{\link{seq_loglm}}, +#' \code{\link{plot.assoc_graph}} +#' +#' @family loglinear models +#' @export +#' @examples +#' # Structural graphs from margin lists (3-way: A, B, C) +#' mutual(3, factors = c("A", "B", "C")) |> assoc_graph() +#' joint(3, factors = c("A", "B", "C")) |> assoc_graph() +#' conditional(3, factors = c("A", "B", "C")) |> assoc_graph() +#' saturated(3, factors = c("A", "B", "C")) |> assoc_graph() +#' +#' # Adjacency matrix form +#' conditional(3, factors = c("A", "B", "C")) |> assoc_graph(result = "matrix") +#' +#' # From a fitted loglm model (Berkeley admissions) +#' \dontrun{ +#' mod <- MASS::loglm(~ (Admit + Gender) * Dept, data = UCBAdmissions) +#' assoc_graph(mod) +#' plot(assoc_graph(mod), main = "Berkeley: [AD] [GD]") +#' } +#' +#' # From glm models (Dayton Survey: cigarette, alcohol, marijuana, sex, race) +#' data(DaytonSurvey) +#' +#' # Mutual independence + sex*race: one edge only +#' mod.SR <- glm(Freq ~ . + sex*race, data = DaytonSurvey, family = poisson) +#' assoc_SRaph(mod.SR) +#' plot(assoc_SRaph(mod.SR), main = "Mutual indep. + [SR]") +#' +#' # [AM][AC][MC][AR][AS][RS]: {race, Sender} indep {marijuana, ciS} | alcohol +#' mod.cond <- glm(Freq ~ (cigarette + alcohol + marijuana)^2 + +#' (alcohol + sex + race)^2, +#' data = DaytonSurvey, family = poisson) +#' assoc_graph(mod.cond) +#' plot(assoc_graph(mod.cond), +#' groups = list(c("cigarette", "alcohol", "marijuana"), +#' c("sex", "race")), +#' main = "{R,S} indep {M,C} | A") +#' +assoc_graph <- function(x, ...) { + UseMethod("assoc_graph") +} + +#' @rdname assoc_graph +#' @export +assoc_graph.list <- function(x, result = c("igraph", "matrix", "edge_list"), ...) { + result <- match.arg(result) + .margins_to_assoc_graph(x, result = result) +} + +#' @rdname assoc_graph +#' @export +assoc_graph.loglm <- function(x, result = c("igraph", "matrix", "edge_list"), ...) { + result <- match.arg(result) + if (is.null(x$margin)) { + stop("Cannot extract margins from this loglm object") + } + .margins_to_assoc_graph(x$margin, result = result) +} + +#' @rdname assoc_graph +#' @export +assoc_graph.glm <- function(x, result = c("igraph", "matrix", "edge_list"), ...) { + result <- match.arg(result) + margins <- .glm_to_margins(x) + .margins_to_assoc_graph(margins, result = result) +} + + +# --- Core helper: margin list -> assoc_graph --- + +.margins_to_assoc_graph <- function(margins, result = "igraph") { + + # all variable names (including isolated ones from single-variable terms) + all_vars <- unique(unlist(margins)) + + # pairwise edges from each clique + edge_list <- do.call(rbind, lapply(margins, function(m) { + if (length(m) >= 2) t(utils::combn(m, 2)) else NULL + })) + + if (!is.null(edge_list) && nrow(edge_list) > 0) { + # deduplicate edges (sort each pair so A-B and B-A are treated the same) + edge_list <- unique(edge_list) + } + + if (result == "edge_list") { + if (is.null(edge_list) || nrow(edge_list) == 0) { + return(matrix(character(0), ncol = 2, dimnames = list(NULL, c("from", "to")))) + } + colnames(edge_list) <- c("from", "to") + return(edge_list) + } + + if (result == "matrix") { + nv <- length(all_vars) + adj <- matrix(0L, nv, nv, dimnames = list(all_vars, all_vars)) + if (!is.null(edge_list) && nrow(edge_list) > 0) { + for (i in seq_len(nrow(edge_list))) { + adj[edge_list[i, 1], edge_list[i, 2]] <- 1L + adj[edge_list[i, 2], edge_list[i, 1]] <- 1L + } + } + return(adj) + } + + # result == "igraph" + if (is.null(edge_list) || nrow(edge_list) == 0) { + g <- igraph::make_empty_graph(n = 0, directed = FALSE) + g <- igraph::add_vertices(g, length(all_vars), name = all_vars) + } else { + g <- igraph::graph_from_edgelist(edge_list, directed = FALSE) + # add any isolated nodes not covered by edges + missing <- setdiff(all_vars, igraph::V(g)$name) + if (length(missing) > 0) { + g <- igraph::add_vertices(g, length(missing), name = missing) + } + } + + class(g) <- c("assoc_graph", class(g)) + g +} + + +# --- Helper: extract generating class (margins) from a glm formula --- + +.glm_to_margins <- function(object) { + tt <- stats::terms(object) + factors <- attr(tt, "factors") + order <- attr(tt, "order") + + if (is.null(factors)) { + stop("Cannot extract model terms from this glm object") + } + + # Get variable names involved in each term + # Only keep the highest-order terms (generating class for hierarchical model) + var_names <- rownames(factors) + term_names <- colnames(factors) + + # Build list of variable sets for each term + term_vars <- lapply(seq_along(term_names), function(j) { + var_names[factors[, j] > 0] + }) + + # Filter to the generating class: remove terms that are subsets of other terms + is_maximal <- vapply(seq_along(term_vars), function(i) { + ti <- term_vars[[i]] + !any(vapply(seq_along(term_vars), function(j) { + if (i == j) return(FALSE) + all(ti %in% term_vars[[j]]) && length(term_vars[[j]]) > length(ti) + }, logical(1))) + }, logical(1)) + + margins <- term_vars[is_maximal] + names(margins) <- paste0("term", seq_along(margins)) + margins +} + + +# --- Print method --- + +#' @rdname assoc_graph +#' @export +print.assoc_graph <- function(x, ...) { + nv <- igraph::vcount(x) + ne <- igraph::ecount(x) + vnames <- igraph::V(x)$name + + cat("Association graph: ", nv, " variables, ", ne, " edges\n", sep = "") + cat("Variables:", paste(vnames, collapse = ", "), "\n") + + if (ne > 0) { + el <- igraph::as_edgelist(x) + edge_strings <- paste0(el[, 1], " -- ", el[, 2]) + cat("Edges:", paste(edge_strings, collapse = ", "), "\n") + } else { + cat("Edges: (none -- mutual independence)\n") + } + + # Show bracket notation + margins <- .graph_to_margins(x) + cat("Model:", loglin2string(margins), "\n") + + invisible(x) +} + + +# --- Helper: recover generating class from the graph (maximal cliques) --- + +.graph_to_margins <- function(g) { + if (igraph::ecount(g) == 0) { + # Mutual independence: each variable is its own term + margins <- as.list(igraph::V(g)$name) + } else { + # Maximal cliques give the generating class + cliques <- igraph::max_cliques(g) + margins <- lapply(cliques, function(cl) igraph::V(g)$name[cl]) + } + names(margins) <- paste0("term", seq_along(margins)) + margins +} diff --git a/R/plot.assoc_graph.R b/R/plot.assoc_graph.R new file mode 100644 index 0000000..f9ad827 --- /dev/null +++ b/R/plot.assoc_graph.R @@ -0,0 +1,98 @@ +#' Plot an Association Graph +#' +#' Plot method for \code{\link{assoc_graph}} objects, displaying the association +#' structure of a loglinear model as a network diagram. +#' +#' @param x An \code{assoc_graph} object, as returned by \code{\link{assoc_graph}}. +#' @param layout Layout function or coordinate matrix for node positions. +#' Defaults to \code{\link[igraph]{layout_in_circle}} for up to 6 nodes, +#' \code{\link[igraph]{layout_with_fr}} otherwise. +#' @param groups Optional named list assigning variables to groups for coloring, +#' e.g., \code{list(response = "Survived", predictors = c("Class", "Sex", "Age"))}. +#' @param colors Character vector of colors for groups. Recycled as needed. +#' @param vertex.size Vertex size (default 30). +#' @param vertex.label.cex Label size for vertex names (default 1.2). +#' @param edge.width Edge width (default 2). If edge weights are present, widths are +#' scaled from the weights automatically. +#' @param edge.label Optional edge labels. If \code{TRUE} and edge weights are present, +#' the weight values are used as labels. +#' @param \dots Additional arguments passed to \code{\link[igraph]{plot.igraph}}, +#' such as \code{main} for a title. +#' +#' @return The \code{assoc_graph} object \code{x}, returned invisibly. +#' +#' @seealso \code{\link{assoc_graph}}, \code{\link[igraph]{plot.igraph}} +#' +#' @family loglinear models +#' @export +#' @examples +#' # Basic structural plot +#' g <- conditional(3, factors = c("A", "B", "C")) |> assoc_graph() +#' plot(g, main = "Conditional independence: [AC] [BC]") +#' +#' # With grouped node colors +#' g <- saturated(4, factors = c("A", "B", "C", "D")) |> assoc_graph() +#' plot(g, groups = list(c("A", "B"), c("C", "D")), +#' main = "Saturated model") +#' +plot.assoc_graph <- function(x, layout = NULL, + groups = NULL, + colors = c("lightblue", "lightyellow", "lightgreen", + "lightsalmon", "plum"), + vertex.size = 30, + vertex.label.cex = 1.2, + edge.width = 2, + edge.label = NULL, + ...) { + + if (is.null(layout)) { + layout <- if (igraph::vcount(x) <= 6) igraph::layout_in_circle(x) + else igraph::layout_with_fr(x) + } + + # Node colors from groups + vcol <- rep(colors[1], igraph::vcount(x)) + if (!is.null(groups)) { + vnames <- igraph::V(x)$name + if (is.list(groups)) { + # named list: list(response = "Survived", explanatory = c("Class", "Sex")) + for (i in seq_along(groups)) { + idx <- match(groups[[i]], vnames) + idx <- idx[!is.na(idx)] + vcol[idx] <- colors[((i - 1) %% length(colors)) + 1] + } + } + } + + # Edge weights -> width scaling + ew <- edge.width + if (!is.null(igraph::E(x)$weight)) { + w <- igraph::E(x)$weight + # Scale to range [1, 6] + if (max(w) > min(w)) { + ew <- 1 + 5 * (w - min(w)) / (max(w) - min(w)) + } else { + ew <- rep(3, length(w)) + } + } + + # Edge labels from weights + el <- edge.label + if (isTRUE(el) && !is.null(igraph::E(x)$weight)) { + el <- round(igraph::E(x)$weight, 2) + } + + igraph::plot.igraph(x, + layout = layout, + vertex.size = vertex.size, + vertex.color = vcol, + vertex.label = igraph::V(x)$name, + vertex.label.cex = vertex.label.cex, + vertex.frame.color = "gray40", + edge.width = ew, + edge.label = el, + edge.color = "gray30", + ...) + + invisible(x) +} diff --git a/_pkgdown.yml b/_pkgdown.yml index 5d27044..801b496 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -44,6 +44,8 @@ reference: - title: Other graphical methods desc: Other functions for visualizing contingency tables contents: + - assoc_graph + - plot.assoc_graph - color_table - mcaplot diff --git a/dev/assoc-graph.md b/dev/assoc-graph.md index c45e4a1..6be53f8 100644 --- a/dev/assoc-graph.md +++ b/dev/assoc-graph.md @@ -1,7 +1,7 @@ # Association graphs for loglinear models -Association graphs for loglinear models represent variables as nodes and their partial associations +Association graphs for loglinear models represent variables as nodes and their partial associations between pairs of variables as edges. If two variables are not connected by an edge, they are conditionally independent given the other variables in the model. How can we use this in practice, to understand a model, or how well it fits a given dataset? @@ -10,16 +10,271 @@ How can we use this in practice, to understand a model, or how well it fits a gi * use this as a visual representation of a loglinear model in a network diagram of nodes and edges representing associations allowed (fitted) in the model. Allow nodes (variables) to be in different groups using different node shape / color, ... +reflecting different roles in models. + +This representation only allows to represent pairwise associations in a model, but this is similar to what is done in +multiple correspondence analysis. * use this to represent the actual associations fitted in a given loglinear model -- use edge width to represent the -strength of the pairwise associatopm +strength of the pairwise associations. This could be based on the decrease in G^2 when a term is dropped or +a measure of pairwise association, e.g., Cramer's V. In this, it would be good to print the value of the +association value on the edge. + + Read the lecture slides, C:/Dropbox/Documents/psy6136/lectures/04-Loglin.pdf, slides 41-47 +There is also a large CRAN Task View on Graphical Models: https://cran.r-project.org/web/views/GraphicalModels.html + ## What and How to Do I want to develop this idea into R functions for representing loglinear models as graph structures. -There is a collection of old packages: gRbase, gRim, ... that implement these ideas. +There is a collection of old packages: gRbase, gRim, ... that implement these ideas. See: https://people.math.aau.dk/~sorenh/software/gR/ +However, they rely on old graphics packages (`graph`, `Rgraphiviz`), no longer available or very hard to +work with. + +It's not clear whether these should be in a new package, or be part of {vcdExtra}, but let's create a plan for this, +perhaps starting quite simply, but with a view to how this could be further developed. + +### Package / dependency choices + +* **igraph**: Use for graph construction and layout. Well-established, lightweight, already widely used. + The margin lists from `joint()`, `conditional()`, etc. and the `$margin` component of `loglm` objects + can be converted to igraph objects with a simple `combn()`-based helper. This avoids depending on gRbase. + +* **igraph** also has good `plot()` support for basic structural graphs (node colors, shapes, edge width/labels). + This may be sufficient for both levels of visualization described below, at least initially. + +* **qgraph**: A strong candidate for rendering *weighted* association networks, with built-in support for + edge width proportional to weight, edge labels, positive/negative coloring, and spring layouts. + However, it adds a dependency. Could be added to `Suggests:` for an enhanced plotting method. + +* **DiagrammeR**: More verbose and general-purpose (flowcharts, process diagrams). Not well suited here. + +**Decision**: Start with `igraph` only (add to `Imports:`). Consider `qgraph` later for a richer weighted-graph display. + + +--- + +## Implementation plan + +### Phase 1: Graph construction -- `assoc_graph()` + +A function to convert a loglinear model specification into an igraph undirected graph. + +**File**: `R/assoc_graph.R` + +#### Input sources + +The function should accept several forms of input: + +1. A **margin list** -- e.g., `list(c("A","B"), c("B","C"))`, as produced by `joint()`, `conditional()`, etc. + and stored in `loglm` objects as `$margin`. +2. A **fitted `loglm` object** -- extract `$margin` from it. +3. A **fitted `glm` object** (poisson family) -- extract pairwise terms from the model formula. +4. A **model formula** -- e.g., `~ A*B + B*C`, parse the terms to find the generating class. + +A simple S3 generic with methods might be cleanest: + +```r +assoc_graph(x, ...) +assoc_graph.list(x, ...) # margin list +assoc_graph.loglm(x, ...) # loglm object +assoc_graph.glm(x, ...) # glm object +``` + +#### Core logic + +Each margin term (a character vector of variable names) is a *clique*: expand it into all +pairwise edges via `combn(term, 2)`. Union of all edges forms the graph. Single-variable +terms (from mutual independence) yield isolated nodes. + +```r +# Core helper: +.margins_to_graph <- function(margins) { + # all variable names (including isolated ones) + all_vars <- unique(unlist(margins)) + + # pairwise edges from each clique + edge_list <- do.call(rbind, lapply(margins, function(m) { + if (length(m) >= 2) t(combn(m, 2)) else NULL + })) + + if (is.null(edge_list) || nrow(edge_list) == 0) { + # No edges: mutual independence + g <- igraph::make_empty_graph(n = 0, directed = FALSE) + g <- igraph::add_vertices(g, length(all_vars), name = all_vars) + } else { + edge_list <- unique(edge_list) + g <- igraph::graph_from_edgelist(edge_list, directed = FALSE) + # Add any isolated nodes not covered by edges + missing <- setdiff(all_vars, igraph::V(g)$name) + if (length(missing) > 0) g <- igraph::add_vertices(g, length(missing), name = missing) + } + g +} +``` + +#### Return value + +An igraph object with: +- vertex names = variable names from the table +- Optional vertex attributes: `group` (for coloring response vs. explanatory variables) +- Optional edge attributes: `weight` and `label` (for Phase 2) + +#### Print method + +Consider also a better print method for `igraph` objects. The default one is very compact, not not very readable. +But there is also a result="matrix" argument. + +``` +> library(gRbase) +> uG1 <- ug(~ a:b:c + c:d) +> uG1 +IGRAPH 3d85909 UN-- 4 4 -- ++ attr: name (v/c) ++ edges from 3d85909 (vertex names): +[1] a--b a--c b--c c--d + +> ug(~a:b:c + c:d, result="matrix") + a b c d +a 0 1 1 0 +b 1 0 1 0 +c 1 1 0 1 +d 0 0 1 0 +``` + + +### Phase 2: Edge weights -- association strength + +Add an optional argument to compute pairwise association measures and store them as edge weights. + +#### Measures (options for a `measure` argument): + +* **`"chisq"`**: Partial chi-squared -- the decrease in G^2 when the corresponding two-way term is + dropped from the model. Requires a fitted model object and the data table. + +* **`"cramer"`**: Cramer's V for each pair, computed from the marginal 2-way table + (not conditional on other variables, but simple and interpretable). + +* **`"none"`** (default): Structural graph only, no weights. + +For `"chisq"`, the approach: +- For each edge (pair of variables) in the model, fit the model without that term + and compute the change in G^2. This is analogous to Type III tests. +- `drop1()` or manual refitting via `update()`. + +Store results as `E(g)$weight` and `E(g)$label` (formatted value). + + +### Phase 3: Plotting -- `plot.assoc_graph()` + +A plot method (or just pass igraph object to `plot()` / `qgraph()`). Key visual features: + +#### Structural graph (no weights): +- Nodes: circles with variable names, colored by `group` if specified +- Edges: solid lines connecting associated pairs +- Missing edges: visually represent conditional independence +- Layout: `igraph::layout_in_circle()` for small graphs, `layout_with_fr()` for larger ones + +#### Weighted graph: +- Edge width proportional to association strength +- Edge labels showing the numeric value +- Optionally, edge color intensity proportional to strength + +#### Node grouping: +Allow a `groups` argument (named list or factor) to color nodes by role, e.g., +response vs. explanatory, or substantive groupings. This maps to vertex `color` attributes. + +#### Using igraph directly: + +```r +plot.assoc_graph <- function(x, layout = NULL, + edge.width = NULL, + edge.label = NULL, + groups = NULL, ...) { + # Set defaults + if (is.null(layout)) { + layout <- if (igraph::vcount(x) <= 6) igraph::layout_in_circle + else igraph::layout_with_fr + } + # Map groups to colors + if (!is.null(groups)) { + igraph::V(x)$color <- groups_to_colors(x, groups) + } + # Use edge weights for width if available and not overridden + if (is.null(edge.width) && !is.null(igraph::E(x)$weight)) { + edge.width <- scale_weights(igraph::E(x)$weight) + } + igraph::plot.igraph(x, layout = layout, + edge.width = edge.width, + edge.label = edge.label, ...) +} +``` + +#### Optional qgraph rendering: + +If qgraph is available, offer an alternative via `assoc_graph_qgraph()` or a `renderer = "qgraph"` +argument. This would convert the igraph adjacency matrix (with weights) to a qgraph call, +getting automatic edge width scaling, positive/negative coloring, and spring layout. + + +### Phase 4: Comparing models visually + +Given a `loglmlist` (e.g., from `seq_loglm()`), show a panel of association graphs to visualize +how the model structure changes across a sequence. This connects to the existing `seq_mosaic()` idea. + +```r +# Sketch: +assoc_graphs <- function(x, ...) { + # x: loglmlist + op <- par(mfrow = layout_for_n(length(x))) + for (i in seq_along(x)) { + g <- assoc_graph(x[[i]]) + plot(g, main = get_model(x[[i]]), ...) + } + par(op) +} +``` + + +--- + +## Dependencies + +* `igraph`: Add to `Imports:`. Core graph construction and basic plotting. +* `qgraph`: Optionally add to `Suggests:`. Enhanced weighted graph rendering. + + +## Files + +* `R/assoc_graph.R` -- `assoc_graph()` generic + methods, `.margins_to_graph()` helper, `plot.assoc_graph()` +* `man/assoc_graph.Rd` -- documentation (via roxygen) +* `dev/assoc-graph-examples.R` -- development test script + + +## Examples to develop with + +```r +library(vcdExtra) +data(Titanic) + +# 1. Structural graphs from margin lists (3-way: A, B, C) +mutual(3, factors = c("A", "B", "C")) |> assoc_graph() |> plot() # no edges +joint(3, factors = c("A", "B", "C")) |> assoc_graph() |> plot() # A--B edge only +conditional(3, factors = c("A","B","C")) |> assoc_graph() |> plot() # A--C, B--C +saturated(3, factors = c("A", "B", "C")) |> assoc_graph() |> plot() # complete graph + +# 2. From a fitted loglm model +mod <- MASS::loglm(~ (Admit + Gender) * Dept, data = UCBAdmissions) +assoc_graph(mod) |> plot() # A--D, G--D edges (slide 47 example) + +# 3. Sequential models (Titanic) +tit.joint <- seq_loglm(Titanic, type = "joint") +assoc_graphs(tit.joint) # panel of 4 graphs showing growing structure -Not clear +# 4. Weighted graph +g <- assoc_graph(mod, measure = "chisq") +plot(g) # edge widths reflect partial chi-squared contributions +``` diff --git a/man/assoc_graph.Rd b/man/assoc_graph.Rd new file mode 100644 index 0000000..836d925 --- /dev/null +++ b/man/assoc_graph.Rd @@ -0,0 +1,121 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/assoc_graph.R +\name{assoc_graph} +\alias{assoc_graph} +\alias{assoc_graph.list} +\alias{assoc_graph.loglm} +\alias{assoc_graph.glm} +\alias{print.assoc_graph} +\title{Association Graph for a Loglinear Model} +\usage{ +assoc_graph(x, ...) + +\method{assoc_graph}{list}(x, result = c("igraph", "matrix", "edge_list"), ...) + +\method{assoc_graph}{loglm}(x, result = c("igraph", "matrix", "edge_list"), ...) + +\method{assoc_graph}{glm}(x, result = c("igraph", "matrix", "edge_list"), ...) + +\method{print}{assoc_graph}(x, ...) +} +\arguments{ +\item{x}{An object specifying the model. Can be: +\itemize{ +\item A \code{list} of character vectors (a margin/generating class list, as produced by +\code{\link{joint}}, \code{\link{conditional}}, etc.) +\item A fitted \code{\link[MASS]{loglm}} object +\item A fitted \code{\link[stats]{glm}} object (poisson family loglinear model) +}} + +\item{\dots}{Additional arguments (currently unused).} + +\item{result}{Type of result to return: \code{"igraph"} (default) returns an +\code{\link[igraph:igraph-package]{igraph}} object; \code{"matrix"} returns the +adjacency matrix; \code{"edge_list"} returns a two-column character matrix of edges.} +} +\value{ +Depending on \code{result}: +\itemize{ +\item \code{"igraph"}: An \code{igraph} undirected graph object of class +\code{c("assoc_graph", "igraph")}, with vertex names corresponding to +the variable names. +\item \code{"matrix"}: A symmetric adjacency matrix (0/1) with variable names as +row and column names. +\item \code{"edge_list"}: A two-column character matrix, each row an edge. +} +} +\description{ +Construct an undirected graph representing the associations in a loglinear model. +Nodes represent variables and edges represent pairwise associations fitted in the model. +If two variables are not connected by an edge, they are conditionally independent +given the other variables. +} +\details{ +Each high-order term (margin) in a hierarchical loglinear model defines a clique +in the association graph. For example, the term \code{c("A", "B", "C")} generates +edges A--B, A--C, and B--C. Single-variable terms (as in mutual independence) +yield isolated nodes with no edges. + +For \code{loglm} objects, the margins are extracted from the \code{$margin} component. +For \code{glm} objects, the interaction terms are extracted from the model formula. +} +\examples{ +# Structural graphs from margin lists (3-way: A, B, C) +mutual(3, factors = c("A", "B", "C")) |> assoc_graph() +joint(3, factors = c("A", "B", "C")) |> assoc_graph() +conditional(3, factors = c("A", "B", "C")) |> assoc_graph() +saturated(3, factors = c("A", "B", "C")) |> assoc_graph() + +# Adjacency matrix form +conditional(3, factors = c("A", "B", "C")) |> assoc_graph(result = "matrix") + +# From a fitted loglm model (Berkeley admissions) +\dontrun{ +mod <- MASS::loglm(~ (Admit + Gender) * Dept, data = UCBAdmissions) +assoc_graph(mod) +plot(assoc_graph(mod), main = "Berkeley: [AD] [GD]") +} + +# From glm models (Dayton Survey: cigarette, alcohol, marijuana, sex, race) +data(DaytonSurvey) + +# Mutual independence + sex*race: one edge only +mod.SR <- glm(Freq ~ . + sex*race, data = DaytonSurvey, family = poisson) +assoc_SRaph(mod.SR) +plot(assoc_SRaph(mod.SR), main = "Mutual indep. + [SR]") + +# [AM][AC][MC][AR][AS][RS]: {race, Sender} indep {marijuana, ciS} | alcohol +mod.cond <- glm(Freq ~ (cigarette + alcohol + marijuana)^2 + + (alcohol + sex + race)^2, + data = DaytonSurvey, family = poisson) +assoc_graph(mod.cond) +plot(assoc_graph(mod.cond), + groups = list(c("cigarette", "alcohol", "marijuana"), + c("sex", "race")), + main = "{R,S} indep {M,C} | A") + +} +\references{ +Khamis, H. J. (2011). \emph{The Association Graph and the Multigraph for Loglinear Models}. +SAGE Publications. \doi{10.4135/9781452226521} + +Darroch, J. N., Lauritzen, S. L., & Speed, T. P. (1980). Markov Fields and Log-Linear +Interaction Models for Contingency Tables. \emph{The Annals of Statistics}, 8(3), 522--539. +\doi{10.1214/aos/1176345006} + +Whittaker, J. (1990). \emph{Graphical Models in Applied Multivariate Statistics}. +John Wiley & Sons, Chichester. +} +\seealso{ +\code{\link{joint}}, \code{\link{conditional}}, \code{\link{mutual}}, +\code{\link{saturated}}, \code{\link{loglin2string}}, \code{\link{seq_loglm}}, +\code{\link{plot.assoc_graph}} + +Other loglinear models: +\code{\link{get_model}()}, +\code{\link{glmlist}()}, +\code{\link{joint}()}, +\code{\link{plot.assoc_graph}()}, +\code{\link{seq_loglm}()} +} +\concept{loglinear models} diff --git a/man/get_model.Rd b/man/get_model.Rd index 7c904b3..463dfa0 100644 --- a/man/get_model.Rd +++ b/man/get_model.Rd @@ -26,16 +26,19 @@ such as \code{sep} and \code{collapse}.} } \value{ For \code{get_model()}: a character string with the model formula or bracket notation. -For \code{get_models()}: a named character vector with the model formulas or bracket notations. + +\if{html}{\out{