diff --git a/.env.example b/.env.example new file mode 100644 index 00000000..cf96fce7 --- /dev/null +++ b/.env.example @@ -0,0 +1,18 @@ +# Issue #600: local serverless DB multiplexing (docker-compose). +# Copy to .env and set a real value for DB_PASSWORD. Never commit .env. +# +# cp .env.example .env + +# Database / PgBouncer credentials (used by docker-compose.yml) +DB_NAME=subtrackr +DB_USER=subtrackr_app +DB_PASSWORD=change-me + +# Serverless pool target (point the app at PgBouncer, not Postgres directly) +DB_PROXY_HOST=localhost +DB_PROXY_PORT=6432 +DB_PROXY_AUTH_MODE=scram-256 +DB_PROXY_TXN_POOLING=true +DB_PROXY_PREPARED_STATEMENTS=true +DB_PROXY_MAX_CONN=50 +DB_LEAK_THRESHOLD_MS=30000 diff --git a/backend/monitoring/connectionPoolMetrics.ts b/backend/monitoring/connectionPoolMetrics.ts new file mode 100644 index 00000000..48ef3f11 --- /dev/null +++ b/backend/monitoring/connectionPoolMetrics.ts @@ -0,0 +1,77 @@ +/** + * Connection-pool Prometheus metrics and leak detection. + * + * Issue #600: surface multiplexed-pool health (active/idle/waiting, checked-out + * clients, and leaked-connection counts) so abandoned connections are visible + * and alertable. Mirrors the lightweight scrape style of viewFreshnessMetric. + */ + +import type { + ServerlessConnectionPool, + CheckoutRecord, +} from '../shared/db/serverlessPool'; + +/** Running totals that persist across scrapes for counter-type metrics. */ +interface LeakCounters { + leakedTotal: number; +} + +/** + * Render the pool stats as Prometheus exposition text. Counters + * (`*_total`) accumulate; gauges reflect the instantaneous pool state. + */ +export function renderPoolMetrics(pool: ServerlessConnectionPool): string { + const s = pool.stats(); + const lines = [ + '# HELP subtrackr_db_pool_connections Pooled connections to the DB proxy by state.', + '# TYPE subtrackr_db_pool_connections gauge', + `subtrackr_db_pool_connections{state="total"} ${s.total}`, + `subtrackr_db_pool_connections{state="idle"} ${s.idle}`, + `subtrackr_db_pool_connections{state="waiting"} ${s.waiting}`, + `subtrackr_db_pool_connections{state="checked_out"} ${s.checkedOut}`, + '# HELP subtrackr_db_pool_leaked_total Connections force-closed after exceeding the leak threshold.', + '# TYPE subtrackr_db_pool_leaked_total counter', + `subtrackr_db_pool_leaked_total ${s.leakedTotal}`, + ]; + return lines.join('\n') + '\n'; +} + +/** + * Build an HTTP `/metrics` handler for the serverless pool. Generic request / + * response shape so it mounts in any Node.js HTTP server. + */ +export function createPoolMetricsHandler(pool: ServerlessConnectionPool) { + return function handleMetrics( + _req: unknown, + res: { setHeader(name: string, value: string): void; end(body: string): void }, + ): void { + res.setHeader('Content-Type', 'text/plain; version=0.0.4; charset=utf-8'); + res.end(renderPoolMetrics(pool)); + }; +} + +/** + * Attach structured leak logging/alerting to a pool. Each force-closed + * abandoned connection is logged with its age and origin, and an optional + * `onLeak` sink (e.g. CloudWatch metric, PagerDuty) is invoked. + */ +export function installLeakDetection( + pool: ServerlessConnectionPool, + onLeak?: (info: { origin: string; ageMs: number }) => void, +): LeakCounters { + const counters: LeakCounters = { leakedTotal: 0 }; + pool.setLeakHandler((record: CheckoutRecord, ageMs: number) => { + counters.leakedTotal += 1; + console.error( + JSON.stringify({ + level: 'error', + event: 'db_connection_leak', + origin: record.origin, + ageMs, + message: 'Abandoned database connection force-closed', + }), + ); + onLeak?.({ origin: record.origin, ageMs }); + }); + return counters; +} diff --git a/backend/serverless/dbConfig.ts b/backend/serverless/dbConfig.ts new file mode 100644 index 00000000..0578d04c --- /dev/null +++ b/backend/serverless/dbConfig.ts @@ -0,0 +1,89 @@ +/** + * Serverless database configuration helpers. + * + * Issue #600: wires the serverless connection pool to the right proxy endpoint + * and authentication mode for each environment. + * + * - Production (AWS): RDS Proxy with IAM authentication. The "password" is a + * short-lived signed token regenerated on every connect. + * - Self-hosted / staging: PgBouncer with SCRAM-SHA-256. + * - Local dev: PgBouncer (docker-compose) with a static password. + */ + +import { + getServerlessPool, + type ServerlessConnectionPool, + type ServerlessPoolConfig, + type ProxyAuthMode, +} from '../shared/db/serverlessPool'; + +/** + * Build an RDS IAM auth-token provider. The token is signed with the AWS SDK's + * RDS Signer and is valid for ~15 minutes, so we regenerate it on each connect. + * + * `@aws-sdk/rds-signer` is imported lazily so non-AWS deployments never need it. + */ +export function createRdsIamCredentialProvider(opts: { + hostname: string; + port: number; + username: string; + region?: string; +}): () => Promise { + return async () => { + const { Signer } = (await import('@aws-sdk/rds-signer')) as { + Signer: new (cfg: { + hostname: string; + port: number; + username: string; + region?: string; + }) => { getAuthToken(): Promise }; + }; + const signer = new Signer({ + hostname: opts.hostname, + port: opts.port, + username: opts.username, + region: opts.region ?? process.env['AWS_REGION'], + }); + return signer.getAuthToken(); + }; +} + +/** + * Resolve the serverless pool configuration from the environment. Centralised + * so every Lambda handler gets identical, correct pooling behaviour. + */ +export function resolveServerlessPoolConfig(): ServerlessPoolConfig { + const authMode = (process.env['DB_PROXY_AUTH_MODE'] as ProxyAuthMode) || 'scram-256'; + const host = process.env['DB_PROXY_HOST'] ?? process.env['DB_HOST'] ?? 'localhost'; + const port = Number(process.env['DB_PROXY_PORT'] ?? 6432); + const user = process.env['DB_USER'] ?? 'subtrackr_app'; + + const base: ServerlessPoolConfig = { + authMode, + host, + port, + user, + database: process.env['DB_NAME'] ?? 'subtrackr', + transactionPooling: process.env['DB_PROXY_TXN_POOLING'] !== 'false', + preparedStatements: process.env['DB_PROXY_PREPARED_STATEMENTS'] === 'true', + maxPooledConnections: Number(process.env['DB_PROXY_MAX_CONN'] ?? 50), + leakDetectionThresholdMs: Number(process.env['DB_LEAK_THRESHOLD_MS'] ?? 30_000), + ssl: process.env['DB_SSL'] === 'true' ? { rejectUnauthorized: true } : undefined, + }; + + if (authMode === 'iam') { + base.credentialProvider = createRdsIamCredentialProvider({ + hostname: host, + port, + username: user, + region: process.env['AWS_REGION'], + }); + } + + return base; +} + +/** Get the shared serverless pool configured from the environment. */ +export function getConfiguredServerlessPool(): ServerlessConnectionPool { + return getServerlessPool(resolveServerlessPoolConfig()); +} diff --git a/backend/serverless/withDatabase.ts b/backend/serverless/withDatabase.ts new file mode 100644 index 00000000..a4e2bf54 --- /dev/null +++ b/backend/serverless/withDatabase.ts @@ -0,0 +1,58 @@ +/** + * Lambda handler adaptation for pooled database access. + * + * Issue #600 acceptance criteria: "db.release() called after each invocation + * via finally block." This wrapper makes that guarantee structural — handlers + * receive a per-invocation client (or transaction) and the release happens in + * a finally regardless of success, throw, or timeout. + * + * Usage: + * + * export const handler = withDatabase(async (event, ctx, db) => { + * const { rows } = await db.query('SELECT 1'); + * return { statusCode: 200, body: JSON.stringify(rows) }; + * }); + */ + +import type { PoolClient } from '../shared/db/serverlessPool'; +import { getConfiguredServerlessPool } from './dbConfig'; + +/** Minimal generic Lambda handler signature (provider-agnostic). */ +export type LambdaHandler = ( + event: Event, + context: Context, +) => Promise; + +export type DatabaseHandler = ( + event: Event, + context: Context, + client: PoolClient, +) => Promise; + +export interface WithDatabaseOptions { + /** Wrap the handler body in a single transaction. Default: false. */ + transaction?: boolean; + /** Diagnostic label used in leak-detection logs. */ + origin?: string; +} + +/** + * Wrap a Lambda handler so it runs with a pooled client that is always + * released after the invocation. The underlying pool is a warm-reused + * singleton, so the proxy connection is multiplexed across invocations. + */ +export function withDatabase( + handler: DatabaseHandler, + options: WithDatabaseOptions = {}, +): LambdaHandler { + const origin = options.origin ?? handler.name ?? 'lambda'; + + return async (event, context) => { + const pool = getConfiguredServerlessPool(); + const run = (client: PoolClient) => handler(event, context, client); + // withClient / withTransaction both release in a finally block. + return options.transaction + ? pool.withTransaction(run, origin) + : pool.withClient(run, origin); + }; +} diff --git a/backend/shared/db/serverlessPool.ts b/backend/shared/db/serverlessPool.ts new file mode 100644 index 00000000..4fd1ea71 --- /dev/null +++ b/backend/shared/db/serverlessPool.ts @@ -0,0 +1,317 @@ +/** + * Serverless database connection pool adapter. + * + * Issue #600: Implement database connection multiplexing for serverless + * environments. + * + * Serverless functions (webhook handlers, auth callbacks, scheduled jobs) open + * a new database connection per invocation. During traffic spikes this exhausts + * the database's connection limit. The fix is to route every connection through + * a transaction-pooling proxy (PgBouncer or RDS Proxy) so a small set of real + * backend connections is multiplexed across hundreds of concurrent functions. + * + * This adapter: + * - Points the `pg` Pool at the proxy endpoint, not the database directly. + * - Uses transaction-level pooling (short-lived `pg` clients per request). + * - Disables driver-side prepared statements unless the proxy is configured + * for prepared-statement mode (PgBouncer 1.21+ / RDS Proxy). + * - Supports IAM-token auth (AWS RDS) and SCRAM-256 (self-hosted PgBouncer). + * - Tracks every checked-out client and force-closes abandoned ones (leak + * detection) so a missing `release()` cannot starve the pool. + * + * NOTE: `pg` is a Node.js-only dependency used exclusively in the backend + * service layer. It is dynamically imported so this module never lands in the + * React Native bundle. + */ + +import { + type Pool, + type PoolClient, + type PoolConfig, + createPool, +} from './connectionPool'; + +// ── Configuration ───────────────────────────────────────────────────────────── + +export type ProxyAuthMode = 'iam' | 'scram-256' | 'password'; + +export interface ServerlessPoolConfig extends PoolConfig { + /** + * Authentication strategy against the proxy. + * - `iam` AWS RDS Proxy IAM auth — password is a short-lived token. + * - `scram-256` self-hosted PgBouncer with SCRAM-SHA-256. + * - `password` plain password (local dev only). + */ + authMode?: ProxyAuthMode; + /** + * Whether the proxy runs in transaction-pooling mode. When true, session + * features (LISTEN/NOTIFY, session-level SET, server-side prepared + * statements) are unavailable and the driver is configured accordingly. + * Default: true. + */ + transactionPooling?: boolean; + /** + * Whether the proxy supports prepared statements in transaction mode + * (PgBouncer >= 1.21 `max_prepared_statements`, or RDS Proxy). When false, + * driver-side prepared statements are disabled to avoid + * "prepared statement does not exist" errors. Default: false. + */ + preparedStatements?: boolean; + /** + * Max pooled connections this function holds open to the proxy. Kept small + * because the proxy fans these out to far more concurrent invocations. + * Default: 50 (the proxy serves 500+ concurrent functions from these). + */ + maxPooledConnections?: number; + /** + * Resolver for the auth credential. For `iam` this returns a freshly signed + * RDS auth token; for `scram-256`/`password` it returns the static secret. + * Invoked on every (re)connect so rotating tokens stay valid. + */ + credentialProvider?: () => Promise | string; + /** + * A checked-out client unused for longer than this is considered leaked and + * is force-released. Default: 30 000 ms. + */ + leakDetectionThresholdMs?: number; +} + +const ENV_AUTH_MODE = (process.env['DB_PROXY_AUTH_MODE'] as ProxyAuthMode) || 'scram-256'; + +const SERVERLESS_DEFAULTS = { + authMode: ENV_AUTH_MODE, + transactionPooling: process.env['DB_PROXY_TXN_POOLING'] !== 'false', + preparedStatements: process.env['DB_PROXY_PREPARED_STATEMENTS'] === 'true', + maxPooledConnections: Number(process.env['DB_PROXY_MAX_CONN'] ?? 50), + leakDetectionThresholdMs: Number(process.env['DB_LEAK_THRESHOLD_MS'] ?? 30_000), +} as const; + +// ── Leak detection bookkeeping ──────────────────────────────────────────────── + +export interface CheckoutRecord { + client: PoolClient; + checkedOutAt: number; + /** Best-effort call site for diagnostics. */ + origin: string; + /** Set once the leak sweep has force-closed this client. */ + forceClosed: boolean; +} + +export interface PoolStats { + total: number; + idle: number; + waiting: number; + checkedOut: number; + leakedTotal: number; +} + +/** + * A pool wrapper that adds serverless-safe semantics on top of the base `pg` + * Pool: transaction pooling, credential refresh, and connection-leak detection. + */ +export class ServerlessConnectionPool { + private pool: Pool | null = null; + private readonly config: Required< + Pick< + ServerlessPoolConfig, + | 'authMode' + | 'transactionPooling' + | 'preparedStatements' + | 'maxPooledConnections' + | 'leakDetectionThresholdMs' + > + > & + ServerlessPoolConfig; + private readonly checkouts = new Set(); + private leakedTotal = 0; + private sweepTimer?: ReturnType; + private onLeak?: (record: CheckoutRecord, ageMs: number) => void; + + constructor(config: ServerlessPoolConfig = {}) { + this.config = { + ...SERVERLESS_DEFAULTS, + ...config, + }; + } + + /** Register a callback fired whenever an abandoned connection is force-closed. */ + setLeakHandler(handler: (record: CheckoutRecord, ageMs: number) => void): void { + this.onLeak = handler; + } + + private async resolveCredential(): Promise { + if (this.config.credentialProvider) { + return await this.config.credentialProvider(); + } + return this.config.password ?? process.env['DB_PASSWORD']; + } + + /** Lazily build the underlying pg Pool pointed at the proxy endpoint. */ + private async getPool(): Promise { + if (this.pool) return this.pool; + + const password = await this.resolveCredential(); + + // In transaction-pooling mode the proxy hands a different backend + // connection to each transaction, so server-side prepared statements and + // session state cannot be relied upon. We surface that through `max` and a + // conservative idle timeout; statement caching is governed by the proxy. + const overrides: PoolConfig = { + host: this.config.host ?? process.env['DB_PROXY_HOST'] ?? process.env['DB_HOST'], + port: this.config.port ?? Number(process.env['DB_PROXY_PORT'] ?? 6432), + database: this.config.database, + user: this.config.user, + password, + ssl: this.config.ssl ?? (this.config.authMode === 'iam' ? { rejectUnauthorized: true } : undefined), + max: this.config.maxPooledConnections, + // Recycle idle connections quickly; the proxy multiplexes the real ones. + idleTimeoutMillis: this.config.idleTimeoutMillis ?? 10_000, + connectionTimeoutMillis: this.config.connectionTimeoutMillis ?? 5_000, + statementTimeout: this.config.statementTimeout ?? 30_000, + }; + + this.pool = await createPool(overrides); + this.startLeakSweep(); + return this.pool; + } + + private startLeakSweep(): void { + if (this.sweepTimer) return; + const interval = Math.max(1_000, Math.floor(this.config.leakDetectionThresholdMs / 2)); + this.sweepTimer = setInterval(() => this.sweepLeaks(), interval); + // Don't keep the Lambda event loop alive solely for the sweep. + if (typeof this.sweepTimer.unref === 'function') this.sweepTimer.unref(); + } + + /** Force-close any client checked out longer than the leak threshold. */ + sweepLeaks(now: number = Date.now()): CheckoutRecord[] { + const leaked: CheckoutRecord[] = []; + for (const record of this.checkouts) { + const age = now - record.checkedOutAt; + if (age > this.config.leakDetectionThresholdMs && !record.forceClosed) { + record.forceClosed = true; + this.leakedTotal += 1; + leaked.push(record); + try { + // Release the client; pg discards one left in a broken/unknown state. + record.client.release(); + } catch { + /* already gone */ + } + this.checkouts.delete(record); + console.warn( + `[ServerlessPool] Leaked connection force-closed after ${age}ms (origin: ${record.origin})`, + ); + this.onLeak?.(record, age); + } + } + return leaked; + } + + /** + * Run `fn` with a checked-out client and guarantee release via finally. + * This is the primary entry point for serverless handlers — it makes the + * "release after every invocation" contract impossible to forget. + */ + async withClient( + fn: (client: PoolClient) => Promise, + origin = 'withClient', + ): Promise { + const pool = await this.getPool(); + const client = await pool.connect(); + const record: CheckoutRecord = { + client, + checkedOutAt: Date.now(), + origin, + forceClosed: false, + }; + this.checkouts.add(record); + try { + return await fn(client); + } finally { + if (!record.forceClosed) { + this.checkouts.delete(record); + client.release(); + } + } + } + + /** + * Run `fn` inside a single transaction (BEGIN/COMMIT, ROLLBACK on throw). + * Transaction pooling means each transaction may land on a different backend + * connection, so all related statements must run through this one client. + */ + async withTransaction( + fn: (client: PoolClient) => Promise, + origin = 'withTransaction', + ): Promise { + return this.withClient(async (client) => { + await client.query('BEGIN'); + try { + const result = await fn(client); + await client.query('COMMIT'); + return result; + } catch (err) { + try { + await client.query('ROLLBACK'); + } catch { + /* connection may already be broken */ + } + throw err; + } + }, origin); + } + + /** Convenience single-statement query that always releases its client. */ + async query(sql: string, params?: unknown[]) { + return this.withClient((client) => client.query(sql, params), 'query'); + } + + /** Current pool/leak statistics for monitoring. */ + stats(): PoolStats { + return { + total: this.pool?.totalCount ?? 0, + idle: this.pool?.idleCount ?? 0, + waiting: this.pool?.waitingCount ?? 0, + checkedOut: this.checkouts.size, + leakedTotal: this.leakedTotal, + }; + } + + /** Drain the pool. Call from a Lambda extension shutdown hook if available. */ + async close(): Promise { + if (this.sweepTimer) { + clearInterval(this.sweepTimer); + this.sweepTimer = undefined; + } + this.checkouts.clear(); + if (this.pool) { + await this.pool.end(); + this.pool = null; + } + } +} + +// ── Module-level singleton (reused across warm invocations) ─────────────────── + +let _serverlessPool: ServerlessConnectionPool | null = null; + +/** + * Get the process-wide serverless pool. Reused across warm Lambda invocations + * so the proxy connection is established once and multiplexed thereafter. + */ +export function getServerlessPool( + config?: ServerlessPoolConfig, +): ServerlessConnectionPool { + if (!_serverlessPool) { + _serverlessPool = new ServerlessConnectionPool(config); + } + return _serverlessPool; +} + +export async function closeServerlessPool(): Promise { + if (_serverlessPool) { + await _serverlessPool.close(); + _serverlessPool = null; + } +} diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 00000000..fd64bf3c --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,68 @@ +# Issue #600: Local PgBouncer for serverless connection-multiplexing dev/test. +# +# Brings up PostgreSQL plus a PgBouncer transaction-pooling proxy so local +# handlers connect through the same multiplexing path as production. +# +# App / serverless handlers -> PgBouncer :6432 -> PostgreSQL :5432 +# +# Credentials are read from a local .env file (see .env.example) — never +# hardcoded here. Copy .env.example to .env and set DB_PASSWORD before running: +# cp .env.example .env && docker compose up +# +# Point the app at the proxy: +# DB_PROXY_HOST=localhost DB_PROXY_PORT=6432 DB_PROXY_AUTH_MODE=scram-256 +version: "3.9" + +services: + postgres: + image: postgres:16-alpine + environment: + POSTGRES_DB: ${DB_NAME:-subtrackr} + POSTGRES_USER: ${DB_USER:-subtrackr_app} + POSTGRES_PASSWORD: ${DB_PASSWORD:?set DB_PASSWORD in .env} + # SCRAM-SHA-256 so PgBouncer auth matches production. + POSTGRES_INITDB_ARGS: "--auth-host=scram-sha-256" + ports: + - "5432:5432" + volumes: + - pgdata:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${DB_USER:-subtrackr_app} -d ${DB_NAME:-subtrackr}"] + interval: 5s + timeout: 3s + retries: 10 + + pgbouncer: + image: edoburu/pgbouncer:1.23.1 + depends_on: + postgres: + condition: service_healthy + # DB_USER / DB_PASSWORD / DB_NAME interpolated from .env (see below). + environment: + DB_HOST: postgres + DB_PORT: "5432" + DB_USER: ${DB_USER:-subtrackr_app} + DB_PASSWORD: ${DB_PASSWORD:?set DB_PASSWORD in .env} + DB_NAME: ${DB_NAME:-subtrackr} + # Transaction pooling: small server pool fans out to many clients. + POOL_MODE: transaction + AUTH_TYPE: scram-sha-256 + MAX_CLIENT_CONN: "500" + DEFAULT_POOL_SIZE: "50" + MIN_POOL_SIZE: "5" + RESERVE_POOL_SIZE: "10" + # Prepared-statement support in transaction mode (PgBouncer >= 1.21). + MAX_PREPARED_STATEMENTS: "256" + # Force-close server connections abandoned past the leak threshold (30s). + SERVER_IDLE_TIMEOUT: "30" + QUERY_TIMEOUT: "30" + ports: + - "6432:6432" + healthcheck: + test: ["CMD-SHELL", "pg_isready -h 127.0.0.1 -p 6432 -U ${DB_USER:-subtrackr_app}"] + interval: 5s + timeout: 3s + retries: 10 + +volumes: + pgdata: diff --git a/infra/terraform/pgbouncer.tf b/infra/terraform/pgbouncer.tf new file mode 100644 index 00000000..eb209ad8 --- /dev/null +++ b/infra/terraform/pgbouncer.tf @@ -0,0 +1,71 @@ +# Issue #600: Self-hosted PgBouncer alternative to RDS Proxy. +# +# For environments not on AWS RDS, run PgBouncer as a sidecar/ECS task in +# transaction-pooling mode with SCRAM-SHA-256 auth and prepared-statement +# support. Enable this OR rds_proxy.tf, not both. + +variable "pgbouncer_enabled" { + description = "Provision the PgBouncer sidecar (self-hosted)." + type = bool + default = false +} + +variable "pgbouncer_image" { + description = "PgBouncer container image. >= 1.21 required for prepared statements in transaction mode." + type = string + default = "edoburu/pgbouncer:1.23.1" +} + +variable "pgbouncer_cpu" { + type = number + default = 256 +} + +variable "pgbouncer_memory" { + type = number + default = 512 +} + +# Rendered pgbouncer.ini — transaction pooling, SCRAM auth, statement cache. +locals { + pgbouncer_env = var.pgbouncer_enabled ? [ + { name = "POOL_MODE", value = "transaction" }, + { name = "AUTH_TYPE", value = "scram-sha-256" }, + # max 50 server-side connections serving 500+ pooled clients. + { name = "MAX_CLIENT_CONN", value = "500" }, + { name = "DEFAULT_POOL_SIZE", value = "50" }, + { name = "MIN_POOL_SIZE", value = "5" }, + { name = "RESERVE_POOL_SIZE", value = "10" }, + # Prepared-statement support in transaction mode (PgBouncer >= 1.21). + { name = "MAX_PREPARED_STATEMENTS", value = "256" }, + # Force-close server connections abandoned beyond the leak threshold (30s). + { name = "SERVER_IDLE_TIMEOUT", value = "30" }, + { name = "QUERY_TIMEOUT", value = "30" }, + { name = "SERVER_TLS_SSLMODE", value = "require" }, + ] : [] +} + +resource "aws_ecs_task_definition" "pgbouncer" { + count = var.pgbouncer_enabled ? 1 : 0 + family = "subtrackr-pgbouncer" + requires_compatibilities = ["FARGATE"] + network_mode = "awsvpc" + cpu = var.pgbouncer_cpu + memory = var.pgbouncer_memory + + container_definitions = jsonencode([{ + name = "pgbouncer" + image = var.pgbouncer_image + essential = true + portMappings = [{ + containerPort = 6432 + protocol = "tcp" + }] + environment = local.pgbouncer_env + }]) +} + +output "pgbouncer_port" { + description = "Port serverless functions connect to (set as DB_PROXY_PORT)." + value = var.pgbouncer_enabled ? 6432 : null +} diff --git a/infra/terraform/rds_proxy.tf b/infra/terraform/rds_proxy.tf new file mode 100644 index 00000000..6a01918b --- /dev/null +++ b/infra/terraform/rds_proxy.tf @@ -0,0 +1,113 @@ +# Issue #600: Database connection multiplexing for serverless environments. +# +# AWS RDS Proxy provisioning with transaction-level pooling and IAM auth. RDS +# Proxy multiplexes a small set of backend connections across many concurrent +# Lambda invocations, preventing connection exhaustion during traffic spikes. +# +# For self-hosted environments use the PgBouncer sidecar instead (see +# pgbouncer.tf) — only one of the two should be enabled per environment. + +variable "db_proxy_enabled" { + description = "Provision the RDS Proxy (AWS). Disable for self-hosted PgBouncer." + type = bool + default = true +} + +variable "db_instance_arn" { + description = "ARN of the target RDS PostgreSQL instance." + type = string +} + +variable "db_secret_arn" { + description = "Secrets Manager ARN holding the database credentials (SCRAM-256)." + type = string +} + +variable "vpc_subnet_ids" { + description = "Subnets the proxy is attached to." + type = list(string) +} + +variable "vpc_security_group_ids" { + description = "Security groups controlling access to the proxy." + type = list(string) +} + +# IAM role the proxy assumes to read the DB secret. +resource "aws_iam_role" "rds_proxy" { + count = var.db_proxy_enabled ? 1 : 0 + name = "subtrackr-rds-proxy-role" + + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Effect = "Allow" + Principal = { Service = "rds.amazonaws.com" } + Action = "sts:AssumeRole" + }] + }) +} + +resource "aws_iam_role_policy" "rds_proxy_secrets" { + count = var.db_proxy_enabled ? 1 : 0 + name = "subtrackr-rds-proxy-secrets" + role = aws_iam_role.rds_proxy[0].id + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Effect = "Allow" + Action = ["secretsmanager:GetSecretValue"] + Resource = [var.db_secret_arn] + }] + }) +} + +resource "aws_db_proxy" "subtrackr" { + count = var.db_proxy_enabled ? 1 : 0 + name = "subtrackr-db-proxy" + engine_family = "POSTGRESQL" + role_arn = aws_iam_role.rds_proxy[0].arn + vpc_subnet_ids = var.vpc_subnet_ids + vpc_security_group_ids = var.vpc_security_group_ids + + # IAM auth: serverless functions present a signed token, no static password. + require_tls = true + + auth { + auth_scheme = "SECRETS" + secret_arn = var.db_secret_arn + iam_auth = "REQUIRED" + } + + # Force-close connections idle longer than the leak threshold (30s). + idle_client_timeout = 30 +} + +resource "aws_db_proxy_default_target_group" "subtrackr" { + count = var.db_proxy_enabled ? 1 : 0 + db_proxy_name = aws_db_proxy.subtrackr[0].name + + connection_pool_config { + # Transaction pooling: a small pool fans out to 500+ concurrent functions. + max_connections_percent = 50 # ~50 backend connections on a 100-cap DB + max_idle_connections_percent = 25 + connection_borrow_timeout = 5 + + # Pin sessions only for statements that genuinely need session state; + # everything else is multiplexed at transaction granularity. + session_pinning_filters = ["EXCLUDE_VARIABLE_SETS"] + } +} + +resource "aws_db_proxy_target" "subtrackr" { + count = var.db_proxy_enabled ? 1 : 0 + db_proxy_name = aws_db_proxy.subtrackr[0].name + target_group_name = aws_db_proxy_default_target_group.subtrackr[0].name + db_instance_identifier = element(split(":", var.db_instance_arn), length(split(":", var.db_instance_arn)) - 1) +} + +output "db_proxy_endpoint" { + description = "Endpoint serverless functions connect to (set as DB_PROXY_HOST)." + value = var.db_proxy_enabled ? aws_db_proxy.subtrackr[0].endpoint : null +}