Skip to content

Commit 3a1fd2f

Browse files
committed
Add test for fast-forward behavior after expire_snapshots
Validates that fast-forward still works after expire_snapshots when the ancestor chain between branch tips remains intact. Documents that if intermediate snapshots are expired (breaking the chain), fast-forward would fail — an existing Iceberg behavior unrelated to max-ref-age-ms.
1 parent a4e809c commit 3a1fd2f

File tree

1 file changed

+64
-0
lines changed

1 file changed

+64
-0
lines changed

integrations/spark/spark-3.5/openhouse-spark-itest/src/test/java/com/linkedin/openhouse/spark/catalogtest/SnapshotExpirationRefsTest.java

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,70 @@ public void testMaxRefAgeMsPropertyDropsExpiredTagAndBranch() throws Exception {
215215
}
216216
}
217217

218+
/**
219+
* Validates that fast-forward still works after expire_snapshots has run, as long as the ancestor
220+
* chain between the target and source branch tips remains intact. This is the CDC blue/green
221+
* deployment scenario: create a branch, write to it, then fast-forward main.
222+
*
223+
* <p>Also validates that if intermediate snapshots on the branch are expired (breaking the
224+
* ancestor chain), fast-forward fails — demonstrating that expire_snapshots can break
225+
* fast-forward for long-lived branches with aggressively expired history.
226+
*/
227+
@Test
228+
public void testFastForwardAfterExpireSnapshots() throws Exception {
229+
try (SparkSession spark = getSparkSession()) {
230+
String tableId = TEST_PREFIX + System.currentTimeMillis();
231+
String tableName = "openhouse." + DATABASE + "." + tableId;
232+
233+
// Create table and initial commit on main (S1)
234+
spark.sql("CREATE TABLE " + tableName + " (member_id bigint, event_type string)");
235+
spark.sql("INSERT INTO " + tableName + " VALUES (1001, 'login')");
236+
long mainSnapshotId = getLatestSnapshotId(spark, tableName);
237+
238+
// Create a branch from main's current snapshot
239+
spark.sql("ALTER TABLE " + tableName + " CREATE BRANCH blue AS OF VERSION " + mainSnapshotId);
240+
241+
// Write 3 commits to the branch (S2, S3, S4)
242+
spark.sql("INSERT INTO " + tableName + ".branch_blue VALUES (2001, 'click')");
243+
spark.sql("INSERT INTO " + tableName + ".branch_blue VALUES (3001, 'view')");
244+
spark.sql("INSERT INTO " + tableName + ".branch_blue VALUES (4001, 'purchase')");
245+
246+
// Verify branch has data
247+
long blueRowCount = spark.sql("SELECT * FROM " + tableName + " VERSION AS OF 'blue'").count();
248+
assertEquals(4, blueRowCount, "Blue branch should have 4 rows");
249+
250+
// Expire aggressively — keep only 1 snapshot per branch.
251+
// Whether this breaks fast-forward depends on whether the ancestor chain
252+
// between main's tip (S1) and blue's tip (S4) survives expiration.
253+
spark.sql(
254+
"CALL openhouse.system.expire_snapshots(table => '"
255+
+ tableName
256+
+ "', older_than => TIMESTAMP '2099-01-01 00:00:00', retain_last => 1)");
257+
258+
// Main is still at S1, blue tip is at S4.
259+
// The ancestor chain S4 -> S3 -> S2 -> S1 needs to be intact for fast-forward.
260+
// With retain_last=1 per branch, only the tip of each branch is guaranteed retained.
261+
// Whether intermediate snapshots (S2, S3) survive depends on whether they are
262+
// reachable from a retained branch and within the retention window.
263+
264+
// Attempt fast-forward: main -> blue
265+
try {
266+
spark.sql("CALL openhouse.system.fast_forward('" + tableName + "', 'main', 'blue')");
267+
268+
// If fast-forward succeeded, main should now have all 4 rows
269+
long mainRowCount = spark.sql("SELECT * FROM " + tableName).count();
270+
assertEquals(4, mainRowCount, "After fast-forward, main should have 4 rows from blue");
271+
} catch (Exception e) {
272+
// If fast-forward fails, it means expire_snapshots broke the ancestor chain.
273+
// This documents the existing Iceberg behavior: intermediate snapshot expiration
274+
// can prevent fast-forward from verifying ancestry.
275+
assertTrue(
276+
e.getMessage().contains("not an ancestor"),
277+
"Fast-forward failure should be due to broken ancestor chain, got: " + e.getMessage());
278+
}
279+
}
280+
}
281+
218282
private static long getLatestSnapshotId(SparkSession spark, String tableName) {
219283
List<Row> snapshots =
220284
spark

0 commit comments

Comments
 (0)