Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions .github/workflows/asf-allowlist-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,9 @@ name: "ASF Allowlist Check"

on:
pull_request:
paths:
- ".github/**"
push:
branches:
- main
paths:
- ".github/**"

permissions:
contents: read
Expand Down
32 changes: 32 additions & 0 deletions api/src/main/java/org/apache/iceberg/ManifestFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,38 @@ default boolean hasDeletedFiles() {
/** Returns the total number of rows in all files with status DELETED in the manifest file. */
Long deletedRowsCount();

/**
* Returns the number of files with status REPLACED in the manifest file, or null if not tracked.
*
* <p>REPLACED files are the prior-state entries of v4 REPLACED/MODIFIED pairs and are not live.
* Returns null for manifest files written by pre-v4 writers.
*/
default Integer replacedFilesCount() {
return null;
}

/**
* Returns the total number of rows in all files with status REPLACED in the manifest file, or
* null if not tracked.
*
* <p>Returns null for manifest files written by pre-v4 writers.
*/
default Long replacedRowsCount() {
return null;
}

/**
* Returns the writer format version of the manifest file. Used at the v4 root-manifest level to
* dispatch leaf-manifest reads: {@code 0} for legacy v1-v3 manifests (Avro {@code manifest_entry}
* shape), {@code 4} for v4 leaf manifests (Parquet {@code content_entry} shape).
*
* <p>Defaults to {@code 0} (legacy) for manifests that don't carry an explicit value — pre-v4
* manifest list entries don't have this field.
*/
default int writerFormatVersion() {
return 0;
}

/**
* Returns a list of {@link PartitionFieldSummary partition field summaries}.
*
Expand Down
10 changes: 10 additions & 0 deletions api/src/main/java/org/apache/iceberg/Snapshot.java
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,16 @@ default Iterable<DeleteFile> removedDeleteFiles(FileIO io) {
*/
String manifestListLocation();

/**
* Returns the location of this snapshot's root manifest, or null if this snapshot uses a
* manifest list. Root manifests are introduced in format version 4 and replace manifest lists.
*
* @return the location of the root manifest for this snapshot, or null
*/
default String rootManifestLocation() {
return null;
}

/**
* Return the id of the schema used when this snapshot was created, or null if this information is
* not available.
Expand Down
4 changes: 4 additions & 0 deletions core/src/main/java/org/apache/iceberg/BaseFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,10 @@ void setManifestLocation(String manifestLocation) {
this.manifestLocation = manifestLocation;
}

void setFileOrdinal(long ordinal) {
this.fileOrdinal = ordinal;
}

@Override
public Long fileSequenceNumber() {
return fileSequenceNumber;
Expand Down
58 changes: 54 additions & 4 deletions core/src/main/java/org/apache/iceberg/BaseSnapshot.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ class BaseSnapshot implements Snapshot {
private final long sequenceNumber;
private final long timestampMillis;
private final String manifestListLocation;
private final String rootManifestLocation;
private final int formatVersion;
private final String operation;
private final Map<String, String> summary;
private final Integer schemaId;
Expand Down Expand Up @@ -68,6 +70,41 @@ class BaseSnapshot implements Snapshot {
Long firstRowId,
Long addedRows,
String keyId) {
this(
2,
sequenceNumber,
snapshotId,
parentId,
timestampMillis,
operation,
summary,
schemaId,
manifestList,
null,
firstRowId,
addedRows,
keyId);
}

BaseSnapshot(
int formatVersion,
long sequenceNumber,
long snapshotId,
Long parentId,
long timestampMillis,
String operation,
Map<String, String> summary,
Integer schemaId,
String manifestList,
String rootManifest,
Long firstRowId,
Long addedRows,
String keyId) {
Preconditions.checkArgument(
(manifestList == null) != (rootManifest == null),
"Invalid snapshot: must have exactly one of manifest-list (%s) or root-manifest (%s)",
manifestList,
rootManifest);
Preconditions.checkArgument(
firstRowId == null || firstRowId >= 0,
"Invalid first-row-id (cannot be negative): %s",
Expand All @@ -79,6 +116,7 @@ class BaseSnapshot implements Snapshot {
Preconditions.checkArgument(
firstRowId == null || addedRows != null,
"Invalid added-rows (required when first-row-id is set): null");
this.formatVersion = formatVersion;
this.sequenceNumber = sequenceNumber;
this.snapshotId = snapshotId;
this.parentId = parentId;
Expand All @@ -87,6 +125,7 @@ class BaseSnapshot implements Snapshot {
this.summary = summary;
this.schemaId = schemaId;
this.manifestListLocation = manifestList;
this.rootManifestLocation = rootManifest;
this.v1ManifestLocations = null;
this.firstRowId = firstRowId;
this.addedRows = firstRowId != null ? addedRows : null;
Expand All @@ -102,6 +141,7 @@ class BaseSnapshot implements Snapshot {
Map<String, String> summary,
Integer schemaId,
String[] v1ManifestLocations) {
this.formatVersion = 1;
this.sequenceNumber = sequenceNumber;
this.snapshotId = snapshotId;
this.parentId = parentId;
Expand All @@ -110,6 +150,7 @@ class BaseSnapshot implements Snapshot {
this.summary = summary;
this.schemaId = schemaId;
this.manifestListLocation = null;
this.rootManifestLocation = null;
this.v1ManifestLocations = v1ManifestLocations;
this.firstRowId = null;
this.addedRows = null;
Expand Down Expand Up @@ -182,10 +223,14 @@ private void cacheManifests(FileIO fileIO) {

if (allManifests == null) {
// if manifests isn't set, then the snapshotFile is set and should be read to get the list
this.allManifests =
ManifestLists.read(
ManifestLists.newInputFile(
fileIO, new BaseManifestListFile(manifestListLocation, keyId)));
if (formatVersion >= 4) {
this.allManifests = RootManifests.read(fileIO.newInputFile(rootManifestLocation));
} else {
this.allManifests =
ManifestLists.read(
ManifestLists.newInputFile(
fileIO, new BaseManifestListFile(manifestListLocation, keyId)));
}
}

if (dataManifests == null || deleteManifests == null) {
Expand Down Expand Up @@ -261,6 +306,11 @@ public String manifestListLocation() {
return manifestListLocation;
}

@Override
public String rootManifestLocation() {
return rootManifestLocation;
}

private void cacheDeleteFileChanges(FileIO fileIO) {
Preconditions.checkArgument(fileIO != null, "Cannot cache delete file changes: FileIO is null");

Expand Down
Loading