Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,11 @@
import com.linkedin.openhouse.tables.common.TableType;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import javax.validation.ConstraintViolation;
import javax.validation.Validator;
Expand All @@ -29,6 +32,25 @@
@Component
public class OpenHouseTablesApiValidator implements TablesApiValidator {

private static final Set<String> MEMBER_ID_CONTAINS_PATTERNS =
Collections.unmodifiableSet(
new HashSet<>(
Arrays.asList(
"actorid",
"customerid",
"destid",
"entityid",
"memberid",
"profileid",
"recipientid",
"recommenderid",
"reporterid",
"senderid",
"sourceid",
"viewerid")));

private static final Set<String> MEMBER_ID_EXACT_PATTERNS = Collections.singleton("mid");

@Autowired private Validator validator;

@Autowired private RetentionPolicySpecValidator retentionPolicySpecValidator;
Expand Down Expand Up @@ -106,6 +128,9 @@ && getSchemaFromSchemaJson(createUpdateTableRequestBody.getSchema()).columns().i
validationFailures.addAll(validateUUIDForReplicaTable(createUpdateTableRequestBody));
validationFailures.addAll(
validateUpdateTimestampForReplicatedTable(createUpdateTableRequestBody));
if (createUpdateTableRequestBody.getSchema() != null) {
validateMemberIdColumnTypes(createUpdateTableRequestBody.getSchema(), validationFailures);
Copy link
Copy Markdown
Member

@abhisheknath2011 abhisheknath2011 Mar 2, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like this is specific usecase for LInkedIn memberId. Can we have this validation in the internal li repo as the validation is specific to LinkedIn usecase considering this is OSS codebase? We would need to extend OpenHouseTablesApiValidator in the internal repo and that can be enabled by adding something like @primary annotation.

}

if (!validationFailures.isEmpty()) {
throw new RequestValidationFailureException(validationFailures);
Expand Down Expand Up @@ -257,6 +282,9 @@ && getSchemaFromSchemaJson(createUpdateTableRequestBody.getSchema()).columns().i
createUpdateTableRequestBody.getSortOrder(),
createUpdateTableRequestBody.getSchema(),
validationFailures);
if (createUpdateTableRequestBody.getSchema() != null) {
validateMemberIdColumnTypes(createUpdateTableRequestBody.getSchema(), validationFailures);
}
if (!validationFailures.isEmpty()) {
throw new RequestValidationFailureException(validationFailures);
}
Expand Down Expand Up @@ -404,4 +432,39 @@ private void validateSortOrder(String sortOrder, String schema, List<String> val
}
}
}

/**
* Validates that Iceberg table schemas do not use INTEGER type for member identity columns.
* Member IDs will exceed 32-bit int max — all must use LONG. See go/project-2b.
*/
private void validateMemberIdColumnTypes(String schemaJson, List<String> validationFailures) {
try {
Schema icebergSchema = getSchemaFromSchemaJson(schemaJson);
for (org.apache.iceberg.types.Types.NestedField column : icebergSchema.columns()) {
if (column.type().typeId() == org.apache.iceberg.types.Type.TypeID.INTEGER) {
String normalized = column.name().toLowerCase().replace("_", "");
if (MEMBER_ID_EXACT_PATTERNS.contains(normalized)) {
validationFailures.add(
String.format(
"schema : column '%s' uses INTEGER type for a member identity field. "
+ "Use LONG instead to avoid overflow. See go/project-2b for details.",
column.name()));
} else {
for (String pattern : MEMBER_ID_CONTAINS_PATTERNS) {
if (normalized.contains(pattern)) {
validationFailures.add(
String.format(
"schema : column '%s' uses INTEGER type for a member identity field. "
+ "Use LONG instead to avoid overflow. See go/project-2b for details.",
column.name()));
break;
}
}
}
}
}
} catch (Exception e) {
// Schema parsing is already validated elsewhere; skip member ID check if unparseable
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1345,4 +1345,166 @@ public void validateCreateTableWithPrimaryTableType() {
.tableType(TableType.PRIMARY_TABLE)
.build()));
}

@Test
public void validateCreateTable_memberIdLongColumnSucceeds() {
String schemaJson =
"{\"type\":\"struct\",\"fields\":["
+ "{\"id\":1,\"required\":true,\"name\":\"memberId\",\"type\":\"long\"},"
+ "{\"id\":2,\"required\":true,\"name\":\"name\",\"type\":\"string\"}"
+ "]}";
assertDoesNotThrow(
() ->
tablesApiValidator.validateCreateTable(
"c",
"d",
CreateUpdateTableRequestBody.builder()
.databaseId("d")
.tableId("t")
.clusterId("c")
.schema(schemaJson)
.tableProperties(ImmutableMap.of())
.baseTableVersion("base")
.build()));
}

@Test
public void validateCreateTable_memberIdIntColumnFails() {
String schemaJson =
"{\"type\":\"struct\",\"fields\":["
+ "{\"id\":1,\"required\":true,\"name\":\"memberId\",\"type\":\"int\"},"
+ "{\"id\":2,\"required\":true,\"name\":\"name\",\"type\":\"string\"}"
+ "]}";
assertThrows(
RequestValidationFailureException.class,
() ->
tablesApiValidator.validateCreateTable(
"c",
"d",
CreateUpdateTableRequestBody.builder()
.databaseId("d")
.tableId("t")
.clusterId("c")
.schema(schemaJson)
.tableProperties(ImmutableMap.of())
.baseTableVersion("base")
.build()));
}

@Test
public void validateCreateTable_nonMemberIdIntColumnSucceeds() {
String schemaJson =
"{\"type\":\"struct\",\"fields\":["
+ "{\"id\":1,\"required\":true,\"name\":\"retryCount\",\"type\":\"int\"},"
+ "{\"id\":2,\"required\":true,\"name\":\"name\",\"type\":\"string\"}"
+ "]}";
assertDoesNotThrow(
() ->
tablesApiValidator.validateCreateTable(
"c",
"d",
CreateUpdateTableRequestBody.builder()
.databaseId("d")
.tableId("t")
.clusterId("c")
.schema(schemaJson)
.tableProperties(ImmutableMap.of())
.baseTableVersion("base")
.build()));
}

@Test
public void validateCreateTable_memberIdNewPatternsFail() {
String schemaJson =
"{\"type\":\"struct\",\"fields\":["
+ "{\"id\":1,\"required\":true,\"name\":\"customerId\",\"type\":\"int\"},"
+ "{\"id\":2,\"required\":true,\"name\":\"sourceId\",\"type\":\"int\"},"
+ "{\"id\":3,\"required\":true,\"name\":\"destId\",\"type\":\"int\"},"
+ "{\"id\":4,\"required\":true,\"name\":\"name\",\"type\":\"string\"}"
+ "]}";
assertThrows(
RequestValidationFailureException.class,
() ->
tablesApiValidator.validateCreateTable(
"c",
"d",
CreateUpdateTableRequestBody.builder()
.databaseId("d")
.tableId("t")
.clusterId("c")
.schema(schemaJson)
.tableProperties(ImmutableMap.of())
.baseTableVersion("base")
.build()));
}

@Test
public void validateCreateTable_exactMatchMidFails() {
String schemaJson =
"{\"type\":\"struct\",\"fields\":["
+ "{\"id\":1,\"required\":true,\"name\":\"mid\",\"type\":\"int\"},"
+ "{\"id\":2,\"required\":true,\"name\":\"name\",\"type\":\"string\"}"
+ "]}";
assertThrows(
RequestValidationFailureException.class,
() ->
tablesApiValidator.validateCreateTable(
"c",
"d",
CreateUpdateTableRequestBody.builder()
.databaseId("d")
.tableId("t")
.clusterId("c")
.schema(schemaJson)
.tableProperties(ImmutableMap.of())
.baseTableVersion("base")
.build()));
}

@Test
public void validateCreateTable_underscoreMemberIdFails() {
String schemaJson =
"{\"type\":\"struct\",\"fields\":["
+ "{\"id\":1,\"required\":true,\"name\":\"member_id\",\"type\":\"int\"},"
+ "{\"id\":2,\"required\":true,\"name\":\"name\",\"type\":\"string\"}"
+ "]}";
assertThrows(
RequestValidationFailureException.class,
() ->
tablesApiValidator.validateCreateTable(
"c",
"d",
CreateUpdateTableRequestBody.builder()
.databaseId("d")
.tableId("t")
.clusterId("c")
.schema(schemaJson)
.tableProperties(ImmutableMap.of())
.baseTableVersion("base")
.build()));
}

@Test
public void validateUpdateTable_memberIdIntColumnFails() {
String schemaJson =
"{\"type\":\"struct\",\"fields\":["
+ "{\"id\":1,\"required\":true,\"name\":\"memberId\",\"type\":\"int\"},"
+ "{\"id\":2,\"required\":true,\"name\":\"name\",\"type\":\"string\"}"
+ "]}";
assertThrows(
RequestValidationFailureException.class,
() ->
tablesApiValidator.validateUpdateTable(
"c",
"d",
"t",
CreateUpdateTableRequestBody.builder()
.databaseId("d")
.tableId("t")
.clusterId("c")
.schema(schemaJson)
.tableProperties(ImmutableMap.of())
.baseTableVersion("base")
.build()));
}
}