diff --git a/api/src/main/java/org/apache/iceberg/util/UUIDUtil.java b/api/src/main/java/org/apache/iceberg/util/UUIDUtil.java
index 3146a3763cce..94ec7606b588 100644
--- a/api/src/main/java/org/apache/iceberg/util/UUIDUtil.java
+++ b/api/src/main/java/org/apache/iceberg/util/UUIDUtil.java
@@ -26,6 +26,9 @@
public class UUIDUtil {
private static final SecureRandom SECURE_RANDOM = new SecureRandom();
+ private static final byte[] HEX_DIGITS = {
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
+ };
private UUIDUtil() {}
@@ -82,6 +85,106 @@ public static ByteBuffer convertToByteBuffer(UUID value, ByteBuffer reuse) {
return buffer;
}
+ /**
+ * Parses the ASCII bytes of a canonical UUID string (36 bytes, layout {@code 8-4-4-4-12})
+ * directly into the 16-byte big-endian representation, without allocating an intermediate {@link
+ * String} or {@link UUID}.
+ *
+ *
This is intended for hot write paths where the UUID is already available as the ASCII bytes
+ * of its string form (for example {@code UTF8String#getBytes()}). The output is byte-for-byte
+ * identical to {@code convertToByteBuffer(UUID.fromString(s), reuse)}.
+ *
+ * @param uuidStringBytes ASCII bytes of a canonical UUID string (length 36)
+ * @param reuse a 16-byte buffer to reuse, or null to allocate a new one
+ * @return a big-endian buffer holding the 16-byte UUID representation
+ */
+ public static ByteBuffer convertToByteBuffer(byte[] uuidStringBytes, ByteBuffer reuse) {
+ Preconditions.checkArgument(
+ uuidStringBytes.length == 36,
+ "Invalid UUID string: expected 36 ASCII bytes, got %s",
+ uuidStringBytes.length);
+ checkDash(uuidStringBytes, 8);
+ checkDash(uuidStringBytes, 13);
+ checkDash(uuidStringBytes, 18);
+ checkDash(uuidStringBytes, 23);
+
+ long mostSigBits = hexToLong(uuidStringBytes, 0, 8);
+ mostSigBits <<= 16;
+ mostSigBits |= hexToLong(uuidStringBytes, 9, 13);
+ mostSigBits <<= 16;
+ mostSigBits |= hexToLong(uuidStringBytes, 14, 18);
+
+ long leastSigBits = hexToLong(uuidStringBytes, 19, 23);
+ leastSigBits <<= 48;
+ leastSigBits |= hexToLong(uuidStringBytes, 24, 36);
+
+ ByteBuffer buffer = reuse != null ? reuse : ByteBuffer.allocate(16);
+ buffer.order(ByteOrder.BIG_ENDIAN);
+ buffer.putLong(0, mostSigBits);
+ buffer.putLong(8, leastSigBits);
+ return buffer;
+ }
+
+ /**
+ * Renders the 16-byte big-endian UUID representation as the ASCII bytes of its canonical string
+ * form (36 bytes, layout {@code 8-4-4-4-12}), without allocating an intermediate {@link UUID} or
+ * {@link String}.
+ *
+ *
This is intended for hot read paths: the result can be wrapped with {@code
+ * UTF8String#fromBytes} without an intermediate {@link String}. The output is byte-for-byte
+ * identical to {@code convert(uuidBytes).toString().getBytes(US_ASCII)}. Two longs are read
+ * relatively from the buffer's current position, mirroring {@link #convert(ByteBuffer)}.
+ *
+ * @param uuidBytes a buffer positioned at the 16 UUID bytes (big-endian)
+ * @param reuse a 36-byte array to reuse, or null to allocate a new one
+ * @return a 36-byte array holding the ASCII bytes of the canonical UUID string
+ */
+ public static byte[] convertToStringBytes(ByteBuffer uuidBytes, byte[] reuse) {
+ Preconditions.checkArgument(
+ reuse == null || reuse.length == 36,
+ "Invalid reuse buffer: expected 36 bytes, got %s",
+ reuse == null ? 0 : reuse.length);
+ long mostSigBits = uuidBytes.getLong();
+ long leastSigBits = uuidBytes.getLong();
+
+ byte[] out = reuse != null ? reuse : new byte[36];
+ formatHex(out, 0, mostSigBits >>> 32, 8);
+ out[8] = '-';
+ formatHex(out, 9, mostSigBits >>> 16, 4);
+ out[13] = '-';
+ formatHex(out, 14, mostSigBits, 4);
+ out[18] = '-';
+ formatHex(out, 19, leastSigBits >>> 48, 4);
+ out[23] = '-';
+ formatHex(out, 24, leastSigBits, 12);
+ return out;
+ }
+
+ private static void checkDash(byte[] bytes, int pos) {
+ Preconditions.checkArgument(
+ bytes[pos] == '-', "Invalid UUID string: expected '-' at position %s", pos);
+ }
+
+ private static long hexToLong(byte[] bytes, int start, int end) {
+ long result = 0;
+ for (int i = start; i < end; i += 1) {
+ int digit = Character.digit((char) (bytes[i] & 0xFF), 16);
+ Preconditions.checkArgument(
+ digit >= 0, "Invalid UUID string: not a hex digit at position %s", i);
+ result = (result << 4) | digit;
+ }
+
+ return result;
+ }
+
+ private static void formatHex(byte[] out, int offset, long value, int digits) {
+ long bits = value;
+ for (int i = digits - 1; i >= 0; i -= 1) {
+ out[offset + i] = HEX_DIGITS[(int) (bits & 0xF)];
+ bits >>>= 4;
+ }
+ }
+
/**
* Generate a RFC 9562 UUIDv7.
*
diff --git a/api/src/test/java/org/apache/iceberg/util/TestUUIDUtil.java b/api/src/test/java/org/apache/iceberg/util/TestUUIDUtil.java
index c5f85c2f20b3..db0efc69444b 100644
--- a/api/src/test/java/org/apache/iceberg/util/TestUUIDUtil.java
+++ b/api/src/test/java/org/apache/iceberg/util/TestUUIDUtil.java
@@ -19,7 +19,11 @@
package org.apache.iceberg.util;
import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.Locale;
import java.util.UUID;
import org.junit.jupiter.api.Test;
@@ -31,4 +35,123 @@ public void uuidV7HasVersionAndVariant() {
assertThat(uuid.version()).isEqualTo(7);
assertThat(uuid.variant()).isEqualTo(2);
}
+
+ @Test
+ public void convertStringBytesMatchesUuidConversion() {
+ for (int i = 0; i < 100; i += 1) {
+ UUID uuid = UUID.randomUUID();
+ byte[] stringBytes = uuid.toString().getBytes(StandardCharsets.US_ASCII);
+
+ byte[] fromString = UUIDUtil.convertToByteBuffer(stringBytes, null).array();
+ byte[] fromUuid = UUIDUtil.convertToByteBuffer(uuid, null).array();
+
+ assertThat(fromString).containsExactly(fromUuid).containsExactly(UUIDUtil.convert(uuid));
+ }
+ }
+
+ @Test
+ public void convertStringBytesHandlesEdgeValues() {
+ for (String value :
+ new String[] {
+ "00000000-0000-0000-0000-000000000000",
+ "ffffffff-ffff-ffff-ffff-ffffffffffff",
+ "12345678-90ab-cdef-1234-567890abcdef"
+ }) {
+ UUID uuid = UUID.fromString(value);
+ byte[] stringBytes = value.getBytes(StandardCharsets.US_ASCII);
+
+ assertThat(UUIDUtil.convertToByteBuffer(stringBytes, null).array())
+ .containsExactly(UUIDUtil.convert(uuid));
+ }
+ }
+
+ @Test
+ public void convertStringBytesAcceptsUppercaseHex() {
+ UUID uuid = UUID.randomUUID();
+ byte[] upper = uuid.toString().toUpperCase(Locale.ROOT).getBytes(StandardCharsets.US_ASCII);
+
+ assertThat(UUIDUtil.convertToByteBuffer(upper, null).array())
+ .containsExactly(UUIDUtil.convert(uuid));
+ }
+
+ @Test
+ public void convertStringBytesReusesBuffer() {
+ UUID uuid = UUID.randomUUID();
+ byte[] stringBytes = uuid.toString().getBytes(StandardCharsets.US_ASCII);
+
+ ByteBuffer reuse = ByteBuffer.allocate(16);
+ ByteBuffer result = UUIDUtil.convertToByteBuffer(stringBytes, reuse);
+
+ assertThat(result).isSameAs(reuse);
+ assertThat(result.array()).containsExactly(UUIDUtil.convert(uuid));
+ }
+
+ @Test
+ public void convertToStringBytesMatchesUuidToString() {
+ for (int i = 0; i < 100; i += 1) {
+ UUID uuid = UUID.randomUUID();
+ ByteBuffer raw = UUIDUtil.convertToByteBuffer(uuid, null);
+
+ byte[] stringBytes = UUIDUtil.convertToStringBytes(raw, null);
+
+ assertThat(new String(stringBytes, StandardCharsets.US_ASCII)).isEqualTo(uuid.toString());
+ }
+ }
+
+ @Test
+ public void convertToStringBytesIsRoundTrippable() {
+ UUID uuid = UUID.randomUUID();
+
+ byte[] stringBytes =
+ UUIDUtil.convertToStringBytes(UUIDUtil.convertToByteBuffer(uuid, null), null);
+ byte[] raw = UUIDUtil.convertToByteBuffer(stringBytes, null).array();
+
+ assertThat(UUIDUtil.convert(raw)).isEqualTo(uuid);
+ }
+
+ @Test
+ public void convertToStringBytesReusesBuffer() {
+ UUID uuid = UUID.randomUUID();
+
+ byte[] reuse = new byte[36];
+ byte[] result = UUIDUtil.convertToStringBytes(UUIDUtil.convertToByteBuffer(uuid, null), reuse);
+
+ assertThat(result).isSameAs(reuse);
+ assertThat(new String(result, StandardCharsets.US_ASCII)).isEqualTo(uuid.toString());
+ }
+
+ @Test
+ public void convertToByteBufferRejectsMalformedInput() {
+ assertThatThrownBy(
+ () ->
+ UUIDUtil.convertToByteBuffer("too-short".getBytes(StandardCharsets.US_ASCII), null))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessageContaining("expected 36 ASCII bytes");
+
+ // valid length, but the dash positions hold non-dash characters
+ assertThatThrownBy(
+ () ->
+ UUIDUtil.convertToByteBuffer(
+ "000000000000000000000000000000000000".getBytes(StandardCharsets.US_ASCII),
+ null))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessageContaining("expected '-'");
+
+ // valid layout, but a non-hex character in a hex group
+ assertThatThrownBy(
+ () ->
+ UUIDUtil.convertToByteBuffer(
+ "zzzzzzzz-0000-0000-0000-000000000000".getBytes(StandardCharsets.US_ASCII),
+ null))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessageContaining("not a hex digit");
+ }
+
+ @Test
+ public void convertToStringBytesRejectsWrongReuseLength() {
+ ByteBuffer raw = UUIDUtil.convertToByteBuffer(UUID.randomUUID(), null);
+ assertThatThrownBy(() -> UUIDUtil.convertToStringBytes(raw, new byte[16]))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessageContaining("expected 36 bytes");
+ }
}
diff --git a/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueReaders.java b/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueReaders.java
index 670537fbf872..52e8aa6d561e 100644
--- a/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueReaders.java
+++ b/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueReaders.java
@@ -186,7 +186,8 @@ public UTF8String nonNullRead(ColumnVector vector, int row) {
BytesColumnVector bytesVector = (BytesColumnVector) vector;
ByteBuffer buffer =
ByteBuffer.wrap(bytesVector.vector[row], bytesVector.start[row], bytesVector.length[row]);
- return UTF8String.fromString(UUIDUtil.convert(buffer).toString());
+ // a fresh array is required because UTF8String.fromBytes wraps it without copying
+ return UTF8String.fromBytes(UUIDUtil.convertToStringBytes(buffer, null));
}
}
diff --git a/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueWriters.java b/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueWriters.java
index 7f9810e4c60c..27029e757460 100644
--- a/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueWriters.java
+++ b/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueWriters.java
@@ -20,7 +20,6 @@
import java.nio.ByteBuffer;
import java.util.List;
-import java.util.UUID;
import java.util.stream.Stream;
import org.apache.iceberg.FieldMetrics;
import org.apache.iceberg.orc.OrcValueWriter;
@@ -88,7 +87,7 @@ public void nonNullWrite(int rowId, UTF8String data, ColumnVector output) {
// ((BytesColumnVector) output).setRef(..) just stores a reference to the passed byte[], so
// can't use a ThreadLocal ByteBuffer here like in other places because subsequent writes
// would then overwrite previous values
- ByteBuffer buffer = UUIDUtil.convertToByteBuffer(UUID.fromString(data.toString()));
+ ByteBuffer buffer = UUIDUtil.convertToByteBuffer(data.getBytes(), null);
((BytesColumnVector) output).setRef(rowId, buffer.array(), 0, buffer.array().length);
}
}
diff --git a/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java b/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java
index f9300099fe25..e9efe4562409 100644
--- a/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java
+++ b/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java
@@ -415,7 +415,9 @@ private static class UUIDReader extends PrimitiveReader {
@Override
@SuppressWarnings("ByteBufferBackingArray")
public UTF8String read(UTF8String ignored) {
- return UTF8String.fromString(UUIDUtil.convert(column.nextBinary().toByteBuffer()).toString());
+ // a fresh array is required because UTF8String.fromBytes wraps it without copying
+ return UTF8String.fromBytes(
+ UUIDUtil.convertToStringBytes(column.nextBinary().toByteBuffer(), null));
}
}
diff --git a/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java b/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java
index a1ed6c66f337..99a74d5c06b8 100644
--- a/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java
+++ b/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java
@@ -25,7 +25,6 @@
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Optional;
-import java.util.UUID;
import java.util.stream.IntStream;
import org.apache.iceberg.Schema;
import org.apache.iceberg.parquet.ParquetValueReaders.ReusableEntry;
@@ -436,8 +435,7 @@ private UUIDWriter(ColumnDescriptor desc) {
@Override
public void write(int repetitionLevel, UTF8String string) {
- UUID uuid = UUID.fromString(string.toString());
- ByteBuffer buffer = UUIDUtil.convertToByteBuffer(uuid, BUFFER.get());
+ ByteBuffer buffer = UUIDUtil.convertToByteBuffer(string.getBytes(), BUFFER.get());
column.writeBinary(repetitionLevel, Binary.fromReusedByteBuffer(buffer));
}
}
diff --git a/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/SparkValueReaders.java b/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/SparkValueReaders.java
index 7e65535f5ecb..81dc3c9bd5cc 100644
--- a/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/SparkValueReaders.java
+++ b/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/SparkValueReaders.java
@@ -141,7 +141,8 @@ public UTF8String read(Decoder decoder, Object reuse) throws IOException {
decoder.readFixed(buffer.array(), 0, 16);
- return UTF8String.fromString(UUIDUtil.convert(buffer).toString());
+ // a fresh array is required because UTF8String.fromBytes wraps it without copying
+ return UTF8String.fromBytes(UUIDUtil.convertToStringBytes(buffer, null));
}
}
diff --git a/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/SparkValueWriters.java b/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/SparkValueWriters.java
index bb8218bd83df..01870518c4b4 100644
--- a/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/SparkValueWriters.java
+++ b/spark/v3.4/spark/src/main/java/org/apache/iceberg/spark/data/SparkValueWriters.java
@@ -23,7 +23,6 @@
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.List;
-import java.util.UUID;
import org.apache.avro.io.Encoder;
import org.apache.avro.util.Utf8;
import org.apache.iceberg.avro.ValueWriter;
@@ -101,10 +100,8 @@ private UUIDWriter() {}
@Override
@SuppressWarnings("ByteBufferBackingArray")
public void write(UTF8String s, Encoder encoder) throws IOException {
- // TODO: direct conversion from string to byte buffer
- UUID uuid = UUID.fromString(s.toString());
// calling array() is safe because the buffer is always allocated by the thread-local
- encoder.writeFixed(UUIDUtil.convertToByteBuffer(uuid, BUFFER.get()).array());
+ encoder.writeFixed(UUIDUtil.convertToByteBuffer(s.getBytes(), BUFFER.get()).array());
}
}
diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueReaders.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueReaders.java
index 670537fbf872..52e8aa6d561e 100644
--- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueReaders.java
+++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueReaders.java
@@ -186,7 +186,8 @@ public UTF8String nonNullRead(ColumnVector vector, int row) {
BytesColumnVector bytesVector = (BytesColumnVector) vector;
ByteBuffer buffer =
ByteBuffer.wrap(bytesVector.vector[row], bytesVector.start[row], bytesVector.length[row]);
- return UTF8String.fromString(UUIDUtil.convert(buffer).toString());
+ // a fresh array is required because UTF8String.fromBytes wraps it without copying
+ return UTF8String.fromBytes(UUIDUtil.convertToStringBytes(buffer, null));
}
}
diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueWriters.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueWriters.java
index 7f9810e4c60c..27029e757460 100644
--- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueWriters.java
+++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueWriters.java
@@ -20,7 +20,6 @@
import java.nio.ByteBuffer;
import java.util.List;
-import java.util.UUID;
import java.util.stream.Stream;
import org.apache.iceberg.FieldMetrics;
import org.apache.iceberg.orc.OrcValueWriter;
@@ -88,7 +87,7 @@ public void nonNullWrite(int rowId, UTF8String data, ColumnVector output) {
// ((BytesColumnVector) output).setRef(..) just stores a reference to the passed byte[], so
// can't use a ThreadLocal ByteBuffer here like in other places because subsequent writes
// would then overwrite previous values
- ByteBuffer buffer = UUIDUtil.convertToByteBuffer(UUID.fromString(data.toString()));
+ ByteBuffer buffer = UUIDUtil.convertToByteBuffer(data.getBytes(), null);
((BytesColumnVector) output).setRef(rowId, buffer.array(), 0, buffer.array().length);
}
}
diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java
index 87c97cc7a663..062ddff91760 100644
--- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java
+++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java
@@ -378,7 +378,9 @@ private static class UUIDReader extends PrimitiveReader {
@Override
@SuppressWarnings("ByteBufferBackingArray")
public UTF8String read(UTF8String ignored) {
- return UTF8String.fromString(UUIDUtil.convert(column.nextBinary().toByteBuffer()).toString());
+ // a fresh array is required because UTF8String.fromBytes wraps it without copying
+ return UTF8String.fromBytes(
+ UUIDUtil.convertToStringBytes(column.nextBinary().toByteBuffer(), null));
}
}
diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java
index a1ed6c66f337..99a74d5c06b8 100644
--- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java
+++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java
@@ -25,7 +25,6 @@
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Optional;
-import java.util.UUID;
import java.util.stream.IntStream;
import org.apache.iceberg.Schema;
import org.apache.iceberg.parquet.ParquetValueReaders.ReusableEntry;
@@ -436,8 +435,7 @@ private UUIDWriter(ColumnDescriptor desc) {
@Override
public void write(int repetitionLevel, UTF8String string) {
- UUID uuid = UUID.fromString(string.toString());
- ByteBuffer buffer = UUIDUtil.convertToByteBuffer(uuid, BUFFER.get());
+ ByteBuffer buffer = UUIDUtil.convertToByteBuffer(string.getBytes(), BUFFER.get());
column.writeBinary(repetitionLevel, Binary.fromReusedByteBuffer(buffer));
}
}
diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/SparkValueReaders.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/SparkValueReaders.java
index 7e65535f5ecb..81dc3c9bd5cc 100644
--- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/SparkValueReaders.java
+++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/SparkValueReaders.java
@@ -141,7 +141,8 @@ public UTF8String read(Decoder decoder, Object reuse) throws IOException {
decoder.readFixed(buffer.array(), 0, 16);
- return UTF8String.fromString(UUIDUtil.convert(buffer).toString());
+ // a fresh array is required because UTF8String.fromBytes wraps it without copying
+ return UTF8String.fromBytes(UUIDUtil.convertToStringBytes(buffer, null));
}
}
diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/SparkValueWriters.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/SparkValueWriters.java
index bb8218bd83df..01870518c4b4 100644
--- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/SparkValueWriters.java
+++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/data/SparkValueWriters.java
@@ -23,7 +23,6 @@
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.List;
-import java.util.UUID;
import org.apache.avro.io.Encoder;
import org.apache.avro.util.Utf8;
import org.apache.iceberg.avro.ValueWriter;
@@ -101,10 +100,8 @@ private UUIDWriter() {}
@Override
@SuppressWarnings("ByteBufferBackingArray")
public void write(UTF8String s, Encoder encoder) throws IOException {
- // TODO: direct conversion from string to byte buffer
- UUID uuid = UUID.fromString(s.toString());
// calling array() is safe because the buffer is always allocated by the thread-local
- encoder.writeFixed(UUIDUtil.convertToByteBuffer(uuid, BUFFER.get()).array());
+ encoder.writeFixed(UUIDUtil.convertToByteBuffer(s.getBytes(), BUFFER.get()).array());
}
}
diff --git a/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueReaders.java b/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueReaders.java
index 670537fbf872..52e8aa6d561e 100644
--- a/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueReaders.java
+++ b/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueReaders.java
@@ -186,7 +186,8 @@ public UTF8String nonNullRead(ColumnVector vector, int row) {
BytesColumnVector bytesVector = (BytesColumnVector) vector;
ByteBuffer buffer =
ByteBuffer.wrap(bytesVector.vector[row], bytesVector.start[row], bytesVector.length[row]);
- return UTF8String.fromString(UUIDUtil.convert(buffer).toString());
+ // a fresh array is required because UTF8String.fromBytes wraps it without copying
+ return UTF8String.fromBytes(UUIDUtil.convertToStringBytes(buffer, null));
}
}
diff --git a/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueWriters.java b/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueWriters.java
index 7f9810e4c60c..27029e757460 100644
--- a/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueWriters.java
+++ b/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueWriters.java
@@ -20,7 +20,6 @@
import java.nio.ByteBuffer;
import java.util.List;
-import java.util.UUID;
import java.util.stream.Stream;
import org.apache.iceberg.FieldMetrics;
import org.apache.iceberg.orc.OrcValueWriter;
@@ -88,7 +87,7 @@ public void nonNullWrite(int rowId, UTF8String data, ColumnVector output) {
// ((BytesColumnVector) output).setRef(..) just stores a reference to the passed byte[], so
// can't use a ThreadLocal ByteBuffer here like in other places because subsequent writes
// would then overwrite previous values
- ByteBuffer buffer = UUIDUtil.convertToByteBuffer(UUID.fromString(data.toString()));
+ ByteBuffer buffer = UUIDUtil.convertToByteBuffer(data.getBytes(), null);
((BytesColumnVector) output).setRef(rowId, buffer.array(), 0, buffer.array().length);
}
}
diff --git a/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java b/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java
index 28a9a31c6a6e..141df685f2b6 100644
--- a/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java
+++ b/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java
@@ -395,7 +395,9 @@ private static class UUIDReader extends PrimitiveReader {
@Override
@SuppressWarnings("ByteBufferBackingArray")
public UTF8String read(UTF8String ignored) {
- return UTF8String.fromString(UUIDUtil.convert(column.nextBinary().toByteBuffer()).toString());
+ // a fresh array is required because UTF8String.fromBytes wraps it without copying
+ return UTF8String.fromBytes(
+ UUIDUtil.convertToStringBytes(column.nextBinary().toByteBuffer(), null));
}
}
diff --git a/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java b/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java
index ba816efc0ac8..c6411cd88472 100644
--- a/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java
+++ b/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java
@@ -26,7 +26,6 @@
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Optional;
-import java.util.UUID;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import org.apache.iceberg.FieldMetrics;
@@ -456,8 +455,7 @@ private UUIDWriter(ColumnDescriptor desc) {
@Override
public void write(int repetitionLevel, UTF8String string) {
- UUID uuid = UUID.fromString(string.toString());
- ByteBuffer buffer = UUIDUtil.convertToByteBuffer(uuid, BUFFER.get());
+ ByteBuffer buffer = UUIDUtil.convertToByteBuffer(string.getBytes(), BUFFER.get());
column.writeBinary(repetitionLevel, Binary.fromReusedByteBuffer(buffer));
}
}
diff --git a/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/data/SparkValueReaders.java b/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/data/SparkValueReaders.java
index 7bcb3e3fae01..729a27ee294d 100644
--- a/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/data/SparkValueReaders.java
+++ b/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/data/SparkValueReaders.java
@@ -146,7 +146,8 @@ public UTF8String read(Decoder decoder, Object reuse) throws IOException {
decoder.readFixed(buffer.array(), 0, 16);
- return UTF8String.fromString(UUIDUtil.convert(buffer).toString());
+ // a fresh array is required because UTF8String.fromBytes wraps it without copying
+ return UTF8String.fromBytes(UUIDUtil.convertToStringBytes(buffer, null));
}
}
diff --git a/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/data/SparkValueWriters.java b/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/data/SparkValueWriters.java
index d0d3483a7690..9075ca397044 100644
--- a/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/data/SparkValueWriters.java
+++ b/spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/data/SparkValueWriters.java
@@ -23,7 +23,6 @@
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.List;
-import java.util.UUID;
import org.apache.avro.io.Encoder;
import org.apache.avro.util.Utf8;
import org.apache.iceberg.avro.ValueWriter;
@@ -107,10 +106,8 @@ private UUIDWriter() {}
@Override
@SuppressWarnings("ByteBufferBackingArray")
public void write(UTF8String s, Encoder encoder) throws IOException {
- // TODO: direct conversion from string to byte buffer
- UUID uuid = UUID.fromString(s.toString());
// calling array() is safe because the buffer is always allocated by the thread-local
- encoder.writeFixed(UUIDUtil.convertToByteBuffer(uuid, BUFFER.get()).array());
+ encoder.writeFixed(UUIDUtil.convertToByteBuffer(s.getBytes(), BUFFER.get()).array());
}
}
diff --git a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueReaders.java b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueReaders.java
index 67664ac6c753..ca23c75538b7 100644
--- a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueReaders.java
+++ b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueReaders.java
@@ -193,7 +193,8 @@ public UTF8String nonNullRead(ColumnVector vector, int row) {
BytesColumnVector bytesVector = (BytesColumnVector) vector;
ByteBuffer buffer =
ByteBuffer.wrap(bytesVector.vector[row], bytesVector.start[row], bytesVector.length[row]);
- return UTF8String.fromString(UUIDUtil.convert(buffer).toString());
+ // a fresh array is required because UTF8String.fromBytes wraps it without copying
+ return UTF8String.fromBytes(UUIDUtil.convertToStringBytes(buffer, null));
}
}
diff --git a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueWriters.java b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueWriters.java
index 7f9810e4c60c..27029e757460 100644
--- a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueWriters.java
+++ b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/data/SparkOrcValueWriters.java
@@ -20,7 +20,6 @@
import java.nio.ByteBuffer;
import java.util.List;
-import java.util.UUID;
import java.util.stream.Stream;
import org.apache.iceberg.FieldMetrics;
import org.apache.iceberg.orc.OrcValueWriter;
@@ -88,7 +87,7 @@ public void nonNullWrite(int rowId, UTF8String data, ColumnVector output) {
// ((BytesColumnVector) output).setRef(..) just stores a reference to the passed byte[], so
// can't use a ThreadLocal ByteBuffer here like in other places because subsequent writes
// would then overwrite previous values
- ByteBuffer buffer = UUIDUtil.convertToByteBuffer(UUID.fromString(data.toString()));
+ ByteBuffer buffer = UUIDUtil.convertToByteBuffer(data.getBytes(), null);
((BytesColumnVector) output).setRef(rowId, buffer.array(), 0, buffer.array().length);
}
}
diff --git a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java
index a19ed8060737..0ea70e3b0a26 100644
--- a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java
+++ b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetReaders.java
@@ -397,7 +397,9 @@ private static class UUIDReader extends PrimitiveReader {
@Override
@SuppressWarnings("ByteBufferBackingArray")
public UTF8String read(UTF8String ignored) {
- return UTF8String.fromString(UUIDUtil.convert(column.nextBinary().toByteBuffer()).toString());
+ // a fresh array is required because UTF8String.fromBytes wraps it without copying
+ return UTF8String.fromBytes(
+ UUIDUtil.convertToStringBytes(column.nextBinary().toByteBuffer(), null));
}
}
diff --git a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java
index ba816efc0ac8..c6411cd88472 100644
--- a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java
+++ b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/data/SparkParquetWriters.java
@@ -26,7 +26,6 @@
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Optional;
-import java.util.UUID;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import org.apache.iceberg.FieldMetrics;
@@ -456,8 +455,7 @@ private UUIDWriter(ColumnDescriptor desc) {
@Override
public void write(int repetitionLevel, UTF8String string) {
- UUID uuid = UUID.fromString(string.toString());
- ByteBuffer buffer = UUIDUtil.convertToByteBuffer(uuid, BUFFER.get());
+ ByteBuffer buffer = UUIDUtil.convertToByteBuffer(string.getBytes(), BUFFER.get());
column.writeBinary(repetitionLevel, Binary.fromReusedByteBuffer(buffer));
}
}
diff --git a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/data/SparkValueReaders.java b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/data/SparkValueReaders.java
index 7bcb3e3fae01..729a27ee294d 100644
--- a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/data/SparkValueReaders.java
+++ b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/data/SparkValueReaders.java
@@ -146,7 +146,8 @@ public UTF8String read(Decoder decoder, Object reuse) throws IOException {
decoder.readFixed(buffer.array(), 0, 16);
- return UTF8String.fromString(UUIDUtil.convert(buffer).toString());
+ // a fresh array is required because UTF8String.fromBytes wraps it without copying
+ return UTF8String.fromBytes(UUIDUtil.convertToStringBytes(buffer, null));
}
}
diff --git a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/data/SparkValueWriters.java b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/data/SparkValueWriters.java
index d0d3483a7690..9075ca397044 100644
--- a/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/data/SparkValueWriters.java
+++ b/spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/data/SparkValueWriters.java
@@ -23,7 +23,6 @@
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.List;
-import java.util.UUID;
import org.apache.avro.io.Encoder;
import org.apache.avro.util.Utf8;
import org.apache.iceberg.avro.ValueWriter;
@@ -107,10 +106,8 @@ private UUIDWriter() {}
@Override
@SuppressWarnings("ByteBufferBackingArray")
public void write(UTF8String s, Encoder encoder) throws IOException {
- // TODO: direct conversion from string to byte buffer
- UUID uuid = UUID.fromString(s.toString());
// calling array() is safe because the buffer is always allocated by the thread-local
- encoder.writeFixed(UUIDUtil.convertToByteBuffer(uuid, BUFFER.get()).array());
+ encoder.writeFixed(UUIDUtil.convertToByteBuffer(s.getBytes(), BUFFER.get()).array());
}
}