Conversation
= aarch64 assembly (Apple M1Pro, rustc 1.93.0)
== `len()`: 13 → 11 instructions
Before:
ldrb w8, [x0, #14]
ldrh w9, [x0, #12]
orr w8, w9, w8, lsl #16
ldr w9, [x0, #8]
orr x8, x9, x8, lsl #32
ldrb w9, [x0, #15]
sub x10, x9, #192
mov w11, #16
cmp x10, #16
csel x10, x10, x11, lo
cmp w9, #208
csel x0, x10, x8, lo
ret
After:
ldr x8, [x0, #8]
and x9, x8, #0xffffffffffffff
lsr x10, x8, #56
sub x10, x10, #192
mov w11, #16
cmp x10, #16
csel x10, x10, x11, lo
lsr x8, x8, #60
cmp x8, #13
csel x0, x10, x9, lo
ret
== `as_str()` / `as_bytes()`: 16 → 13 instructions
Before:
ldrb w8, [x0, #14]
ldrh w9, [x0, #12]
orr w8, w9, w8, lsl #16
ldr w9, [x0, #8]
orr x8, x9, x8, lsl #32
ldrb w9, [x0, #15]
sub x10, x9, #192
mov w11, #16
cmp x10, #16
csel x10, x10, x11, lo
cmp w9, #208
csel x1, x10, x8, lo
ldr x8, [x0]
cmp w9, #207
csel x0, x8, x0, hi
ret
After:
ldp x9, x8, [x0]
lsr x10, x8, #56
sub x10, x10, #192
mov w11, #16
cmp x10, #16
csel x10, x10, x11, lo
and x11, x8, #0xffffffffffffff
lsr x8, x8, #60
cmp x8, #13
csel x1, x10, x11, lo
cmp x8, #12
csel x0, x9, x0, hi
ret
= x86_64 assembly (Apple M1Pro, --target=x86_64-unknown-linux-gnu, rustc 1.93.0)
== `len()`: 15 → 13 instructions
Before:
movzx eax, word ptr [rdi + 12]
movzx ecx, byte ptr [rdi + 14]
shl ecx, 16
or ecx, eax
shl rcx, 32
mov edx, dword ptr [rdi + 8]
or rdx, rcx
movzx ecx, byte ptr [rdi + 15]
lea rsi, [rcx - 192]
cmp rsi, 16
mov eax, 16
cmovb rax, rsi
cmp rcx, 208
cmovae rax, rdx
ret
After:
mov rcx, qword ptr [rdi + 8]
movabs rdx, 72057594037927935
and rdx, rcx
mov rsi, rcx
shr rsi, 56
add rsi, -192
cmp rsi, 16
mov eax, 16
cmovb rax, rsi
shr rcx, 60
cmp ecx, 13
cmovae rax, rdx
ret
== `as_str()` / `as_bytes()`: 17 → 19 instructions
Before:
mov rax, rdi
movzx ecx, byte ptr [rdi + 15]
lea rsi, [rcx - 192]
cmp rsi, 16
mov edx, 16
cmovb rdx, rsi
cmp rcx, 208
jb .LBB11_2
movzx ecx, word ptr [rax + 12]
movzx esi, byte ptr [rax + 14]
shl esi, 16
or esi, ecx
shl rsi, 32
mov edx, dword ptr [rax + 8]
or rdx, rsi
mov rax, qword ptr [rax]
.LBB11_2:
ret
After:
mov rax, rdi
mov rcx, qword ptr [rdi + 8]
movabs rsi, 72057594037927935
and rsi, rcx
mov rdi, rcx
shr rdi, 56
add rdi, -192
cmp rdi, 16
mov edx, 16
cmovb rdx, rdi
movabs rdi, -3458764513820540929
inc rdi
cmp rcx, rdi
cmovae rdx, rsi
shr rcx, 60
cmp ecx, 13
jb .LBB11_2
mov rax, qword ptr [rax]
.LBB11_2:
ret
= aarch64 assembly (Apple M1Pro, rustc 1.93.0)
== `len()`: 11 → 10 instructions
Before:
ldr x8, [x0, #8]
and x9, x8, #0xffffffffffffff
lsr x10, x8, #56
sub x10, x10, #192
mov w11, #16
cmp x10, #16
csel x10, x10, x11, lo
lsr x8, x8, #60
cmp x8, #13
csel x0, x10, x9, lo
ret
After:
ldrb w8, [x0, #15]
sub x9, x8, #192
mov w10, #16
cmp x9, #16
csel x9, x9, x10, lo
ldr x10, [x0, #8]
and x10, x10, #0xffffffffffffff
cmp w8, #208
csel x0, x9, x10, lo
ret
== `as_str()` / `as_bytes()`: 13 → 12 instructions
Before:
ldp x9, x8, [x0]
lsr x10, x8, #56
sub x10, x10, #192
mov w11, #16
cmp x10, #16
csel x10, x10, x11, lo
and x11, x8, #0xffffffffffffff
lsr x8, x8, #60
cmp x8, #13
csel x1, x10, x11, lo
cmp x8, #12
csel x0, x9, x0, hi
ret
After:
ldrb w8, [x0, #15]
sub x9, x8, #192
mov w10, #16
cmp x9, #16
csel x9, x9, x10, lo
ldp x11, x10, [x0]
cmp w8, #207
csel x0, x11, x0, hi
and x10, x10, #0xffffffffffffff
cmp w8, #208
csel x1, x9, x10, lo
ret
= x86_64 assembly (Apple M1Pro, --target=x86_64-unknown-linux-gnu, rustc 1.93.0)
== `len()`: 13 → 10 instructions
Before:
mov rcx, qword ptr [rdi + 8]
movabs rdx, 72057594037927935
and rdx, rcx
mov rsi, rcx
shr rsi, 56
add rsi, -192
cmp rsi, 16
mov eax, 16
cmovb rax, rsi
shr rcx, 60
cmp ecx, 13
cmovae rax, rdx
ret
After:
movzx ecx, byte ptr [rdi + 15]
lea rax, [rcx - 192]
cmp rax, 16
mov edx, 16
cmovb rdx, rax
movabs rax, 72057594037927935
and rax, qword ptr [rdi + 8]
cmp rcx, 208
cmovb rax, rdx
ret
== `as_str()` / `as_bytes()`: 19 → 12 instructions
Before:
mov rax, rdi
mov rcx, qword ptr [rdi + 8]
movabs rsi, 72057594037927935
and rsi, rcx
mov rdi, rcx
shr rdi, 56
add rdi, -192
cmp rdi, 16
mov edx, 16
cmovb rdx, rdi
movabs rdi, -3458764513820540929
inc rdi
cmp rcx, rdi
cmovae rdx, rsi
shr rcx, 60
cmp ecx, 13
jb .LBB2_2
mov rax, qword ptr [rax]
.LBB2_2:
ret
After:
mov rax, rdi
movzx ecx, byte ptr [rdi + 15]
lea rsi, [rcx - 192]
cmp rsi, 16
mov edx, 16
cmovb rdx, rsi
cmp rcx, 208
jb .LBB2_2
movabs rdx, 72057594037927935
and rdx, qword ptr [rax + 8]
mov rax, qword ptr [rax]
.LBB2_2:
ret
Contributor
There was a problem hiding this comment.
Pull request overview
This PR refactors the 64-bit Repr::len() implementation to compute the stored length by loading the “tail” word as a usize and masking off the marker byte, avoiding the previous byte-array reconstruction.
Changes:
- Replaced tail-length extraction in
Repr::len()(64-bit) with ausizeload +from_le+ bitmask. - Kept the inline-length fast-path logic unchanged.
💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
len() instruction countslen()
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Reduced instruction counts for
LeanString::len()(andLeanString::as_str()/as_bytes()) on both AArch64 and x86_64.Assembly results
Direct
LeanString::as_bytesis emitted as an alias ofLeanString::as_str, so theas_strdiff below covers both.AArch64 (
cargo asm --lib)LeanString::lenLeanString::as_str/as_bytesLeanString::lenBefore:
After:
LeanString::as_str()/as_bytes()Before:
After:
x86_64 (
cargo asm --lib --target x86_64-unknown-linux-gnu)LeanString::lenLeanString::as_str/as_bytesLeanString::lenBefore:
After:
LeanString::as_str()/as_bytes()Before:
After: