-
Notifications
You must be signed in to change notification settings - Fork 67
macos: fall back to RTF/HTML when pasted plain text is encoding-lossy #45
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -36,6 +36,8 @@ extension NSPasteboard { | |||||
| /// Does these things in order: | ||||||
| /// - Tries to get the absolute filesystem path of the file in the pasteboard if there is one and ensures the file path is properly escaped. | ||||||
| /// - Tries to get any string from the pasteboard. | ||||||
| /// - If the plain-text variant looks encoding-lossy, falls back to | ||||||
| /// extracting plain text from the RTF or HTML variant. | ||||||
| /// If all of the above fail, returns None. | ||||||
| func getOpinionatedStringContents() -> String? { | ||||||
| if let urls = readObjects(forClasses: [NSURL.self]) as? [URL], | ||||||
|
|
@@ -45,7 +47,74 @@ extension NSPasteboard { | |||||
| .joined(separator: " ") | ||||||
| } | ||||||
|
|
||||||
| return self.string(forType: .string) | ||||||
| let plainText = self.string(forType: .string) | ||||||
|
|
||||||
| // Some apps (certain IMs, Electron ports, legacy native apps) write a | ||||||
| // mangled plain-text variant alongside a correctly-encoded rich-text | ||||||
| // variant — e.g. non-ASCII chars arrive as U+FFFD or literal '?' runs | ||||||
| // because the app's plain-text writer fell back to an ASCII encoding. | ||||||
| // Prefer the rich-text variant only when it demonstrably carries more | ||||||
| // non-ASCII content than the plain variant, so that legitimate user | ||||||
| // text containing '?' runs is not replaced by a stripped RTF/HTML | ||||||
| // rendering. | ||||||
| if let s = plainText, Self.looksEncodingLossy(s), | ||||||
| let recovered = self.richTextFallback(), | ||||||
| Self.nonASCIIScalarCount(recovered) > Self.nonASCIIScalarCount(s) { | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. P2: Lossy plain text containing U+FFFD can bypass rich-text recovery because U+FFFD is counted as non-ASCII, making the strict Prompt for AI agents
Suggested change
|
||||||
| return recovered | ||||||
| } | ||||||
|
|
||||||
| if let s = plainText { return s } | ||||||
| return self.richTextFallback() | ||||||
| } | ||||||
|
|
||||||
| private static func looksEncodingLossy(_ s: String) -> Bool { | ||||||
| if s.contains("\u{FFFD}") { return true } | ||||||
| var run = 0 | ||||||
| for scalar in s.unicodeScalars { | ||||||
| if scalar == "?" { | ||||||
| run += 1 | ||||||
| if run >= 3 { return true } | ||||||
| } else { | ||||||
| run = 0 | ||||||
| } | ||||||
| } | ||||||
| return false | ||||||
| } | ||||||
|
Comment on lines
+70
to
+82
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
With
private static func looksEncodingLossy(_ s: String) -> Bool {
if s.contains("\u{FFFD}") { return true }
var run = 0
for scalar in s.unicodeScalars {
if scalar == "?" {
run += 1
if run >= 5 { return true }
} else {
run = 0
}
}
return false
} |
||||||
|
|
||||||
| private static func nonASCIIScalarCount(_ s: String) -> Int { | ||||||
| s.unicodeScalars.reduce(0) { $1.value > 127 ? $0 + 1 : $0 } | ||||||
| } | ||||||
|
Comment on lines
+84
to
+86
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Consider filtering replacement characters out of the private func richTextFallback() -> String? {
// ... existing extraction ...
let cleaned = result.unicodeScalars
.filter { $0.value != 0xFFFC && $0.value != 0xFFFD }
.reduce(into: "") { $0.append(Character($1)) }
return cleaned.isEmpty ? nil : cleaned
} |
||||||
|
|
||||||
| private func richTextFallback() -> String? { | ||||||
| // Try RTF first: Apple's RTF parser is local and never fetches | ||||||
| // external resources. Fall back to HTML only when RTF is absent, | ||||||
| // since HTML parsing via NSAttributedString goes through WebKit | ||||||
| // and is heavier even when the attributed string is only used to | ||||||
| // extract `.string`. | ||||||
| if let data = self.data(forType: .rtf), | ||||||
| let attr = try? NSAttributedString( | ||||||
| data: data, | ||||||
| options: [.documentType: NSAttributedString.DocumentType.rtf], | ||||||
| documentAttributes: nil | ||||||
| ), | ||||||
| !attr.string.isEmpty { | ||||||
| return attr.string | ||||||
| } | ||||||
|
|
||||||
| if let data = self.data(forType: .html), | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. P2: Prompt for AI agents |
||||||
| let attr = try? NSAttributedString( | ||||||
| data: data, | ||||||
| options: [ | ||||||
| .documentType: NSAttributedString.DocumentType.html, | ||||||
| .characterEncoding: String.Encoding.utf8.rawValue, | ||||||
| ], | ||||||
| documentAttributes: nil | ||||||
| ), | ||||||
| !attr.string.isEmpty { | ||||||
| return attr.string | ||||||
| } | ||||||
|
Comment on lines
+104
to
+115
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
# Find all call sites of getOpinionatedStringContents to assess threading context.
rg -nP --type=swift -C3 '\bgetOpinionatedStringContents\s*\('Repository: manaflow-ai/ghostty Length of output: 2940 🌐 Web query:
💡 Result: Yes, initializing NSAttributedString with NSAttributedString.DocumentType.html requires the main thread on macOS. Apple's official documentation explicitly states: "Don’t call this method from a background thread if the options dictionary includes the documentType attribute with a value of html. If you do, the method tries to synchronize with the main thread, fails, and times out." This applies to the synchronous initializers like init(data:options:documentAttributes:). An older Apple document confirms that since OS X v10.4, NSAttributedString uses WebKit for HTML import, which is not thread-safe on background threads; on macOS (OS X v10.5+), it transfers to the main thread via performSelectorOnMainThread:withObject:waitUntilDone:, requiring the main thread's run loop to be running. Recent forum discussions (up to 2025) affirm this behavior persists on macOS. New asynchronous APIs like loadFromHTML(data:options:completionHandler:) exist but are separate from the synchronous init methods. Citations:
🏁 Script executed: # Examine the call site contexts to understand threading guarantees
sed -n '2100,2130p' macos/Sources/Ghostty/Surface\ View/SurfaceView_AppKit.swiftRepository: manaflow-ai/ghostty Length of output: 1001 🏁 Script executed: # Check the clipboard request handler context
sed -n '330,345p' macos/Sources/Ghostty/Ghostty.App.swiftRepository: manaflow-ai/ghostty Length of output: 738 🏁 Script executed: # Check the C interop callback pattern to understand threading context
rg -nB5 "completeClipboardRequest" macos/Sources/Ghostty/Ghostty.App.swift | head -30Repository: manaflow-ai/ghostty Length of output: 587 🏁 Script executed: # Look for the definition of the clipboard callback to understand its threading context
rg -nB10 "performable paste bindings" macos/Sources/Ghostty/Ghostty.App.swiftRepository: manaflow-ai/ghostty Length of output: 628 🏁 Script executed: # Search for where this callback function is registered/called to understand threading
rg -n "readClipboard\|clipboard.*callback" macos/Sources/Ghostty/Ghostty.App.swift | head -20Repository: manaflow-ai/ghostty Length of output: 45 🏁 Script executed: # Check if there's a dispatch or threading guard anywhere in the clipboard handling chain
rg -nB2 "readClipboard" macos/Sources/Ghostty/Ghostty.App.swift | head -40Repository: manaflow-ai/ghostty Length of output: 666 🏁 Script executed: # Check the C library interface to understand threading guarantees for clipboard callbacks
rg -n "read_clipboard_cb|ghostty_app_callbacks_s" macos/Sources/ -A3 | head -40Repository: manaflow-ai/ghostty Length of output: 870 🏁 Script executed: # Look for documentation or comments about the C callback threading model
rg -n "ghostty_clipboard_e|clipboard.*thread|read.*clipboard" macos/Sources/Ghostty/ -B2 -A2 | head -50Repository: manaflow-ai/ghostty Length of output: 3491 🏁 Script executed: # Search for any comments or documentation about threading in the C library headers
find . -name "*.h" -type f | head -20Repository: manaflow-ai/ghostty Length of output: 673 🏁 Script executed: # Check if there are any assertions or main-thread checks elsewhere in the codebase for clipboard handling
rg -n "dispatchPrecondition\|MainActor\|Thread\|main thread" macos/Sources/Ghostty/Ghostty.App.swift | head -20Repository: manaflow-ai/ghostty Length of output: 45 🏁 Script executed: # Check the main app event loop and threading model to understand if clipboard callbacks are main-thread guaranteed
head -100 macos/Sources/Ghostty/Ghostty.App.swift | grep -A20 "class App\|init\|runModal"Repository: manaflow-ai/ghostty Length of output: 3304 🏁 Script executed: # Look for documentation or comments about the C library's threading model in the header
grep -n "read_clipboard_cb\|thread\|main" ./include/ghostty.h | head -30Repository: manaflow-ai/ghostty Length of output: 398 Add main-thread guarantee to Apple's official documentation confirms that While current call sites ( Add either:
🤖 Prompt for AI Agents
Comment on lines
+104
to
+115
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Consider adding a runtime assertion to make the requirement explicit: if let data = self.data(forType: .html) {
assert(Thread.isMainThread, "NSAttributedString HTML init must be called on the main thread")
// ...
} |
||||||
|
|
||||||
| return nil | ||||||
| } | ||||||
|
|
||||||
| /// The pasteboard for the Ghostty enum type. | ||||||
|
|
||||||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
P1:
NSAttributedString.stringemitsU+FFFC(Object Replacement Character) for every embedded image/attachment in RTF or HTML. Since0xFFFC > 127, each one inflatesnonASCIIScalarCount, which can cause an incorrect swap (e.g., mangled plain text with 0 non-ASCII vs. RTF with only an image →1 > 0triggers fallback). Worse, anyU+FFFCthat survives into the return value renders as garbage in the terminal.Filter
\u{FFFC}(and\u{FFFD}) out of the string returned byrichTextFallback()before using it.Prompt for AI agents