From 50904e2320d9986ade2dc2f93b5e37c0c0d2c541 Mon Sep 17 00:00:00 2001 From: "bolun.chen" Date: Mon, 20 Apr 2026 13:28:26 +0800 Subject: [PATCH] macos: fall back to RTF/HTML when pasted plain text is encoding-lossy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some applications (certain IMs, Electron ports, legacy native apps) write a mangled plain-text pasteboard variant alongside a correctly-encoded RTF or HTML variant — non-ASCII characters arrive as U+FFFD or literal '?' runs because the app's plain-text writer fell back to an ASCII encoding. Pasting from such apps produced "?????" in the terminal. When getOpinionatedStringContents() detects an encoding-lossy plain-text variant (contains U+FFFD, or runs of 3+ '?'), recover the string from the rich-text variant provided it carries strictly more non-ASCII characters than plain text — this avoids regressing legitimate user text containing '?' runs. RTF parsing (local) is preferred; HTML is tried only when RTF is absent, minimizing WebKit involvement. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../Extensions/NSPasteboard+Extension.swift | 71 ++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/macos/Sources/Helpers/Extensions/NSPasteboard+Extension.swift b/macos/Sources/Helpers/Extensions/NSPasteboard+Extension.swift index a54735fde21..501a61f3ca5 100644 --- a/macos/Sources/Helpers/Extensions/NSPasteboard+Extension.swift +++ b/macos/Sources/Helpers/Extensions/NSPasteboard+Extension.swift @@ -36,6 +36,8 @@ extension NSPasteboard { /// Does these things in order: /// - Tries to get the absolute filesystem path of the file in the pasteboard if there is one and ensures the file path is properly escaped. /// - Tries to get any string from the pasteboard. + /// - If the plain-text variant looks encoding-lossy, falls back to + /// extracting plain text from the RTF or HTML variant. /// If all of the above fail, returns None. func getOpinionatedStringContents() -> String? { if let urls = readObjects(forClasses: [NSURL.self]) as? [URL], @@ -45,7 +47,74 @@ extension NSPasteboard { .joined(separator: " ") } - return self.string(forType: .string) + let plainText = self.string(forType: .string) + + // Some apps (certain IMs, Electron ports, legacy native apps) write a + // mangled plain-text variant alongside a correctly-encoded rich-text + // variant — e.g. non-ASCII chars arrive as U+FFFD or literal '?' runs + // because the app's plain-text writer fell back to an ASCII encoding. + // Prefer the rich-text variant only when it demonstrably carries more + // non-ASCII content than the plain variant, so that legitimate user + // text containing '?' runs is not replaced by a stripped RTF/HTML + // rendering. + if let s = plainText, Self.looksEncodingLossy(s), + let recovered = self.richTextFallback(), + Self.nonASCIIScalarCount(recovered) > Self.nonASCIIScalarCount(s) { + return recovered + } + + if let s = plainText { return s } + return self.richTextFallback() + } + + private static func looksEncodingLossy(_ s: String) -> Bool { + if s.contains("\u{FFFD}") { return true } + var run = 0 + for scalar in s.unicodeScalars { + if scalar == "?" { + run += 1 + if run >= 3 { return true } + } else { + run = 0 + } + } + return false + } + + private static func nonASCIIScalarCount(_ s: String) -> Int { + s.unicodeScalars.reduce(0) { $1.value > 127 ? $0 + 1 : $0 } + } + + private func richTextFallback() -> String? { + // Try RTF first: Apple's RTF parser is local and never fetches + // external resources. Fall back to HTML only when RTF is absent, + // since HTML parsing via NSAttributedString goes through WebKit + // and is heavier even when the attributed string is only used to + // extract `.string`. + if let data = self.data(forType: .rtf), + let attr = try? NSAttributedString( + data: data, + options: [.documentType: NSAttributedString.DocumentType.rtf], + documentAttributes: nil + ), + !attr.string.isEmpty { + return attr.string + } + + if let data = self.data(forType: .html), + let attr = try? NSAttributedString( + data: data, + options: [ + .documentType: NSAttributedString.DocumentType.html, + .characterEncoding: String.Encoding.utf8.rawValue, + ], + documentAttributes: nil + ), + !attr.string.isEmpty { + return attr.string + } + + return nil } /// The pasteboard for the Ghostty enum type.