diff --git a/graalpython/com.oracle.graal.python.shell/src/com/oracle/graal/python/shell/GraalPythonMain.java b/graalpython/com.oracle.graal.python.shell/src/com/oracle/graal/python/shell/GraalPythonMain.java index 9ff11e94e6..7608d199c7 100644 --- a/graalpython/com.oracle.graal.python.shell/src/com/oracle/graal/python/shell/GraalPythonMain.java +++ b/graalpython/com.oracle.graal.python.shell/src/com/oracle/graal/python/shell/GraalPythonMain.java @@ -34,6 +34,12 @@ import java.io.InputStream; import java.io.OutputStream; import java.lang.management.ManagementFactory; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CoderResult; +import java.nio.charset.CodingErrorAction; import java.nio.file.Files; import java.nio.file.InvalidPathException; import java.nio.file.NoSuchFileException; @@ -80,6 +86,56 @@ public static void main(String[] args) { new GraalPythonMain().launch(args); } + @Override + protected String decodeArgument(byte[] argument) { + Charset charset = Charset.defaultCharset(); + String decoded = new String(argument, charset); + if (decoded.indexOf('\uFFFD') < 0) { + return decoded; + } + return decodeArgument(argument, charset); + } + + /* + * Match documented Unix sys.argv behavior: + * https://docs.python.org/3.12/library/sys.html#sys.argv + * "When you need original bytes, you can get it by `[os.fsencode(arg) for arg in sys.argv]`." + */ + private static String decodeArgument(byte[] argument, Charset charset) { + CharsetDecoder decoder = charset.newDecoder().onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT); + StringBuilder builder = new StringBuilder(argument.length); + ByteBuffer in = ByteBuffer.wrap(argument); + CharBuffer out = CharBuffer.allocate(Math.max(1, (int) Math.ceil(argument.length * decoder.maxCharsPerByte()))); + while (true) { + CoderResult result = decoder.decode(in, out, true); + appendDecodedChars(builder, out); + if (result.isUnderflow()) { + break; + } else if (result.isOverflow()) { + continue; + } else if (result.isError()) { + int errorLength = result.length(); + for (int i = 0; i < errorLength && in.hasRemaining(); i++) { + builder.append((char) (0xDC00 + (in.get() & 0xff))); + } + } + } + while (true) { + CoderResult result = decoder.flush(out); + appendDecodedChars(builder, out); + if (result.isUnderflow()) { + break; + } + } + return builder.toString(); + } + + private static void appendDecodedChars(StringBuilder builder, CharBuffer out) { + out.flip(); + builder.append(out); + out.clear(); + } + private static final String LANGUAGE_ID = "python"; private static final String J_PYENVCFG = "pyvenv.cfg"; diff --git a/graalpython/com.oracle.graal.python.test/src/tests/test_subprocess.py b/graalpython/com.oracle.graal.python.test/src/tests/test_subprocess.py index b7f2dc1dea..15962f6fad 100644 --- a/graalpython/com.oracle.graal.python.test/src/tests/test_subprocess.py +++ b/graalpython/com.oracle.graal.python.test/src/tests/test_subprocess.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018, 2025, Oracle and/or its affiliates. +# Copyright (c) 2018, 2026, Oracle and/or its affiliates. # Copyright (C) 1996-2017 Python Software Foundation # # Licensed under the PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 @@ -75,6 +75,17 @@ def test_check_output(self): [sys.executable, "-c", "print('BDFL')"]) self.assertIn(b'BDFL', output) + @unittest.skipIf(sys.platform == 'win32', "POSIX argv bytes specific") + def test_surrogateescape_non_utf8_argv(self): + code = ( + "import os, sys; " + "assert os.fsencode(sys.argv[-1]) == b'\\x8av'; " + "print(repr(sys.argv[-1]))" + ) + cmd = f"{shlex.quote(sys.executable)} -c {shlex.quote(code)} \"$(printf '\\212v')\"" + output = subprocess.check_output(cmd, shell=True, stderr=subprocess.PIPE, text=True) + self.assertEqual("'\\udc8av'\n", output) + def test_check_output_nonzero(self): # check_call() function with non-zero return code with self.assertRaises(subprocess.CalledProcessError) as c: diff --git a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/ast/Validator.java b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/ast/Validator.java index 1607f75056..e2ea6b7801 100644 --- a/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/ast/Validator.java +++ b/graalpython/com.oracle.graal.python/src/com/oracle/graal/python/builtins/modules/ast/Validator.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, 2025, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2022, 2026, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * The Universal Permissive License (UPL), Version 1.0 @@ -1006,9 +1006,9 @@ public Void visit(TypeVarTuple node) { return null; } -/*- -// Validation of sequences -*/ + /*- + // Validation of sequences + */ // Equivalent of validate_stmts private void validateStmts(StmtTy[] stmts) {