diff --git a/.github/workflows/CI_build.yml b/.github/workflows/CI_build.yml index 453e08b0..94285c79 100644 --- a/.github/workflows/CI_build.yml +++ b/.github/workflows/CI_build.yml @@ -7,9 +7,10 @@ jobs: runs-on: windows-latest strategy: + fail-fast: false matrix: build_configuration: [Release, Debug] - build_platform: [x64, Win32] + build_platform: [x64, Win32, ARM64] steps: - name: Checkout repo @@ -43,8 +44,9 @@ jobs: if: matrix.build_configuration == 'Release' working-directory: installer run: | - $env:PYTHONBUILDDIR_X64='${{ github.workspace }}\packages\python.3.12.10\tools' - $env:PYTHONBUILDDIR='${{ github.workspace }}\packages\pythonx86.3.12.10\tools' + $env:PYTHONBUILDDIR_ARM64='${{ github.workspace }}\packages\pythonarm64.3.14.3\tools' + $env:PYTHONBUILDDIR_X64='${{ github.workspace }}\packages\python.3.14.3\tools' + $env:PYTHONBUILDDIR='${{ github.workspace }}\packages\pythonx86.3.14.3\tools' Rename-Item -Path ".\buildPaths.bat.orig" -NewName "buildPaths.bat" - dotnet tool install --global wix + dotnet tool install --global wix --version 6.0.2 .\buildAll.bat ${{ matrix.build_platform }} diff --git a/NppPlugin/project/NppPlugin.vcxproj b/NppPlugin/project/NppPlugin.vcxproj index 96952389..4842d3e2 100644 --- a/NppPlugin/project/NppPlugin.vcxproj +++ b/NppPlugin/project/NppPlugin.vcxproj @@ -1,6 +1,10 @@  + + Debug + ARM64 + Debug Win32 @@ -9,6 +13,10 @@ Debug x64 + + PythonDebug + ARM64 + PythonDebug Win32 @@ -17,6 +25,10 @@ PythonDebug x64 + + Release + ARM64 + Release Win32 @@ -45,6 +57,12 @@ true v143 + + StaticLibrary + Unicode + true + v143 + StaticLibrary Unicode @@ -55,6 +73,11 @@ Unicode v143 + + StaticLibrary + Unicode + v143 + StaticLibrary Unicode @@ -65,6 +88,11 @@ Unicode v143 + + StaticLibrary + Unicode + v143 + @@ -74,18 +102,27 @@ + + + + + + + + + <_ProjectFileVersion>10.0.30319.1 @@ -98,12 +135,18 @@ $(ProjectDir)\..\bin64\$(Configuration)\ $(ProjectDir)\..\bin64\$(Configuration)\ $(ProjectDir)\..\bin64\$(Configuration)\ + $(ProjectDir)\..\binarm64\$(Configuration)\ + $(ProjectDir)\..\binarm64\$(Configuration)\ + $(ProjectDir)\..\binarm64\$(Configuration)\ $(IncludePath) $(IncludePath) + $(IncludePath) $(IncludePath) $(IncludePath) + $(IncludePath) $(IncludePath) $(IncludePath) + $(IncludePath) @@ -135,6 +178,21 @@ stdc17 + + + Disabled + ..\include;..;%(AdditionalIncludeDirectories) + WIN32;_CRT_SECURE_NO_WARNINGS;_DEBUG;_LIB;%(PreprocessorDefinitions) + EnableFastChecks + MultiThreadedDebug + NotUsing + Level4 + ProgramDatabase + true + stdcpp17 + stdc17 + + Disabled @@ -165,6 +223,21 @@ stdc17 + + + Disabled + ..\include;..;%(AdditionalIncludeDirectories) + WIN32;_CRT_SECURE_NO_WARNINGS;_DEBUG;_LIB;%(PreprocessorDefinitions) + EnableFastChecks + MultiThreadedDebug + NotUsing + Level4 + ProgramDatabase + true + stdcpp17 + stdc17 + + ..\include;..;%(AdditionalIncludeDirectories) @@ -191,6 +264,19 @@ stdc17 + + + ..\include;..;%(AdditionalIncludeDirectories) + WIN32;_CRT_SECURE_NO_WARNINGS;NDEBUG;_LIB;%(PreprocessorDefinitions) + MultiThreaded + NotUsing + Level3 + ProgramDatabase + true + stdcpp17 + stdc17 + + @@ -198,10 +284,13 @@ Create Create + Create Create Create + Create Create Create + Create diff --git a/PythonLib/full/_android_support.py b/PythonLib/full/_android_support.py new file mode 100644 index 00000000..320dab52 --- /dev/null +++ b/PythonLib/full/_android_support.py @@ -0,0 +1,192 @@ +import io +import sys +from threading import RLock +from time import sleep, time + +# The maximum length of a log message in bytes, including the level marker and +# tag, is defined as LOGGER_ENTRY_MAX_PAYLOAD at +# https://cs.android.com/android/platform/superproject/+/android-14.0.0_r1:system/logging/liblog/include/log/log.h;l=71. +# Messages longer than this will be truncated by logcat. This limit has already +# been reduced at least once in the history of Android (from 4076 to 4068 between +# API level 23 and 26), so leave some headroom. +MAX_BYTES_PER_WRITE = 4000 + +# UTF-8 uses a maximum of 4 bytes per character, so limiting text writes to this +# size ensures that we can always avoid exceeding MAX_BYTES_PER_WRITE. +# However, if the actual number of bytes per character is smaller than that, +# then we may still join multiple consecutive text writes into binary +# writes containing a larger number of characters. +MAX_CHARS_PER_WRITE = MAX_BYTES_PER_WRITE // 4 + + +# When embedded in an app on current versions of Android, there's no easy way to +# monitor the C-level stdout and stderr. The testbed comes with a .c file to +# redirect them to the system log using a pipe, but that wouldn't be convenient +# or appropriate for all apps. So we redirect at the Python level instead. +def init_streams(android_log_write, stdout_prio, stderr_prio): + if sys.executable: + return # Not embedded in an app. + + global logcat + logcat = Logcat(android_log_write) + sys.stdout = TextLogStream(stdout_prio, "python.stdout", sys.stdout) + sys.stderr = TextLogStream(stderr_prio, "python.stderr", sys.stderr) + + +class TextLogStream(io.TextIOWrapper): + def __init__(self, prio, tag, original=None, **kwargs): + # Respect the -u option. + if original: + kwargs.setdefault("write_through", original.write_through) + fileno = original.fileno() + else: + fileno = None + + # The default is surrogateescape for stdout and backslashreplace for + # stderr, but in the context of an Android log, readability is more + # important than reversibility. + kwargs.setdefault("encoding", "UTF-8") + kwargs.setdefault("errors", "backslashreplace") + + super().__init__(BinaryLogStream(prio, tag, fileno), **kwargs) + self._lock = RLock() + self._pending_bytes = [] + self._pending_bytes_count = 0 + + def __repr__(self): + return f"" + + def write(self, s): + if not isinstance(s, str): + raise TypeError( + f"write() argument must be str, not {type(s).__name__}") + + # In case `s` is a str subclass that writes itself to stdout or stderr + # when we call its methods, convert it to an actual str. + s = str.__str__(s) + + # We want to emit one log message per line wherever possible, so split + # the string into lines first. Note that "".splitlines() == [], so + # nothing will be logged for an empty string. + with self._lock: + for line in s.splitlines(keepends=True): + while line: + chunk = line[:MAX_CHARS_PER_WRITE] + line = line[MAX_CHARS_PER_WRITE:] + self._write_chunk(chunk) + + return len(s) + + # The size and behavior of TextIOWrapper's buffer is not part of its public + # API, so we handle buffering ourselves to avoid truncation. + def _write_chunk(self, s): + b = s.encode(self.encoding, self.errors) + if self._pending_bytes_count + len(b) > MAX_BYTES_PER_WRITE: + self.flush() + + self._pending_bytes.append(b) + self._pending_bytes_count += len(b) + if ( + self.write_through + or b.endswith(b"\n") + or self._pending_bytes_count > MAX_BYTES_PER_WRITE + ): + self.flush() + + def flush(self): + with self._lock: + self.buffer.write(b"".join(self._pending_bytes)) + self._pending_bytes.clear() + self._pending_bytes_count = 0 + + # Since this is a line-based logging system, line buffering cannot be turned + # off, i.e. a newline always causes a flush. + @property + def line_buffering(self): + return True + + +class BinaryLogStream(io.RawIOBase): + def __init__(self, prio, tag, fileno=None): + self.prio = prio + self.tag = tag + self._fileno = fileno + + def __repr__(self): + return f"" + + def writable(self): + return True + + def write(self, b): + if type(b) is not bytes: + try: + b = bytes(memoryview(b)) + except TypeError: + raise TypeError( + f"write() argument must be bytes-like, not {type(b).__name__}" + ) from None + + # Writing an empty string to the stream should have no effect. + if b: + logcat.write(self.prio, self.tag, b) + return len(b) + + # This is needed by the test suite --timeout option, which uses faulthandler. + def fileno(self): + if self._fileno is None: + raise io.UnsupportedOperation("fileno") + return self._fileno + + +# When a large volume of data is written to logcat at once, e.g. when a test +# module fails in --verbose3 mode, there's a risk of overflowing logcat's own +# buffer and losing messages. We avoid this by imposing a rate limit using the +# token bucket algorithm, based on a conservative estimate of how fast `adb +# logcat` can consume data. +MAX_BYTES_PER_SECOND = 1024 * 1024 + +# The logcat buffer size of a device can be determined by running `logcat -g`. +# We set the token bucket size to half of the buffer size of our current minimum +# API level, because other things on the system will be producing messages as +# well. +BUCKET_SIZE = 128 * 1024 + +# https://cs.android.com/android/platform/superproject/+/android-14.0.0_r1:system/logging/liblog/include/log/log_read.h;l=39 +PER_MESSAGE_OVERHEAD = 28 + + +class Logcat: + def __init__(self, android_log_write): + self.android_log_write = android_log_write + self._lock = RLock() + self._bucket_level = 0 + self._prev_write_time = time() + + def write(self, prio, tag, message): + # Encode null bytes using "modified UTF-8" to avoid them truncating the + # message. + message = message.replace(b"\x00", b"\xc0\x80") + + # On API level 30 and higher, Logcat will strip any number of leading + # newlines. This is visible in all `logcat` modes, even --binary. Work + # around this by adding a leading space, which shouldn't make any + # difference to the log's usability. + if message.startswith(b"\n"): + message = b" " + message + + with self._lock: + now = time() + self._bucket_level += ( + (now - self._prev_write_time) * MAX_BYTES_PER_SECOND) + + # If the bucket level is still below zero, the clock must have gone + # backwards, so reset it to zero and continue. + self._bucket_level = max(0, min(self._bucket_level, BUCKET_SIZE)) + self._prev_write_time = now + + self._bucket_level -= PER_MESSAGE_OVERHEAD + len(tag) + len(message) + if self._bucket_level < 0: + sleep(-self._bucket_level / MAX_BYTES_PER_SECOND) + + self.android_log_write(prio, tag, message) diff --git a/PythonLib/full/_apple_support.py b/PythonLib/full/_apple_support.py new file mode 100644 index 00000000..92febdcf --- /dev/null +++ b/PythonLib/full/_apple_support.py @@ -0,0 +1,66 @@ +import io +import sys + + +def init_streams(log_write, stdout_level, stderr_level): + # Redirect stdout and stderr to the Apple system log. This method is + # invoked by init_apple_streams() (initconfig.c) if config->use_system_logger + # is enabled. + sys.stdout = SystemLog(log_write, stdout_level, errors=sys.stderr.errors) + sys.stderr = SystemLog(log_write, stderr_level, errors=sys.stderr.errors) + + +class SystemLog(io.TextIOWrapper): + def __init__(self, log_write, level, **kwargs): + kwargs.setdefault("encoding", "UTF-8") + kwargs.setdefault("line_buffering", True) + super().__init__(LogStream(log_write, level), **kwargs) + + def __repr__(self): + return f"" + + def write(self, s): + if not isinstance(s, str): + raise TypeError( + f"write() argument must be str, not {type(s).__name__}") + + # In case `s` is a str subclass that writes itself to stdout or stderr + # when we call its methods, convert it to an actual str. + s = str.__str__(s) + + # We want to emit one log message per line, so split + # the string before sending it to the superclass. + for line in s.splitlines(keepends=True): + super().write(line) + + return len(s) + + +class LogStream(io.RawIOBase): + def __init__(self, log_write, level): + self.log_write = log_write + self.level = level + + def __repr__(self): + return f"" + + def writable(self): + return True + + def write(self, b): + if type(b) is not bytes: + try: + b = bytes(memoryview(b)) + except TypeError: + raise TypeError( + f"write() argument must be bytes-like, not {type(b).__name__}" + ) from None + + # Writing an empty string to the stream should have no effect. + if b: + # Encode null bytes using "modified UTF-8" to avoid truncating the + # message. This should not affect the return value, as the caller + # may be expecting it to match the length of the input. + self.log_write(self.level, b.replace(b"\x00", b"\xc0\x80")) + + return len(b) diff --git a/PythonLib/full/_ast_unparse.py b/PythonLib/full/_ast_unparse.py new file mode 100644 index 00000000..1c8741b5 --- /dev/null +++ b/PythonLib/full/_ast_unparse.py @@ -0,0 +1,1161 @@ +# This module contains ``ast.unparse()``, defined here +# to improve the import time for the ``ast`` module. +import sys +from _ast import * +from ast import NodeVisitor +from contextlib import contextmanager, nullcontext +from enum import IntEnum, auto, _simple_enum + +# Large float and imaginary literals get turned into infinities in the AST. +# We unparse those infinities to INFSTR. +_INFSTR = "1e" + repr(sys.float_info.max_10_exp + 1) + +@_simple_enum(IntEnum) +class _Precedence: + """Precedence table that originated from python grammar.""" + + NAMED_EXPR = auto() # := + TUPLE = auto() # , + YIELD = auto() # 'yield', 'yield from' + TEST = auto() # 'if'-'else', 'lambda' + OR = auto() # 'or' + AND = auto() # 'and' + NOT = auto() # 'not' + CMP = auto() # '<', '>', '==', '>=', '<=', '!=', + # 'in', 'not in', 'is', 'is not' + EXPR = auto() + BOR = EXPR # '|' + BXOR = auto() # '^' + BAND = auto() # '&' + SHIFT = auto() # '<<', '>>' + ARITH = auto() # '+', '-' + TERM = auto() # '*', '@', '/', '%', '//' + FACTOR = auto() # unary '+', '-', '~' + POWER = auto() # '**' + AWAIT = auto() # 'await' + ATOM = auto() + + def next(self): + try: + return self.__class__(self + 1) + except ValueError: + return self + + +_SINGLE_QUOTES = ("'", '"') +_MULTI_QUOTES = ('"""', "'''") +_ALL_QUOTES = (*_SINGLE_QUOTES, *_MULTI_QUOTES) + +class Unparser(NodeVisitor): + """Methods in this class recursively traverse an AST and + output source code for the abstract syntax; original formatting + is disregarded.""" + + def __init__(self): + self._source = [] + self._precedences = {} + self._type_ignores = {} + self._indent = 0 + self._in_try_star = False + self._in_interactive = False + + def interleave(self, inter, f, seq): + """Call f on each item in seq, calling inter() in between.""" + seq = iter(seq) + try: + f(next(seq)) + except StopIteration: + pass + else: + for x in seq: + inter() + f(x) + + def items_view(self, traverser, items): + """Traverse and separate the given *items* with a comma and append it to + the buffer. If *items* is a single item sequence, a trailing comma + will be added.""" + if len(items) == 1: + traverser(items[0]) + self.write(",") + else: + self.interleave(lambda: self.write(", "), traverser, items) + + def maybe_newline(self): + """Adds a newline if it isn't the start of generated source""" + if self._source: + self.write("\n") + + def maybe_semicolon(self): + """Adds a "; " delimiter if it isn't the start of generated source""" + if self._source: + self.write("; ") + + def fill(self, text="", *, allow_semicolon=True): + """Indent a piece of text and append it, according to the current + indentation level, or only delineate with semicolon if applicable""" + if self._in_interactive and not self._indent and allow_semicolon: + self.maybe_semicolon() + self.write(text) + else: + self.maybe_newline() + self.write(" " * self._indent + text) + + def write(self, *text): + """Add new source parts""" + self._source.extend(text) + + @contextmanager + def buffered(self, buffer = None): + if buffer is None: + buffer = [] + + original_source = self._source + self._source = buffer + yield buffer + self._source = original_source + + @contextmanager + def block(self, *, extra = None): + """A context manager for preparing the source for blocks. It adds + the character':', increases the indentation on enter and decreases + the indentation on exit. If *extra* is given, it will be directly + appended after the colon character. + """ + self.write(":") + if extra: + self.write(extra) + self._indent += 1 + yield + self._indent -= 1 + + @contextmanager + def delimit(self, start, end): + """A context manager for preparing the source for expressions. It adds + *start* to the buffer and enters, after exit it adds *end*.""" + + self.write(start) + yield + self.write(end) + + def delimit_if(self, start, end, condition): + if condition: + return self.delimit(start, end) + else: + return nullcontext() + + def require_parens(self, precedence, node): + """Shortcut to adding precedence related parens""" + return self.delimit_if("(", ")", self.get_precedence(node) > precedence) + + def get_precedence(self, node): + return self._precedences.get(node, _Precedence.TEST) + + def set_precedence(self, precedence, *nodes): + for node in nodes: + self._precedences[node] = precedence + + def get_raw_docstring(self, node): + """If a docstring node is found in the body of the *node* parameter, + return that docstring node, None otherwise. + + Logic mirrored from ``_PyAST_GetDocString``.""" + if not isinstance( + node, (AsyncFunctionDef, FunctionDef, ClassDef, Module) + ) or len(node.body) < 1: + return None + node = node.body[0] + if not isinstance(node, Expr): + return None + node = node.value + if isinstance(node, Constant) and isinstance(node.value, str): + return node + + def get_type_comment(self, node): + comment = self._type_ignores.get(node.lineno) or node.type_comment + if comment is not None: + return f" # type: {comment}" + + def traverse(self, node): + if isinstance(node, list): + for item in node: + self.traverse(item) + else: + super().visit(node) + + # Note: as visit() resets the output text, do NOT rely on + # NodeVisitor.generic_visit to handle any nodes (as it calls back in to + # the subclass visit() method, which resets self._source to an empty list) + def visit(self, node): + """Outputs a source code string that, if converted back to an ast + (using ast.parse) will generate an AST equivalent to *node*""" + self._source = [] + self.traverse(node) + return "".join(self._source) + + def _write_docstring_and_traverse_body(self, node): + if (docstring := self.get_raw_docstring(node)): + self._write_docstring(docstring) + self.traverse(node.body[1:]) + else: + self.traverse(node.body) + + def visit_Module(self, node): + self._type_ignores = { + ignore.lineno: f"ignore{ignore.tag}" + for ignore in node.type_ignores + } + try: + self._write_docstring_and_traverse_body(node) + finally: + self._type_ignores.clear() + + def visit_Interactive(self, node): + self._in_interactive = True + try: + self._write_docstring_and_traverse_body(node) + finally: + self._in_interactive = False + + def visit_FunctionType(self, node): + with self.delimit("(", ")"): + self.interleave( + lambda: self.write(", "), self.traverse, node.argtypes + ) + + self.write(" -> ") + self.traverse(node.returns) + + def visit_Expr(self, node): + self.fill() + self.set_precedence(_Precedence.YIELD, node.value) + self.traverse(node.value) + + def visit_NamedExpr(self, node): + with self.require_parens(_Precedence.NAMED_EXPR, node): + self.set_precedence(_Precedence.ATOM, node.target, node.value) + self.traverse(node.target) + self.write(" := ") + self.traverse(node.value) + + def visit_Import(self, node): + self.fill("import ") + self.interleave(lambda: self.write(", "), self.traverse, node.names) + + def visit_ImportFrom(self, node): + self.fill("from ") + self.write("." * (node.level or 0)) + if node.module: + self.write(node.module) + self.write(" import ") + self.interleave(lambda: self.write(", "), self.traverse, node.names) + + def visit_Assign(self, node): + self.fill() + for target in node.targets: + self.set_precedence(_Precedence.TUPLE, target) + self.traverse(target) + self.write(" = ") + self.traverse(node.value) + if type_comment := self.get_type_comment(node): + self.write(type_comment) + + def visit_AugAssign(self, node): + self.fill() + self.traverse(node.target) + self.write(" " + self.binop[node.op.__class__.__name__] + "= ") + self.traverse(node.value) + + def visit_AnnAssign(self, node): + self.fill() + with self.delimit_if("(", ")", not node.simple and isinstance(node.target, Name)): + self.traverse(node.target) + self.write(": ") + self.traverse(node.annotation) + if node.value: + self.write(" = ") + self.traverse(node.value) + + def visit_Return(self, node): + self.fill("return") + if node.value: + self.write(" ") + self.traverse(node.value) + + def visit_Pass(self, node): + self.fill("pass") + + def visit_Break(self, node): + self.fill("break") + + def visit_Continue(self, node): + self.fill("continue") + + def visit_Delete(self, node): + self.fill("del ") + self.interleave(lambda: self.write(", "), self.traverse, node.targets) + + def visit_Assert(self, node): + self.fill("assert ") + self.traverse(node.test) + if node.msg: + self.write(", ") + self.traverse(node.msg) + + def visit_Global(self, node): + self.fill("global ") + self.interleave(lambda: self.write(", "), self.write, node.names) + + def visit_Nonlocal(self, node): + self.fill("nonlocal ") + self.interleave(lambda: self.write(", "), self.write, node.names) + + def visit_Await(self, node): + with self.require_parens(_Precedence.AWAIT, node): + self.write("await") + if node.value: + self.write(" ") + self.set_precedence(_Precedence.ATOM, node.value) + self.traverse(node.value) + + def visit_Yield(self, node): + with self.require_parens(_Precedence.YIELD, node): + self.write("yield") + if node.value: + self.write(" ") + self.set_precedence(_Precedence.ATOM, node.value) + self.traverse(node.value) + + def visit_YieldFrom(self, node): + with self.require_parens(_Precedence.YIELD, node): + self.write("yield from ") + if not node.value: + raise ValueError("Node can't be used without a value attribute.") + self.set_precedence(_Precedence.ATOM, node.value) + self.traverse(node.value) + + def visit_Raise(self, node): + self.fill("raise") + if not node.exc: + if node.cause: + raise ValueError(f"Node can't use cause without an exception.") + return + self.write(" ") + self.traverse(node.exc) + if node.cause: + self.write(" from ") + self.traverse(node.cause) + + def do_visit_try(self, node): + self.fill("try", allow_semicolon=False) + with self.block(): + self.traverse(node.body) + for ex in node.handlers: + self.traverse(ex) + if node.orelse: + self.fill("else", allow_semicolon=False) + with self.block(): + self.traverse(node.orelse) + if node.finalbody: + self.fill("finally", allow_semicolon=False) + with self.block(): + self.traverse(node.finalbody) + + def visit_Try(self, node): + prev_in_try_star = self._in_try_star + try: + self._in_try_star = False + self.do_visit_try(node) + finally: + self._in_try_star = prev_in_try_star + + def visit_TryStar(self, node): + prev_in_try_star = self._in_try_star + try: + self._in_try_star = True + self.do_visit_try(node) + finally: + self._in_try_star = prev_in_try_star + + def visit_ExceptHandler(self, node): + self.fill("except*" if self._in_try_star else "except", allow_semicolon=False) + if node.type: + self.write(" ") + self.traverse(node.type) + if node.name: + self.write(" as ") + self.write(node.name) + with self.block(): + self.traverse(node.body) + + def visit_ClassDef(self, node): + self.maybe_newline() + for deco in node.decorator_list: + self.fill("@", allow_semicolon=False) + self.traverse(deco) + self.fill("class " + node.name, allow_semicolon=False) + if hasattr(node, "type_params"): + self._type_params_helper(node.type_params) + with self.delimit_if("(", ")", condition = node.bases or node.keywords): + comma = False + for e in node.bases: + if comma: + self.write(", ") + else: + comma = True + self.traverse(e) + for e in node.keywords: + if comma: + self.write(", ") + else: + comma = True + self.traverse(e) + + with self.block(): + self._write_docstring_and_traverse_body(node) + + def visit_FunctionDef(self, node): + self._function_helper(node, "def") + + def visit_AsyncFunctionDef(self, node): + self._function_helper(node, "async def") + + def _function_helper(self, node, fill_suffix): + self.maybe_newline() + for deco in node.decorator_list: + self.fill("@", allow_semicolon=False) + self.traverse(deco) + def_str = fill_suffix + " " + node.name + self.fill(def_str, allow_semicolon=False) + if hasattr(node, "type_params"): + self._type_params_helper(node.type_params) + with self.delimit("(", ")"): + self.traverse(node.args) + if node.returns: + self.write(" -> ") + self.traverse(node.returns) + with self.block(extra=self.get_type_comment(node)): + self._write_docstring_and_traverse_body(node) + + def _type_params_helper(self, type_params): + if type_params is not None and len(type_params) > 0: + with self.delimit("[", "]"): + self.interleave(lambda: self.write(", "), self.traverse, type_params) + + def visit_TypeVar(self, node): + self.write(node.name) + if node.bound: + self.write(": ") + self.traverse(node.bound) + if node.default_value: + self.write(" = ") + self.traverse(node.default_value) + + def visit_TypeVarTuple(self, node): + self.write("*" + node.name) + if node.default_value: + self.write(" = ") + self.traverse(node.default_value) + + def visit_ParamSpec(self, node): + self.write("**" + node.name) + if node.default_value: + self.write(" = ") + self.traverse(node.default_value) + + def visit_TypeAlias(self, node): + self.fill("type ") + self.traverse(node.name) + self._type_params_helper(node.type_params) + self.write(" = ") + self.traverse(node.value) + + def visit_For(self, node): + self._for_helper("for ", node) + + def visit_AsyncFor(self, node): + self._for_helper("async for ", node) + + def _for_helper(self, fill, node): + self.fill(fill, allow_semicolon=False) + self.set_precedence(_Precedence.TUPLE, node.target) + self.traverse(node.target) + self.write(" in ") + self.traverse(node.iter) + with self.block(extra=self.get_type_comment(node)): + self.traverse(node.body) + if node.orelse: + self.fill("else", allow_semicolon=False) + with self.block(): + self.traverse(node.orelse) + + def visit_If(self, node): + self.fill("if ", allow_semicolon=False) + self.traverse(node.test) + with self.block(): + self.traverse(node.body) + # collapse nested ifs into equivalent elifs. + while node.orelse and len(node.orelse) == 1 and isinstance(node.orelse[0], If): + node = node.orelse[0] + self.fill("elif ", allow_semicolon=False) + self.traverse(node.test) + with self.block(): + self.traverse(node.body) + # final else + if node.orelse: + self.fill("else", allow_semicolon=False) + with self.block(): + self.traverse(node.orelse) + + def visit_While(self, node): + self.fill("while ", allow_semicolon=False) + self.traverse(node.test) + with self.block(): + self.traverse(node.body) + if node.orelse: + self.fill("else", allow_semicolon=False) + with self.block(): + self.traverse(node.orelse) + + def visit_With(self, node): + self.fill("with ", allow_semicolon=False) + self.interleave(lambda: self.write(", "), self.traverse, node.items) + with self.block(extra=self.get_type_comment(node)): + self.traverse(node.body) + + def visit_AsyncWith(self, node): + self.fill("async with ", allow_semicolon=False) + self.interleave(lambda: self.write(", "), self.traverse, node.items) + with self.block(extra=self.get_type_comment(node)): + self.traverse(node.body) + + def _str_literal_helper( + self, string, *, quote_types=_ALL_QUOTES, escape_special_whitespace=False + ): + """Helper for writing string literals, minimizing escapes. + Returns the tuple (string literal to write, possible quote types). + """ + def escape_char(c): + # \n and \t are non-printable, but we only escape them if + # escape_special_whitespace is True + if not escape_special_whitespace and c in "\n\t": + return c + # Always escape backslashes and other non-printable characters + if c == "\\" or not c.isprintable(): + return c.encode("unicode_escape").decode("ascii") + return c + + escaped_string = "".join(map(escape_char, string)) + possible_quotes = quote_types + if "\n" in escaped_string: + possible_quotes = [q for q in possible_quotes if q in _MULTI_QUOTES] + possible_quotes = [q for q in possible_quotes if q not in escaped_string] + if not possible_quotes: + # If there aren't any possible_quotes, fallback to using repr + # on the original string. Try to use a quote from quote_types, + # e.g., so that we use triple quotes for docstrings. + string = repr(string) + quote = next((q for q in quote_types if string[0] in q), string[0]) + return string[1:-1], [quote] + if escaped_string: + # Sort so that we prefer '''"''' over """\"""" + possible_quotes.sort(key=lambda q: q[0] == escaped_string[-1]) + # If we're using triple quotes and we'd need to escape a final + # quote, escape it + if possible_quotes[0][0] == escaped_string[-1]: + assert len(possible_quotes[0]) == 3 + escaped_string = escaped_string[:-1] + "\\" + escaped_string[-1] + return escaped_string, possible_quotes + + def _write_str_avoiding_backslashes(self, string, *, quote_types=_ALL_QUOTES): + """Write string literal value with a best effort attempt to avoid backslashes.""" + string, quote_types = self._str_literal_helper(string, quote_types=quote_types) + quote_type = quote_types[0] + self.write(f"{quote_type}{string}{quote_type}") + + def _ftstring_helper(self, parts): + new_parts = [] + quote_types = list(_ALL_QUOTES) + fallback_to_repr = False + for value, is_constant in parts: + if is_constant: + value, new_quote_types = self._str_literal_helper( + value, + quote_types=quote_types, + escape_special_whitespace=True, + ) + if set(new_quote_types).isdisjoint(quote_types): + fallback_to_repr = True + break + quote_types = new_quote_types + else: + if "\n" in value: + quote_types = [q for q in quote_types if q in _MULTI_QUOTES] + assert quote_types + + new_quote_types = [q for q in quote_types if q not in value] + if new_quote_types: + quote_types = new_quote_types + new_parts.append(value) + + if fallback_to_repr: + # If we weren't able to find a quote type that works for all parts + # of the JoinedStr, fallback to using repr and triple single quotes. + quote_types = ["'''"] + new_parts.clear() + for value, is_constant in parts: + if is_constant: + value = repr('"' + value) # force repr to use single quotes + expected_prefix = "'\"" + assert value.startswith(expected_prefix), repr(value) + value = value[len(expected_prefix):-1] + new_parts.append(value) + + value = "".join(new_parts) + quote_type = quote_types[0] + self.write(f"{quote_type}{value}{quote_type}") + + def _write_ftstring(self, values, prefix): + self.write(prefix) + fstring_parts = [] + for value in values: + with self.buffered() as buffer: + self._write_ftstring_inner(value) + fstring_parts.append( + ("".join(buffer), isinstance(value, Constant)) + ) + self._ftstring_helper(fstring_parts) + + def visit_JoinedStr(self, node): + self._write_ftstring(node.values, "f") + + def visit_TemplateStr(self, node): + self._write_ftstring(node.values, "t") + + def _write_ftstring_inner(self, node, is_format_spec=False): + if isinstance(node, JoinedStr): + # for both the f-string itself, and format_spec + for value in node.values: + self._write_ftstring_inner(value, is_format_spec=is_format_spec) + elif isinstance(node, Constant) and isinstance(node.value, str): + value = node.value.replace("{", "{{").replace("}", "}}") + + if is_format_spec: + value = value.replace("\\", "\\\\") + value = value.replace("'", "\\'") + value = value.replace('"', '\\"') + value = value.replace("\n", "\\n") + self.write(value) + elif isinstance(node, FormattedValue): + self.visit_FormattedValue(node) + elif isinstance(node, Interpolation): + self.visit_Interpolation(node) + else: + raise ValueError(f"Unexpected node inside JoinedStr, {node!r}") + + def _unparse_interpolation_value(self, inner): + unparser = type(self)() + unparser.set_precedence(_Precedence.TEST.next(), inner) + return unparser.visit(inner) + + def _write_interpolation(self, node, use_str_attr=False): + with self.delimit("{", "}"): + if use_str_attr: + expr = node.str + else: + expr = self._unparse_interpolation_value(node.value) + if expr.startswith("{"): + # Separate pair of opening brackets as "{ {" + self.write(" ") + self.write(expr) + if node.conversion != -1: + self.write(f"!{chr(node.conversion)}") + if node.format_spec: + self.write(":") + self._write_ftstring_inner(node.format_spec, is_format_spec=True) + + def visit_FormattedValue(self, node): + self._write_interpolation(node) + + def visit_Interpolation(self, node): + # If `str` is set to `None`, use the `value` to generate the source code. + self._write_interpolation(node, use_str_attr=node.str is not None) + + def visit_Name(self, node): + self.write(node.id) + + def _write_docstring(self, node): + self.fill(allow_semicolon=False) + if node.kind == "u": + self.write("u") + self._write_str_avoiding_backslashes(node.value, quote_types=_MULTI_QUOTES) + + def _write_constant(self, value): + if isinstance(value, (float, complex)): + # Substitute overflowing decimal literal for AST infinities, + # and inf - inf for NaNs. + self.write( + repr(value) + .replace("inf", _INFSTR) + .replace("nan", f"({_INFSTR}-{_INFSTR})") + ) + else: + self.write(repr(value)) + + def visit_Constant(self, node): + value = node.value + if isinstance(value, tuple): + with self.delimit("(", ")"): + self.items_view(self._write_constant, value) + elif value is ...: + self.write("...") + else: + if node.kind == "u": + self.write("u") + self._write_constant(node.value) + + def visit_List(self, node): + with self.delimit("[", "]"): + self.interleave(lambda: self.write(", "), self.traverse, node.elts) + + def visit_ListComp(self, node): + with self.delimit("[", "]"): + self.traverse(node.elt) + for gen in node.generators: + self.traverse(gen) + + def visit_GeneratorExp(self, node): + with self.delimit("(", ")"): + self.traverse(node.elt) + for gen in node.generators: + self.traverse(gen) + + def visit_SetComp(self, node): + with self.delimit("{", "}"): + self.traverse(node.elt) + for gen in node.generators: + self.traverse(gen) + + def visit_DictComp(self, node): + with self.delimit("{", "}"): + self.traverse(node.key) + self.write(": ") + self.traverse(node.value) + for gen in node.generators: + self.traverse(gen) + + def visit_comprehension(self, node): + if node.is_async: + self.write(" async for ") + else: + self.write(" for ") + self.set_precedence(_Precedence.TUPLE, node.target) + self.traverse(node.target) + self.write(" in ") + self.set_precedence(_Precedence.TEST.next(), node.iter, *node.ifs) + self.traverse(node.iter) + for if_clause in node.ifs: + self.write(" if ") + self.traverse(if_clause) + + def visit_IfExp(self, node): + with self.require_parens(_Precedence.TEST, node): + self.set_precedence(_Precedence.TEST.next(), node.body, node.test) + self.traverse(node.body) + self.write(" if ") + self.traverse(node.test) + self.write(" else ") + self.set_precedence(_Precedence.TEST, node.orelse) + self.traverse(node.orelse) + + def visit_Set(self, node): + if node.elts: + with self.delimit("{", "}"): + self.interleave(lambda: self.write(", "), self.traverse, node.elts) + else: + # `{}` would be interpreted as a dictionary literal, and + # `set` might be shadowed. Thus: + self.write('{*()}') + + def visit_Dict(self, node): + def write_key_value_pair(k, v): + self.traverse(k) + self.write(": ") + self.traverse(v) + + def write_item(item): + k, v = item + if k is None: + # for dictionary unpacking operator in dicts {**{'y': 2}} + # see PEP 448 for details + self.write("**") + self.set_precedence(_Precedence.EXPR, v) + self.traverse(v) + else: + write_key_value_pair(k, v) + + with self.delimit("{", "}"): + self.interleave( + lambda: self.write(", "), write_item, zip(node.keys, node.values) + ) + + def visit_Tuple(self, node): + with self.delimit_if( + "(", + ")", + len(node.elts) == 0 or self.get_precedence(node) > _Precedence.TUPLE + ): + self.items_view(self.traverse, node.elts) + + unop = {"Invert": "~", "Not": "not", "UAdd": "+", "USub": "-"} + unop_precedence = { + "not": _Precedence.NOT, + "~": _Precedence.FACTOR, + "+": _Precedence.FACTOR, + "-": _Precedence.FACTOR, + } + + def visit_UnaryOp(self, node): + operator = self.unop[node.op.__class__.__name__] + operator_precedence = self.unop_precedence[operator] + with self.require_parens(operator_precedence, node): + self.write(operator) + # factor prefixes (+, -, ~) shouldn't be separated + # from the value they belong, (e.g: +1 instead of + 1) + if operator_precedence is not _Precedence.FACTOR: + self.write(" ") + self.set_precedence(operator_precedence, node.operand) + self.traverse(node.operand) + + binop = { + "Add": "+", + "Sub": "-", + "Mult": "*", + "MatMult": "@", + "Div": "/", + "Mod": "%", + "LShift": "<<", + "RShift": ">>", + "BitOr": "|", + "BitXor": "^", + "BitAnd": "&", + "FloorDiv": "//", + "Pow": "**", + } + + binop_precedence = { + "+": _Precedence.ARITH, + "-": _Precedence.ARITH, + "*": _Precedence.TERM, + "@": _Precedence.TERM, + "/": _Precedence.TERM, + "%": _Precedence.TERM, + "<<": _Precedence.SHIFT, + ">>": _Precedence.SHIFT, + "|": _Precedence.BOR, + "^": _Precedence.BXOR, + "&": _Precedence.BAND, + "//": _Precedence.TERM, + "**": _Precedence.POWER, + } + + binop_rassoc = frozenset(("**",)) + def visit_BinOp(self, node): + operator = self.binop[node.op.__class__.__name__] + operator_precedence = self.binop_precedence[operator] + with self.require_parens(operator_precedence, node): + if operator in self.binop_rassoc: + left_precedence = operator_precedence.next() + right_precedence = operator_precedence + else: + left_precedence = operator_precedence + right_precedence = operator_precedence.next() + + self.set_precedence(left_precedence, node.left) + self.traverse(node.left) + self.write(f" {operator} ") + self.set_precedence(right_precedence, node.right) + self.traverse(node.right) + + cmpops = { + "Eq": "==", + "NotEq": "!=", + "Lt": "<", + "LtE": "<=", + "Gt": ">", + "GtE": ">=", + "Is": "is", + "IsNot": "is not", + "In": "in", + "NotIn": "not in", + } + + def visit_Compare(self, node): + with self.require_parens(_Precedence.CMP, node): + self.set_precedence(_Precedence.CMP.next(), node.left, *node.comparators) + self.traverse(node.left) + for o, e in zip(node.ops, node.comparators): + self.write(" " + self.cmpops[o.__class__.__name__] + " ") + self.traverse(e) + + boolops = {"And": "and", "Or": "or"} + boolop_precedence = {"and": _Precedence.AND, "or": _Precedence.OR} + + def visit_BoolOp(self, node): + operator = self.boolops[node.op.__class__.__name__] + operator_precedence = self.boolop_precedence[operator] + + def increasing_level_traverse(node): + nonlocal operator_precedence + operator_precedence = operator_precedence.next() + self.set_precedence(operator_precedence, node) + self.traverse(node) + + with self.require_parens(operator_precedence, node): + s = f" {operator} " + self.interleave(lambda: self.write(s), increasing_level_traverse, node.values) + + def visit_Attribute(self, node): + self.set_precedence(_Precedence.ATOM, node.value) + self.traverse(node.value) + # Special case: 3.__abs__() is a syntax error, so if node.value + # is an integer literal then we need to either parenthesize + # it or add an extra space to get 3 .__abs__(). + if isinstance(node.value, Constant) and isinstance(node.value.value, int): + self.write(" ") + self.write(".") + self.write(node.attr) + + def visit_Call(self, node): + self.set_precedence(_Precedence.ATOM, node.func) + self.traverse(node.func) + with self.delimit("(", ")"): + comma = False + for e in node.args: + if comma: + self.write(", ") + else: + comma = True + self.traverse(e) + for e in node.keywords: + if comma: + self.write(", ") + else: + comma = True + self.traverse(e) + + def visit_Subscript(self, node): + def is_non_empty_tuple(slice_value): + return ( + isinstance(slice_value, Tuple) + and slice_value.elts + ) + + self.set_precedence(_Precedence.ATOM, node.value) + self.traverse(node.value) + with self.delimit("[", "]"): + if is_non_empty_tuple(node.slice): + # parentheses can be omitted if the tuple isn't empty + self.items_view(self.traverse, node.slice.elts) + else: + self.traverse(node.slice) + + def visit_Starred(self, node): + self.write("*") + self.set_precedence(_Precedence.EXPR, node.value) + self.traverse(node.value) + + def visit_Ellipsis(self, node): + self.write("...") + + def visit_Slice(self, node): + if node.lower: + self.traverse(node.lower) + self.write(":") + if node.upper: + self.traverse(node.upper) + if node.step: + self.write(":") + self.traverse(node.step) + + def visit_Match(self, node): + self.fill("match ", allow_semicolon=False) + self.traverse(node.subject) + with self.block(): + for case in node.cases: + self.traverse(case) + + def visit_arg(self, node): + self.write(node.arg) + if node.annotation: + self.write(": ") + self.traverse(node.annotation) + + def visit_arguments(self, node): + first = True + # normal arguments + all_args = node.posonlyargs + node.args + defaults = [None] * (len(all_args) - len(node.defaults)) + node.defaults + for index, elements in enumerate(zip(all_args, defaults), 1): + a, d = elements + if first: + first = False + else: + self.write(", ") + self.traverse(a) + if d: + self.write("=") + self.traverse(d) + if index == len(node.posonlyargs): + self.write(", /") + + # varargs, or bare '*' if no varargs but keyword-only arguments present + if node.vararg or node.kwonlyargs: + if first: + first = False + else: + self.write(", ") + self.write("*") + if node.vararg: + self.write(node.vararg.arg) + if node.vararg.annotation: + self.write(": ") + self.traverse(node.vararg.annotation) + + # keyword-only arguments + if node.kwonlyargs: + for a, d in zip(node.kwonlyargs, node.kw_defaults): + self.write(", ") + self.traverse(a) + if d: + self.write("=") + self.traverse(d) + + # kwargs + if node.kwarg: + if first: + first = False + else: + self.write(", ") + self.write("**" + node.kwarg.arg) + if node.kwarg.annotation: + self.write(": ") + self.traverse(node.kwarg.annotation) + + def visit_keyword(self, node): + if node.arg is None: + self.write("**") + else: + self.write(node.arg) + self.write("=") + self.traverse(node.value) + + def visit_Lambda(self, node): + with self.require_parens(_Precedence.TEST, node): + self.write("lambda") + with self.buffered() as buffer: + self.traverse(node.args) + if buffer: + self.write(" ", *buffer) + self.write(": ") + self.set_precedence(_Precedence.TEST, node.body) + self.traverse(node.body) + + def visit_alias(self, node): + self.write(node.name) + if node.asname: + self.write(" as " + node.asname) + + def visit_withitem(self, node): + self.traverse(node.context_expr) + if node.optional_vars: + self.write(" as ") + self.traverse(node.optional_vars) + + def visit_match_case(self, node): + self.fill("case ", allow_semicolon=False) + self.traverse(node.pattern) + if node.guard: + self.write(" if ") + self.traverse(node.guard) + with self.block(): + self.traverse(node.body) + + def visit_MatchValue(self, node): + self.traverse(node.value) + + def visit_MatchSingleton(self, node): + self._write_constant(node.value) + + def visit_MatchSequence(self, node): + with self.delimit("[", "]"): + self.interleave( + lambda: self.write(", "), self.traverse, node.patterns + ) + + def visit_MatchStar(self, node): + name = node.name + if name is None: + name = "_" + self.write(f"*{name}") + + def visit_MatchMapping(self, node): + def write_key_pattern_pair(pair): + k, p = pair + self.traverse(k) + self.write(": ") + self.traverse(p) + + with self.delimit("{", "}"): + keys = node.keys + self.interleave( + lambda: self.write(", "), + write_key_pattern_pair, + zip(keys, node.patterns, strict=True), + ) + rest = node.rest + if rest is not None: + if keys: + self.write(", ") + self.write(f"**{rest}") + + def visit_MatchClass(self, node): + self.set_precedence(_Precedence.ATOM, node.cls) + self.traverse(node.cls) + with self.delimit("(", ")"): + patterns = node.patterns + self.interleave( + lambda: self.write(", "), self.traverse, patterns + ) + attrs = node.kwd_attrs + if attrs: + def write_attr_pattern(pair): + attr, pattern = pair + self.write(f"{attr}=") + self.traverse(pattern) + + if patterns: + self.write(", ") + self.interleave( + lambda: self.write(", "), + write_attr_pattern, + zip(attrs, node.kwd_patterns, strict=True), + ) + + def visit_MatchAs(self, node): + name = node.name + pattern = node.pattern + if name is None: + self.write("_") + elif pattern is None: + self.write(node.name) + else: + with self.require_parens(_Precedence.TEST, node): + self.set_precedence(_Precedence.BOR, node.pattern) + self.traverse(node.pattern) + self.write(f" as {node.name}") + + def visit_MatchOr(self, node): + with self.require_parens(_Precedence.BOR, node): + self.set_precedence(_Precedence.BOR.next(), *node.patterns) + self.interleave(lambda: self.write(" | "), self.traverse, node.patterns) diff --git a/PythonLib/full/_collections_abc.py b/PythonLib/full/_collections_abc.py index 09745658..241d40d5 100644 --- a/PythonLib/full/_collections_abc.py +++ b/PythonLib/full/_collections_abc.py @@ -85,6 +85,10 @@ def _f(): pass dict_items = type({}.items()) ## misc ## mappingproxy = type(type.__dict__) +def _get_framelocalsproxy(): + return type(sys._getframe().f_locals) +framelocalsproxy = _get_framelocalsproxy() +del _get_framelocalsproxy generator = type((lambda: (yield))()) ## coroutine ## async def _coro(): pass @@ -481,9 +485,10 @@ def __new__(cls, origin, args): def __repr__(self): if len(self.__args__) == 2 and _is_param_expr(self.__args__[0]): return super().__repr__() + from annotationlib import type_repr return (f'collections.abc.Callable' - f'[[{", ".join([_type_repr(a) for a in self.__args__[:-1]])}], ' - f'{_type_repr(self.__args__[-1])}]') + f'[[{", ".join([type_repr(a) for a in self.__args__[:-1]])}], ' + f'{type_repr(self.__args__[-1])}]') def __reduce__(self): args = self.__args__ @@ -520,23 +525,6 @@ def _is_param_expr(obj): names = ('ParamSpec', '_ConcatenateGenericAlias') return obj.__module__ == 'typing' and any(obj.__name__ == name for name in names) -def _type_repr(obj): - """Return the repr() of an object, special-casing types (internal helper). - - Copied from :mod:`typing` since collections.abc - shouldn't depend on that module. - (Keep this roughly in sync with the typing version.) - """ - if isinstance(obj, type): - if obj.__module__ == 'builtins': - return obj.__qualname__ - return f'{obj.__module__}.{obj.__qualname__}' - if obj is Ellipsis: - return '...' - if isinstance(obj, FunctionType): - return obj.__name__ - return repr(obj) - class Callable(metaclass=ABCMeta): @@ -836,6 +824,7 @@ def __eq__(self, other): __reversed__ = None Mapping.register(mappingproxy) +Mapping.register(framelocalsproxy) class MappingView(Sized): @@ -1068,6 +1057,7 @@ def count(self, value): Sequence.register(tuple) Sequence.register(str) +Sequence.register(bytes) Sequence.register(range) Sequence.register(memoryview) @@ -1078,7 +1068,7 @@ def __new__(cls, name, bases, namespace, **kwargs): warnings._deprecated( "collections.abc.ByteString", - remove=(3, 14), + remove=(3, 17), ) return super().__new__(cls, name, bases, namespace, **kwargs) @@ -1087,14 +1077,18 @@ def __instancecheck__(cls, instance): warnings._deprecated( "collections.abc.ByteString", - remove=(3, 14), + remove=(3, 17), ) return super().__instancecheck__(instance) class ByteString(Sequence, metaclass=_DeprecateByteStringMeta): - """This unifies bytes and bytearray. + """Deprecated ABC serving as a common supertype of ``bytes`` and ``bytearray``. - XXX Should add all their methods. + This ABC is scheduled for removal in Python 3.17. + Use ``isinstance(obj, collections.abc.Buffer)`` to test if ``obj`` + implements the buffer protocol at runtime. For use in type annotations, + either use ``Buffer`` or a union that explicitly specifies the types your + code supports (e.g., ``bytes | bytearray | memoryview``). """ __slots__ = () @@ -1170,4 +1164,4 @@ def __iadd__(self, values): MutableSequence.register(list) -MutableSequence.register(bytearray) # Multiply inheriting, see ByteString +MutableSequence.register(bytearray) diff --git a/PythonLib/full/_colorize.py b/PythonLib/full/_colorize.py new file mode 100644 index 00000000..d6673f66 --- /dev/null +++ b/PythonLib/full/_colorize.py @@ -0,0 +1,355 @@ +import os +import sys + +from collections.abc import Callable, Iterator, Mapping +from dataclasses import dataclass, field, Field + +COLORIZE = True + + +# types +if False: + from typing import IO, Self, ClassVar + _theme: Theme + + +class ANSIColors: + RESET = "\x1b[0m" + + BLACK = "\x1b[30m" + BLUE = "\x1b[34m" + CYAN = "\x1b[36m" + GREEN = "\x1b[32m" + GREY = "\x1b[90m" + MAGENTA = "\x1b[35m" + RED = "\x1b[31m" + WHITE = "\x1b[37m" # more like LIGHT GRAY + YELLOW = "\x1b[33m" + + BOLD = "\x1b[1m" + BOLD_BLACK = "\x1b[1;30m" # DARK GRAY + BOLD_BLUE = "\x1b[1;34m" + BOLD_CYAN = "\x1b[1;36m" + BOLD_GREEN = "\x1b[1;32m" + BOLD_MAGENTA = "\x1b[1;35m" + BOLD_RED = "\x1b[1;31m" + BOLD_WHITE = "\x1b[1;37m" # actual WHITE + BOLD_YELLOW = "\x1b[1;33m" + + # intense = like bold but without being bold + INTENSE_BLACK = "\x1b[90m" + INTENSE_BLUE = "\x1b[94m" + INTENSE_CYAN = "\x1b[96m" + INTENSE_GREEN = "\x1b[92m" + INTENSE_MAGENTA = "\x1b[95m" + INTENSE_RED = "\x1b[91m" + INTENSE_WHITE = "\x1b[97m" + INTENSE_YELLOW = "\x1b[93m" + + BACKGROUND_BLACK = "\x1b[40m" + BACKGROUND_BLUE = "\x1b[44m" + BACKGROUND_CYAN = "\x1b[46m" + BACKGROUND_GREEN = "\x1b[42m" + BACKGROUND_MAGENTA = "\x1b[45m" + BACKGROUND_RED = "\x1b[41m" + BACKGROUND_WHITE = "\x1b[47m" + BACKGROUND_YELLOW = "\x1b[43m" + + INTENSE_BACKGROUND_BLACK = "\x1b[100m" + INTENSE_BACKGROUND_BLUE = "\x1b[104m" + INTENSE_BACKGROUND_CYAN = "\x1b[106m" + INTENSE_BACKGROUND_GREEN = "\x1b[102m" + INTENSE_BACKGROUND_MAGENTA = "\x1b[105m" + INTENSE_BACKGROUND_RED = "\x1b[101m" + INTENSE_BACKGROUND_WHITE = "\x1b[107m" + INTENSE_BACKGROUND_YELLOW = "\x1b[103m" + + +ColorCodes = set() +NoColors = ANSIColors() + +for attr, code in ANSIColors.__dict__.items(): + if not attr.startswith("__"): + ColorCodes.add(code) + setattr(NoColors, attr, "") + + +# +# Experimental theming support (see gh-133346) +# + +# - Create a theme by copying an existing `Theme` with one or more sections +# replaced, using `default_theme.copy_with()`; +# - create a theme section by copying an existing `ThemeSection` with one or +# more colors replaced, using for example `default_theme.syntax.copy_with()`; +# - create a theme from scratch by instantiating a `Theme` data class with +# the required sections (which are also dataclass instances). +# +# Then call `_colorize.set_theme(your_theme)` to set it. +# +# Put your theme configuration in $PYTHONSTARTUP for the interactive shell, +# or sitecustomize.py in your virtual environment or Python installation for +# other uses. Your applications can call `_colorize.set_theme()` too. +# +# Note that thanks to the dataclasses providing default values for all fields, +# creating a new theme or theme section from scratch is possible without +# specifying all keys. +# +# For example, here's a theme that makes punctuation and operators less prominent: +# +# try: +# from _colorize import set_theme, default_theme, Syntax, ANSIColors +# except ImportError: +# pass +# else: +# theme_with_dim_operators = default_theme.copy_with( +# syntax=Syntax(op=ANSIColors.INTENSE_BLACK), +# ) +# set_theme(theme_with_dim_operators) +# del set_theme, default_theme, Syntax, ANSIColors, theme_with_dim_operators +# +# Guarding the import ensures that your .pythonstartup file will still work in +# Python 3.13 and older. Deleting the variables ensures they don't remain in your +# interactive shell's global scope. + +class ThemeSection(Mapping[str, str]): + """A mixin/base class for theme sections. + + It enables dictionary access to a section, as well as implements convenience + methods. + """ + + # The two types below are just that: types to inform the type checker that the + # mixin will work in context of those fields existing + __dataclass_fields__: ClassVar[dict[str, Field[str]]] + _name_to_value: Callable[[str], str] + + def __post_init__(self) -> None: + name_to_value = {} + for color_name in self.__dataclass_fields__: + name_to_value[color_name] = getattr(self, color_name) + super().__setattr__('_name_to_value', name_to_value.__getitem__) + + def copy_with(self, **kwargs: str) -> Self: + color_state: dict[str, str] = {} + for color_name in self.__dataclass_fields__: + color_state[color_name] = getattr(self, color_name) + color_state.update(kwargs) + return type(self)(**color_state) + + @classmethod + def no_colors(cls) -> Self: + color_state: dict[str, str] = {} + for color_name in cls.__dataclass_fields__: + color_state[color_name] = "" + return cls(**color_state) + + def __getitem__(self, key: str) -> str: + return self._name_to_value(key) + + def __len__(self) -> int: + return len(self.__dataclass_fields__) + + def __iter__(self) -> Iterator[str]: + return iter(self.__dataclass_fields__) + + +@dataclass(frozen=True, kw_only=True) +class Argparse(ThemeSection): + usage: str = ANSIColors.BOLD_BLUE + prog: str = ANSIColors.BOLD_MAGENTA + prog_extra: str = ANSIColors.MAGENTA + heading: str = ANSIColors.BOLD_BLUE + summary_long_option: str = ANSIColors.CYAN + summary_short_option: str = ANSIColors.GREEN + summary_label: str = ANSIColors.YELLOW + summary_action: str = ANSIColors.GREEN + long_option: str = ANSIColors.BOLD_CYAN + short_option: str = ANSIColors.BOLD_GREEN + label: str = ANSIColors.BOLD_YELLOW + action: str = ANSIColors.BOLD_GREEN + reset: str = ANSIColors.RESET + + +@dataclass(frozen=True) +class Syntax(ThemeSection): + prompt: str = ANSIColors.BOLD_MAGENTA + keyword: str = ANSIColors.BOLD_BLUE + keyword_constant: str = ANSIColors.BOLD_BLUE + builtin: str = ANSIColors.CYAN + comment: str = ANSIColors.RED + string: str = ANSIColors.GREEN + number: str = ANSIColors.YELLOW + op: str = ANSIColors.RESET + definition: str = ANSIColors.BOLD + soft_keyword: str = ANSIColors.BOLD_BLUE + reset: str = ANSIColors.RESET + + +@dataclass(frozen=True) +class Traceback(ThemeSection): + type: str = ANSIColors.BOLD_MAGENTA + message: str = ANSIColors.MAGENTA + filename: str = ANSIColors.MAGENTA + line_no: str = ANSIColors.MAGENTA + frame: str = ANSIColors.MAGENTA + error_highlight: str = ANSIColors.BOLD_RED + error_range: str = ANSIColors.RED + reset: str = ANSIColors.RESET + + +@dataclass(frozen=True) +class Unittest(ThemeSection): + passed: str = ANSIColors.GREEN + warn: str = ANSIColors.YELLOW + fail: str = ANSIColors.RED + fail_info: str = ANSIColors.BOLD_RED + reset: str = ANSIColors.RESET + + +@dataclass(frozen=True) +class Theme: + """A suite of themes for all sections of Python. + + When adding a new one, remember to also modify `copy_with` and `no_colors` + below. + """ + argparse: Argparse = field(default_factory=Argparse) + syntax: Syntax = field(default_factory=Syntax) + traceback: Traceback = field(default_factory=Traceback) + unittest: Unittest = field(default_factory=Unittest) + + def copy_with( + self, + *, + argparse: Argparse | None = None, + syntax: Syntax | None = None, + traceback: Traceback | None = None, + unittest: Unittest | None = None, + ) -> Self: + """Return a new Theme based on this instance with some sections replaced. + + Themes are immutable to protect against accidental modifications that + could lead to invalid terminal states. + """ + return type(self)( + argparse=argparse or self.argparse, + syntax=syntax or self.syntax, + traceback=traceback or self.traceback, + unittest=unittest or self.unittest, + ) + + @classmethod + def no_colors(cls) -> Self: + """Return a new Theme where colors in all sections are empty strings. + + This allows writing user code as if colors are always used. The color + fields will be ANSI color code strings when colorization is desired + and possible, and empty strings otherwise. + """ + return cls( + argparse=Argparse.no_colors(), + syntax=Syntax.no_colors(), + traceback=Traceback.no_colors(), + unittest=Unittest.no_colors(), + ) + + +def get_colors( + colorize: bool = False, *, file: IO[str] | IO[bytes] | None = None +) -> ANSIColors: + if colorize or can_colorize(file=file): + return ANSIColors() + else: + return NoColors + + +def decolor(text: str) -> str: + """Remove ANSI color codes from a string.""" + for code in ColorCodes: + text = text.replace(code, "") + return text + + +def can_colorize(*, file: IO[str] | IO[bytes] | None = None) -> bool: + + def _safe_getenv(k: str, fallback: str | None = None) -> str | None: + """Exception-safe environment retrieval. See gh-128636.""" + try: + return os.environ.get(k, fallback) + except Exception: + return fallback + + if file is None: + file = sys.stdout + + if not sys.flags.ignore_environment: + if _safe_getenv("PYTHON_COLORS") == "0": + return False + if _safe_getenv("PYTHON_COLORS") == "1": + return True + if _safe_getenv("NO_COLOR"): + return False + if not COLORIZE: + return False + if _safe_getenv("FORCE_COLOR"): + return True + if _safe_getenv("TERM") == "dumb": + return False + + if not hasattr(file, "fileno"): + return False + + if sys.platform == "win32": + try: + import nt + + if not nt._supports_virtual_terminal(): + return False + except (ImportError, AttributeError): + return False + + try: + return os.isatty(file.fileno()) + except OSError: + return hasattr(file, "isatty") and file.isatty() + + +default_theme = Theme() +theme_no_color = default_theme.no_colors() + + +def get_theme( + *, + tty_file: IO[str] | IO[bytes] | None = None, + force_color: bool = False, + force_no_color: bool = False, +) -> Theme: + """Returns the currently set theme, potentially in a zero-color variant. + + In cases where colorizing is not possible (see `can_colorize`), the returned + theme contains all empty strings in all color definitions. + See `Theme.no_colors()` for more information. + + It is recommended not to cache the result of this function for extended + periods of time because the user might influence theme selection by + the interactive shell, a debugger, or application-specific code. The + environment (including environment variable state and console configuration + on Windows) can also change in the course of the application life cycle. + """ + if force_color or (not force_no_color and + can_colorize(file=tty_file)): + return _theme + return theme_no_color + + +def set_theme(t: Theme) -> None: + global _theme + + if not isinstance(t, Theme): + raise ValueError(f"Expected Theme object, found {t}") + + _theme = t + + +set_theme(default_theme) diff --git a/PythonLib/full/_compat_pickle.py b/PythonLib/full/_compat_pickle.py index 65a94b6b..439f8c02 100644 --- a/PythonLib/full/_compat_pickle.py +++ b/PythonLib/full/_compat_pickle.py @@ -22,7 +22,6 @@ 'tkMessageBox': 'tkinter.messagebox', 'ScrolledText': 'tkinter.scrolledtext', 'Tkconstants': 'tkinter.constants', - 'Tix': 'tkinter.tix', 'ttk': 'tkinter.ttk', 'Tkinter': 'tkinter', 'markupbase': '_markupbase', diff --git a/PythonLib/full/_compression.py b/PythonLib/full/_compression.py deleted file mode 100644 index e8b70aa0..00000000 --- a/PythonLib/full/_compression.py +++ /dev/null @@ -1,162 +0,0 @@ -"""Internal classes used by the gzip, lzma and bz2 modules""" - -import io -import sys - -BUFFER_SIZE = io.DEFAULT_BUFFER_SIZE # Compressed data read chunk size - - -class BaseStream(io.BufferedIOBase): - """Mode-checking helper functions.""" - - def _check_not_closed(self): - if self.closed: - raise ValueError("I/O operation on closed file") - - def _check_can_read(self): - if not self.readable(): - raise io.UnsupportedOperation("File not open for reading") - - def _check_can_write(self): - if not self.writable(): - raise io.UnsupportedOperation("File not open for writing") - - def _check_can_seek(self): - if not self.readable(): - raise io.UnsupportedOperation("Seeking is only supported " - "on files open for reading") - if not self.seekable(): - raise io.UnsupportedOperation("The underlying file object " - "does not support seeking") - - -class DecompressReader(io.RawIOBase): - """Adapts the decompressor API to a RawIOBase reader API""" - - def readable(self): - return True - - def __init__(self, fp, decomp_factory, trailing_error=(), **decomp_args): - self._fp = fp - self._eof = False - self._pos = 0 # Current offset in decompressed stream - - # Set to size of decompressed stream once it is known, for SEEK_END - self._size = -1 - - # Save the decompressor factory and arguments. - # If the file contains multiple compressed streams, each - # stream will need a separate decompressor object. A new decompressor - # object is also needed when implementing a backwards seek(). - self._decomp_factory = decomp_factory - self._decomp_args = decomp_args - self._decompressor = self._decomp_factory(**self._decomp_args) - - # Exception class to catch from decompressor signifying invalid - # trailing data to ignore - self._trailing_error = trailing_error - - def close(self): - self._decompressor = None - return super().close() - - def seekable(self): - return self._fp.seekable() - - def readinto(self, b): - with memoryview(b) as view, view.cast("B") as byte_view: - data = self.read(len(byte_view)) - byte_view[:len(data)] = data - return len(data) - - def read(self, size=-1): - if size < 0: - return self.readall() - - if not size or self._eof: - return b"" - data = None # Default if EOF is encountered - # Depending on the input data, our call to the decompressor may not - # return any data. In this case, try again after reading another block. - while True: - if self._decompressor.eof: - rawblock = (self._decompressor.unused_data or - self._fp.read(BUFFER_SIZE)) - if not rawblock: - break - # Continue to next stream. - self._decompressor = self._decomp_factory( - **self._decomp_args) - try: - data = self._decompressor.decompress(rawblock, size) - except self._trailing_error: - # Trailing data isn't a valid compressed stream; ignore it. - break - else: - if self._decompressor.needs_input: - rawblock = self._fp.read(BUFFER_SIZE) - if not rawblock: - raise EOFError("Compressed file ended before the " - "end-of-stream marker was reached") - else: - rawblock = b"" - data = self._decompressor.decompress(rawblock, size) - if data: - break - if not data: - self._eof = True - self._size = self._pos - return b"" - self._pos += len(data) - return data - - def readall(self): - chunks = [] - # sys.maxsize means the max length of output buffer is unlimited, - # so that the whole input buffer can be decompressed within one - # .decompress() call. - while data := self.read(sys.maxsize): - chunks.append(data) - - return b"".join(chunks) - - # Rewind the file to the beginning of the data stream. - def _rewind(self): - self._fp.seek(0) - self._eof = False - self._pos = 0 - self._decompressor = self._decomp_factory(**self._decomp_args) - - def seek(self, offset, whence=io.SEEK_SET): - # Recalculate offset as an absolute file position. - if whence == io.SEEK_SET: - pass - elif whence == io.SEEK_CUR: - offset = self._pos + offset - elif whence == io.SEEK_END: - # Seeking relative to EOF - we need to know the file's size. - if self._size < 0: - while self.read(io.DEFAULT_BUFFER_SIZE): - pass - offset = self._size + offset - else: - raise ValueError("Invalid value for whence: {}".format(whence)) - - # Make it so that offset is the number of bytes to skip forward. - if offset < self._pos: - self._rewind() - else: - offset -= self._pos - - # Read and discard data until we reach the desired position. - while offset > 0: - data = self.read(min(io.DEFAULT_BUFFER_SIZE, offset)) - if not data: - break - offset -= len(data) - - return self._pos - - def tell(self): - """Return the current file position.""" - return self._pos diff --git a/PythonLib/full/_ios_support.py b/PythonLib/full/_ios_support.py new file mode 100644 index 00000000..20467a7c --- /dev/null +++ b/PythonLib/full/_ios_support.py @@ -0,0 +1,71 @@ +import sys +try: + from ctypes import cdll, c_void_p, c_char_p, util +except ImportError: + # ctypes is an optional module. If it's not present, we're limited in what + # we can tell about the system, but we don't want to prevent the module + # from working. + print("ctypes isn't available; iOS system calls will not be available", file=sys.stderr) + objc = None +else: + # ctypes is available. Load the ObjC library, and wrap the objc_getClass, + # sel_registerName methods + lib = util.find_library("objc") + if lib is None: + # Failed to load the objc library + raise ImportError("ObjC runtime library couldn't be loaded") + + objc = cdll.LoadLibrary(lib) + objc.objc_getClass.restype = c_void_p + objc.objc_getClass.argtypes = [c_char_p] + objc.sel_registerName.restype = c_void_p + objc.sel_registerName.argtypes = [c_char_p] + + +def get_platform_ios(): + # Determine if this is a simulator using the multiarch value + is_simulator = sys.implementation._multiarch.endswith("simulator") + + # We can't use ctypes; abort + if not objc: + return None + + # Most of the methods return ObjC objects + objc.objc_msgSend.restype = c_void_p + # All the methods used have no arguments. + objc.objc_msgSend.argtypes = [c_void_p, c_void_p] + + # Equivalent of: + # device = [UIDevice currentDevice] + UIDevice = objc.objc_getClass(b"UIDevice") + SEL_currentDevice = objc.sel_registerName(b"currentDevice") + device = objc.objc_msgSend(UIDevice, SEL_currentDevice) + + # Equivalent of: + # device_systemVersion = [device systemVersion] + SEL_systemVersion = objc.sel_registerName(b"systemVersion") + device_systemVersion = objc.objc_msgSend(device, SEL_systemVersion) + + # Equivalent of: + # device_systemName = [device systemName] + SEL_systemName = objc.sel_registerName(b"systemName") + device_systemName = objc.objc_msgSend(device, SEL_systemName) + + # Equivalent of: + # device_model = [device model] + SEL_model = objc.sel_registerName(b"model") + device_model = objc.objc_msgSend(device, SEL_model) + + # UTF8String returns a const char*; + SEL_UTF8String = objc.sel_registerName(b"UTF8String") + objc.objc_msgSend.restype = c_char_p + + # Equivalent of: + # system = [device_systemName UTF8String] + # release = [device_systemVersion UTF8String] + # model = [device_model UTF8String] + system = objc.objc_msgSend(device_systemName, SEL_UTF8String).decode() + release = objc.objc_msgSend(device_systemVersion, SEL_UTF8String).decode() + model = objc.objc_msgSend(device_model, SEL_UTF8String).decode() + + return system, release, model, is_simulator diff --git a/PythonLib/full/_markupbase.py b/PythonLib/full/_markupbase.py index 3ad7e279..614f0cd1 100644 --- a/PythonLib/full/_markupbase.py +++ b/PythonLib/full/_markupbase.py @@ -13,7 +13,7 @@ _markedsectionclose = re.compile(r']\s*]\s*>') # An analysis of the MS-Word extensions is available at -# http://www.planetpublish.com/xmlarena/xap/Thursday/WordtoXML.pdf +# http://web.archive.org/web/20060321153828/http://www.planetpublish.com/xmlarena/xap/Thursday/WordtoXML.pdf _msmarkedsectionclose = re.compile(r']\s*>') diff --git a/PythonLib/full/_opcode_metadata.py b/PythonLib/full/_opcode_metadata.py new file mode 100644 index 00000000..b9304ec3 --- /dev/null +++ b/PythonLib/full/_opcode_metadata.py @@ -0,0 +1,371 @@ +# This file is generated by Tools/cases_generator/py_metadata_generator.py +# from: +# Python/bytecodes.c +# Do not edit! +_specializations = { + "RESUME": [ + "RESUME_CHECK", + ], + "LOAD_CONST": [ + "LOAD_CONST_MORTAL", + "LOAD_CONST_IMMORTAL", + ], + "TO_BOOL": [ + "TO_BOOL_ALWAYS_TRUE", + "TO_BOOL_BOOL", + "TO_BOOL_INT", + "TO_BOOL_LIST", + "TO_BOOL_NONE", + "TO_BOOL_STR", + ], + "BINARY_OP": [ + "BINARY_OP_MULTIPLY_INT", + "BINARY_OP_ADD_INT", + "BINARY_OP_SUBTRACT_INT", + "BINARY_OP_MULTIPLY_FLOAT", + "BINARY_OP_ADD_FLOAT", + "BINARY_OP_SUBTRACT_FLOAT", + "BINARY_OP_ADD_UNICODE", + "BINARY_OP_SUBSCR_LIST_INT", + "BINARY_OP_SUBSCR_LIST_SLICE", + "BINARY_OP_SUBSCR_TUPLE_INT", + "BINARY_OP_SUBSCR_STR_INT", + "BINARY_OP_SUBSCR_DICT", + "BINARY_OP_SUBSCR_GETITEM", + "BINARY_OP_EXTEND", + "BINARY_OP_INPLACE_ADD_UNICODE", + ], + "STORE_SUBSCR": [ + "STORE_SUBSCR_DICT", + "STORE_SUBSCR_LIST_INT", + ], + "SEND": [ + "SEND_GEN", + ], + "UNPACK_SEQUENCE": [ + "UNPACK_SEQUENCE_TWO_TUPLE", + "UNPACK_SEQUENCE_TUPLE", + "UNPACK_SEQUENCE_LIST", + ], + "STORE_ATTR": [ + "STORE_ATTR_INSTANCE_VALUE", + "STORE_ATTR_SLOT", + "STORE_ATTR_WITH_HINT", + ], + "LOAD_GLOBAL": [ + "LOAD_GLOBAL_MODULE", + "LOAD_GLOBAL_BUILTIN", + ], + "LOAD_SUPER_ATTR": [ + "LOAD_SUPER_ATTR_ATTR", + "LOAD_SUPER_ATTR_METHOD", + ], + "LOAD_ATTR": [ + "LOAD_ATTR_INSTANCE_VALUE", + "LOAD_ATTR_MODULE", + "LOAD_ATTR_WITH_HINT", + "LOAD_ATTR_SLOT", + "LOAD_ATTR_CLASS", + "LOAD_ATTR_CLASS_WITH_METACLASS_CHECK", + "LOAD_ATTR_PROPERTY", + "LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN", + "LOAD_ATTR_METHOD_WITH_VALUES", + "LOAD_ATTR_METHOD_NO_DICT", + "LOAD_ATTR_METHOD_LAZY_DICT", + "LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES", + "LOAD_ATTR_NONDESCRIPTOR_NO_DICT", + ], + "COMPARE_OP": [ + "COMPARE_OP_FLOAT", + "COMPARE_OP_INT", + "COMPARE_OP_STR", + ], + "CONTAINS_OP": [ + "CONTAINS_OP_SET", + "CONTAINS_OP_DICT", + ], + "JUMP_BACKWARD": [ + "JUMP_BACKWARD_NO_JIT", + "JUMP_BACKWARD_JIT", + ], + "FOR_ITER": [ + "FOR_ITER_LIST", + "FOR_ITER_TUPLE", + "FOR_ITER_RANGE", + "FOR_ITER_GEN", + ], + "CALL": [ + "CALL_BOUND_METHOD_EXACT_ARGS", + "CALL_PY_EXACT_ARGS", + "CALL_TYPE_1", + "CALL_STR_1", + "CALL_TUPLE_1", + "CALL_BUILTIN_CLASS", + "CALL_BUILTIN_O", + "CALL_BUILTIN_FAST", + "CALL_BUILTIN_FAST_WITH_KEYWORDS", + "CALL_LEN", + "CALL_ISINSTANCE", + "CALL_LIST_APPEND", + "CALL_METHOD_DESCRIPTOR_O", + "CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS", + "CALL_METHOD_DESCRIPTOR_NOARGS", + "CALL_METHOD_DESCRIPTOR_FAST", + "CALL_ALLOC_AND_ENTER_INIT", + "CALL_PY_GENERAL", + "CALL_BOUND_METHOD_GENERAL", + "CALL_NON_PY_GENERAL", + ], + "CALL_KW": [ + "CALL_KW_BOUND_METHOD", + "CALL_KW_PY", + "CALL_KW_NON_PY", + ], +} + +_specialized_opmap = { + 'BINARY_OP_ADD_FLOAT': 129, + 'BINARY_OP_ADD_INT': 130, + 'BINARY_OP_ADD_UNICODE': 131, + 'BINARY_OP_EXTEND': 132, + 'BINARY_OP_INPLACE_ADD_UNICODE': 3, + 'BINARY_OP_MULTIPLY_FLOAT': 133, + 'BINARY_OP_MULTIPLY_INT': 134, + 'BINARY_OP_SUBSCR_DICT': 135, + 'BINARY_OP_SUBSCR_GETITEM': 136, + 'BINARY_OP_SUBSCR_LIST_INT': 137, + 'BINARY_OP_SUBSCR_LIST_SLICE': 138, + 'BINARY_OP_SUBSCR_STR_INT': 139, + 'BINARY_OP_SUBSCR_TUPLE_INT': 140, + 'BINARY_OP_SUBTRACT_FLOAT': 141, + 'BINARY_OP_SUBTRACT_INT': 142, + 'CALL_ALLOC_AND_ENTER_INIT': 143, + 'CALL_BOUND_METHOD_EXACT_ARGS': 144, + 'CALL_BOUND_METHOD_GENERAL': 145, + 'CALL_BUILTIN_CLASS': 146, + 'CALL_BUILTIN_FAST': 147, + 'CALL_BUILTIN_FAST_WITH_KEYWORDS': 148, + 'CALL_BUILTIN_O': 149, + 'CALL_ISINSTANCE': 150, + 'CALL_KW_BOUND_METHOD': 151, + 'CALL_KW_NON_PY': 152, + 'CALL_KW_PY': 153, + 'CALL_LEN': 154, + 'CALL_LIST_APPEND': 155, + 'CALL_METHOD_DESCRIPTOR_FAST': 156, + 'CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS': 157, + 'CALL_METHOD_DESCRIPTOR_NOARGS': 158, + 'CALL_METHOD_DESCRIPTOR_O': 159, + 'CALL_NON_PY_GENERAL': 160, + 'CALL_PY_EXACT_ARGS': 161, + 'CALL_PY_GENERAL': 162, + 'CALL_STR_1': 163, + 'CALL_TUPLE_1': 164, + 'CALL_TYPE_1': 165, + 'COMPARE_OP_FLOAT': 166, + 'COMPARE_OP_INT': 167, + 'COMPARE_OP_STR': 168, + 'CONTAINS_OP_DICT': 169, + 'CONTAINS_OP_SET': 170, + 'FOR_ITER_GEN': 171, + 'FOR_ITER_LIST': 172, + 'FOR_ITER_RANGE': 173, + 'FOR_ITER_TUPLE': 174, + 'JUMP_BACKWARD_JIT': 175, + 'JUMP_BACKWARD_NO_JIT': 176, + 'LOAD_ATTR_CLASS': 177, + 'LOAD_ATTR_CLASS_WITH_METACLASS_CHECK': 178, + 'LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN': 179, + 'LOAD_ATTR_INSTANCE_VALUE': 180, + 'LOAD_ATTR_METHOD_LAZY_DICT': 181, + 'LOAD_ATTR_METHOD_NO_DICT': 182, + 'LOAD_ATTR_METHOD_WITH_VALUES': 183, + 'LOAD_ATTR_MODULE': 184, + 'LOAD_ATTR_NONDESCRIPTOR_NO_DICT': 185, + 'LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES': 186, + 'LOAD_ATTR_PROPERTY': 187, + 'LOAD_ATTR_SLOT': 188, + 'LOAD_ATTR_WITH_HINT': 189, + 'LOAD_CONST_IMMORTAL': 190, + 'LOAD_CONST_MORTAL': 191, + 'LOAD_GLOBAL_BUILTIN': 192, + 'LOAD_GLOBAL_MODULE': 193, + 'LOAD_SUPER_ATTR_ATTR': 194, + 'LOAD_SUPER_ATTR_METHOD': 195, + 'RESUME_CHECK': 196, + 'SEND_GEN': 197, + 'STORE_ATTR_INSTANCE_VALUE': 198, + 'STORE_ATTR_SLOT': 199, + 'STORE_ATTR_WITH_HINT': 200, + 'STORE_SUBSCR_DICT': 201, + 'STORE_SUBSCR_LIST_INT': 202, + 'TO_BOOL_ALWAYS_TRUE': 203, + 'TO_BOOL_BOOL': 204, + 'TO_BOOL_INT': 205, + 'TO_BOOL_LIST': 206, + 'TO_BOOL_NONE': 207, + 'TO_BOOL_STR': 208, + 'UNPACK_SEQUENCE_LIST': 209, + 'UNPACK_SEQUENCE_TUPLE': 210, + 'UNPACK_SEQUENCE_TWO_TUPLE': 211, +} + +opmap = { + 'CACHE': 0, + 'RESERVED': 17, + 'RESUME': 128, + 'INSTRUMENTED_LINE': 254, + 'ENTER_EXECUTOR': 255, + 'BINARY_SLICE': 1, + 'BUILD_TEMPLATE': 2, + 'CALL_FUNCTION_EX': 4, + 'CHECK_EG_MATCH': 5, + 'CHECK_EXC_MATCH': 6, + 'CLEANUP_THROW': 7, + 'DELETE_SUBSCR': 8, + 'END_FOR': 9, + 'END_SEND': 10, + 'EXIT_INIT_CHECK': 11, + 'FORMAT_SIMPLE': 12, + 'FORMAT_WITH_SPEC': 13, + 'GET_AITER': 14, + 'GET_ANEXT': 15, + 'GET_ITER': 16, + 'GET_LEN': 18, + 'GET_YIELD_FROM_ITER': 19, + 'INTERPRETER_EXIT': 20, + 'LOAD_BUILD_CLASS': 21, + 'LOAD_LOCALS': 22, + 'MAKE_FUNCTION': 23, + 'MATCH_KEYS': 24, + 'MATCH_MAPPING': 25, + 'MATCH_SEQUENCE': 26, + 'NOP': 27, + 'NOT_TAKEN': 28, + 'POP_EXCEPT': 29, + 'POP_ITER': 30, + 'POP_TOP': 31, + 'PUSH_EXC_INFO': 32, + 'PUSH_NULL': 33, + 'RETURN_GENERATOR': 34, + 'RETURN_VALUE': 35, + 'SETUP_ANNOTATIONS': 36, + 'STORE_SLICE': 37, + 'STORE_SUBSCR': 38, + 'TO_BOOL': 39, + 'UNARY_INVERT': 40, + 'UNARY_NEGATIVE': 41, + 'UNARY_NOT': 42, + 'WITH_EXCEPT_START': 43, + 'BINARY_OP': 44, + 'BUILD_INTERPOLATION': 45, + 'BUILD_LIST': 46, + 'BUILD_MAP': 47, + 'BUILD_SET': 48, + 'BUILD_SLICE': 49, + 'BUILD_STRING': 50, + 'BUILD_TUPLE': 51, + 'CALL': 52, + 'CALL_INTRINSIC_1': 53, + 'CALL_INTRINSIC_2': 54, + 'CALL_KW': 55, + 'COMPARE_OP': 56, + 'CONTAINS_OP': 57, + 'CONVERT_VALUE': 58, + 'COPY': 59, + 'COPY_FREE_VARS': 60, + 'DELETE_ATTR': 61, + 'DELETE_DEREF': 62, + 'DELETE_FAST': 63, + 'DELETE_GLOBAL': 64, + 'DELETE_NAME': 65, + 'DICT_MERGE': 66, + 'DICT_UPDATE': 67, + 'END_ASYNC_FOR': 68, + 'EXTENDED_ARG': 69, + 'FOR_ITER': 70, + 'GET_AWAITABLE': 71, + 'IMPORT_FROM': 72, + 'IMPORT_NAME': 73, + 'IS_OP': 74, + 'JUMP_BACKWARD': 75, + 'JUMP_BACKWARD_NO_INTERRUPT': 76, + 'JUMP_FORWARD': 77, + 'LIST_APPEND': 78, + 'LIST_EXTEND': 79, + 'LOAD_ATTR': 80, + 'LOAD_COMMON_CONSTANT': 81, + 'LOAD_CONST': 82, + 'LOAD_DEREF': 83, + 'LOAD_FAST': 84, + 'LOAD_FAST_AND_CLEAR': 85, + 'LOAD_FAST_BORROW': 86, + 'LOAD_FAST_BORROW_LOAD_FAST_BORROW': 87, + 'LOAD_FAST_CHECK': 88, + 'LOAD_FAST_LOAD_FAST': 89, + 'LOAD_FROM_DICT_OR_DEREF': 90, + 'LOAD_FROM_DICT_OR_GLOBALS': 91, + 'LOAD_GLOBAL': 92, + 'LOAD_NAME': 93, + 'LOAD_SMALL_INT': 94, + 'LOAD_SPECIAL': 95, + 'LOAD_SUPER_ATTR': 96, + 'MAKE_CELL': 97, + 'MAP_ADD': 98, + 'MATCH_CLASS': 99, + 'POP_JUMP_IF_FALSE': 100, + 'POP_JUMP_IF_NONE': 101, + 'POP_JUMP_IF_NOT_NONE': 102, + 'POP_JUMP_IF_TRUE': 103, + 'RAISE_VARARGS': 104, + 'RERAISE': 105, + 'SEND': 106, + 'SET_ADD': 107, + 'SET_FUNCTION_ATTRIBUTE': 108, + 'SET_UPDATE': 109, + 'STORE_ATTR': 110, + 'STORE_DEREF': 111, + 'STORE_FAST': 112, + 'STORE_FAST_LOAD_FAST': 113, + 'STORE_FAST_STORE_FAST': 114, + 'STORE_GLOBAL': 115, + 'STORE_NAME': 116, + 'SWAP': 117, + 'UNPACK_EX': 118, + 'UNPACK_SEQUENCE': 119, + 'YIELD_VALUE': 120, + 'INSTRUMENTED_END_FOR': 234, + 'INSTRUMENTED_POP_ITER': 235, + 'INSTRUMENTED_END_SEND': 236, + 'INSTRUMENTED_FOR_ITER': 237, + 'INSTRUMENTED_INSTRUCTION': 238, + 'INSTRUMENTED_JUMP_FORWARD': 239, + 'INSTRUMENTED_NOT_TAKEN': 240, + 'INSTRUMENTED_POP_JUMP_IF_TRUE': 241, + 'INSTRUMENTED_POP_JUMP_IF_FALSE': 242, + 'INSTRUMENTED_POP_JUMP_IF_NONE': 243, + 'INSTRUMENTED_POP_JUMP_IF_NOT_NONE': 244, + 'INSTRUMENTED_RESUME': 245, + 'INSTRUMENTED_RETURN_VALUE': 246, + 'INSTRUMENTED_YIELD_VALUE': 247, + 'INSTRUMENTED_END_ASYNC_FOR': 248, + 'INSTRUMENTED_LOAD_SUPER_ATTR': 249, + 'INSTRUMENTED_CALL': 250, + 'INSTRUMENTED_CALL_KW': 251, + 'INSTRUMENTED_CALL_FUNCTION_EX': 252, + 'INSTRUMENTED_JUMP_BACKWARD': 253, + 'ANNOTATIONS_PLACEHOLDER': 256, + 'JUMP': 257, + 'JUMP_IF_FALSE': 258, + 'JUMP_IF_TRUE': 259, + 'JUMP_NO_INTERRUPT': 260, + 'LOAD_CLOSURE': 261, + 'POP_BLOCK': 262, + 'SETUP_CLEANUP': 263, + 'SETUP_FINALLY': 264, + 'SETUP_WITH': 265, + 'STORE_FAST_MAYBE_NULL': 266, +} + +HAVE_ARGUMENT = 43 +MIN_INSTRUMENTED_OPCODE = 234 diff --git a/PythonLib/full/_py_warnings.py b/PythonLib/full/_py_warnings.py new file mode 100644 index 00000000..55f8c069 --- /dev/null +++ b/PythonLib/full/_py_warnings.py @@ -0,0 +1,869 @@ +"""Python part of the warnings subsystem.""" + +import sys +import _contextvars +import _thread + + +__all__ = ["warn", "warn_explicit", "showwarning", + "formatwarning", "filterwarnings", "simplefilter", + "resetwarnings", "catch_warnings", "deprecated"] + + +# Normally '_wm' is sys.modules['warnings'] but for unit tests it can be +# a different module. User code is allowed to reassign global attributes +# of the 'warnings' module, commonly 'filters' or 'showwarning'. So we +# need to lookup these global attributes dynamically on the '_wm' object, +# rather than binding them earlier. The code in this module consistently uses +# '_wm.' rather than using the globals of this module. If the +# '_warnings' C extension is in use, some globals are replaced by functions +# and variables defined in that extension. +_wm = None + + +def _set_module(module): + global _wm + _wm = module + + +# filters contains a sequence of filter 5-tuples +# The components of the 5-tuple are: +# - an action: error, ignore, always, all, default, module, or once +# - a compiled regex that must match the warning message +# - a class representing the warning category +# - a compiled regex that must match the module that is being warned +# - a line number for the line being warning, or 0 to mean any line +# If either if the compiled regexs are None, match anything. +filters = [] + + +defaultaction = "default" +onceregistry = {} +_lock = _thread.RLock() +_filters_version = 1 + + +# If true, catch_warnings() will use a context var to hold the modified +# filters list. Otherwise, catch_warnings() will operate on the 'filters' +# global of the warnings module. +_use_context = sys.flags.context_aware_warnings + + +class _Context: + def __init__(self, filters): + self._filters = filters + self.log = None # if set to a list, logging is enabled + + def copy(self): + context = _Context(self._filters[:]) + if self.log is not None: + context.log = self.log + return context + + def _record_warning(self, msg): + self.log.append(msg) + + +class _GlobalContext(_Context): + def __init__(self): + self.log = None + + @property + def _filters(self): + # Since there is quite a lot of code that assigns to + # warnings.filters, this needs to return the current value of + # the module global. + try: + return _wm.filters + except AttributeError: + # 'filters' global was deleted. Do we need to actually handle this case? + return [] + + +_global_context = _GlobalContext() + + +_warnings_context = _contextvars.ContextVar('warnings_context') + + +def _get_context(): + if not _use_context: + return _global_context + try: + return _wm._warnings_context.get() + except LookupError: + return _global_context + + +def _set_context(context): + assert _use_context + _wm._warnings_context.set(context) + + +def _new_context(): + assert _use_context + old_context = _wm._get_context() + new_context = old_context.copy() + _wm._set_context(new_context) + return old_context, new_context + + +def _get_filters(): + """Return the current list of filters. This is a non-public API used by + module functions and by the unit tests.""" + return _wm._get_context()._filters + + +def _filters_mutated_lock_held(): + _wm._filters_version += 1 + + +def showwarning(message, category, filename, lineno, file=None, line=None): + """Hook to write a warning to a file; replace if you like.""" + msg = _wm.WarningMessage(message, category, filename, lineno, file, line) + _wm._showwarnmsg_impl(msg) + + +def formatwarning(message, category, filename, lineno, line=None): + """Function to format a warning the standard way.""" + msg = _wm.WarningMessage(message, category, filename, lineno, None, line) + return _wm._formatwarnmsg_impl(msg) + + +def _showwarnmsg_impl(msg): + context = _wm._get_context() + if context.log is not None: + context._record_warning(msg) + return + file = msg.file + if file is None: + file = sys.stderr + if file is None: + # sys.stderr is None when run with pythonw.exe: + # warnings get lost + return + text = _wm._formatwarnmsg(msg) + try: + file.write(text) + except OSError: + # the file (probably stderr) is invalid - this warning gets lost. + pass + + +def _formatwarnmsg_impl(msg): + category = msg.category.__name__ + s = f"{msg.filename}:{msg.lineno}: {category}: {msg.message}\n" + + if msg.line is None: + try: + import linecache + line = linecache.getline(msg.filename, msg.lineno) + except Exception: + # When a warning is logged during Python shutdown, linecache + # and the import machinery don't work anymore + line = None + linecache = None + else: + line = msg.line + if line: + line = line.strip() + s += " %s\n" % line + + if msg.source is not None: + try: + import tracemalloc + # Logging a warning should not raise a new exception: + # catch Exception, not only ImportError and RecursionError. + except Exception: + # don't suggest to enable tracemalloc if it's not available + suggest_tracemalloc = False + tb = None + else: + try: + suggest_tracemalloc = not tracemalloc.is_tracing() + tb = tracemalloc.get_object_traceback(msg.source) + except Exception: + # When a warning is logged during Python shutdown, tracemalloc + # and the import machinery don't work anymore + suggest_tracemalloc = False + tb = None + + if tb is not None: + s += 'Object allocated at (most recent call last):\n' + for frame in tb: + s += (' File "%s", lineno %s\n' + % (frame.filename, frame.lineno)) + + try: + if linecache is not None: + line = linecache.getline(frame.filename, frame.lineno) + else: + line = None + except Exception: + line = None + if line: + line = line.strip() + s += ' %s\n' % line + elif suggest_tracemalloc: + s += (f'{category}: Enable tracemalloc to get the object ' + f'allocation traceback\n') + return s + + +# Keep a reference to check if the function was replaced +_showwarning_orig = showwarning + + +def _showwarnmsg(msg): + """Hook to write a warning to a file; replace if you like.""" + try: + sw = _wm.showwarning + except AttributeError: + pass + else: + if sw is not _showwarning_orig: + # warnings.showwarning() was replaced + if not callable(sw): + raise TypeError("warnings.showwarning() must be set to a " + "function or method") + + sw(msg.message, msg.category, msg.filename, msg.lineno, + msg.file, msg.line) + return + _wm._showwarnmsg_impl(msg) + + +# Keep a reference to check if the function was replaced +_formatwarning_orig = formatwarning + + +def _formatwarnmsg(msg): + """Function to format a warning the standard way.""" + try: + fw = _wm.formatwarning + except AttributeError: + pass + else: + if fw is not _formatwarning_orig: + # warnings.formatwarning() was replaced + return fw(msg.message, msg.category, + msg.filename, msg.lineno, msg.line) + return _wm._formatwarnmsg_impl(msg) + + +def filterwarnings(action, message="", category=Warning, module="", lineno=0, + append=False): + """Insert an entry into the list of warnings filters (at the front). + + 'action' -- one of "error", "ignore", "always", "all", "default", "module", + or "once" + 'message' -- a regex that the warning message must match + 'category' -- a class that the warning must be a subclass of + 'module' -- a regex that the module name must match + 'lineno' -- an integer line number, 0 matches all warnings + 'append' -- if true, append to the list of filters + """ + if action not in {"error", "ignore", "always", "all", "default", "module", "once"}: + raise ValueError(f"invalid action: {action!r}") + if not isinstance(message, str): + raise TypeError("message must be a string") + if not isinstance(category, type) or not issubclass(category, Warning): + raise TypeError("category must be a Warning subclass") + if not isinstance(module, str): + raise TypeError("module must be a string") + if not isinstance(lineno, int): + raise TypeError("lineno must be an int") + if lineno < 0: + raise ValueError("lineno must be an int >= 0") + + if message or module: + import re + + if message: + message = re.compile(message, re.I) + else: + message = None + if module: + module = re.compile(module) + else: + module = None + + _wm._add_filter(action, message, category, module, lineno, append=append) + + +def simplefilter(action, category=Warning, lineno=0, append=False): + """Insert a simple entry into the list of warnings filters (at the front). + + A simple filter matches all modules and messages. + 'action' -- one of "error", "ignore", "always", "all", "default", "module", + or "once" + 'category' -- a class that the warning must be a subclass of + 'lineno' -- an integer line number, 0 matches all warnings + 'append' -- if true, append to the list of filters + """ + if action not in {"error", "ignore", "always", "all", "default", "module", "once"}: + raise ValueError(f"invalid action: {action!r}") + if not isinstance(lineno, int): + raise TypeError("lineno must be an int") + if lineno < 0: + raise ValueError("lineno must be an int >= 0") + _wm._add_filter(action, None, category, None, lineno, append=append) + + +def _filters_mutated(): + # Even though this function is not part of the public API, it's used by + # a fair amount of user code. + with _wm._lock: + _wm._filters_mutated_lock_held() + + +def _add_filter(*item, append): + with _wm._lock: + filters = _wm._get_filters() + if not append: + # Remove possible duplicate filters, so new one will be placed + # in correct place. If append=True and duplicate exists, do nothing. + try: + filters.remove(item) + except ValueError: + pass + filters.insert(0, item) + else: + if item not in filters: + filters.append(item) + _wm._filters_mutated_lock_held() + + +def resetwarnings(): + """Clear the list of warning filters, so that no filters are active.""" + with _wm._lock: + del _wm._get_filters()[:] + _wm._filters_mutated_lock_held() + + +class _OptionError(Exception): + """Exception used by option processing helpers.""" + pass + + +# Helper to process -W options passed via sys.warnoptions +def _processoptions(args): + for arg in args: + try: + _wm._setoption(arg) + except _wm._OptionError as msg: + print("Invalid -W option ignored:", msg, file=sys.stderr) + + +# Helper for _processoptions() +def _setoption(arg): + parts = arg.split(':') + if len(parts) > 5: + raise _wm._OptionError("too many fields (max 5): %r" % (arg,)) + while len(parts) < 5: + parts.append('') + action, message, category, module, lineno = [s.strip() + for s in parts] + action = _wm._getaction(action) + category = _wm._getcategory(category) + if message or module: + import re + if message: + message = re.escape(message) + if module: + module = re.escape(module) + r'\z' + if lineno: + try: + lineno = int(lineno) + if lineno < 0: + raise ValueError + except (ValueError, OverflowError): + raise _wm._OptionError("invalid lineno %r" % (lineno,)) from None + else: + lineno = 0 + _wm.filterwarnings(action, message, category, module, lineno) + + +# Helper for _setoption() +def _getaction(action): + if not action: + return "default" + for a in ('default', 'always', 'all', 'ignore', 'module', 'once', 'error'): + if a.startswith(action): + return a + raise _wm._OptionError("invalid action: %r" % (action,)) + + +# Helper for _setoption() +def _getcategory(category): + if not category: + return Warning + if '.' not in category: + import builtins as m + klass = category + else: + module, _, klass = category.rpartition('.') + try: + m = __import__(module, None, None, [klass]) + except ImportError: + raise _wm._OptionError("invalid module name: %r" % (module,)) from None + try: + cat = getattr(m, klass) + except AttributeError: + raise _wm._OptionError("unknown warning category: %r" % (category,)) from None + if not issubclass(cat, Warning): + raise _wm._OptionError("invalid warning category: %r" % (category,)) + return cat + + +def _is_internal_filename(filename): + return 'importlib' in filename and '_bootstrap' in filename + + +def _is_filename_to_skip(filename, skip_file_prefixes): + return any(filename.startswith(prefix) for prefix in skip_file_prefixes) + + +def _is_internal_frame(frame): + """Signal whether the frame is an internal CPython implementation detail.""" + return _is_internal_filename(frame.f_code.co_filename) + + +def _next_external_frame(frame, skip_file_prefixes): + """Find the next frame that doesn't involve Python or user internals.""" + frame = frame.f_back + while frame is not None and ( + _is_internal_filename(filename := frame.f_code.co_filename) or + _is_filename_to_skip(filename, skip_file_prefixes)): + frame = frame.f_back + return frame + + +# Code typically replaced by _warnings +def warn(message, category=None, stacklevel=1, source=None, + *, skip_file_prefixes=()): + """Issue a warning, or maybe ignore it or raise an exception.""" + # Check if message is already a Warning object + if isinstance(message, Warning): + category = message.__class__ + # Check category argument + if category is None: + category = UserWarning + if not (isinstance(category, type) and issubclass(category, Warning)): + raise TypeError("category must be a Warning subclass, " + "not '{:s}'".format(type(category).__name__)) + if not isinstance(skip_file_prefixes, tuple): + # The C version demands a tuple for implementation performance. + raise TypeError('skip_file_prefixes must be a tuple of strs.') + if skip_file_prefixes: + stacklevel = max(2, stacklevel) + # Get context information + try: + if stacklevel <= 1 or _is_internal_frame(sys._getframe(1)): + # If frame is too small to care or if the warning originated in + # internal code, then do not try to hide any frames. + frame = sys._getframe(stacklevel) + else: + frame = sys._getframe(1) + # Look for one frame less since the above line starts us off. + for x in range(stacklevel-1): + frame = _next_external_frame(frame, skip_file_prefixes) + if frame is None: + raise ValueError + except ValueError: + globals = sys.__dict__ + filename = "" + lineno = 0 + else: + globals = frame.f_globals + filename = frame.f_code.co_filename + lineno = frame.f_lineno + if '__name__' in globals: + module = globals['__name__'] + else: + module = "" + registry = globals.setdefault("__warningregistry__", {}) + _wm.warn_explicit( + message, + category, + filename, + lineno, + module, + registry, + globals, + source=source, + ) + + +def warn_explicit(message, category, filename, lineno, + module=None, registry=None, module_globals=None, + source=None): + lineno = int(lineno) + if module is None: + module = filename or "" + if module[-3:].lower() == ".py": + module = module[:-3] # XXX What about leading pathname? + if isinstance(message, Warning): + text = str(message) + category = message.__class__ + else: + text = message + message = category(message) + key = (text, category, lineno) + with _wm._lock: + if registry is None: + registry = {} + if registry.get('version', 0) != _wm._filters_version: + registry.clear() + registry['version'] = _wm._filters_version + # Quick test for common case + if registry.get(key): + return + # Search the filters + for item in _wm._get_filters(): + action, msg, cat, mod, ln = item + if ((msg is None or msg.match(text)) and + issubclass(category, cat) and + (mod is None or mod.match(module)) and + (ln == 0 or lineno == ln)): + break + else: + action = _wm.defaultaction + # Early exit actions + if action == "ignore": + return + + if action == "error": + raise message + # Other actions + if action == "once": + registry[key] = 1 + oncekey = (text, category) + if _wm.onceregistry.get(oncekey): + return + _wm.onceregistry[oncekey] = 1 + elif action in {"always", "all"}: + pass + elif action == "module": + registry[key] = 1 + altkey = (text, category, 0) + if registry.get(altkey): + return + registry[altkey] = 1 + elif action == "default": + registry[key] = 1 + else: + # Unrecognized actions are errors + raise RuntimeError( + "Unrecognized action (%r) in warnings.filters:\n %s" % + (action, item)) + + # Prime the linecache for formatting, in case the + # "file" is actually in a zipfile or something. + import linecache + linecache.getlines(filename, module_globals) + + # Print message and context + msg = _wm.WarningMessage(message, category, filename, lineno, source=source) + _wm._showwarnmsg(msg) + + +class WarningMessage(object): + + _WARNING_DETAILS = ("message", "category", "filename", "lineno", "file", + "line", "source") + + def __init__(self, message, category, filename, lineno, file=None, + line=None, source=None): + self.message = message + self.category = category + self.filename = filename + self.lineno = lineno + self.file = file + self.line = line + self.source = source + self._category_name = category.__name__ if category else None + + def __str__(self): + return ("{message : %r, category : %r, filename : %r, lineno : %s, " + "line : %r}" % (self.message, self._category_name, + self.filename, self.lineno, self.line)) + + def __repr__(self): + return f'<{type(self).__qualname__} {self}>' + + +class catch_warnings(object): + + """A context manager that copies and restores the warnings filter upon + exiting the context. + + The 'record' argument specifies whether warnings should be captured by a + custom implementation of warnings.showwarning() and be appended to a list + returned by the context manager. Otherwise None is returned by the context + manager. The objects appended to the list are arguments whose attributes + mirror the arguments to showwarning(). + + The 'module' argument is to specify an alternative module to the module + named 'warnings' and imported under that name. This argument is only useful + when testing the warnings module itself. + + If the 'action' argument is not None, the remaining arguments are passed + to warnings.simplefilter() as if it were called immediately on entering the + context. + """ + + def __init__(self, *, record=False, module=None, + action=None, category=Warning, lineno=0, append=False): + """Specify whether to record warnings and if an alternative module + should be used other than sys.modules['warnings']. + + """ + self._record = record + self._module = sys.modules['warnings'] if module is None else module + self._entered = False + if action is None: + self._filter = None + else: + self._filter = (action, category, lineno, append) + + def __repr__(self): + args = [] + if self._record: + args.append("record=True") + if self._module is not sys.modules['warnings']: + args.append("module=%r" % self._module) + name = type(self).__name__ + return "%s(%s)" % (name, ", ".join(args)) + + def __enter__(self): + if self._entered: + raise RuntimeError("Cannot enter %r twice" % self) + self._entered = True + with _wm._lock: + if _use_context: + self._saved_context, context = self._module._new_context() + else: + context = None + self._filters = self._module.filters + self._module.filters = self._filters[:] + self._showwarning = self._module.showwarning + self._showwarnmsg_impl = self._module._showwarnmsg_impl + self._module._filters_mutated_lock_held() + if self._record: + if _use_context: + context.log = log = [] + else: + log = [] + self._module._showwarnmsg_impl = log.append + # Reset showwarning() to the default implementation to make sure + # that _showwarnmsg() calls _showwarnmsg_impl() + self._module.showwarning = self._module._showwarning_orig + else: + log = None + if self._filter is not None: + self._module.simplefilter(*self._filter) + return log + + def __exit__(self, *exc_info): + if not self._entered: + raise RuntimeError("Cannot exit %r without entering first" % self) + with _wm._lock: + if _use_context: + self._module._warnings_context.set(self._saved_context) + else: + self._module.filters = self._filters + self._module.showwarning = self._showwarning + self._module._showwarnmsg_impl = self._showwarnmsg_impl + self._module._filters_mutated_lock_held() + + +class deprecated: + """Indicate that a class, function or overload is deprecated. + + When this decorator is applied to an object, the type checker + will generate a diagnostic on usage of the deprecated object. + + Usage: + + @deprecated("Use B instead") + class A: + pass + + @deprecated("Use g instead") + def f(): + pass + + @overload + @deprecated("int support is deprecated") + def g(x: int) -> int: ... + @overload + def g(x: str) -> int: ... + + The warning specified by *category* will be emitted at runtime + on use of deprecated objects. For functions, that happens on calls; + for classes, on instantiation and on creation of subclasses. + If the *category* is ``None``, no warning is emitted at runtime. + The *stacklevel* determines where the + warning is emitted. If it is ``1`` (the default), the warning + is emitted at the direct caller of the deprecated object; if it + is higher, it is emitted further up the stack. + Static type checker behavior is not affected by the *category* + and *stacklevel* arguments. + + The deprecation message passed to the decorator is saved in the + ``__deprecated__`` attribute on the decorated object. + If applied to an overload, the decorator + must be after the ``@overload`` decorator for the attribute to + exist on the overload as returned by ``get_overloads()``. + + See PEP 702 for details. + + """ + def __init__( + self, + message: str, + /, + *, + category: type[Warning] | None = DeprecationWarning, + stacklevel: int = 1, + ) -> None: + if not isinstance(message, str): + raise TypeError( + f"Expected an object of type str for 'message', not {type(message).__name__!r}" + ) + self.message = message + self.category = category + self.stacklevel = stacklevel + + def __call__(self, arg, /): + # Make sure the inner functions created below don't + # retain a reference to self. + msg = self.message + category = self.category + stacklevel = self.stacklevel + if category is None: + arg.__deprecated__ = msg + return arg + elif isinstance(arg, type): + import functools + from types import MethodType + + original_new = arg.__new__ + + @functools.wraps(original_new) + def __new__(cls, /, *args, **kwargs): + if cls is arg: + _wm.warn(msg, category=category, stacklevel=stacklevel + 1) + if original_new is not object.__new__: + return original_new(cls, *args, **kwargs) + # Mirrors a similar check in object.__new__. + elif cls.__init__ is object.__init__ and (args or kwargs): + raise TypeError(f"{cls.__name__}() takes no arguments") + else: + return original_new(cls) + + arg.__new__ = staticmethod(__new__) + + if "__init_subclass__" in arg.__dict__: + # __init_subclass__ is directly present on the decorated class. + # Synthesize a wrapper that calls this method directly. + original_init_subclass = arg.__init_subclass__ + # We need slightly different behavior if __init_subclass__ + # is a bound method (likely if it was implemented in Python). + # Otherwise, it likely means it's a builtin such as + # object's implementation of __init_subclass__. + if isinstance(original_init_subclass, MethodType): + original_init_subclass = original_init_subclass.__func__ + + @functools.wraps(original_init_subclass) + def __init_subclass__(*args, **kwargs): + _wm.warn(msg, category=category, stacklevel=stacklevel + 1) + return original_init_subclass(*args, **kwargs) + else: + def __init_subclass__(cls, *args, **kwargs): + _wm.warn(msg, category=category, stacklevel=stacklevel + 1) + return super(arg, cls).__init_subclass__(*args, **kwargs) + + arg.__init_subclass__ = classmethod(__init_subclass__) + + arg.__deprecated__ = __new__.__deprecated__ = msg + __init_subclass__.__deprecated__ = msg + return arg + elif callable(arg): + import functools + import inspect + + @functools.wraps(arg) + def wrapper(*args, **kwargs): + _wm.warn(msg, category=category, stacklevel=stacklevel + 1) + return arg(*args, **kwargs) + + if inspect.iscoroutinefunction(arg): + wrapper = inspect.markcoroutinefunction(wrapper) + + arg.__deprecated__ = wrapper.__deprecated__ = msg + return wrapper + else: + raise TypeError( + "@deprecated decorator with non-None category must be applied to " + f"a class or callable, not {arg!r}" + ) + + +_DEPRECATED_MSG = "{name!r} is deprecated and slated for removal in Python {remove}" + + +def _deprecated(name, message=_DEPRECATED_MSG, *, remove, _version=sys.version_info): + """Warn that *name* is deprecated or should be removed. + + RuntimeError is raised if *remove* specifies a major/minor tuple older than + the current Python version or the same version but past the alpha. + + The *message* argument is formatted with *name* and *remove* as a Python + version tuple (e.g. (3, 11)). + + """ + remove_formatted = f"{remove[0]}.{remove[1]}" + if (_version[:2] > remove) or (_version[:2] == remove and _version[3] != "alpha"): + msg = f"{name!r} was slated for removal after Python {remove_formatted} alpha" + raise RuntimeError(msg) + else: + msg = message.format(name=name, remove=remove_formatted) + _wm.warn(msg, DeprecationWarning, stacklevel=3) + + +# Private utility function called by _PyErr_WarnUnawaitedCoroutine +def _warn_unawaited_coroutine(coro): + msg_lines = [ + f"coroutine '{coro.__qualname__}' was never awaited\n" + ] + if coro.cr_origin is not None: + import linecache, traceback + def extract(): + for filename, lineno, funcname in reversed(coro.cr_origin): + line = linecache.getline(filename, lineno) + yield (filename, lineno, funcname, line) + msg_lines.append("Coroutine created at (most recent call last)\n") + msg_lines += traceback.format_list(list(extract())) + msg = "".join(msg_lines).rstrip("\n") + # Passing source= here means that if the user happens to have tracemalloc + # enabled and tracking where the coroutine was created, the warning will + # contain that traceback. This does mean that if they have *both* + # coroutine origin tracking *and* tracemalloc enabled, they'll get two + # partially-redundant tracebacks. If we wanted to be clever we could + # probably detect this case and avoid it, but for now we don't bother. + _wm.warn( + msg, category=RuntimeWarning, stacklevel=2, source=coro + ) + + +def _setup_defaults(): + # Several warning categories are ignored by default in regular builds + if hasattr(sys, 'gettotalrefcount'): + return + _wm.filterwarnings("default", category=DeprecationWarning, module="__main__", append=1) + _wm.simplefilter("ignore", category=DeprecationWarning, append=1) + _wm.simplefilter("ignore", category=PendingDeprecationWarning, append=1) + _wm.simplefilter("ignore", category=ImportWarning, append=1) + _wm.simplefilter("ignore", category=ResourceWarning, append=1) diff --git a/PythonLib/full/_pydatetime.py b/PythonLib/full/_pydatetime.py index 6e9c3a9b..70251dbb 100644 --- a/PythonLib/full/_pydatetime.py +++ b/PythonLib/full/_pydatetime.py @@ -1,12 +1,10 @@ -"""Concrete date/time and related types. - -See http://www.iana.org/time-zones/repository/tz-link.html for -time zone and DST data sources. -""" +"""Pure Python implementation of the datetime module.""" __all__ = ("date", "datetime", "time", "timedelta", "timezone", "tzinfo", "MINYEAR", "MAXYEAR", "UTC") +__name__ = "datetime" + import time as _time import math as _math @@ -18,10 +16,10 @@ def _cmp(x, y): def _get_class_module(self): module_name = self.__class__.__module__ - if module_name == '_pydatetime': - return 'datetime' + if module_name == 'datetime': + return 'datetime.' else: - return module_name + return '' MINYEAR = 1 MAXYEAR = 9999 @@ -64,14 +62,14 @@ def _days_in_month(year, month): def _days_before_month(year, month): "year, month -> number of days in year preceding first day of month." - assert 1 <= month <= 12, 'month must be in 1..12' + assert 1 <= month <= 12, f"month must be in 1..12, not {month}" return _DAYS_BEFORE_MONTH[month] + (month > 2 and _is_leap(year)) def _ymd2ord(year, month, day): "year, month, day -> ordinal, considering 01-Jan-0001 as day 1." - assert 1 <= month <= 12, 'month must be in 1..12' + assert 1 <= month <= 12, f"month must be in 1..12, not {month}" dim = _days_in_month(year, month) - assert 1 <= day <= dim, ('day must be in 1..%d' % dim) + assert 1 <= day <= dim, f"day must be in 1..{dim}, not {day}" return (_days_before_year(year) + _days_before_month(year, month) + day) @@ -204,6 +202,17 @@ def _format_offset(off, sep=':'): s += '.%06d' % ss.microseconds return s +_normalize_century = None +def _need_normalize_century(): + global _normalize_century + if _normalize_century is None: + try: + _normalize_century = ( + _time.strftime("%Y", (99, 1, 1, 0, 0, 0, 0, 1, 0)) != "0099") + except ValueError: + _normalize_century = True + return _normalize_century + # Correctly substitute for %z and %Z escapes in strftime formats. def _wrap_strftime(object, format, timetuple): # Don't call utcoffset() or tzname() unless actually needed. @@ -261,6 +270,20 @@ def _wrap_strftime(object, format, timetuple): # strftime is going to have at this: escape % Zreplace = s.replace('%', '%%') newformat.append(Zreplace) + # Note that datetime(1000, 1, 1).strftime('%G') == '1000' so + # year 1000 for %G can go on the fast path. + elif ((ch in 'YG' or ch in 'FC') and + object.year < 1000 and _need_normalize_century()): + if ch == 'G': + year = int(_time.strftime("%G", timetuple)) + else: + year = object.year + if ch == 'C': + push('{:02}'.format(year // 100)) + else: + push('{:04}'.format(year)) + if ch == 'F': + push('-{:02}-{:02}'.format(*timetuple[1:3])) else: push('%') push(ch) @@ -399,7 +422,7 @@ def _parse_hh_mm_ss_ff(tstr): if pos < len_str: if tstr[pos] not in '.,': - raise ValueError("Invalid microsecond component") + raise ValueError("Invalid microsecond separator") else: pos += 1 if not all(map(_is_ascii_digit, tstr[pos:])): @@ -430,6 +453,17 @@ def _parse_isoformat_time(tstr): time_comps = _parse_hh_mm_ss_ff(timestr) + hour, minute, second, microsecond = time_comps + became_next_day = False + error_from_components = False + if (hour == 24): + if all(time_comp == 0 for time_comp in time_comps[1:]): + hour = 0 + time_comps[0] = hour + became_next_day = True + else: + error_from_components = True + tzi = None if tz_pos == len_str and tstr[-1] == 'Z': tzi = timezone.utc @@ -445,7 +479,7 @@ def _parse_isoformat_time(tstr): # HH:MM:SS len: 8 # HH:MM:SS.f+ len: 10+ - if len(tzstr) in (0, 1, 3): + if len(tzstr) in (0, 1, 3) or tstr[tz_pos-1] == 'Z': raise ValueError("Malformed time zone string") tz_comps = _parse_hh_mm_ss_ff(tzstr) @@ -462,13 +496,13 @@ def _parse_isoformat_time(tstr): time_comps.append(tzi) - return time_comps + return time_comps, became_next_day, error_from_components # tuple[int, int, int] -> tuple[int, int, int] version of date.fromisocalendar def _isoweek_to_gregorian(year, week, day): # Year is bounded this way because 9999-12-31 is (9999, 52, 5) if not MINYEAR <= year <= MAXYEAR: - raise ValueError(f"Year is out of range: {year}") + raise ValueError(f"year must be in {MINYEAR}..{MAXYEAR}, not {year}") if not 0 < week < 53: out_of_range = True @@ -501,7 +535,7 @@ def _isoweek_to_gregorian(year, week, day): def _check_tzname(name): if name is not None and not isinstance(name, str): raise TypeError("tzinfo.tzname() must return None or string, " - "not '%s'" % type(name)) + f"not {type(name).__name__!r}") # name is the offset-producing method, "utcoffset" or "dst". # offset is what it returned. @@ -514,24 +548,24 @@ def _check_utc_offset(name, offset): if offset is None: return if not isinstance(offset, timedelta): - raise TypeError("tzinfo.%s() must return None " - "or timedelta, not '%s'" % (name, type(offset))) + raise TypeError(f"tzinfo.{name}() must return None " + f"or timedelta, not {type(offset).__name__!r}") if not -timedelta(1) < offset < timedelta(1): - raise ValueError("%s()=%s, must be strictly between " - "-timedelta(hours=24) and timedelta(hours=24)" % - (name, offset)) + raise ValueError("offset must be a timedelta " + "strictly between -timedelta(hours=24) and " + f"timedelta(hours=24), not {offset!r}") def _check_date_fields(year, month, day): year = _index(year) month = _index(month) day = _index(day) if not MINYEAR <= year <= MAXYEAR: - raise ValueError('year must be in %d..%d' % (MINYEAR, MAXYEAR), year) + raise ValueError(f"year must be in {MINYEAR}..{MAXYEAR}, not {year}") if not 1 <= month <= 12: - raise ValueError('month must be in 1..12', month) + raise ValueError(f"month must be in 1..12, not {month}") dim = _days_in_month(year, month) if not 1 <= day <= dim: - raise ValueError('day must be in 1..%d' % dim, day) + raise ValueError(f"day {day} must be in range 1..{dim} for month {month} in year {year}") return year, month, day def _check_time_fields(hour, minute, second, microsecond, fold): @@ -540,24 +574,23 @@ def _check_time_fields(hour, minute, second, microsecond, fold): second = _index(second) microsecond = _index(microsecond) if not 0 <= hour <= 23: - raise ValueError('hour must be in 0..23', hour) + raise ValueError(f"hour must be in 0..23, not {hour}") if not 0 <= minute <= 59: - raise ValueError('minute must be in 0..59', minute) + raise ValueError(f"minute must be in 0..59, not {minute}") if not 0 <= second <= 59: - raise ValueError('second must be in 0..59', second) + raise ValueError(f"second must be in 0..59, not {second}") if not 0 <= microsecond <= 999999: - raise ValueError('microsecond must be in 0..999999', microsecond) + raise ValueError(f"microsecond must be in 0..999999, not {microsecond}") if fold not in (0, 1): - raise ValueError('fold must be either 0 or 1', fold) + raise ValueError(f"fold must be either 0 or 1, not {fold}") return hour, minute, second, microsecond, fold def _check_tzinfo_arg(tz): if tz is not None and not isinstance(tz, tzinfo): - raise TypeError("tzinfo argument must be None or of a tzinfo subclass") - -def _cmperror(x, y): - raise TypeError("can't compare '%s' to '%s'" % ( - type(x).__name__, type(y).__name__)) + raise TypeError( + "tzinfo argument must be None or of a tzinfo subclass, " + f"not {type(tz).__name__!r}" + ) def _divide_and_round(a, b): """divide a by b and round result to the nearest integer @@ -611,7 +644,19 @@ def __new__(cls, days=0, seconds=0, microseconds=0, # guide the C implementation; it's way more convoluted than speed- # ignoring auto-overflow-to-long idiomatic Python could be. - # XXX Check that all inputs are ints or floats. + for name, value in ( + ("days", days), + ("seconds", seconds), + ("microseconds", microseconds), + ("milliseconds", milliseconds), + ("minutes", minutes), + ("hours", hours), + ("weeks", weeks) + ): + if not isinstance(value, (int, float)): + raise TypeError( + f"unsupported type for timedelta {name} component: {type(value).__name__}" + ) # Final values, all integer. # s and us fit in 32-bit signed ints; d isn't bounded. @@ -712,9 +757,9 @@ def __repr__(self): args.append("microseconds=%d" % self._microseconds) if not args: args.append('0') - return "%s.%s(%s)" % (_get_class_module(self), - self.__class__.__qualname__, - ', '.join(args)) + return "%s%s(%s)" % (_get_class_module(self), + self.__class__.__qualname__, + ', '.join(args)) def __str__(self): mm, ss = divmod(self._seconds, 60) @@ -911,6 +956,7 @@ class date: fromtimestamp() today() fromordinal() + strptime() Operators: @@ -993,8 +1039,12 @@ def fromordinal(cls, n): @classmethod def fromisoformat(cls, date_string): """Construct a date from a string in ISO 8601 format.""" + if not isinstance(date_string, str): - raise TypeError('fromisoformat: argument must be str') + raise TypeError('Argument must be a str') + + if not date_string.isascii(): + raise ValueError('Argument must be an ASCII str') if len(date_string) not in (7, 8, 10): raise ValueError(f'Invalid isoformat string: {date_string!r}') @@ -1011,6 +1061,12 @@ def fromisocalendar(cls, year, week, day): This is the inverse of the date.isocalendar() function""" return cls(*_isoweek_to_gregorian(year, week, day)) + @classmethod + def strptime(cls, date_string, format): + """Parse a date string according to the given format (like time.strptime()).""" + import _strptime + return _strptime._strptime_datetime_date(cls, date_string, format) + # Conversions to string def __repr__(self): @@ -1020,11 +1076,11 @@ def __repr__(self): >>> repr(d) 'datetime.date(2010, 1, 1)' """ - return "%s.%s(%d, %d, %d)" % (_get_class_module(self), - self.__class__.__qualname__, - self._year, - self._month, - self._day) + return "%s%s(%d, %d, %d)" % (_get_class_module(self), + self.__class__.__qualname__, + self._year, + self._month, + self._day) # XXX These shouldn't depend on time.localtime(), because that # clips the usable dates to [1970 .. 2038). At least ctime() is # easily done without using strftime() -- that's better too because @@ -1060,8 +1116,8 @@ def isoformat(self): This is 'YYYY-MM-DD'. References: - - http://www.w3.org/TR/NOTE-datetime - - http://www.cl.cam.ac.uk/~mgk25/iso-time.html + - https://www.w3.org/TR/NOTE-datetime + - https://www.cl.cam.ac.uk/~mgk25/iso-time.html """ return "%04d-%02d-%02d" % (self._year, self._month, self._day) @@ -1109,35 +1165,38 @@ def replace(self, year=None, month=None, day=None): day = self._day return type(self)(year, month, day) + __replace__ = replace + # Comparisons of date objects with other. def __eq__(self, other): - if isinstance(other, date): + if isinstance(other, date) and not isinstance(other, datetime): return self._cmp(other) == 0 return NotImplemented def __le__(self, other): - if isinstance(other, date): + if isinstance(other, date) and not isinstance(other, datetime): return self._cmp(other) <= 0 return NotImplemented def __lt__(self, other): - if isinstance(other, date): + if isinstance(other, date) and not isinstance(other, datetime): return self._cmp(other) < 0 return NotImplemented def __ge__(self, other): - if isinstance(other, date): + if isinstance(other, date) and not isinstance(other, datetime): return self._cmp(other) >= 0 return NotImplemented def __gt__(self, other): - if isinstance(other, date): + if isinstance(other, date) and not isinstance(other, datetime): return self._cmp(other) > 0 return NotImplemented def _cmp(self, other): assert isinstance(other, date) + assert not isinstance(other, datetime) y, m, d = self._year, self._month, self._day y2, m2, d2 = other._year, other._month, other._day return _cmp((y, m, d), (y2, m2, d2)) @@ -1192,7 +1251,7 @@ def isocalendar(self): The first week is 1; Monday is 1 ... Sunday is 7. ISO calendar algorithm taken from - http://www.phys.uu.nl/~vgent/calendar/isocalendar.htm + https://www.phys.uu.nl/~vgent/calendar/isocalendar.htm (used with permission) """ year = self._year @@ -1328,6 +1387,7 @@ class time: Constructors: __new__() + strptime() Operators: @@ -1386,6 +1446,12 @@ def __new__(cls, hour=0, minute=0, second=0, microsecond=0, tzinfo=None, *, fold self._fold = fold return self + @classmethod + def strptime(cls, date_string, format): + """string, format -> new time parsed from a string (like time.strptime()).""" + import _strptime + return _strptime._strptime_datetime_time(cls, date_string, format) + # Read-only field accessors @property def hour(self): @@ -1514,7 +1580,7 @@ def __repr__(self): s = ", %d" % self._second else: s = "" - s= "%s.%s(%d, %d%s)" % (_get_class_module(self), + s = "%s%s(%d, %d%s)" % (_get_class_module(self), self.__class__.__qualname__, self._hour, self._minute, s) if self._tzinfo is not None: @@ -1556,7 +1622,7 @@ def fromisoformat(cls, time_string): time_string = time_string.removeprefix('T') try: - return cls(*_parse_isoformat_time(time_string)) + return cls(*_parse_isoformat_time(time_string)[0]) except Exception: raise ValueError(f'Invalid isoformat string: {time_string!r}') @@ -1634,6 +1700,8 @@ def replace(self, hour=None, minute=None, second=None, microsecond=None, fold = self._fold return type(self)(hour, minute, second, microsecond, tzinfo, fold=fold) + __replace__ = replace + # Pickle support. def _getstate(self, protocol=3): @@ -1681,7 +1749,7 @@ class datetime(date): The year, month and day arguments are required. tzinfo may be None, or an instance of a tzinfo subclass. The remaining arguments may be ints. """ - __slots__ = date.__slots__ + time.__slots__ + __slots__ = time.__slots__ def __new__(cls, year, month=None, day=None, hour=0, minute=0, second=0, microsecond=0, tzinfo=None, *, fold=0): @@ -1868,10 +1936,27 @@ def fromisoformat(cls, date_string): if tstr: try: - time_components = _parse_isoformat_time(tstr) + time_components, became_next_day, error_from_components = _parse_isoformat_time(tstr) except ValueError: raise ValueError( f'Invalid isoformat string: {date_string!r}') from None + else: + if error_from_components: + raise ValueError("minute, second, and microsecond must be 0 when hour is 24") + + if became_next_day: + year, month, day = date_components + # Only wrap day/month when it was previously valid + if month <= 12 and day <= (days_in_month := _days_in_month(year, month)): + # Calculate midnight of the next day + day += 1 + if day > days_in_month: + day = 1 + month += 1 + if month > 12: + month = 1 + year += 1 + date_components = [year, month, day] else: time_components = [0, 0, 0, 0, None] @@ -1980,6 +2065,8 @@ def replace(self, year=None, month=None, day=None, hour=None, return type(self)(year, month, day, hour, minute, second, microsecond, tzinfo, fold=fold) + __replace__ = replace + def _local_timezone(self): if self.tzinfo is None: ts = self._mktime() @@ -2041,7 +2128,7 @@ def isoformat(self, sep='T', timespec='auto'): By default, the fractional part is omitted if self.microsecond == 0. If self.tzinfo is not None, the UTC offset is also attached, giving - giving a full format of 'YYYY-MM-DD HH:MM:SS.mmmmmm+HH:MM'. + a full format of 'YYYY-MM-DD HH:MM:SS.mmmmmm+HH:MM'. Optional argument sep specifies the separator between date and time, default 'T'. @@ -2069,9 +2156,9 @@ def __repr__(self): del L[-1] if L[-1] == 0: del L[-1] - s = "%s.%s(%s)" % (_get_class_module(self), - self.__class__.__qualname__, - ", ".join(map(str, L))) + s = "%s%s(%s)" % (_get_class_module(self), + self.__class__.__qualname__, + ", ".join(map(str, L))) if self._tzinfo is not None: assert s[-1:] == ")" s = s[:-1] + ", tzinfo=%r" % self._tzinfo + ")" @@ -2088,7 +2175,7 @@ def __str__(self): def strptime(cls, date_string, format): 'string, format -> new datetime parsed from a string (like time.strptime()).' import _strptime - return _strptime._strptime_datetime(cls, date_string, format) + return _strptime._strptime_datetime_datetime(cls, date_string, format) def utcoffset(self): """Return the timezone offset as timedelta positive east of UTC (negative west of @@ -2132,42 +2219,32 @@ def dst(self): def __eq__(self, other): if isinstance(other, datetime): return self._cmp(other, allow_mixed=True) == 0 - elif not isinstance(other, date): - return NotImplemented else: - return False + return NotImplemented def __le__(self, other): if isinstance(other, datetime): return self._cmp(other) <= 0 - elif not isinstance(other, date): - return NotImplemented else: - _cmperror(self, other) + return NotImplemented def __lt__(self, other): if isinstance(other, datetime): return self._cmp(other) < 0 - elif not isinstance(other, date): - return NotImplemented else: - _cmperror(self, other) + return NotImplemented def __ge__(self, other): if isinstance(other, datetime): return self._cmp(other) >= 0 - elif not isinstance(other, date): - return NotImplemented else: - _cmperror(self, other) + return NotImplemented def __gt__(self, other): if isinstance(other, datetime): return self._cmp(other) > 0 - elif not isinstance(other, date): - return NotImplemented else: - _cmperror(self, other) + return NotImplemented def _cmp(self, other, allow_mixed=False): assert isinstance(other, datetime) @@ -2338,9 +2415,12 @@ def __new__(cls, offset, name=_Omitted): if not cls._minoffset <= offset <= cls._maxoffset: raise ValueError("offset must be a timedelta " "strictly between -timedelta(hours=24) and " - "timedelta(hours=24).") + f"timedelta(hours=24), not {offset!r}") return cls._create(offset, name) + def __init_subclass__(cls): + raise TypeError("type 'datetime.timezone' is not an acceptable base type") + @classmethod def _create(cls, offset, name=None): self = tzinfo.__new__(cls) @@ -2375,12 +2455,12 @@ def __repr__(self): if self is self.utc: return 'datetime.timezone.utc' if self._name is None: - return "%s.%s(%r)" % (_get_class_module(self), - self.__class__.__qualname__, - self._offset) - return "%s.%s(%r, %r)" % (_get_class_module(self), - self.__class__.__qualname__, - self._offset, self._name) + return "%s%s(%r)" % (_get_class_module(self), + self.__class__.__qualname__, + self._offset) + return "%s%s(%r, %r)" % (_get_class_module(self), + self.__class__.__qualname__, + self._offset, self._name) def __str__(self): return self.tzname(None) diff --git a/PythonLib/full/_pydecimal.py b/PythonLib/full/_pydecimal.py index ff80180a..97a629fe 100644 --- a/PythonLib/full/_pydecimal.py +++ b/PythonLib/full/_pydecimal.py @@ -38,10 +38,10 @@ 'ROUND_FLOOR', 'ROUND_UP', 'ROUND_HALF_DOWN', 'ROUND_05UP', # Functions for manipulating contexts - 'setcontext', 'getcontext', 'localcontext', + 'setcontext', 'getcontext', 'localcontext', 'IEEEContext', # Limits for the C version for compatibility - 'MAX_PREC', 'MAX_EMAX', 'MIN_EMIN', 'MIN_ETINY', + 'MAX_PREC', 'MAX_EMAX', 'MIN_EMIN', 'MIN_ETINY', 'IEEE_CONTEXT_MAX_BITS', # C version: compile time choice that enables the thread local context (deprecated, now always true) 'HAVE_THREADS', @@ -83,10 +83,12 @@ MAX_PREC = 999999999999999999 MAX_EMAX = 999999999999999999 MIN_EMIN = -999999999999999999 + IEEE_CONTEXT_MAX_BITS = 512 else: MAX_PREC = 425000000 MAX_EMAX = 425000000 MIN_EMIN = -425000000 + IEEE_CONTEXT_MAX_BITS = 256 MIN_ETINY = MIN_EMIN - (MAX_PREC-1) @@ -417,6 +419,27 @@ def sin(x): return ctx_manager +def IEEEContext(bits, /): + """ + Return a context object initialized to the proper values for one of the + IEEE interchange formats. The argument must be a multiple of 32 and less + than IEEE_CONTEXT_MAX_BITS. + """ + if bits <= 0 or bits > IEEE_CONTEXT_MAX_BITS or bits % 32: + raise ValueError("argument must be a multiple of 32, " + f"with a maximum of {IEEE_CONTEXT_MAX_BITS}") + + ctx = Context() + ctx.prec = 9 * (bits//32) - 2 + ctx.Emax = 3 * (1 << (bits//16 + 3)) + ctx.Emin = 1 - ctx.Emax + ctx.rounding = ROUND_HALF_EVEN + ctx.clamp = 1 + ctx.traps = dict.fromkeys(_signals, False) + + return ctx + + ##### Decimal class ####################################################### # Do not subclass Decimal from numbers.Real and do not register it as such @@ -582,6 +605,21 @@ def __new__(cls, value="0", context=None): raise TypeError("Cannot convert %r to Decimal" % value) + @classmethod + def from_number(cls, number): + """Converts a real number to a decimal number, exactly. + + >>> Decimal.from_number(314) # int + Decimal('314') + >>> Decimal.from_number(0.1) # float + Decimal('0.1000000000000000055511151231257827021181583404541015625') + >>> Decimal.from_number(Decimal('3.14')) # another decimal instance + Decimal('3.14') + """ + if isinstance(number, (int, Decimal, float)): + return cls(number) + raise TypeError("Cannot convert %r to Decimal" % number) + @classmethod def from_float(cls, f): """Converts a float to a decimal number, exactly. @@ -2425,12 +2463,12 @@ def __pow__(self, other, modulo=None, context=None): return ans - def __rpow__(self, other, context=None): + def __rpow__(self, other, modulo=None, context=None): """Swaps self/other and returns __pow__.""" other = _convert_other(other) if other is NotImplemented: return other - return other.__pow__(self, context=context) + return other.__pow__(self, modulo, context=context) def normalize(self, context=None): """Normalize- strip trailing 0s, change anything equal to 0 to 0e0""" @@ -3302,7 +3340,10 @@ def _fill_logical(self, context, opa, opb): return opa, opb def logical_and(self, other, context=None): - """Applies an 'and' operation between self and other's digits.""" + """Applies an 'and' operation between self and other's digits. + + Both self and other must be logical numbers. + """ if context is None: context = getcontext() @@ -3319,14 +3360,20 @@ def logical_and(self, other, context=None): return _dec_from_triple(0, result.lstrip('0') or '0', 0) def logical_invert(self, context=None): - """Invert all its digits.""" + """Invert all its digits. + + The self must be logical number. + """ if context is None: context = getcontext() return self.logical_xor(_dec_from_triple(0,'1'*context.prec,0), context) def logical_or(self, other, context=None): - """Applies an 'or' operation between self and other's digits.""" + """Applies an 'or' operation between self and other's digits. + + Both self and other must be logical numbers. + """ if context is None: context = getcontext() @@ -3343,7 +3390,10 @@ def logical_or(self, other, context=None): return _dec_from_triple(0, result.lstrip('0') or '0', 0) def logical_xor(self, other, context=None): - """Applies an 'xor' operation between self and other's digits.""" + """Applies an 'xor' operation between self and other's digits. + + Both self and other must be logical numbers. + """ if context is None: context = getcontext() @@ -6058,7 +6108,7 @@ def _convert_for_comparison(self, other, equality_op=False): (?P\d*) # with (possibly empty) diagnostic info. ) # \s* - \Z + \z """, re.VERBOSE | re.IGNORECASE).match _all_zeros = re.compile('0*$').match @@ -6082,11 +6132,15 @@ def _convert_for_comparison(self, other, equality_op=False): (?Pz)? (?P\#)? (?P0)? -(?P(?!0)\d+)? -(?P,)? -(?:\.(?P0|(?!0)\d+))? +(?P\d+)? +(?P[,_])? +(?:\. + (?=[\d,_]) # lookahead for digit or separator + (?P\d+)? + (?P[,_])? +)? (?P[eEfFgGn%])? -\Z +\z """, re.VERBOSE|re.DOTALL) del re @@ -6177,6 +6231,9 @@ def _parse_format_specifier(format_spec, _localeconv=None): format_dict['grouping'] = [3, 0] format_dict['decimal_point'] = '.' + if format_dict['frac_separators'] is None: + format_dict['frac_separators'] = '' + return format_dict def _format_align(sign, body, spec): @@ -6296,6 +6353,11 @@ def _format_number(is_negative, intpart, fracpart, exp, spec): sign = _format_sign(is_negative, spec) + frac_sep = spec['frac_separators'] + if fracpart and frac_sep: + fracpart = frac_sep.join(fracpart[pos:pos + 3] + for pos in range(0, len(fracpart), 3)) + if fracpart or spec['alt']: fracpart = spec['decimal_point'] + fracpart diff --git a/PythonLib/full/_pyio.py b/PythonLib/full/_pyio.py index 687076fb..116ce4f3 100644 --- a/PythonLib/full/_pyio.py +++ b/PythonLib/full/_pyio.py @@ -16,15 +16,16 @@ _setmode = None import io -from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END) +from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END, Reader, Writer) # noqa: F401 valid_seek_flags = {0, 1, 2} # Hardwired values if hasattr(os, 'SEEK_HOLE') : valid_seek_flags.add(os.SEEK_HOLE) valid_seek_flags.add(os.SEEK_DATA) -# open() uses st_blksize whenever we can -DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes +# open() uses max(min(blocksize, 8 MiB), DEFAULT_BUFFER_SIZE) +# when the device block size is available. +DEFAULT_BUFFER_SIZE = 128 * 1024 # bytes # NOTE: Base classes defined here are registered with the "official" ABCs # defined in io.py. We don't use real inheritance though, because we don't want @@ -33,11 +34,8 @@ # Rebind for compatibility BlockingIOError = BlockingIOError -# Does io.IOBase finalizer log the exception if the close() method fails? -# The exception is ignored silently by default in release build. -_IOBASE_EMITS_UNRAISABLE = (hasattr(sys, "gettotalrefcount") or sys.flags.dev_mode) # Does open() check its 'errors' argument? -_CHECK_ERRORS = _IOBASE_EMITS_UNRAISABLE +_CHECK_ERRORS = (hasattr(sys, "gettotalrefcount") or sys.flags.dev_mode) def text_encoding(encoding, stacklevel=2): @@ -126,10 +124,10 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None, the size of a fixed-size chunk buffer. When no buffering argument is given, the default buffering policy works as follows: - * Binary files are buffered in fixed-size chunks; the size of the buffer - is chosen using a heuristic trying to determine the underlying device's - "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`. - On many systems, the buffer will typically be 4096 or 8192 bytes long. + * Binary files are buffered in fixed-size chunks; the size of the buffer + is max(min(blocksize, 8 MiB), DEFAULT_BUFFER_SIZE) + when the device block size is available. + On most systems, the buffer will typically be 128 kilobytes long. * "Interactive" text files (files for which isatty() returns True) use line buffering. Other text files use the policy described above @@ -241,18 +239,11 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None, result = raw try: line_buffering = False - if buffering == 1 or buffering < 0 and raw.isatty(): + if buffering == 1 or buffering < 0 and raw._isatty_open_only(): buffering = -1 line_buffering = True if buffering < 0: - buffering = DEFAULT_BUFFER_SIZE - try: - bs = os.fstat(raw.fileno()).st_blksize - except (OSError, AttributeError): - pass - else: - if bs > 1: - buffering = bs + buffering = max(min(raw._blksize, 8192 * 1024), DEFAULT_BUFFER_SIZE) if buffering < 0: raise ValueError("invalid buffering size") if buffering == 0: @@ -416,18 +407,12 @@ def __del__(self): if closed: return - if _IOBASE_EMITS_UNRAISABLE: - self.close() - else: - # The try/except block is in case this is called at program - # exit time, when it's possible that globals have already been - # deleted, and then the close() call might fail. Since - # there's nothing we can do about such failures and they annoy - # the end users, we suppress the traceback. - try: - self.close() - except: - pass + if dealloc_warn := getattr(self, "_dealloc_warn", None): + dealloc_warn(self) + + # If close() fails, the caller logs the exception with + # sys.unraisablehook. close() must be called at the end at __del__(). + self.close() ### Inquiries ### @@ -632,6 +617,8 @@ def read(self, size=-1): n = self.readinto(b) if n is None: return None + if n < 0 or n > len(b): + raise ValueError(f"readinto returned {n} outside buffer size {len(b)}") del b[n:] return bytes(b) @@ -663,8 +650,6 @@ def write(self, b): self._unsupported("write") io.RawIOBase.register(RawIOBase) -from _io import FileIO -RawIOBase.register(FileIO) class BufferedIOBase(IOBase): @@ -871,6 +856,10 @@ def __repr__(self): else: return "<{}.{} name={!r}>".format(modname, clsname, name) + def _dealloc_warn(self, source): + if dealloc_warn := getattr(self.raw, "_dealloc_warn", None): + dealloc_warn(source) + ### Lower-level APIs ### def fileno(self): @@ -946,22 +935,22 @@ def read1(self, size=-1): return self.read(size) def write(self, b): - if self.closed: - raise ValueError("write to closed file") if isinstance(b, str): raise TypeError("can't write str to binary stream") with memoryview(b) as view: + if self.closed: + raise ValueError("write to closed file") + n = view.nbytes # Size of any bytes-like object - if n == 0: - return 0 - pos = self._pos - if pos > len(self._buffer): - # Inserts null bytes between the current end of the file - # and the new write position. - padding = b'\x00' * (pos - len(self._buffer)) - self._buffer += padding - self._buffer[pos:pos + n] = b - self._pos += n + if n == 0: + return 0 + + pos = self._pos + if pos > len(self._buffer): + # Pad buffer to pos with null bytes. + self._buffer.resize(pos) + self._buffer[pos:pos + n] = view + self._pos += n return n def seek(self, pos, whence=0): @@ -1475,6 +1464,17 @@ def write(self, b): return BufferedWriter.write(self, b) +def _new_buffersize(bytes_read): + # Parallels _io/fileio.c new_buffersize + if bytes_read > 65536: + addend = bytes_read >> 3 + else: + addend = 256 + bytes_read + if addend < DEFAULT_BUFFER_SIZE: + addend = DEFAULT_BUFFER_SIZE + return bytes_read + addend + + class FileIO(RawIOBase): _fd = -1 _created = False @@ -1499,6 +1499,7 @@ def __init__(self, file, mode='r', closefd=True, opener=None): """ if self._fd >= 0: # Have to close the existing file first. + self._stat_atopen = None try: if self._closefd: os.close(self._fd) @@ -1508,6 +1509,11 @@ def __init__(self, file, mode='r', closefd=True, opener=None): if isinstance(file, float): raise TypeError('integer argument expected, got float') if isinstance(file, int): + if isinstance(file, bool): + import warnings + warnings.warn("bool is used as a file descriptor", + RuntimeWarning, stacklevel=2) + file = int(file) fd = file if fd < 0: raise ValueError('negative file descriptor') @@ -1566,24 +1572,22 @@ def __init__(self, file, mode='r', closefd=True, opener=None): if not isinstance(fd, int): raise TypeError('expected integer from opener') if fd < 0: - raise OSError('Negative file descriptor') + # bpo-27066: Raise a ValueError for bad value. + raise ValueError(f'opener returned {fd}') owned_fd = fd if not noinherit_flag: os.set_inheritable(fd, False) self._closefd = closefd - fdfstat = os.fstat(fd) + self._stat_atopen = os.fstat(fd) try: - if stat.S_ISDIR(fdfstat.st_mode): + if stat.S_ISDIR(self._stat_atopen.st_mode): raise IsADirectoryError(errno.EISDIR, os.strerror(errno.EISDIR), file) except AttributeError: # Ignore the AttributeError if stat.S_ISDIR or errno.EISDIR # don't exist. pass - self._blksize = getattr(fdfstat, 'st_blksize', 0) - if self._blksize <= 1: - self._blksize = DEFAULT_BUFFER_SIZE if _setmode: # don't translate newlines (\r\n <=> \n) @@ -1600,17 +1604,17 @@ def __init__(self, file, mode='r', closefd=True, opener=None): if e.errno != errno.ESPIPE: raise except: + self._stat_atopen = None if owned_fd is not None: os.close(owned_fd) raise self._fd = fd - def __del__(self): + def _dealloc_warn(self, source): if self._fd >= 0 and self._closefd and not self.closed: import warnings - warnings.warn('unclosed file %r' % (self,), ResourceWarning, + warnings.warn(f'unclosed file {source!r}', ResourceWarning, stacklevel=2, source=self) - self.close() def __getstate__(self): raise TypeError(f"cannot pickle {self.__class__.__name__!r} object") @@ -1629,6 +1633,17 @@ def __repr__(self): return ('<%s name=%r mode=%r closefd=%r>' % (class_name, name, self.mode, self._closefd)) + @property + def _blksize(self): + if self._stat_atopen is None: + return DEFAULT_BUFFER_SIZE + + blksize = getattr(self._stat_atopen, "st_blksize", 0) + # WASI sets blsize to 0 + if not blksize: + return DEFAULT_BUFFER_SIZE + return blksize + def _checkReadable(self): if not self._readable: raise UnsupportedOperation('File not open for reading') @@ -1640,7 +1655,13 @@ def _checkWritable(self, msg=None): def read(self, size=None): """Read at most size bytes, returned as bytes. - Only makes one system call, so less data may be returned than requested + If size is less than 0, read all bytes in the file making + multiple read calls. See ``FileIO.readall``. + + Attempts to make only one system call, retrying only per + PEP 475 (EINTR). This means less data may be returned than + requested. + In non-blocking mode, returns None if no data is available. Return an empty bytes object at EOF. """ @@ -1656,45 +1677,57 @@ def read(self, size=None): def readall(self): """Read all data from the file, returned as bytes. - In non-blocking mode, returns as much as is immediately available, - or None if no data is available. Return an empty bytes object at EOF. + Reads until either there is an error or read() returns size 0 + (indicates EOF). If the file is already at EOF, returns an + empty bytes object. + + In non-blocking mode, returns as much data as could be read + before EAGAIN. If no data is available (EAGAIN is returned + before bytes are read) returns None. """ self._checkClosed() self._checkReadable() - bufsize = DEFAULT_BUFFER_SIZE - try: - pos = os.lseek(self._fd, 0, SEEK_CUR) - end = os.fstat(self._fd).st_size - if end >= pos: - bufsize = end - pos + 1 - except OSError: - pass + if self._stat_atopen is None or self._stat_atopen.st_size <= 0: + bufsize = DEFAULT_BUFFER_SIZE + else: + # In order to detect end of file, need a read() of at least 1 + # byte which returns size 0. Oversize the buffer by 1 byte so the + # I/O can be completed with two read() calls (one for all data, one + # for EOF) without needing to resize the buffer. + bufsize = self._stat_atopen.st_size + 1 - result = bytearray() - while True: - if len(result) >= bufsize: - bufsize = len(result) - bufsize += max(bufsize, DEFAULT_BUFFER_SIZE) - n = bufsize - len(result) - try: - chunk = os.read(self._fd, n) - except BlockingIOError: - if result: - break + if self._stat_atopen.st_size > 65536: + try: + pos = os.lseek(self._fd, 0, SEEK_CUR) + if self._stat_atopen.st_size >= pos: + bufsize = self._stat_atopen.st_size - pos + 1 + except OSError: + pass + + result = bytearray(bufsize) + bytes_read = 0 + try: + while n := os.readinto(self._fd, memoryview(result)[bytes_read:]): + bytes_read += n + if bytes_read >= len(result): + result.resize(_new_buffersize(bytes_read)) + except BlockingIOError: + if not bytes_read: return None - if not chunk: # reached the end of the file - break - result += chunk + assert len(result) - bytes_read >= 1, \ + "os.readinto buffer size 0 will result in erroneous EOF / returns 0" + result.resize(bytes_read) return bytes(result) - def readinto(self, b): + def readinto(self, buffer): """Same as RawIOBase.readinto().""" - m = memoryview(b).cast('B') - data = self.read(len(m)) - n = len(data) - m[:n] = data - return n + self._checkClosed() + self._checkReadable() + try: + return os.readinto(self._fd, buffer) + except BlockingIOError: + return None def write(self, b): """Write bytes b to file, return number written. @@ -1744,6 +1777,7 @@ def truncate(self, size=None): if size is None: size = self.tell() os.ftruncate(self._fd, size) + self._stat_atopen = None return size def close(self): @@ -1753,8 +1787,9 @@ def close(self): called more than once without error. """ if not self.closed: + self._stat_atopen = None try: - if self._closefd: + if self._closefd and self._fd >= 0: os.close(self._fd) finally: super().close() @@ -1791,6 +1826,21 @@ def isatty(self): self._checkClosed() return os.isatty(self._fd) + def _isatty_open_only(self): + """Checks whether the file is a TTY using an open-only optimization. + + TTYs are always character devices. If the interpreter knows a file is + not a character device when it would call ``isatty``, can skip that + call. Inside ``open()`` there is a fresh stat result that contains that + information. Use the stat result to skip a system call. Outside of that + context TOCTOU issues (the fd could be arbitrarily modified by + surrounding code). + """ + if (self._stat_atopen is not None + and not stat.S_ISCHR(self._stat_atopen.st_mode)): + return False + return os.isatty(self._fd) + @property def closefd(self): """True if the file descriptor will be closed by close().""" @@ -2015,8 +2065,7 @@ def __init__(self, buffer, encoding=None, errors=None, newline=None, raise ValueError("invalid encoding: %r" % encoding) if not codecs.lookup(encoding)._is_text_encoding: - msg = ("%r is not a text encoding; " - "use codecs.open() to handle arbitrary codecs") + msg = "%r is not a text encoding" raise LookupError(msg % encoding) if errors is None: @@ -2524,9 +2573,12 @@ def read(self, size=None): size = size_index() decoder = self._decoder or self._get_decoder() if size < 0: + chunk = self.buffer.read() + if chunk is None: + raise BlockingIOError("Read returned None.") # Read everything. result = (self._get_decoded_chars() + - decoder.decode(self.buffer.read(), final=True)) + decoder.decode(chunk, final=True)) if self._snapshot is not None: self._set_decoded_chars('') self._snapshot = None @@ -2646,6 +2698,10 @@ def readline(self, size=None): def newlines(self): return self._decoder.newlines if self._decoder else None + def _dealloc_warn(self, source): + if dealloc_warn := getattr(self.buffer, "_dealloc_warn", None): + dealloc_warn(source) + class StringIO(TextIOWrapper): """Text I/O implementation using an in-memory buffer. diff --git a/PythonLib/full/_pylong.py b/PythonLib/full/_pylong.py index 30bee6fc..be1acd17 100644 --- a/PythonLib/full/_pylong.py +++ b/PythonLib/full/_pylong.py @@ -19,6 +19,122 @@ except ImportError: _decimal = None +# A number of functions have this form, where `w` is a desired number of +# digits in base `base`: +# +# def inner(...w...): +# if w <= LIMIT: +# return something +# lo = w >> 1 +# hi = w - lo +# something involving base**lo, inner(...lo...), j, and inner(...hi...) +# figure out largest w needed +# result = inner(w) +# +# They all had some on-the-fly scheme to cache `base**lo` results for reuse. +# Power is costly. +# +# This routine aims to compute all amd only the needed powers in advance, as +# efficiently as reasonably possible. This isn't trivial, and all the +# on-the-fly methods did needless work in many cases. The driving code above +# changes to: +# +# figure out largest w needed +# mycache = compute_powers(w, base, LIMIT) +# result = inner(w) +# +# and `mycache[lo]` replaces `base**lo` in the inner function. +# +# If an algorithm wants the powers of ceiling(w/2) instead of the floor, +# pass keyword argument `need_hi=True`. +# +# While this does give minor speedups (a few percent at best), the +# primary intent is to simplify the functions using this, by eliminating +# the need for them to craft their own ad-hoc caching schemes. +# +# See code near end of file for a block of code that can be enabled to +# run millions of tests. +def compute_powers(w, base, more_than, *, need_hi=False, show=False): + seen = set() + need = set() + ws = {w} + while ws: + w = ws.pop() # any element is fine to use next + if w in seen or w <= more_than: + continue + seen.add(w) + lo = w >> 1 + hi = w - lo + # only _need_ one here; the other may, or may not, be needed + which = hi if need_hi else lo + need.add(which) + ws.add(which) + if lo != hi: + ws.add(w - which) + + # `need` is the set of exponents needed. To compute them all + # efficiently, possibly add other exponents to `extra`. The goal is + # to ensure that each exponent can be gotten from a smaller one via + # multiplying by the base, squaring it, or squaring and then + # multiplying by the base. + # + # If need_hi is False, this is already the case (w can always be + # gotten from w >> 1 via one of the squaring strategies). But we do + # the work anyway, just in case ;-) + # + # Note that speed is irrelevant. These loops are working on little + # ints (exponents) and go around O(log w) times. The total cost is + # insignificant compared to just one of the bigint multiplies. + cands = need.copy() + extra = set() + while cands: + w = max(cands) + cands.remove(w) + lo = w >> 1 + if lo > more_than and w-1 not in cands and lo not in cands: + extra.add(lo) + cands.add(lo) + assert need_hi or not extra + + d = {} + for n in sorted(need | extra): + lo = n >> 1 + hi = n - lo + if n-1 in d: + if show: + print("* base", end="") + result = d[n-1] * base # cheap! + elif lo in d: + # Multiplying a bigint by itself is about twice as fast + # in CPython provided it's the same object. + if show: + print("square", end="") + result = d[lo] * d[lo] # same object + if hi != lo: + if show: + print(" * base", end="") + assert 2 * lo + 1 == n + result *= base + else: # rare + if show: + print("pow", end='') + result = base ** n + if show: + print(" at", n, "needed" if n in need else "extra") + d[n] = result + + assert need <= d.keys() + if excess := d.keys() - need: + assert need_hi + for n in excess: + del d[n] + return d + +_unbounded_dec_context = decimal.getcontext().copy() +_unbounded_dec_context.prec = decimal.MAX_PREC +_unbounded_dec_context.Emax = decimal.MAX_EMAX +_unbounded_dec_context.Emin = decimal.MIN_EMIN +_unbounded_dec_context.traps[decimal.Inexact] = 1 # sanity check def int_to_decimal(n): """Asymptotically fast conversion of an 'int' to Decimal.""" @@ -33,57 +149,32 @@ def int_to_decimal(n): # "clever" recursive way. If we want a string representation, we # apply str to _that_. - D = decimal.Decimal - D2 = D(2) - - BITLIM = 128 - - mem = {} - - def w2pow(w): - """Return D(2)**w and store the result. Also possibly save some - intermediate results. In context, these are likely to be reused - across various levels of the conversion to Decimal.""" - if (result := mem.get(w)) is None: - if w <= BITLIM: - result = D2**w - elif w - 1 in mem: - result = (t := mem[w - 1]) + t - else: - w2 = w >> 1 - # If w happens to be odd, w-w2 is one larger then w2 - # now. Recurse on the smaller first (w2), so that it's - # in the cache and the larger (w-w2) can be handled by - # the cheaper `w-1 in mem` branch instead. - result = w2pow(w2) * w2pow(w - w2) - mem[w] = result - return result + from decimal import Decimal as D + BITLIM = 200 + # Don't bother caching the "lo" mask in this; the time to compute it is + # tiny compared to the multiply. def inner(n, w): if w <= BITLIM: return D(n) w2 = w >> 1 hi = n >> w2 - lo = n - (hi << w2) - return inner(lo, w2) + inner(hi, w - w2) * w2pow(w2) - - with decimal.localcontext() as ctx: - ctx.prec = decimal.MAX_PREC - ctx.Emax = decimal.MAX_EMAX - ctx.Emin = decimal.MIN_EMIN - ctx.traps[decimal.Inexact] = 1 + lo = n & ((1 << w2) - 1) + return inner(lo, w2) + inner(hi, w - w2) * w2pow[w2] + with decimal.localcontext(_unbounded_dec_context): + nbits = n.bit_length() + w2pow = compute_powers(nbits, D(2), BITLIM) if n < 0: negate = True n = -n else: negate = False - result = inner(n, n.bit_length()) + result = inner(n, nbits) if negate: result = -result return result - def int_to_decimal_string(n): """Asymptotically fast conversion of an 'int' to a decimal string.""" w = n.bit_length() @@ -97,14 +188,13 @@ def int_to_decimal_string(n): # available. This algorithm is asymptotically worse than the algorithm # using the decimal module, but better than the quadratic time # implementation in longobject.c. + + DIGLIM = 1000 def inner(n, w): - if w <= 1000: + if w <= DIGLIM: return str(n) w2 = w >> 1 - d = pow10_cache.get(w2) - if d is None: - d = pow10_cache[w2] = 5**w2 << w2 # 10**i = (5*2)**i = 5**i * 2**i - hi, lo = divmod(n, d) + hi, lo = divmod(n, pow10[w2]) return inner(hi, w - w2) + inner(lo, w2).zfill(w2) # The estimation of the number of decimal digits. @@ -115,7 +205,9 @@ def inner(n, w): # only if the number has way more than 10**15 digits, that exceeds # the 52-bit physical address limit in both Intel64 and AMD64. w = int(w * 0.3010299956639812 + 1) # log10(2) - pow10_cache = {} + pow10 = compute_powers(w, 5, DIGLIM) + for k, v in pow10.items(): + pow10[k] = v << k # 5**k << k == 5**k * 2**k == 10**k if n < 0: n = -n sign = '-' @@ -128,7 +220,6 @@ def inner(n, w): s = s.lstrip('0') return sign + s - def _str_to_int_inner(s): """Asymptotically fast conversion of a 'str' to an 'int'.""" @@ -144,38 +235,157 @@ def _str_to_int_inner(s): DIGLIM = 2048 - mem = {} - - def w5pow(w): - """Return 5**w and store the result. - Also possibly save some intermediate results. In context, these - are likely to be reused across various levels of the conversion - to 'int'. - """ - if (result := mem.get(w)) is None: - if w <= DIGLIM: - result = 5**w - elif w - 1 in mem: - result = mem[w - 1] * 5 - else: - w2 = w >> 1 - # If w happens to be odd, w-w2 is one larger then w2 - # now. Recurse on the smaller first (w2), so that it's - # in the cache and the larger (w-w2) can be handled by - # the cheaper `w-1 in mem` branch instead. - result = w5pow(w2) * w5pow(w - w2) - mem[w] = result - return result - def inner(a, b): if b - a <= DIGLIM: return int(s[a:b]) mid = (a + b + 1) >> 1 - return inner(mid, b) + ((inner(a, mid) * w5pow(b - mid)) << (b - mid)) + return (inner(mid, b) + + ((inner(a, mid) * w5pow[b - mid]) + << (b - mid))) + w5pow = compute_powers(len(s), 5, DIGLIM) return inner(0, len(s)) +# Asymptotically faster version, using the C decimal module. See +# comments at the end of the file. This uses decimal arithmetic to +# convert from base 10 to base 256. The latter is just a string of +# bytes, which CPython can convert very efficiently to a Python int. + +# log of 10 to base 256 with best-possible 53-bit precision. Obtained +# via: +# from mpmath import mp +# mp.prec = 1000 +# print(float(mp.log(10, 256)).hex()) +_LOG_10_BASE_256 = float.fromhex('0x1.a934f0979a371p-2') # about 0.415 + +# _spread is for internal testing. It maps a key to the number of times +# that condition obtained in _dec_str_to_int_inner: +# key 0 - quotient guess was right +# key 1 - quotient had to be boosted by 1, one time +# key 999 - one adjustment wasn't enough, so fell back to divmod +from collections import defaultdict +_spread = defaultdict(int) +del defaultdict + +def _dec_str_to_int_inner(s, *, GUARD=8): + # Yes, BYTELIM is "large". Large enough that CPython will usually + # use the Karatsuba _str_to_int_inner to convert the string. This + # allowed reducing the cutoff for calling _this_ function from 3.5M + # to 2M digits. We could almost certainly do even better by + # fine-tuning this and/or using a larger output base than 256. + BYTELIM = 100_000 + D = decimal.Decimal + result = bytearray() + # See notes at end of file for discussion of GUARD. + assert GUARD > 0 # if 0, `decimal` can blow up - .prec 0 not allowed + + def inner(n, w): + #assert n < D256 ** w # required, but too expensive to check + if w <= BYTELIM: + # XXX Stefan Pochmann discovered that, for 1024-bit ints, + # `int(Decimal)` took 2.5x longer than `int(str(Decimal))`. + # Worse, `int(Decimal) is still quadratic-time for much + # larger ints. So unless/until all that is repaired, the + # seemingly redundant `str(Decimal)` is crucial to speed. + result.extend(int(str(n)).to_bytes(w)) # big-endian default + return + w1 = w >> 1 + w2 = w - w1 + if 0: + # This is maximally clear, but "too slow". `decimal` + # division is asymptotically fast, but we have no way to + # tell it to reuse the high-precision reciprocal it computes + # for pow256[w2], so it has to recompute it over & over & + # over again :-( + hi, lo = divmod(n, pow256[w2][0]) + else: + p256, recip = pow256[w2] + # The integer part will have a number of digits about equal + # to the difference between the log10s of `n` and `pow256` + # (which, since these are integers, is roughly approximated + # by `.adjusted()`). That's the working precision we need, + ctx.prec = max(n.adjusted() - p256.adjusted(), 0) + GUARD + hi = +n * +recip # unary `+` chops back to ctx.prec digits + ctx.prec = decimal.MAX_PREC + hi = hi.to_integral_value() # lose the fractional digits + lo = n - hi * p256 + # Because we've been uniformly rounding down, `hi` is a + # lower bound on the correct quotient. + assert lo >= 0 + # Adjust quotient up if needed. It usually isn't. In random + # testing on inputs through 5 billion digit strings, the + # test triggered once in about 200 thousand tries. + count = 0 + if lo >= p256: + count = 1 + lo -= p256 + hi += 1 + if lo >= p256: + # Complete correction via an exact computation. I + # believe it's not possible to get here provided + # GUARD >= 3. It's tested by reducing GUARD below + # that. + count = 999 + hi2, lo = divmod(lo, p256) + hi += hi2 + _spread[count] += 1 + # The assert should always succeed, but way too slow to keep + # enabled. + #assert hi, lo == divmod(n, pow256[w2][0]) + inner(hi, w1) + del hi # at top levels, can free a lot of RAM "early" + inner(lo, w2) + + # How many base 256 digits are needed?. Mathematically, exactly + # floor(log256(int(s))) + 1. There is no cheap way to compute this. + # But we can get an upper bound, and that's necessary for our error + # analysis to make sense. int(s) < 10**len(s), so the log needed is + # < log256(10**len(s)) = len(s) * log256(10). However, using + # finite-precision floating point for this, it's possible that the + # computed value is a little less than the true value. If the true + # value is at - or a little higher than - an integer, we can get an + # off-by-1 error too low. So we add 2 instead of 1 if chopping lost + # a fraction > 0.9. + + # The "WASI" test platform can complain about `len(s)` if it's too + # large to fit in its idea of "an index-sized integer". + lenS = s.__len__() + log_ub = lenS * _LOG_10_BASE_256 + log_ub_as_int = int(log_ub) + w = log_ub_as_int + 1 + (log_ub - log_ub_as_int > 0.9) + # And what if we've plain exhausted the limits of HW floats? We + # could compute the log to any desired precision using `decimal`, + # but it's not plausible that anyone will pass a string requiring + # trillions of bytes (unless they're just trying to "break things"). + if w.bit_length() >= 46: + # "Only" had < 53 - 46 = 7 bits to spare in IEEE-754 double. + raise ValueError(f"cannot convert string of len {lenS} to int") + with decimal.localcontext(_unbounded_dec_context) as ctx: + D256 = D(256) + pow256 = compute_powers(w, D256, BYTELIM, need_hi=True) + rpow256 = compute_powers(w, 1 / D256, BYTELIM, need_hi=True) + # We're going to do inexact, chopped arithmetic, multiplying by + # an approximation to the reciprocal of 256**i. We chop to get a + # lower bound on the true integer quotient. Our approximation is + # a lower bound, the multiplication is chopped too, and + # to_integral_value() is also chopped. + ctx.traps[decimal.Inexact] = 0 + ctx.rounding = decimal.ROUND_DOWN + for k, v in pow256.items(): + # No need to save much more precision in the reciprocal than + # the power of 256 has, plus some guard digits to absorb + # most relevant rounding errors. This is highly significant: + # 1/2**i has the same number of significant decimal digits + # as 5**i, generally over twice the number in 2**i, + ctx.prec = v.adjusted() + GUARD + 1 + # The unary "+" chops the reciprocal back to that precision. + pow256[k] = v, +rpow256[k] + del rpow256 # exact reciprocals no longer needed + ctx.prec = decimal.MAX_PREC + inner(D(s), w) + return int.from_bytes(result) + def int_from_string(s): """Asymptotically fast version of PyLong_FromString(), conversion of a string of decimal digits into an 'int'.""" @@ -184,8 +394,10 @@ def int_from_string(s): # and underscores, and stripped leading whitespace. The input can still # contain underscores and have trailing whitespace. s = s.rstrip().replace('_', '') - return _str_to_int_inner(s) - + func = _str_to_int_inner + if len(s) >= 2_000_000 and _decimal is not None: + func = _dec_str_to_int_inner + return func(s) def str_to_int(s): """Asymptotically fast version of decimal string to 'int' conversion.""" @@ -318,7 +530,7 @@ def int_divmod(a, b): Its time complexity is O(n**1.58), where n = #bits(a) + #bits(b). """ if b == 0: - raise ZeroDivisionError + raise ZeroDivisionError('division by zero') elif b < 0: q, r = int_divmod(-a, -b) return q, -r @@ -327,3 +539,191 @@ def int_divmod(a, b): return ~q, b + ~r else: return _divmod_pos(a, b) + + +# Notes on _dec_str_to_int_inner: +# +# Stefan Pochmann worked up a str->int function that used the decimal +# module to, in effect, convert from base 10 to base 256. This is +# "unnatural", in that it requires multiplying and dividing by large +# powers of 2, which `decimal` isn't naturally suited to. But +# `decimal`'s `*` and `/` are asymptotically superior to CPython's, so +# at _some_ point it could be expected to win. +# +# Alas, the crossover point was too high to be of much real interest. I +# (Tim) then worked on ways to replace its division with multiplication +# by a cached reciprocal approximation instead, fixing up errors +# afterwards. This reduced the crossover point significantly, +# +# I revisited the code, and found ways to improve and simplify it. The +# crossover point is at about 3.4 million digits now. +# +# About .adjusted() +# ----------------- +# Restrict to Decimal values x > 0. We don't use negative numbers in the +# code, and I don't want to have to keep typing, e.g., "absolute value". +# +# For convenience, I'll use `x.a` to mean `x.adjusted()`. x.a doesn't +# look at the digits of x, but instead returns an integer giving x's +# order of magnitude. These are equivalent: +# +# - x.a is the power-of-10 exponent of x's most significant digit. +# - x.a = the infinitely precise floor(log10(x)) +# - x can be written in this form, where f is a real with 1 <= f < 10: +# x = f * 10**x.a +# +# Observation; if x is an integer, len(str(x)) = x.a + 1. +# +# Lemma 1: (x * y).a = x.a + y.a, or one larger +# +# Proof: Write x = f * 10**x.a and y = g * 10**y.a, where f and g are in +# [1, 10). Then x*y = f*g * 10**(x.a + y.a), where 1 <= f*g < 100. If +# f*g < 10, (x*y).a is x.a+y.a. Else divide f*g by 10 to bring it back +# into [1, 10], and add 1 to the exponent to compensate. Then (x*y).a is +# x.a+y.a+1. +# +# Lemma 2: ceiling(log10(x/y)) <= x.a - y.a + 1 +# +# Proof: Express x and y as in Lemma 1. Then x/y = f/g * 10**(x.a - +# y.a), where 1/10 < f/g < 10. If 1 <= f/g, (x/y).a is x.a-y.a. Else +# multiply f/g by 10 to bring it back into [1, 10], and subtract 1 from +# the exponent to compensate. Then (x/y).a is x.a-y.a-1. So the largest +# (x/y).a can be is x.a-y.a. Since that's the floor of log10(x/y). the +# ceiling is at most 1 larger (with equality iff f/g = 1 exactly). +# +# GUARD digits +# ------------ +# We only want the integer part of divisions, so don't need to build +# the full multiplication tree. But using _just_ the number of +# digits expected in the integer part ignores too much. What's left +# out can have a very significant effect on the quotient. So we use +# GUARD additional digits. +# +# The default 8 is more than enough so no more than 1 correction step +# was ever needed for all inputs tried through 2.5 billion digits. In +# fact, I believe 3 guard digits are always enough - but the proof is +# very involved, so better safe than sorry. +# +# Short course: +# +# If prec is the decimal precision in effect, and we're rounding down, +# the result of an operation is exactly equal to the infinitely precise +# result times 1-e for some real e with 0 <= e < 10**(1-prec). In +# +# ctx.prec = max(n.adjusted() - p256.adjusted(), 0) + GUARD +# hi = +n * +recip # unary `+` chops to ctx.prec digits +# +# we have 3 visible chopped operations, but there's also a 4th: +# precomputing a truncated `recip` as part of setup. +# +# So the computed product is exactly equal to the true product times +# (1-e1)*(1-e2)*(1-e3)*(1-e4); since the e's are all very small, an +# excellent approximation to the second factor is 1-(e1+e2+e3+e4) (the +# 2nd and higher order terms in the expanded product are too tiny to +# matter). If they're all as large as possible, that's +# +# 1 - 4*10**(1-prec). This, BTW, is all bog-standard FP error analysis. +# +# That implies the computed product is within 1 of the true product +# provided prec >= log10(true_product) + 1.602. +# +# Here are telegraphic details, rephrasing the initial condition in +# equivalent ways, step by step: +# +# prod - prod * (1 - 4*10**(1-prec)) <= 1 +# prod - prod + prod * 4*10**(1-prec)) <= 1 +# prod * 4*10**(1-prec)) <= 1 +# 10**(log10(prod)) * 4*10**(1-prec)) <= 1 +# 4*10**(1-prec+log10(prod))) <= 1 +# 10**(1-prec+log10(prod))) <= 1/4 +# 1-prec+log10(prod) <= log10(1/4) = -0.602 +# -prec <= -1.602 - log10(prod) +# prec >= log10(prod) + 1.602 +# +# The true product is the same as the true ratio n/p256. By Lemma 2 +# above, n.a - p256.a + 1 is an upper bound on the ceiling of +# log10(prod). Then 2 is the ceiling of 1.602. so n.a - p256.a + 3 is an +# upper bound on the right hand side of the inequality. Any prec >= that +# will work. +# +# But since this is just a sketch of a proof ;-), the code uses the +# empirically tested 8 instead of 3. 5 digits more or less makes no +# practical difference to speed - these ints are huge. And while +# increasing GUARD above 3 may not be necessary, every increase cuts the +# percentage of cases that need a correction at all. +# +# On Computing Reciprocals +# ------------------------ +# In general, the exact reciprocals we compute have over twice as many +# significant digits as needed. 1/256**i has the same number of +# significant decimal digits as 5**i. It's a significant waste of RAM +# to store all those unneeded digits. +# +# So we cut exact reciprocals back to the least precision that can +# be needed so that the error analysis above is valid, +# +# [Note: turns out it's very significantly faster to do it this way than +# to compute 1 / 256**i directly to the desired precision, because the +# power method doesn't require division. It's also faster than computing +# (1/256)**i directly to the desired precision - no material division +# there, but `compute_powers()` is much smarter about _how_ to compute +# all the powers needed than repeated applications of `**` - that +# function invokes `**` for at most the few smallest powers needed.] +# +# The hard part is that chopping back to a shorter width occurs +# _outside_ of `inner`. We can't know then what `prec` `inner()` will +# need. We have to pick, for each value of `w2`, the largest possible +# value `prec` can become when `inner()` is working on `w2`. +# +# This is the `prec` inner() uses: +# max(n.a - p256.a, 0) + GUARD +# and what setup uses (renaming its `v` to `p256` - same thing): +# p256.a + GUARD + 1 +# +# We need that the second is always at least as large as the first, +# which is the same as requiring +# +# n.a - 2 * p256.a <= 1 +# +# What's the largest n can be? n < 255**w = 256**(w2 + (w - w2)). The +# worst case in this context is when w ix even. and then w = 2*w2, so +# n < 256**(2*w2) = (256**w2)**2 = p256**2. By Lemma 1, then, n.a +# is at most p256.a + p256.a + 1. +# +# So the most n.a - 2 * p256.a can be is +# p256.a + p256.a + 1 - 2 * p256.a = 1. QED +# +# Note: an earlier version of the code split on floor(e/2) instead of on +# the ceiling. The worst case then is odd `w`, and a more involved proof +# was needed to show that adding 4 (instead of 1) may be necessary. +# Basically because, in that case, n may be up to 256 times larger than +# p256**2. Curiously enough, by splitting on the ceiling instead, +# nothing in any proof here actually depends on the output base (256). + +# Enable for brute-force testing of compute_powers(). This takes about a +# minute, because it tries millions of cases. +if 0: + def consumer(w, limit, need_hi): + seen = set() + need = set() + def inner(w): + if w <= limit: + return + if w in seen: + return + seen.add(w) + lo = w >> 1 + hi = w - lo + need.add(hi if need_hi else lo) + inner(lo) + inner(hi) + inner(w) + exp = compute_powers(w, 1, limit, need_hi=need_hi) + assert exp.keys() == need + + from itertools import chain + for need_hi in (False, True): + for limit in (0, 1, 10, 100, 1_000, 10_000, 100_000): + for w in chain(range(1, 100_000), + (10**i for i in range(5, 30))): + consumer(w, limit, need_hi) diff --git a/PythonLib/full/_pyrepl/__init__.py b/PythonLib/full/_pyrepl/__init__.py new file mode 100644 index 00000000..1693cbd0 --- /dev/null +++ b/PythonLib/full/_pyrepl/__init__.py @@ -0,0 +1,19 @@ +# Copyright 2000-2008 Michael Hudson-Doyle +# Armin Rigo +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/PythonLib/full/_pyrepl/__main__.py b/PythonLib/full/_pyrepl/__main__.py new file mode 100644 index 00000000..9c66812e --- /dev/null +++ b/PythonLib/full/_pyrepl/__main__.py @@ -0,0 +1,10 @@ +# Important: don't add things to this module, as they will end up in the REPL's +# default globals. Use _pyrepl.main instead. + +# Avoid caching this file by linecache and incorrectly report tracebacks. +# See https://github.com/python/cpython/issues/129098. +__spec__ = __loader__ = None + +if __name__ == "__main__": + from .main import interactive_console as __pyrepl_interactive_console + __pyrepl_interactive_console() diff --git a/PythonLib/full/_pyrepl/_module_completer.py b/PythonLib/full/_pyrepl/_module_completer.py new file mode 100644 index 00000000..2098d0a5 --- /dev/null +++ b/PythonLib/full/_pyrepl/_module_completer.py @@ -0,0 +1,426 @@ +from __future__ import annotations + +import importlib +import os +import pkgutil +import sys +import token +import tokenize +from importlib.machinery import FileFinder +from io import StringIO +from contextlib import contextmanager +from dataclasses import dataclass +from itertools import chain +from tokenize import TokenInfo + +TYPE_CHECKING = False + +if TYPE_CHECKING: + from typing import Any, Iterable, Iterator, Mapping + + +HARDCODED_SUBMODULES = { + # Standard library submodules that are not detected by pkgutil.iter_modules + # but can be imported, so should be proposed in completion + "collections": ["abc"], + "os": ["path"], + "xml.parsers.expat": ["errors", "model"], +} + + +def make_default_module_completer() -> ModuleCompleter: + # Inside pyrepl, __package__ is set to None by default + return ModuleCompleter(namespace={'__package__': None}) + + +class ModuleCompleter: + """A completer for Python import statements. + + Examples: + - import + - import foo + - import foo. + - import foo as bar, baz + + - from + - from foo + - from foo import + - from foo import bar + - from foo import (bar as baz, qux + """ + + def __init__(self, namespace: Mapping[str, Any] | None = None) -> None: + self.namespace = namespace or {} + self._global_cache: list[pkgutil.ModuleInfo] = [] + self._curr_sys_path: list[str] = sys.path[:] + self._stdlib_path = os.path.dirname(importlib.__path__[0]) + + def get_completions(self, line: str) -> list[str] | None: + """Return the next possible import completions for 'line'.""" + result = ImportParser(line).parse() + if not result: + return None + try: + return self.complete(*result) + except Exception: + # Some unexpected error occurred, make it look like + # no completions are available + return [] + + def complete(self, from_name: str | None, name: str | None) -> list[str]: + if from_name is None: + # import x.y.z + assert name is not None + path, prefix = self.get_path_and_prefix(name) + modules = self.find_modules(path, prefix) + return [self.format_completion(path, module) for module in modules] + + if name is None: + # from x.y.z + path, prefix = self.get_path_and_prefix(from_name) + modules = self.find_modules(path, prefix) + return [self.format_completion(path, module) for module in modules] + + # from x.y import z + return self.find_modules(from_name, name) + + def find_modules(self, path: str, prefix: str) -> list[str]: + """Find all modules under 'path' that start with 'prefix'.""" + modules = self._find_modules(path, prefix) + # Filter out invalid module names + # (for example those containing dashes that cannot be imported with 'import') + return [mod for mod in modules if mod.isidentifier()] + + def _find_modules(self, path: str, prefix: str) -> list[str]: + if not path: + # Top-level import (e.g. `import foo`` or `from foo`)` + builtin_modules = [name for name in sys.builtin_module_names + if self.is_suggestion_match(name, prefix)] + third_party_modules = [module.name for module in self.global_cache + if self.is_suggestion_match(module.name, prefix)] + return sorted(builtin_modules + third_party_modules) + + if path.startswith('.'): + # Convert relative path to absolute path + package = self.namespace.get('__package__', '') + path = self.resolve_relative_name(path, package) # type: ignore[assignment] + if path is None: + return [] + + modules: Iterable[pkgutil.ModuleInfo] = self.global_cache + imported_module = sys.modules.get(path.split('.')[0]) + if imported_module: + # Filter modules to those who name and specs match the + # imported module to avoid invalid suggestions + spec = imported_module.__spec__ + if spec: + modules = [mod for mod in modules + if mod.name == spec.name + and mod.module_finder.find_spec(mod.name, None) == spec] + else: + modules = [] + + is_stdlib_import: bool | None = None + for segment in path.split('.'): + modules = [mod_info for mod_info in modules + if mod_info.ispkg and mod_info.name == segment] + if is_stdlib_import is None: + # Top-level import decide if we import from stdlib or not + is_stdlib_import = all( + self._is_stdlib_module(mod_info) for mod_info in modules + ) + modules = self.iter_submodules(modules) + + module_names = [module.name for module in modules] + if is_stdlib_import: + module_names.extend(HARDCODED_SUBMODULES.get(path, ())) + return [module_name for module_name in module_names + if self.is_suggestion_match(module_name, prefix)] + + def _is_stdlib_module(self, module_info: pkgutil.ModuleInfo) -> bool: + return (isinstance(module_info.module_finder, FileFinder) + and module_info.module_finder.path == self._stdlib_path) + + def is_suggestion_match(self, module_name: str, prefix: str) -> bool: + if prefix: + return module_name.startswith(prefix) + # For consistency with attribute completion, which + # does not suggest private attributes unless requested. + return not module_name.startswith("_") + + def iter_submodules(self, parent_modules: list[pkgutil.ModuleInfo]) -> Iterator[pkgutil.ModuleInfo]: + """Iterate over all submodules of the given parent modules.""" + specs = [info.module_finder.find_spec(info.name, None) + for info in parent_modules if info.ispkg] + search_locations = set(chain.from_iterable( + getattr(spec, 'submodule_search_locations', []) + for spec in specs if spec + )) + return pkgutil.iter_modules(search_locations) + + def get_path_and_prefix(self, dotted_name: str) -> tuple[str, str]: + """ + Split a dotted name into an import path and a + final prefix that is to be completed. + + Examples: + 'foo.bar' -> 'foo', 'bar' + 'foo.' -> 'foo', '' + '.foo' -> '.', 'foo' + """ + if '.' not in dotted_name: + return '', dotted_name + if dotted_name.startswith('.'): + stripped = dotted_name.lstrip('.') + dots = '.' * (len(dotted_name) - len(stripped)) + if '.' not in stripped: + return dots, stripped + path, prefix = stripped.rsplit('.', 1) + return dots + path, prefix + path, prefix = dotted_name.rsplit('.', 1) + return path, prefix + + def format_completion(self, path: str, module: str) -> str: + if path == '' or path.endswith('.'): + return f'{path}{module}' + return f'{path}.{module}' + + def resolve_relative_name(self, name: str, package: str) -> str | None: + """Resolve a relative module name to an absolute name. + + Example: resolve_relative_name('.foo', 'bar') -> 'bar.foo' + """ + # taken from importlib._bootstrap + level = 0 + for character in name: + if character != '.': + break + level += 1 + bits = package.rsplit('.', level - 1) + if len(bits) < level: + return None + base = bits[0] + name = name[level:] + return f'{base}.{name}' if name else base + + @property + def global_cache(self) -> list[pkgutil.ModuleInfo]: + """Global module cache""" + if not self._global_cache or self._curr_sys_path != sys.path: + self._curr_sys_path = sys.path[:] + self._global_cache = list(pkgutil.iter_modules()) + return self._global_cache + + +class ImportParser: + """ + Parses incomplete import statements that are + suitable for autocomplete suggestions. + + Examples: + - import foo -> Result(from_name=None, name='foo') + - import foo. -> Result(from_name=None, name='foo.') + - from foo -> Result(from_name='foo', name=None) + - from foo import bar -> Result(from_name='foo', name='bar') + - from .foo import ( -> Result(from_name='.foo', name='') + + Note that the parser works in reverse order, starting from the + last token in the input string. This makes the parser more robust + when parsing multiple statements. + """ + _ignored_tokens = { + token.INDENT, token.DEDENT, token.COMMENT, + token.NL, token.NEWLINE, token.ENDMARKER + } + _keywords = {'import', 'from', 'as'} + + def __init__(self, code: str) -> None: + self.code = code + tokens = [] + try: + for t in tokenize.generate_tokens(StringIO(code).readline): + if t.type not in self._ignored_tokens: + tokens.append(t) + except tokenize.TokenError as e: + if 'unexpected EOF' not in str(e): + # unexpected EOF is fine, since we're parsing an + # incomplete statement, but other errors are not + # because we may not have all the tokens so it's + # safer to bail out + tokens = [] + except SyntaxError: + tokens = [] + self.tokens = TokenQueue(tokens[::-1]) + + def parse(self) -> tuple[str | None, str | None] | None: + if not (res := self._parse()): + return None + return res.from_name, res.name + + def _parse(self) -> Result | None: + with self.tokens.save_state(): + return self.parse_from_import() + with self.tokens.save_state(): + return self.parse_import() + + def parse_import(self) -> Result: + if self.code.rstrip().endswith('import') and self.code.endswith(' '): + return Result(name='') + if self.tokens.peek_string(','): + name = '' + else: + if self.code.endswith(' '): + raise ParseError('parse_import') + name = self.parse_dotted_name() + if name.startswith('.'): + raise ParseError('parse_import') + while self.tokens.peek_string(','): + self.tokens.pop() + self.parse_dotted_as_name() + if self.tokens.peek_string('import'): + return Result(name=name) + raise ParseError('parse_import') + + def parse_from_import(self) -> Result: + stripped = self.code.rstrip() + if stripped.endswith('import') and self.code.endswith(' '): + return Result(from_name=self.parse_empty_from_import(), name='') + if stripped.endswith('from') and self.code.endswith(' '): + return Result(from_name='') + if self.tokens.peek_string('(') or self.tokens.peek_string(','): + return Result(from_name=self.parse_empty_from_import(), name='') + if self.code.endswith(' '): + raise ParseError('parse_from_import') + name = self.parse_dotted_name() + if '.' in name: + self.tokens.pop_string('from') + return Result(from_name=name) + if self.tokens.peek_string('from'): + return Result(from_name=name) + from_name = self.parse_empty_from_import() + return Result(from_name=from_name, name=name) + + def parse_empty_from_import(self) -> str: + if self.tokens.peek_string(','): + self.tokens.pop() + self.parse_as_names() + if self.tokens.peek_string('('): + self.tokens.pop() + self.tokens.pop_string('import') + return self.parse_from() + + def parse_from(self) -> str: + from_name = self.parse_dotted_name() + self.tokens.pop_string('from') + return from_name + + def parse_dotted_as_name(self) -> str: + self.tokens.pop_name() + if self.tokens.peek_string('as'): + self.tokens.pop() + with self.tokens.save_state(): + return self.parse_dotted_name() + + def parse_dotted_name(self) -> str: + name = [] + if self.tokens.peek_string('.'): + name.append('.') + self.tokens.pop() + if (self.tokens.peek_name() + and (tok := self.tokens.peek()) + and tok.string not in self._keywords): + name.append(self.tokens.pop_name()) + if not name: + raise ParseError('parse_dotted_name') + while self.tokens.peek_string('.'): + name.append('.') + self.tokens.pop() + if (self.tokens.peek_name() + and (tok := self.tokens.peek()) + and tok.string not in self._keywords): + name.append(self.tokens.pop_name()) + else: + break + + while self.tokens.peek_string('.'): + name.append('.') + self.tokens.pop() + return ''.join(name[::-1]) + + def parse_as_names(self) -> None: + self.parse_as_name() + while self.tokens.peek_string(','): + self.tokens.pop() + self.parse_as_name() + + def parse_as_name(self) -> None: + self.tokens.pop_name() + if self.tokens.peek_string('as'): + self.tokens.pop() + self.tokens.pop_name() + + +class ParseError(Exception): + pass + + +@dataclass(frozen=True) +class Result: + from_name: str | None = None + name: str | None = None + + +class TokenQueue: + """Provides helper functions for working with a sequence of tokens.""" + + def __init__(self, tokens: list[TokenInfo]) -> None: + self.tokens: list[TokenInfo] = tokens + self.index: int = 0 + self.stack: list[int] = [] + + @contextmanager + def save_state(self) -> Any: + try: + self.stack.append(self.index) + yield + except ParseError: + self.index = self.stack.pop() + else: + self.stack.pop() + + def __bool__(self) -> bool: + return self.index < len(self.tokens) + + def peek(self) -> TokenInfo | None: + if not self: + return None + return self.tokens[self.index] + + def peek_name(self) -> bool: + if not (tok := self.peek()): + return False + return tok.type == token.NAME + + def pop_name(self) -> str: + tok = self.pop() + if tok.type != token.NAME: + raise ParseError('pop_name') + return tok.string + + def peek_string(self, string: str) -> bool: + if not (tok := self.peek()): + return False + return tok.string == string + + def pop_string(self, string: str) -> str: + tok = self.pop() + if tok.string != string: + raise ParseError('pop_string') + return tok.string + + def pop(self) -> TokenInfo: + if not self: + raise ParseError('pop') + tok = self.tokens[self.index] + self.index += 1 + return tok diff --git a/PythonLib/full/_pyrepl/_threading_handler.py b/PythonLib/full/_pyrepl/_threading_handler.py new file mode 100644 index 00000000..82f5e865 --- /dev/null +++ b/PythonLib/full/_pyrepl/_threading_handler.py @@ -0,0 +1,74 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +import traceback + + +TYPE_CHECKING = False +if TYPE_CHECKING: + from threading import Thread + from types import TracebackType + from typing import Protocol + + class ExceptHookArgs(Protocol): + @property + def exc_type(self) -> type[BaseException]: ... + @property + def exc_value(self) -> BaseException | None: ... + @property + def exc_traceback(self) -> TracebackType | None: ... + @property + def thread(self) -> Thread | None: ... + + class ShowExceptions(Protocol): + def __call__(self) -> int: ... + def add(self, s: str) -> None: ... + + from .reader import Reader + + +def install_threading_hook(reader: Reader) -> None: + import threading + + @dataclass + class ExceptHookHandler: + lock: threading.Lock = field(default_factory=threading.Lock) + messages: list[str] = field(default_factory=list) + + def show(self) -> int: + count = 0 + with self.lock: + if not self.messages: + return 0 + reader.restore() + for tb in self.messages: + count += 1 + if tb: + print(tb) + self.messages.clear() + reader.scheduled_commands.append("ctrl-c") + reader.prepare() + return count + + def add(self, s: str) -> None: + with self.lock: + self.messages.append(s) + + def exception(self, args: ExceptHookArgs) -> None: + lines = traceback.format_exception( + args.exc_type, + args.exc_value, + args.exc_traceback, + colorize=reader.can_colorize, + ) # type: ignore[call-overload] + pre = f"\nException in {args.thread.name}:\n" if args.thread else "\n" + tb = pre + "".join(lines) + self.add(tb) + + def __call__(self) -> int: + return self.show() + + + handler = ExceptHookHandler() + reader.threading_hook = handler + threading.excepthook = handler.exception diff --git a/PythonLib/full/_pyrepl/base_eventqueue.py b/PythonLib/full/_pyrepl/base_eventqueue.py new file mode 100644 index 00000000..0589a0f4 --- /dev/null +++ b/PythonLib/full/_pyrepl/base_eventqueue.py @@ -0,0 +1,110 @@ +# Copyright 2000-2008 Michael Hudson-Doyle +# Armin Rigo +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +""" +OS-independent base for an event and VT sequence scanner + +See unix_eventqueue and windows_eventqueue for subclasses. +""" + +from collections import deque + +from . import keymap +from .console import Event +from .trace import trace + +class BaseEventQueue: + def __init__(self, encoding: str, keymap_dict: dict[bytes, str]) -> None: + self.compiled_keymap = keymap.compile_keymap(keymap_dict) + self.keymap = self.compiled_keymap + trace("keymap {k!r}", k=self.keymap) + self.encoding = encoding + self.events: deque[Event] = deque() + self.buf = bytearray() + + def get(self) -> Event | None: + """ + Retrieves the next event from the queue. + """ + if self.events: + return self.events.popleft() + else: + return None + + def empty(self) -> bool: + """ + Checks if the queue is empty. + """ + return not self.events + + def flush_buf(self) -> bytearray: + """ + Flushes the buffer and returns its contents. + """ + old = self.buf + self.buf = bytearray() + return old + + def insert(self, event: Event) -> None: + """ + Inserts an event into the queue. + """ + trace('added event {event}', event=event) + self.events.append(event) + + def push(self, char: int | bytes) -> None: + """ + Processes a character by updating the buffer and handling special key mappings. + """ + assert isinstance(char, (int, bytes)) + ord_char = char if isinstance(char, int) else ord(char) + char = ord_char.to_bytes() + self.buf.append(ord_char) + + if char in self.keymap: + if self.keymap is self.compiled_keymap: + # sanity check, buffer is empty when a special key comes + assert len(self.buf) == 1 + k = self.keymap[char] + trace('found map {k!r}', k=k) + if isinstance(k, dict): + self.keymap = k + else: + self.insert(Event('key', k, bytes(self.flush_buf()))) + self.keymap = self.compiled_keymap + + elif self.buf and self.buf[0] == 27: # escape + # escape sequence not recognized by our keymap: propagate it + # outside so that i can be recognized as an M-... key (see also + # the docstring in keymap.py + trace('unrecognized escape sequence, propagating...') + self.keymap = self.compiled_keymap + self.insert(Event('key', '\033', b'\033')) + for _c in self.flush_buf()[1:]: + self.push(_c) + + else: + try: + decoded = bytes(self.buf).decode(self.encoding) + except UnicodeError: + return + else: + self.insert(Event('key', decoded, bytes(self.flush_buf()))) + self.keymap = self.compiled_keymap diff --git a/PythonLib/full/_pyrepl/commands.py b/PythonLib/full/_pyrepl/commands.py new file mode 100644 index 00000000..10127e58 --- /dev/null +++ b/PythonLib/full/_pyrepl/commands.py @@ -0,0 +1,505 @@ +# Copyright 2000-2010 Michael Hudson-Doyle +# Antonio Cuni +# Armin Rigo +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +from __future__ import annotations +import os +import time + +# Categories of actions: +# killing +# yanking +# motion +# editing +# history +# finishing +# [completion] + +from .trace import trace + +# types +if False: + from .historical_reader import HistoricalReader + + +class Command: + finish: bool = False + kills_digit_arg: bool = True + + def __init__( + self, reader: HistoricalReader, event_name: str, event: list[str] + ) -> None: + # Reader should really be "any reader" but there's too much usage of + # HistoricalReader methods and fields in the code below for us to + # refactor at the moment. + + self.reader = reader + self.event = event + self.event_name = event_name + + def do(self) -> None: + pass + + +class KillCommand(Command): + def kill_range(self, start: int, end: int) -> None: + if start == end: + return + r = self.reader + b = r.buffer + text = b[start:end] + del b[start:end] + if is_kill(r.last_command): + if start < r.pos: + r.kill_ring[-1] = text + r.kill_ring[-1] + else: + r.kill_ring[-1] = r.kill_ring[-1] + text + else: + r.kill_ring.append(text) + r.pos = start + r.dirty = True + + +class YankCommand(Command): + pass + + +class MotionCommand(Command): + pass + + +class EditCommand(Command): + pass + + +class FinishCommand(Command): + finish = True + pass + + +def is_kill(command: type[Command] | None) -> bool: + return command is not None and issubclass(command, KillCommand) + + +def is_yank(command: type[Command] | None) -> bool: + return command is not None and issubclass(command, YankCommand) + + +# etc + + +class digit_arg(Command): + kills_digit_arg = False + + def do(self) -> None: + r = self.reader + c = self.event[-1] + if c == "-": + if r.arg is not None: + r.arg = -r.arg + else: + r.arg = -1 + else: + d = int(c) + if r.arg is None: + r.arg = d + else: + if r.arg < 0: + r.arg = 10 * r.arg - d + else: + r.arg = 10 * r.arg + d + r.dirty = True + + +class clear_screen(Command): + def do(self) -> None: + r = self.reader + r.console.clear() + r.dirty = True + + +class refresh(Command): + def do(self) -> None: + self.reader.dirty = True + + +class repaint(Command): + def do(self) -> None: + self.reader.dirty = True + self.reader.console.repaint() + + +class kill_line(KillCommand): + def do(self) -> None: + r = self.reader + b = r.buffer + eol = r.eol() + for c in b[r.pos : eol]: + if not c.isspace(): + self.kill_range(r.pos, eol) + return + else: + self.kill_range(r.pos, eol + 1) + + +class unix_line_discard(KillCommand): + def do(self) -> None: + r = self.reader + self.kill_range(r.bol(), r.pos) + + +class unix_word_rubout(KillCommand): + def do(self) -> None: + r = self.reader + for i in range(r.get_arg()): + self.kill_range(r.bow(), r.pos) + + +class kill_word(KillCommand): + def do(self) -> None: + r = self.reader + for i in range(r.get_arg()): + self.kill_range(r.pos, r.eow()) + + +class backward_kill_word(KillCommand): + def do(self) -> None: + r = self.reader + for i in range(r.get_arg()): + self.kill_range(r.bow(), r.pos) + + +class yank(YankCommand): + def do(self) -> None: + r = self.reader + if not r.kill_ring: + r.error("nothing to yank") + return + r.insert(r.kill_ring[-1]) + + +class yank_pop(YankCommand): + def do(self) -> None: + r = self.reader + b = r.buffer + if not r.kill_ring: + r.error("nothing to yank") + return + if not is_yank(r.last_command): + r.error("previous command was not a yank") + return + repl = len(r.kill_ring[-1]) + r.kill_ring.insert(0, r.kill_ring.pop()) + t = r.kill_ring[-1] + b[r.pos - repl : r.pos] = t + r.pos = r.pos - repl + len(t) + r.dirty = True + + +class interrupt(FinishCommand): + def do(self) -> None: + import signal + + self.reader.console.finish() + self.reader.finish() + os.kill(os.getpid(), signal.SIGINT) + + +class ctrl_c(Command): + def do(self) -> None: + self.reader.console.finish() + self.reader.finish() + raise KeyboardInterrupt + + +class suspend(Command): + def do(self) -> None: + import signal + + r = self.reader + p = r.pos + r.console.finish() + os.kill(os.getpid(), signal.SIGSTOP) + ## this should probably be done + ## in a handler for SIGCONT? + r.console.prepare() + r.pos = p + # r.posxy = 0, 0 # XXX this is invalid + r.dirty = True + r.console.screen = [] + + +class up(MotionCommand): + def do(self) -> None: + r = self.reader + for _ in range(r.get_arg()): + x, y = r.pos2xy() + new_y = y - 1 + + if r.bol() == 0: + if r.historyi > 0: + r.select_item(r.historyi - 1) + return + r.pos = 0 + r.error("start of buffer") + return + + if ( + x + > ( + new_x := r.max_column(new_y) + ) # we're past the end of the previous line + or x == r.max_column(y) + and any( + not i.isspace() for i in r.buffer[r.bol() :] + ) # move between eols + ): + x = new_x + + r.setpos_from_xy(x, new_y) + + +class down(MotionCommand): + def do(self) -> None: + r = self.reader + b = r.buffer + for _ in range(r.get_arg()): + x, y = r.pos2xy() + new_y = y + 1 + + if r.eol() == len(b): + if r.historyi < len(r.history): + r.select_item(r.historyi + 1) + r.pos = r.eol(0) + return + r.pos = len(b) + r.error("end of buffer") + return + + if ( + x + > ( + new_x := r.max_column(new_y) + ) # we're past the end of the previous line + or x == r.max_column(y) + and any( + not i.isspace() for i in r.buffer[r.bol() :] + ) # move between eols + ): + x = new_x + + r.setpos_from_xy(x, new_y) + + +class left(MotionCommand): + def do(self) -> None: + r = self.reader + for _ in range(r.get_arg()): + p = r.pos - 1 + if p >= 0: + r.pos = p + else: + self.reader.error("start of buffer") + + +class right(MotionCommand): + def do(self) -> None: + r = self.reader + b = r.buffer + for _ in range(r.get_arg()): + p = r.pos + 1 + if p <= len(b): + r.pos = p + else: + self.reader.error("end of buffer") + + +class beginning_of_line(MotionCommand): + def do(self) -> None: + self.reader.pos = self.reader.bol() + + +class end_of_line(MotionCommand): + def do(self) -> None: + self.reader.pos = self.reader.eol() + + +class home(MotionCommand): + def do(self) -> None: + self.reader.pos = 0 + + +class end(MotionCommand): + def do(self) -> None: + self.reader.pos = len(self.reader.buffer) + + +class forward_word(MotionCommand): + def do(self) -> None: + r = self.reader + for i in range(r.get_arg()): + r.pos = r.eow() + + +class backward_word(MotionCommand): + def do(self) -> None: + r = self.reader + for i in range(r.get_arg()): + r.pos = r.bow() + + +class self_insert(EditCommand): + def do(self) -> None: + r = self.reader + text = self.event * r.get_arg() + r.insert(text) + if r.paste_mode: + data = "" + ev = r.console.getpending() + data += ev.data + if data: + r.insert(data) + r.last_refresh_cache.invalidated = True + + +class insert_nl(EditCommand): + def do(self) -> None: + r = self.reader + r.insert("\n" * r.get_arg()) + + +class transpose_characters(EditCommand): + def do(self) -> None: + r = self.reader + b = r.buffer + s = r.pos - 1 + if s < 0: + r.error("cannot transpose at start of buffer") + else: + if s == len(b): + s -= 1 + t = min(s + r.get_arg(), len(b) - 1) + c = b[s] + del b[s] + b.insert(t, c) + r.pos = t + r.dirty = True + + +class backspace(EditCommand): + def do(self) -> None: + r = self.reader + b = r.buffer + for i in range(r.get_arg()): + if r.pos > 0: + r.pos -= 1 + del b[r.pos] + r.dirty = True + else: + self.reader.error("can't backspace at start") + + +class delete(EditCommand): + def do(self) -> None: + r = self.reader + b = r.buffer + if self.event[-1] == "\004": + if b and b[-1].endswith("\n"): + self.finish = True + elif ( + r.pos == 0 + and len(b) == 0 # this is something of a hack + ): + r.update_screen() + r.console.finish() + raise EOFError + + for i in range(r.get_arg()): + if r.pos != len(b): + del b[r.pos] + r.dirty = True + else: + self.reader.error("end of buffer") + + +class accept(FinishCommand): + def do(self) -> None: + pass + + +class help(Command): + def do(self) -> None: + import _sitebuiltins + + with self.reader.suspend(): + self.reader.msg = _sitebuiltins._Helper()() # type: ignore[assignment] + + +class invalid_key(Command): + def do(self) -> None: + pending = self.reader.console.getpending() + s = "".join(self.event) + pending.data + self.reader.error("`%r' not bound" % s) + + +class invalid_command(Command): + def do(self) -> None: + s = self.event_name + self.reader.error("command `%s' not known" % s) + + +class show_history(Command): + def do(self) -> None: + from .pager import get_pager + from site import gethistoryfile + + history = os.linesep.join(self.reader.history[:]) + self.reader.console.restore() + pager = get_pager() + pager(history, gethistoryfile()) + self.reader.console.prepare() + + # We need to copy over the state so that it's consistent between + # console and reader, and console does not overwrite/append stuff + self.reader.console.screen = self.reader.screen.copy() + self.reader.console.posxy = self.reader.cxy + + +class paste_mode(Command): + def do(self) -> None: + self.reader.paste_mode = not self.reader.paste_mode + self.reader.dirty = True + + +class perform_bracketed_paste(Command): + def do(self) -> None: + done = "\x1b[201~" + data = "" + start = time.time() + while done not in data: + ev = self.reader.console.getpending() + data += ev.data + trace( + "bracketed pasting of {l} chars done in {s:.2f}s", + l=len(data), + s=time.time() - start, + ) + self.reader.insert(data.replace(done, "")) + self.reader.last_refresh_cache.invalidated = True diff --git a/PythonLib/full/_pyrepl/completing_reader.py b/PythonLib/full/_pyrepl/completing_reader.py new file mode 100644 index 00000000..9d2d43be --- /dev/null +++ b/PythonLib/full/_pyrepl/completing_reader.py @@ -0,0 +1,299 @@ +# Copyright 2000-2010 Michael Hudson-Doyle +# Antonio Cuni +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +from __future__ import annotations + +from dataclasses import dataclass, field + +import re +from . import commands, console, reader +from .reader import Reader + + +# types +Command = commands.Command +if False: + from .types import KeySpec, CommandName + + +def prefix(wordlist: list[str], j: int = 0) -> str: + d = {} + i = j + try: + while 1: + for word in wordlist: + d[word[i]] = 1 + if len(d) > 1: + return wordlist[0][j:i] + i += 1 + d = {} + except IndexError: + return wordlist[0][j:i] + return "" + + +STRIPCOLOR_REGEX = re.compile(r"\x1B\[([0-9]{1,3}(;[0-9]{1,2})?)?[m|K]") + +def stripcolor(s: str) -> str: + return STRIPCOLOR_REGEX.sub('', s) + + +def real_len(s: str) -> int: + return len(stripcolor(s)) + + +def left_align(s: str, maxlen: int) -> str: + stripped = stripcolor(s) + if len(stripped) > maxlen: + # too bad, we remove the color + return stripped[:maxlen] + padding = maxlen - len(stripped) + return s + ' '*padding + + +def build_menu( + cons: console.Console, + wordlist: list[str], + start: int, + use_brackets: bool, + sort_in_column: bool, +) -> tuple[list[str], int]: + if use_brackets: + item = "[ %s ]" + padding = 4 + else: + item = "%s " + padding = 2 + maxlen = min(max(map(real_len, wordlist)), cons.width - padding) + cols = int(cons.width / (maxlen + padding)) + rows = int((len(wordlist) - 1)/cols + 1) + + if sort_in_column: + # sort_in_column=False (default) sort_in_column=True + # A B C A D G + # D E F B E + # G C F + # + # "fill" the table with empty words, so we always have the same amount + # of rows for each column + missing = cols*rows - len(wordlist) + wordlist = wordlist + ['']*missing + indexes = [(i % cols) * rows + i // cols for i in range(len(wordlist))] + wordlist = [wordlist[i] for i in indexes] + menu = [] + i = start + for r in range(rows): + row = [] + for col in range(cols): + row.append(item % left_align(wordlist[i], maxlen)) + i += 1 + if i >= len(wordlist): + break + menu.append(''.join(row)) + if i >= len(wordlist): + i = 0 + break + if r + 5 > cons.height: + menu.append(" %d more... " % (len(wordlist) - i)) + break + return menu, i + +# this gets somewhat user interface-y, and as a result the logic gets +# very convoluted. +# +# To summarise the summary of the summary:- people are a problem. +# -- The Hitch-Hikers Guide to the Galaxy, Episode 12 + +#### Desired behaviour of the completions commands. +# the considerations are: +# (1) how many completions are possible +# (2) whether the last command was a completion +# (3) if we can assume that the completer is going to return the same set of +# completions: this is controlled by the ``assume_immutable_completions`` +# variable on the reader, which is True by default to match the historical +# behaviour of pyrepl, but e.g. False in the ReadlineAlikeReader to match +# more closely readline's semantics (this is needed e.g. by +# fancycompleter) +# +# if there's no possible completion, beep at the user and point this out. +# this is easy. +# +# if there's only one possible completion, stick it in. if the last thing +# user did was a completion, point out that he isn't getting anywhere, but +# only if the ``assume_immutable_completions`` is True. +# +# now it gets complicated. +# +# for the first press of a completion key: +# if there's a common prefix, stick it in. + +# irrespective of whether anything got stuck in, if the word is now +# complete, show the "complete but not unique" message + +# if there's no common prefix and if the word is not now complete, +# beep. + +# common prefix -> yes no +# word complete \/ +# yes "cbnu" "cbnu" +# no - beep + +# for the second bang on the completion key +# there will necessarily be no common prefix +# show a menu of the choices. + +# for subsequent bangs, rotate the menu around (if there are sufficient +# choices). + + +class complete(commands.Command): + def do(self) -> None: + r: CompletingReader + r = self.reader # type: ignore[assignment] + last_is_completer = r.last_command_is(self.__class__) + immutable_completions = r.assume_immutable_completions + completions_unchangable = last_is_completer and immutable_completions + stem = r.get_stem() + if not completions_unchangable: + r.cmpltn_menu_choices = r.get_completions(stem) + + completions = r.cmpltn_menu_choices + if not completions: + r.error("no matches") + elif len(completions) == 1: + if completions_unchangable and len(completions[0]) == len(stem): + r.msg = "[ sole completion ]" + r.dirty = True + r.insert(completions[0][len(stem):]) + else: + p = prefix(completions, len(stem)) + if p: + r.insert(p) + if last_is_completer: + r.cmpltn_menu_visible = True + r.cmpltn_message_visible = False + r.cmpltn_menu, r.cmpltn_menu_end = build_menu( + r.console, completions, r.cmpltn_menu_end, + r.use_brackets, r.sort_in_column) + r.dirty = True + elif not r.cmpltn_menu_visible: + r.cmpltn_message_visible = True + if stem + p in completions: + r.msg = "[ complete but not unique ]" + r.dirty = True + else: + r.msg = "[ not unique ]" + r.dirty = True + + +class self_insert(commands.self_insert): + def do(self) -> None: + r: CompletingReader + r = self.reader # type: ignore[assignment] + + commands.self_insert.do(self) + if r.cmpltn_menu_visible: + stem = r.get_stem() + if len(stem) < 1: + r.cmpltn_reset() + else: + completions = [w for w in r.cmpltn_menu_choices + if w.startswith(stem)] + if completions: + r.cmpltn_menu, r.cmpltn_menu_end = build_menu( + r.console, completions, 0, + r.use_brackets, r.sort_in_column) + else: + r.cmpltn_reset() + + +@dataclass +class CompletingReader(Reader): + """Adds completion support""" + + ### Class variables + # see the comment for the complete command + assume_immutable_completions = True + use_brackets = True # display completions inside [] + sort_in_column = False + + ### Instance variables + cmpltn_menu: list[str] = field(init=False) + cmpltn_menu_visible: bool = field(init=False) + cmpltn_message_visible: bool = field(init=False) + cmpltn_menu_end: int = field(init=False) + cmpltn_menu_choices: list[str] = field(init=False) + + def __post_init__(self) -> None: + super().__post_init__() + self.cmpltn_reset() + for c in (complete, self_insert): + self.commands[c.__name__] = c + self.commands[c.__name__.replace('_', '-')] = c + + def collect_keymap(self) -> tuple[tuple[KeySpec, CommandName], ...]: + return super().collect_keymap() + ( + (r'\t', 'complete'),) + + def after_command(self, cmd: Command) -> None: + super().after_command(cmd) + if not isinstance(cmd, (complete, self_insert)): + self.cmpltn_reset() + + def calc_screen(self) -> list[str]: + screen = super().calc_screen() + if self.cmpltn_menu_visible: + # We display the completions menu below the current prompt + ly = self.lxy[1] + 1 + screen[ly:ly] = self.cmpltn_menu + # If we're not in the middle of multiline edit, don't append to screeninfo + # since that screws up the position calculation in pos2xy function. + # This is a hack to prevent the cursor jumping + # into the completions menu when pressing left or down arrow. + if self.pos != len(self.buffer): + self.screeninfo[ly:ly] = [(0, [])]*len(self.cmpltn_menu) + return screen + + def finish(self) -> None: + super().finish() + self.cmpltn_reset() + + def cmpltn_reset(self) -> None: + self.cmpltn_menu = [] + self.cmpltn_menu_visible = False + self.cmpltn_message_visible = False + self.cmpltn_menu_end = 0 + self.cmpltn_menu_choices = [] + + def get_stem(self) -> str: + st = self.syntax_table + SW = reader.SYNTAX_WORD + b = self.buffer + p = self.pos - 1 + while p >= 0 and st.get(b[p], SW) == SW: + p -= 1 + return ''.join(b[p+1:self.pos]) + + def get_completions(self, stem: str) -> list[str]: + return [] + + def get_line(self) -> str: + """Return the current line until the cursor position.""" + return ''.join(self.buffer[:self.pos]) diff --git a/PythonLib/full/_pyrepl/console.py b/PythonLib/full/_pyrepl/console.py new file mode 100644 index 00000000..8956fb12 --- /dev/null +++ b/PythonLib/full/_pyrepl/console.py @@ -0,0 +1,229 @@ +# Copyright 2000-2004 Michael Hudson-Doyle +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +from __future__ import annotations + +import _colorize + +from abc import ABC, abstractmethod +import ast +import code +import linecache +from dataclasses import dataclass, field +import os.path +import sys + + +TYPE_CHECKING = False + +if TYPE_CHECKING: + from typing import IO + from typing import Callable + + +@dataclass +class Event: + evt: str + data: str + raw: bytes = b"" + + +@dataclass +class Console(ABC): + posxy: tuple[int, int] + screen: list[str] = field(default_factory=list) + height: int = 25 + width: int = 80 + + def __init__( + self, + f_in: IO[bytes] | int = 0, + f_out: IO[bytes] | int = 1, + term: str = "", + encoding: str = "", + ): + self.encoding = encoding or sys.getdefaultencoding() + + if isinstance(f_in, int): + self.input_fd = f_in + else: + self.input_fd = f_in.fileno() + + if isinstance(f_out, int): + self.output_fd = f_out + else: + self.output_fd = f_out.fileno() + + @abstractmethod + def refresh(self, screen: list[str], xy: tuple[int, int]) -> None: ... + + @abstractmethod + def prepare(self) -> None: ... + + @abstractmethod + def restore(self) -> None: ... + + @abstractmethod + def move_cursor(self, x: int, y: int) -> None: ... + + @abstractmethod + def set_cursor_vis(self, visible: bool) -> None: ... + + @abstractmethod + def getheightwidth(self) -> tuple[int, int]: + """Return (height, width) where height and width are the height + and width of the terminal window in characters.""" + ... + + @abstractmethod + def get_event(self, block: bool = True) -> Event | None: + """Return an Event instance. Returns None if |block| is false + and there is no event pending, otherwise waits for the + completion of an event.""" + ... + + @abstractmethod + def push_char(self, char: int | bytes) -> None: + """ + Push a character to the console event queue. + """ + ... + + @abstractmethod + def beep(self) -> None: ... + + @abstractmethod + def clear(self) -> None: + """Wipe the screen""" + ... + + @abstractmethod + def finish(self) -> None: + """Move the cursor to the end of the display and otherwise get + ready for end. XXX could be merged with restore? Hmm.""" + ... + + @abstractmethod + def flushoutput(self) -> None: + """Flush all output to the screen (assuming there's some + buffering going on somewhere).""" + ... + + @abstractmethod + def forgetinput(self) -> None: + """Forget all pending, but not yet processed input.""" + ... + + @abstractmethod + def getpending(self) -> Event: + """Return the characters that have been typed but not yet + processed.""" + ... + + @abstractmethod + def wait(self, timeout: float | None) -> bool: + """Wait for an event. The return value is True if an event is + available, False if the timeout has been reached. If timeout is + None, wait forever. The timeout is in milliseconds.""" + ... + + @property + def input_hook(self) -> Callable[[], int] | None: + """Returns the current input hook.""" + ... + + @abstractmethod + def repaint(self) -> None: ... + + +class InteractiveColoredConsole(code.InteractiveConsole): + STATEMENT_FAILED = object() + + def __init__( + self, + locals: dict[str, object] | None = None, + filename: str = "", + *, + local_exit: bool = False, + ) -> None: + super().__init__(locals=locals, filename=filename, local_exit=local_exit) + self.can_colorize = _colorize.can_colorize() + + def showsyntaxerror(self, filename=None, **kwargs): + super().showsyntaxerror(filename=filename, **kwargs) + + def _excepthook(self, typ, value, tb): + import traceback + lines = traceback.format_exception( + typ, value, tb, + colorize=self.can_colorize, + limit=traceback.BUILTIN_EXCEPTION_LIMIT) + self.write(''.join(lines)) + + def runcode(self, code): + try: + exec(code, self.locals) + except SystemExit: + raise + except BaseException: + self.showtraceback() + return self.STATEMENT_FAILED + return None + + def runsource(self, source, filename="", symbol="single"): + try: + tree = self.compile.compiler( + source, + filename, + "exec", + ast.PyCF_ONLY_AST, + incomplete_input=False, + ) + except (SyntaxError, OverflowError, ValueError): + self.showsyntaxerror(filename, source=source) + return False + if tree.body: + *_, last_stmt = tree.body + for stmt in tree.body: + wrapper = ast.Interactive if stmt is last_stmt else ast.Module + the_symbol = symbol if stmt is last_stmt else "exec" + item = wrapper([stmt]) + try: + code = self.compile.compiler(item, filename, the_symbol) + linecache._register_code(code, source, filename) + except SyntaxError as e: + if e.args[0] == "'await' outside function": + python = os.path.basename(sys.executable) + e.add_note( + f"Try the asyncio REPL ({python} -m asyncio) to use" + f" top-level 'await' and run background asyncio tasks." + ) + self.showsyntaxerror(filename, source=source) + return False + except (OverflowError, ValueError): + self.showsyntaxerror(filename, source=source) + return False + + if code is None: + return True + + result = self.runcode(code) + if result is self.STATEMENT_FAILED: + break + return False diff --git a/PythonLib/full/_pyrepl/fancy_termios.py b/PythonLib/full/_pyrepl/fancy_termios.py new file mode 100644 index 00000000..8d5bd183 --- /dev/null +++ b/PythonLib/full/_pyrepl/fancy_termios.py @@ -0,0 +1,82 @@ +# Copyright 2000-2004 Michael Hudson-Doyle +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +import termios + + +TYPE_CHECKING = False + +if TYPE_CHECKING: + from typing import cast +else: + cast = lambda typ, val: val + + +class TermState: + def __init__(self, attrs: list[int | list[bytes]]) -> None: + self.iflag = cast(int, attrs[0]) + self.oflag = cast(int, attrs[1]) + self.cflag = cast(int, attrs[2]) + self.lflag = cast(int, attrs[3]) + self.ispeed = cast(int, attrs[4]) + self.ospeed = cast(int, attrs[5]) + self.cc = cast(list[bytes], attrs[6]) + + def as_list(self) -> list[int | list[bytes]]: + return [ + self.iflag, + self.oflag, + self.cflag, + self.lflag, + self.ispeed, + self.ospeed, + # Always return a copy of the control characters list to ensure + # there are not any additional references to self.cc + self.cc[:], + ] + + def copy(self) -> "TermState": + return self.__class__(self.as_list()) + + +def tcgetattr(fd: int) -> TermState: + return TermState(termios.tcgetattr(fd)) + + +def tcsetattr(fd: int, when: int, attrs: TermState) -> None: + termios.tcsetattr(fd, when, attrs.as_list()) + + +class Term(TermState): + TS__init__ = TermState.__init__ + + def __init__(self, fd: int = 0) -> None: + self.TS__init__(termios.tcgetattr(fd)) + self.fd = fd + self.stack: list[list[int | list[bytes]]] = [] + + def save(self) -> None: + self.stack.append(self.as_list()) + + def set(self, when: int = termios.TCSANOW) -> None: + termios.tcsetattr(self.fd, when, self.as_list()) + + def restore(self) -> None: + self.TS__init__(self.stack.pop()) + self.set() diff --git a/PythonLib/full/_pyrepl/historical_reader.py b/PythonLib/full/_pyrepl/historical_reader.py new file mode 100644 index 00000000..c4b95fa2 --- /dev/null +++ b/PythonLib/full/_pyrepl/historical_reader.py @@ -0,0 +1,419 @@ +# Copyright 2000-2004 Michael Hudson-Doyle +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +from __future__ import annotations + +from contextlib import contextmanager +from dataclasses import dataclass, field + +from . import commands, input +from .reader import Reader + + +if False: + from .types import SimpleContextManager, KeySpec, CommandName + + +isearch_keymap: tuple[tuple[KeySpec, CommandName], ...] = tuple( + [("\\%03o" % c, "isearch-end") for c in range(256) if chr(c) != "\\"] + + [(c, "isearch-add-character") for c in map(chr, range(32, 127)) if c != "\\"] + + [ + ("\\%03o" % c, "isearch-add-character") + for c in range(256) + if chr(c).isalpha() and chr(c) != "\\" + ] + + [ + ("\\\\", "self-insert"), + (r"\C-r", "isearch-backwards"), + (r"\C-s", "isearch-forwards"), + (r"\C-c", "isearch-cancel"), + (r"\C-g", "isearch-cancel"), + (r"\", "isearch-backspace"), + ] +) + +ISEARCH_DIRECTION_NONE = "" +ISEARCH_DIRECTION_BACKWARDS = "r" +ISEARCH_DIRECTION_FORWARDS = "f" + + +class next_history(commands.Command): + def do(self) -> None: + r = self.reader + if r.historyi == len(r.history): + r.error("end of history list") + return + r.select_item(r.historyi + 1) + + +class previous_history(commands.Command): + def do(self) -> None: + r = self.reader + if r.historyi == 0: + r.error("start of history list") + return + r.select_item(r.historyi - 1) + + +class history_search_backward(commands.Command): + def do(self) -> None: + r = self.reader + r.search_next(forwards=False) + + +class history_search_forward(commands.Command): + def do(self) -> None: + r = self.reader + r.search_next(forwards=True) + + +class restore_history(commands.Command): + def do(self) -> None: + r = self.reader + if r.historyi != len(r.history): + if r.get_unicode() != r.history[r.historyi]: + r.buffer = list(r.history[r.historyi]) + r.pos = len(r.buffer) + r.dirty = True + + +class first_history(commands.Command): + def do(self) -> None: + self.reader.select_item(0) + + +class last_history(commands.Command): + def do(self) -> None: + self.reader.select_item(len(self.reader.history)) + + +class operate_and_get_next(commands.FinishCommand): + def do(self) -> None: + self.reader.next_history = self.reader.historyi + 1 + + +class yank_arg(commands.Command): + def do(self) -> None: + r = self.reader + if r.last_command is self.__class__: + r.yank_arg_i += 1 + else: + r.yank_arg_i = 0 + if r.historyi < r.yank_arg_i: + r.error("beginning of history list") + return + a = r.get_arg(-1) + # XXX how to split? + words = r.get_item(r.historyi - r.yank_arg_i - 1).split() + if a < -len(words) or a >= len(words): + r.error("no such arg") + return + w = words[a] + b = r.buffer + if r.yank_arg_i > 0: + o = len(r.yank_arg_yanked) + else: + o = 0 + b[r.pos - o : r.pos] = list(w) + r.yank_arg_yanked = w + r.pos += len(w) - o + r.dirty = True + + +class forward_history_isearch(commands.Command): + def do(self) -> None: + r = self.reader + r.isearch_direction = ISEARCH_DIRECTION_FORWARDS + r.isearch_start = r.historyi, r.pos + r.isearch_term = "" + r.dirty = True + r.push_input_trans(r.isearch_trans) + + +class reverse_history_isearch(commands.Command): + def do(self) -> None: + r = self.reader + r.isearch_direction = ISEARCH_DIRECTION_BACKWARDS + r.dirty = True + r.isearch_term = "" + r.push_input_trans(r.isearch_trans) + r.isearch_start = r.historyi, r.pos + + +class isearch_cancel(commands.Command): + def do(self) -> None: + r = self.reader + r.isearch_direction = ISEARCH_DIRECTION_NONE + r.pop_input_trans() + r.select_item(r.isearch_start[0]) + r.pos = r.isearch_start[1] + r.dirty = True + + +class isearch_add_character(commands.Command): + def do(self) -> None: + r = self.reader + b = r.buffer + r.isearch_term += self.event[-1] + r.dirty = True + p = r.pos + len(r.isearch_term) - 1 + if b[p : p + 1] != [r.isearch_term[-1]]: + r.isearch_next() + + +class isearch_backspace(commands.Command): + def do(self) -> None: + r = self.reader + if len(r.isearch_term) > 0: + r.isearch_term = r.isearch_term[:-1] + r.dirty = True + else: + r.error("nothing to rubout") + + +class isearch_forwards(commands.Command): + def do(self) -> None: + r = self.reader + r.isearch_direction = ISEARCH_DIRECTION_FORWARDS + r.isearch_next() + + +class isearch_backwards(commands.Command): + def do(self) -> None: + r = self.reader + r.isearch_direction = ISEARCH_DIRECTION_BACKWARDS + r.isearch_next() + + +class isearch_end(commands.Command): + def do(self) -> None: + r = self.reader + r.isearch_direction = ISEARCH_DIRECTION_NONE + r.console.forgetinput() + r.pop_input_trans() + r.dirty = True + + +@dataclass +class HistoricalReader(Reader): + """Adds history support (with incremental history searching) to the + Reader class. + """ + + history: list[str] = field(default_factory=list) + historyi: int = 0 + next_history: int | None = None + transient_history: dict[int, str] = field(default_factory=dict) + isearch_term: str = "" + isearch_direction: str = ISEARCH_DIRECTION_NONE + isearch_start: tuple[int, int] = field(init=False) + isearch_trans: input.KeymapTranslator = field(init=False) + yank_arg_i: int = 0 + yank_arg_yanked: str = "" + + def __post_init__(self) -> None: + super().__post_init__() + for c in [ + next_history, + previous_history, + restore_history, + first_history, + last_history, + yank_arg, + forward_history_isearch, + reverse_history_isearch, + isearch_end, + isearch_add_character, + isearch_cancel, + isearch_add_character, + isearch_backspace, + isearch_forwards, + isearch_backwards, + operate_and_get_next, + history_search_backward, + history_search_forward, + ]: + self.commands[c.__name__] = c + self.commands[c.__name__.replace("_", "-")] = c + self.isearch_start = self.historyi, self.pos + self.isearch_trans = input.KeymapTranslator( + isearch_keymap, invalid_cls=isearch_end, character_cls=isearch_add_character + ) + + def collect_keymap(self) -> tuple[tuple[KeySpec, CommandName], ...]: + return super().collect_keymap() + ( + (r"\C-n", "next-history"), + (r"\C-p", "previous-history"), + (r"\C-o", "operate-and-get-next"), + (r"\C-r", "reverse-history-isearch"), + (r"\C-s", "forward-history-isearch"), + (r"\M-r", "restore-history"), + (r"\M-.", "yank-arg"), + (r"\", "history-search-forward"), + (r"\x1b[6~", "history-search-forward"), + (r"\", "history-search-backward"), + (r"\x1b[5~", "history-search-backward"), + ) + + def select_item(self, i: int) -> None: + self.transient_history[self.historyi] = self.get_unicode() + buf = self.transient_history.get(i) + if buf is None: + buf = self.history[i].rstrip() + self.buffer = list(buf) + self.historyi = i + self.pos = len(self.buffer) + self.dirty = True + self.last_refresh_cache.invalidated = True + + def get_item(self, i: int) -> str: + if i != len(self.history): + return self.transient_history.get(i, self.history[i]) + else: + return self.transient_history.get(i, self.get_unicode()) + + @contextmanager + def suspend(self) -> SimpleContextManager: + with super().suspend(), self.suspend_history(): + yield + + @contextmanager + def suspend_history(self) -> SimpleContextManager: + try: + old_history = self.history[:] + del self.history[:] + yield + finally: + self.history[:] = old_history + + def prepare(self) -> None: + super().prepare() + try: + self.transient_history = {} + if self.next_history is not None and self.next_history < len(self.history): + self.historyi = self.next_history + self.buffer[:] = list(self.history[self.next_history]) + self.pos = len(self.buffer) + self.transient_history[len(self.history)] = "" + else: + self.historyi = len(self.history) + self.next_history = None + except: + self.restore() + raise + + def get_prompt(self, lineno: int, cursor_on_line: bool) -> str: + if cursor_on_line and self.isearch_direction != ISEARCH_DIRECTION_NONE: + d = "rf"[self.isearch_direction == ISEARCH_DIRECTION_FORWARDS] + return "(%s-search `%s') " % (d, self.isearch_term) + else: + return super().get_prompt(lineno, cursor_on_line) + + def search_next(self, *, forwards: bool) -> None: + """Search history for the current line contents up to the cursor. + + Selects the first item found. If nothing is under the cursor, any next + item in history is selected. + """ + pos = self.pos + s = self.get_unicode() + history_index = self.historyi + + # In multiline contexts, we're only interested in the current line. + nl_index = s.rfind('\n', 0, pos) + prefix = s[nl_index + 1:pos] + pos = len(prefix) + + match_prefix = len(prefix) + len_item = 0 + if history_index < len(self.history): + len_item = len(self.get_item(history_index)) + if len_item and pos == len_item: + match_prefix = False + elif not pos: + match_prefix = False + + while 1: + if forwards: + out_of_bounds = history_index >= len(self.history) - 1 + else: + out_of_bounds = history_index == 0 + if out_of_bounds: + if forwards and not match_prefix: + self.pos = 0 + self.buffer = [] + self.dirty = True + else: + self.error("not found") + return + + history_index += 1 if forwards else -1 + s = self.get_item(history_index) + + if not match_prefix: + self.select_item(history_index) + return + + len_acc = 0 + for i, line in enumerate(s.splitlines(keepends=True)): + if line.startswith(prefix): + self.select_item(history_index) + self.pos = pos + len_acc + return + len_acc += len(line) + + def isearch_next(self) -> None: + st = self.isearch_term + p = self.pos + i = self.historyi + s = self.get_unicode() + forwards = self.isearch_direction == ISEARCH_DIRECTION_FORWARDS + while 1: + if forwards: + p = s.find(st, p + 1) + else: + p = s.rfind(st, 0, p + len(st) - 1) + if p != -1: + self.select_item(i) + self.pos = p + return + elif (forwards and i >= len(self.history) - 1) or (not forwards and i == 0): + self.error("not found") + return + else: + if forwards: + i += 1 + s = self.get_item(i) + p = -1 + else: + i -= 1 + s = self.get_item(i) + p = len(s) + + def finish(self) -> None: + super().finish() + ret = self.get_unicode() + for i, t in self.transient_history.items(): + if i < len(self.history) and i != self.historyi: + self.history[i] = t + if ret and should_auto_add_history: + self.history.append(ret) + + +should_auto_add_history = True diff --git a/PythonLib/full/_pyrepl/input.py b/PythonLib/full/_pyrepl/input.py new file mode 100644 index 00000000..21c24eb5 --- /dev/null +++ b/PythonLib/full/_pyrepl/input.py @@ -0,0 +1,114 @@ +# Copyright 2000-2004 Michael Hudson-Doyle +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +# (naming modules after builtin functions is not such a hot idea...) + +# an KeyTrans instance translates Event objects into Command objects + +# hmm, at what level do we want [C-i] and [tab] to be equivalent? +# [meta-a] and [esc a]? obviously, these are going to be equivalent +# for the UnixConsole, but should they be for PygameConsole? + +# it would in any situation seem to be a bad idea to bind, say, [tab] +# and [C-i] to *different* things... but should binding one bind the +# other? + +# executive, temporary decision: [tab] and [C-i] are distinct, but +# [meta-key] is identified with [esc key]. We demand that any console +# class does quite a lot towards emulating a unix terminal. + +from __future__ import annotations + +from abc import ABC, abstractmethod +import unicodedata +from collections import deque + + +# types +if False: + from .types import EventTuple + + +class InputTranslator(ABC): + @abstractmethod + def push(self, evt: EventTuple) -> None: + pass + + @abstractmethod + def get(self) -> EventTuple | None: + return None + + @abstractmethod + def empty(self) -> bool: + return True + + +class KeymapTranslator(InputTranslator): + def __init__(self, keymap, verbose=False, invalid_cls=None, character_cls=None): + self.verbose = verbose + from .keymap import compile_keymap, parse_keys + + self.keymap = keymap + self.invalid_cls = invalid_cls + self.character_cls = character_cls + d = {} + for keyspec, command in keymap: + keyseq = tuple(parse_keys(keyspec)) + d[keyseq] = command + if self.verbose: + print(d) + self.k = self.ck = compile_keymap(d, ()) + self.results = deque() + self.stack = [] + + def push(self, evt): + if self.verbose: + print("pushed", evt.data, end="") + key = evt.data + d = self.k.get(key) + if isinstance(d, dict): + if self.verbose: + print("transition") + self.stack.append(key) + self.k = d + else: + if d is None: + if self.verbose: + print("invalid") + if self.stack or len(key) > 1 or unicodedata.category(key) == "C": + self.results.append((self.invalid_cls, self.stack + [key])) + else: + # small optimization: + self.k[key] = self.character_cls + self.results.append((self.character_cls, [key])) + else: + if self.verbose: + print("matched", d) + self.results.append((d, self.stack + [key])) + self.stack = [] + self.k = self.ck + + def get(self): + if self.results: + return self.results.popleft() + else: + return None + + def empty(self) -> bool: + return not self.results diff --git a/PythonLib/full/_pyrepl/keymap.py b/PythonLib/full/_pyrepl/keymap.py new file mode 100644 index 00000000..d11df4b5 --- /dev/null +++ b/PythonLib/full/_pyrepl/keymap.py @@ -0,0 +1,213 @@ +# Copyright 2000-2008 Michael Hudson-Doyle +# Armin Rigo +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +""" +Keymap contains functions for parsing keyspecs and turning keyspecs into +appropriate sequences. + +A keyspec is a string representing a sequence of key presses that can +be bound to a command. All characters other than the backslash represent +themselves. In the traditional manner, a backslash introduces an escape +sequence. + +pyrepl uses its own keyspec format that is meant to be a strict superset of +readline's KEYSEQ format. This means that if a spec is found that readline +accepts that this doesn't, it should be logged as a bug. Note that this means +we're using the '\\C-o' style of readline's keyspec, not the 'Control-o' sort. + +The extension to readline is that the sequence \\ denotes the +sequence of characters produced by hitting KEY. + +Examples: +'a' - what you get when you hit the 'a' key +'\\EOA' - Escape - O - A (up, on my terminal) +'\\' - the up arrow key +'\\' - ditto (keynames are case-insensitive) +'\\C-o', '\\c-o' - control-o +'\\M-.' - meta-period +'\\E.' - ditto (that's how meta works for pyrepl) +'\\', '\\', '\\t', '\\011', '\\x09', '\\X09', '\\C-i', '\\C-I' + - all of these are the tab character. +""" + +_escapes = { + "\\": "\\", + "'": "'", + '"': '"', + "a": "\a", + "b": "\b", + "e": "\033", + "f": "\f", + "n": "\n", + "r": "\r", + "t": "\t", + "v": "\v", +} + +_keynames = { + "backspace": "backspace", + "delete": "delete", + "down": "down", + "end": "end", + "enter": "\r", + "escape": "\033", + "f1": "f1", + "f2": "f2", + "f3": "f3", + "f4": "f4", + "f5": "f5", + "f6": "f6", + "f7": "f7", + "f8": "f8", + "f9": "f9", + "f10": "f10", + "f11": "f11", + "f12": "f12", + "f13": "f13", + "f14": "f14", + "f15": "f15", + "f16": "f16", + "f17": "f17", + "f18": "f18", + "f19": "f19", + "f20": "f20", + "home": "home", + "insert": "insert", + "left": "left", + "page down": "page down", + "page up": "page up", + "return": "\r", + "right": "right", + "space": " ", + "tab": "\t", + "up": "up", +} + + +class KeySpecError(Exception): + pass + + +def parse_keys(keys: str) -> list[str]: + """Parse keys in keyspec format to a sequence of keys.""" + s = 0 + r: list[str] = [] + while s < len(keys): + k, s = _parse_single_key_sequence(keys, s) + r.extend(k) + return r + + +def _parse_single_key_sequence(key: str, s: int) -> tuple[list[str], int]: + ctrl = 0 + meta = 0 + ret = "" + while not ret and s < len(key): + if key[s] == "\\": + c = key[s + 1].lower() + if c in _escapes: + ret = _escapes[c] + s += 2 + elif c == "c": + if key[s + 2] != "-": + raise KeySpecError( + "\\C must be followed by `-' (char %d of %s)" + % (s + 2, repr(key)) + ) + if ctrl: + raise KeySpecError( + "doubled \\C- (char %d of %s)" % (s + 1, repr(key)) + ) + ctrl = 1 + s += 3 + elif c == "m": + if key[s + 2] != "-": + raise KeySpecError( + "\\M must be followed by `-' (char %d of %s)" + % (s + 2, repr(key)) + ) + if meta: + raise KeySpecError( + "doubled \\M- (char %d of %s)" % (s + 1, repr(key)) + ) + meta = 1 + s += 3 + elif c.isdigit(): + n = key[s + 1 : s + 4] + ret = chr(int(n, 8)) + s += 4 + elif c == "x": + n = key[s + 2 : s + 4] + ret = chr(int(n, 16)) + s += 4 + elif c == "<": + t = key.find(">", s) + if t == -1: + raise KeySpecError( + "unterminated \\< starting at char %d of %s" + % (s + 1, repr(key)) + ) + ret = key[s + 2 : t].lower() + if ret not in _keynames: + raise KeySpecError( + "unrecognised keyname `%s' at char %d of %s" + % (ret, s + 2, repr(key)) + ) + ret = _keynames[ret] + s = t + 1 + else: + raise KeySpecError( + "unknown backslash escape %s at char %d of %s" + % (repr(c), s + 2, repr(key)) + ) + else: + ret = key[s] + s += 1 + if ctrl: + if len(ret) == 1: + ret = chr(ord(ret) & 0x1F) # curses.ascii.ctrl() + elif ret in {"left", "right"}: + ret = f"ctrl {ret}" + else: + raise KeySpecError("\\C- followed by invalid key") + + result = [ret], s + if meta: + result[0].insert(0, "\033") + return result + + +def compile_keymap(keymap, empty=b""): + r = {} + for key, value in keymap.items(): + if isinstance(key, bytes): + first = key[:1] + else: + first = key[0] + r.setdefault(first, {})[key[1:]] = value + for key, value in r.items(): + if empty in value: + if len(value) != 1: + raise KeySpecError("key definitions for %s clash" % (value.values(),)) + else: + r[key] = value[empty] + else: + r[key] = compile_keymap(value, empty) + return r diff --git a/PythonLib/full/_pyrepl/main.py b/PythonLib/full/_pyrepl/main.py new file mode 100644 index 00000000..447eb1e5 --- /dev/null +++ b/PythonLib/full/_pyrepl/main.py @@ -0,0 +1,58 @@ +import errno +import os +import sys +import types + + +CAN_USE_PYREPL: bool +FAIL_REASON: str +try: + if sys.platform == "win32" and sys.getwindowsversion().build < 10586: + raise RuntimeError("Windows 10 TH2 or later required") + if not os.isatty(sys.stdin.fileno()): + raise OSError(errno.ENOTTY, "tty required", "stdin") + from .simple_interact import check + if err := check(): + raise RuntimeError(err) +except Exception as e: + CAN_USE_PYREPL = False + FAIL_REASON = f"warning: can't use pyrepl: {e}" +else: + CAN_USE_PYREPL = True + FAIL_REASON = "" + + +def interactive_console(mainmodule=None, quiet=False, pythonstartup=False): + if not CAN_USE_PYREPL: + if not os.getenv('PYTHON_BASIC_REPL') and FAIL_REASON: + from .trace import trace + trace(FAIL_REASON) + print(FAIL_REASON, file=sys.stderr) + return sys._baserepl() + + if not mainmodule: + mainmodule = types.ModuleType("__main__") + + namespace = mainmodule.__dict__ + + # sys._baserepl() above does this internally, we do it here + startup_path = os.getenv("PYTHONSTARTUP") + if pythonstartup and startup_path: + sys.audit("cpython.run_startup", startup_path) + + import tokenize + with tokenize.open(startup_path) as f: + startup_code = compile(f.read(), startup_path, "exec") + exec(startup_code, namespace) + + # set sys.{ps1,ps2} just before invoking the interactive interpreter. This + # mimics what CPython does in pythonrun.c + if not hasattr(sys, "ps1"): + sys.ps1 = ">>> " + if not hasattr(sys, "ps2"): + sys.ps2 = "... " + + from .console import InteractiveColoredConsole + from .simple_interact import run_multiline_interactive_console + console = InteractiveColoredConsole(namespace, filename="") + run_multiline_interactive_console(console) diff --git a/PythonLib/full/_pyrepl/mypy.ini b/PythonLib/full/_pyrepl/mypy.ini new file mode 100644 index 00000000..9375a55b --- /dev/null +++ b/PythonLib/full/_pyrepl/mypy.ini @@ -0,0 +1,25 @@ +# Config file for running mypy on _pyrepl. +# Run mypy by invoking `mypy --config-file Lib/_pyrepl/mypy.ini` +# on the command-line from the repo root + +[mypy] +files = Lib/_pyrepl +mypy_path = $MYPY_CONFIG_FILE_DIR/../../Misc/mypy +explicit_package_bases = True +python_version = 3.13 +platform = linux +pretty = True + +# Enable most stricter settings +enable_error_code = ignore-without-code,redundant-expr +strict = True + +# Various stricter settings that we can't yet enable +# Try to enable these in the following order: +disallow_untyped_calls = False +disallow_untyped_defs = False +check_untyped_defs = False + +# Various internal modules that typeshed deliberately doesn't have stubs for: +[mypy-_abc.*,_opcode.*,_overlapped.*,_testcapi.*,_testinternalcapi.*,test.*] +ignore_missing_imports = True diff --git a/PythonLib/full/_pyrepl/pager.py b/PythonLib/full/_pyrepl/pager.py new file mode 100644 index 00000000..1fddc63e --- /dev/null +++ b/PythonLib/full/_pyrepl/pager.py @@ -0,0 +1,175 @@ +from __future__ import annotations + +import io +import os +import re +import sys + + +# types +if False: + from typing import Protocol + class Pager(Protocol): + def __call__(self, text: str, title: str = "") -> None: + ... + + +def get_pager() -> Pager: + """Decide what method to use for paging through text.""" + if not hasattr(sys.stdin, "isatty"): + return plain_pager + if not hasattr(sys.stdout, "isatty"): + return plain_pager + if not sys.stdin.isatty() or not sys.stdout.isatty(): + return plain_pager + if sys.platform == "emscripten": + return plain_pager + use_pager = os.environ.get('MANPAGER') or os.environ.get('PAGER') + if use_pager: + if sys.platform == 'win32': # pipes completely broken in Windows + return lambda text, title='': tempfile_pager(plain(text), use_pager) + elif os.environ.get('TERM') in ('dumb', 'emacs'): + return lambda text, title='': pipe_pager(plain(text), use_pager, title) + else: + return lambda text, title='': pipe_pager(text, use_pager, title) + if os.environ.get('TERM') in ('dumb', 'emacs'): + return plain_pager + if sys.platform == 'win32': + return lambda text, title='': tempfile_pager(plain(text), 'more <') + if hasattr(os, 'system') and os.system('(pager) 2>/dev/null') == 0: + return lambda text, title='': pipe_pager(text, 'pager', title) + if hasattr(os, 'system') and os.system('(less) 2>/dev/null') == 0: + return lambda text, title='': pipe_pager(text, 'less', title) + + import tempfile + (fd, filename) = tempfile.mkstemp() + os.close(fd) + try: + if hasattr(os, 'system') and os.system('more "%s"' % filename) == 0: + return lambda text, title='': pipe_pager(text, 'more', title) + else: + return tty_pager + finally: + os.unlink(filename) + + +def escape_stdout(text: str) -> str: + # Escape non-encodable characters to avoid encoding errors later + encoding = getattr(sys.stdout, 'encoding', None) or 'utf-8' + return text.encode(encoding, 'backslashreplace').decode(encoding) + + +def escape_less(s: str) -> str: + return re.sub(r'([?:.%\\])', r'\\\1', s) + + +def plain(text: str) -> str: + """Remove boldface formatting from text.""" + return re.sub('.\b', '', text) + + +def tty_pager(text: str, title: str = '') -> None: + """Page through text on a text terminal.""" + lines = plain(escape_stdout(text)).split('\n') + has_tty = False + try: + import tty + import termios + fd = sys.stdin.fileno() + old = termios.tcgetattr(fd) + tty.setcbreak(fd) + has_tty = True + + def getchar() -> str: + return sys.stdin.read(1) + + except (ImportError, AttributeError, io.UnsupportedOperation): + def getchar() -> str: + return sys.stdin.readline()[:-1][:1] + + try: + try: + h = int(os.environ.get('LINES', 0)) + except ValueError: + h = 0 + if h <= 1: + h = 25 + r = inc = h - 1 + sys.stdout.write('\n'.join(lines[:inc]) + '\n') + while lines[r:]: + sys.stdout.write('-- more --') + sys.stdout.flush() + c = getchar() + + if c in ('q', 'Q'): + sys.stdout.write('\r \r') + break + elif c in ('\r', '\n'): + sys.stdout.write('\r \r' + lines[r] + '\n') + r = r + 1 + continue + if c in ('b', 'B', '\x1b'): + r = r - inc - inc + if r < 0: r = 0 + sys.stdout.write('\n' + '\n'.join(lines[r:r+inc]) + '\n') + r = r + inc + + finally: + if has_tty: + termios.tcsetattr(fd, termios.TCSAFLUSH, old) + + +def plain_pager(text: str, title: str = '') -> None: + """Simply print unformatted text. This is the ultimate fallback.""" + sys.stdout.write(plain(escape_stdout(text))) + + +def pipe_pager(text: str, cmd: str, title: str = '') -> None: + """Page through text by feeding it to another program.""" + import subprocess + env = os.environ.copy() + if title: + title += ' ' + esc_title = escape_less(title) + prompt_string = ( + f' {esc_title}' + + '?ltline %lt?L/%L.' + ':byte %bB?s/%s.' + '.' + '?e (END):?pB %pB\\%..' + ' (press h for help or q to quit)') + env['LESS'] = '-RmPm{0}$PM{0}$'.format(prompt_string) + proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, + errors='backslashreplace', env=env) + assert proc.stdin is not None + try: + with proc.stdin as pipe: + try: + pipe.write(text) + except KeyboardInterrupt: + # We've hereby abandoned whatever text hasn't been written, + # but the pager is still in control of the terminal. + pass + except OSError: + pass # Ignore broken pipes caused by quitting the pager program. + while True: + try: + proc.wait() + break + except KeyboardInterrupt: + # Ignore ctl-c like the pager itself does. Otherwise the pager is + # left running and the terminal is in raw mode and unusable. + pass + + +def tempfile_pager(text: str, cmd: str, title: str = '') -> None: + """Page through text by invoking a program on a temporary file.""" + import tempfile + with tempfile.TemporaryDirectory() as tempdir: + filename = os.path.join(tempdir, 'pydoc.out') + with open(filename, 'w', errors='backslashreplace', + encoding=os.device_encoding(0) if + sys.platform == 'win32' else None + ) as file: + file.write(text) + os.system(cmd + ' "' + filename + '"') diff --git a/PythonLib/full/_pyrepl/reader.py b/PythonLib/full/_pyrepl/reader.py new file mode 100644 index 00000000..9ab92f64 --- /dev/null +++ b/PythonLib/full/_pyrepl/reader.py @@ -0,0 +1,772 @@ +# Copyright 2000-2010 Michael Hudson-Doyle +# Antonio Cuni +# Armin Rigo +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +from __future__ import annotations + +import sys +import _colorize + +from contextlib import contextmanager +from dataclasses import dataclass, field, fields + +from . import commands, console, input +from .utils import wlen, unbracket, disp_str, gen_colors, THEME +from .trace import trace + + +# types +Command = commands.Command +from .types import Callback, SimpleContextManager, KeySpec, CommandName + + +# syntax classes +SYNTAX_WHITESPACE, SYNTAX_WORD, SYNTAX_SYMBOL = range(3) + + +def make_default_syntax_table() -> dict[str, int]: + # XXX perhaps should use some unicodedata here? + st: dict[str, int] = {} + for c in map(chr, range(256)): + st[c] = SYNTAX_SYMBOL + for c in [a for a in map(chr, range(256)) if a.isalnum()]: + st[c] = SYNTAX_WORD + st["\n"] = st[" "] = SYNTAX_WHITESPACE + return st + + +def make_default_commands() -> dict[CommandName, type[Command]]: + result: dict[CommandName, type[Command]] = {} + for v in vars(commands).values(): + if isinstance(v, type) and issubclass(v, Command) and v.__name__[0].islower(): + result[v.__name__] = v + result[v.__name__.replace("_", "-")] = v + return result + + +default_keymap: tuple[tuple[KeySpec, CommandName], ...] = tuple( + [ + (r"\C-a", "beginning-of-line"), + (r"\C-b", "left"), + (r"\C-c", "interrupt"), + (r"\C-d", "delete"), + (r"\C-e", "end-of-line"), + (r"\C-f", "right"), + (r"\C-g", "cancel"), + (r"\C-h", "backspace"), + (r"\C-j", "accept"), + (r"\", "accept"), + (r"\C-k", "kill-line"), + (r"\C-l", "clear-screen"), + (r"\C-m", "accept"), + (r"\C-t", "transpose-characters"), + (r"\C-u", "unix-line-discard"), + (r"\C-w", "unix-word-rubout"), + (r"\C-x\C-u", "upcase-region"), + (r"\C-y", "yank"), + *(() if sys.platform == "win32" else ((r"\C-z", "suspend"), )), + (r"\M-b", "backward-word"), + (r"\M-c", "capitalize-word"), + (r"\M-d", "kill-word"), + (r"\M-f", "forward-word"), + (r"\M-l", "downcase-word"), + (r"\M-t", "transpose-words"), + (r"\M-u", "upcase-word"), + (r"\M-y", "yank-pop"), + (r"\M--", "digit-arg"), + (r"\M-0", "digit-arg"), + (r"\M-1", "digit-arg"), + (r"\M-2", "digit-arg"), + (r"\M-3", "digit-arg"), + (r"\M-4", "digit-arg"), + (r"\M-5", "digit-arg"), + (r"\M-6", "digit-arg"), + (r"\M-7", "digit-arg"), + (r"\M-8", "digit-arg"), + (r"\M-9", "digit-arg"), + (r"\M-\n", "accept"), + ("\\\\", "self-insert"), + (r"\x1b[200~", "perform-bracketed-paste"), + (r"\x03", "ctrl-c"), + ] + + [(c, "self-insert") for c in map(chr, range(32, 127)) if c != "\\"] + + [(c, "self-insert") for c in map(chr, range(128, 256)) if c.isalpha()] + + [ + (r"\", "up"), + (r"\", "down"), + (r"\", "left"), + (r"\C-\", "backward-word"), + (r"\", "right"), + (r"\C-\", "forward-word"), + (r"\", "delete"), + (r"\x1b[3~", "delete"), + (r"\", "backspace"), + (r"\M-\", "backward-kill-word"), + (r"\", "end-of-line"), # was 'end' + (r"\", "beginning-of-line"), # was 'home' + (r"\", "help"), + (r"\", "show-history"), + (r"\", "paste-mode"), + (r"\EOF", "end"), # the entries in the terminfo database for xterms + (r"\EOH", "home"), # seem to be wrong. this is a less than ideal + # workaround + ] +) + + +@dataclass(slots=True) +class Reader: + """The Reader class implements the bare bones of a command reader, + handling such details as editing and cursor motion. What it does + not support are such things as completion or history support - + these are implemented elsewhere. + + Instance variables of note include: + + * buffer: + A per-character list containing all the characters that have been + entered. Does not include color information. + * console: + Hopefully encapsulates the OS dependent stuff. + * pos: + A 0-based index into 'buffer' for where the insertion point + is. + * screeninfo: + A list of screen position tuples. Each list element is a tuple + representing information on visible line length for a given line. + Allows for efficient skipping of color escape sequences. + * cxy, lxy: + the position of the insertion point in screen ... + * syntax_table: + Dictionary mapping characters to 'syntax class'; read the + emacs docs to see what this means :-) + * commands: + Dictionary mapping command names to command classes. + * arg: + The emacs-style prefix argument. It will be None if no such + argument has been provided. + * dirty: + True if we need to refresh the display. + * kill_ring: + The emacs-style kill-ring; manipulated with yank & yank-pop + * ps1, ps2, ps3, ps4: + prompts. ps1 is the prompt for a one-line input; for a + multiline input it looks like: + ps2> first line of input goes here + ps3> second and further + ps3> lines get ps3 + ... + ps4> and the last one gets ps4 + As with the usual top-level, you can set these to instances if + you like; str() will be called on them (once) at the beginning + of each command. Don't put really long or newline containing + strings here, please! + This is just the default policy; you can change it freely by + overriding get_prompt() (and indeed some standard subclasses + do). + * finished: + handle1 will set this to a true value if a command signals + that we're done. + """ + + console: console.Console + + ## state + buffer: list[str] = field(default_factory=list) + pos: int = 0 + ps1: str = "->> " + ps2: str = "/>> " + ps3: str = "|.. " + ps4: str = R"\__ " + kill_ring: list[list[str]] = field(default_factory=list) + msg: str = "" + arg: int | None = None + dirty: bool = False + finished: bool = False + paste_mode: bool = False + commands: dict[str, type[Command]] = field(default_factory=make_default_commands) + last_command: type[Command] | None = None + syntax_table: dict[str, int] = field(default_factory=make_default_syntax_table) + keymap: tuple[tuple[str, str], ...] = () + input_trans: input.KeymapTranslator = field(init=False) + input_trans_stack: list[input.KeymapTranslator] = field(default_factory=list) + screen: list[str] = field(default_factory=list) + screeninfo: list[tuple[int, list[int]]] = field(init=False) + cxy: tuple[int, int] = field(init=False) + lxy: tuple[int, int] = field(init=False) + scheduled_commands: list[str] = field(default_factory=list) + can_colorize: bool = False + threading_hook: Callback | None = None + + ## cached metadata to speed up screen refreshes + @dataclass + class RefreshCache: + screen: list[str] = field(default_factory=list) + screeninfo: list[tuple[int, list[int]]] = field(init=False) + line_end_offsets: list[int] = field(default_factory=list) + pos: int = field(init=False) + cxy: tuple[int, int] = field(init=False) + dimensions: tuple[int, int] = field(init=False) + invalidated: bool = False + + def update_cache(self, + reader: Reader, + screen: list[str], + screeninfo: list[tuple[int, list[int]]], + ) -> None: + self.screen = screen.copy() + self.screeninfo = screeninfo.copy() + self.pos = reader.pos + self.cxy = reader.cxy + self.dimensions = reader.console.width, reader.console.height + self.invalidated = False + + def valid(self, reader: Reader) -> bool: + if self.invalidated: + return False + dimensions = reader.console.width, reader.console.height + dimensions_changed = dimensions != self.dimensions + return not dimensions_changed + + def get_cached_location(self, reader: Reader) -> tuple[int, int]: + if self.invalidated: + raise ValueError("Cache is invalidated") + offset = 0 + earliest_common_pos = min(reader.pos, self.pos) + num_common_lines = len(self.line_end_offsets) + while num_common_lines > 0: + offset = self.line_end_offsets[num_common_lines - 1] + if earliest_common_pos > offset: + break + num_common_lines -= 1 + else: + offset = 0 + return offset, num_common_lines + + last_refresh_cache: RefreshCache = field(default_factory=RefreshCache) + + def __post_init__(self) -> None: + # Enable the use of `insert` without a `prepare` call - necessary to + # facilitate the tab completion hack implemented for + # . + self.keymap = self.collect_keymap() + self.input_trans = input.KeymapTranslator( + self.keymap, invalid_cls="invalid-key", character_cls="self-insert" + ) + self.screeninfo = [(0, [])] + self.cxy = self.pos2xy() + self.lxy = (self.pos, 0) + self.can_colorize = _colorize.can_colorize() + + self.last_refresh_cache.screeninfo = self.screeninfo + self.last_refresh_cache.pos = self.pos + self.last_refresh_cache.cxy = self.cxy + self.last_refresh_cache.dimensions = (0, 0) + + def collect_keymap(self) -> tuple[tuple[KeySpec, CommandName], ...]: + return default_keymap + + def calc_screen(self) -> list[str]: + """Translate changes in self.buffer into changes in self.console.screen.""" + # Since the last call to calc_screen: + # screen and screeninfo may differ due to a completion menu being shown + # pos and cxy may differ due to edits, cursor movements, or completion menus + + # Lines that are above both the old and new cursor position can't have changed, + # unless the terminal has been resized (which might cause reflowing) or we've + # entered or left paste mode (which changes prompts, causing reflowing). + num_common_lines = 0 + offset = 0 + if self.last_refresh_cache.valid(self): + offset, num_common_lines = self.last_refresh_cache.get_cached_location(self) + + screen = self.last_refresh_cache.screen + del screen[num_common_lines:] + + screeninfo = self.last_refresh_cache.screeninfo + del screeninfo[num_common_lines:] + + last_refresh_line_end_offsets = self.last_refresh_cache.line_end_offsets + del last_refresh_line_end_offsets[num_common_lines:] + + pos = self.pos + pos -= offset + + prompt_from_cache = (offset and self.buffer[offset - 1] != "\n") + + if self.can_colorize: + colors = list(gen_colors(self.get_unicode())) + else: + colors = None + trace("colors = {colors}", colors=colors) + lines = "".join(self.buffer[offset:]).split("\n") + cursor_found = False + lines_beyond_cursor = 0 + for ln, line in enumerate(lines, num_common_lines): + line_len = len(line) + if 0 <= pos <= line_len: + self.lxy = pos, ln + cursor_found = True + elif cursor_found: + lines_beyond_cursor += 1 + if lines_beyond_cursor > self.console.height: + # No need to keep formatting lines. + # The console can't show them. + break + if prompt_from_cache: + # Only the first line's prompt can come from the cache + prompt_from_cache = False + prompt = "" + else: + prompt = self.get_prompt(ln, line_len >= pos >= 0) + while "\n" in prompt: + pre_prompt, _, prompt = prompt.partition("\n") + last_refresh_line_end_offsets.append(offset) + screen.append(pre_prompt) + screeninfo.append((0, [])) + pos -= line_len + 1 + prompt, prompt_len = self.process_prompt(prompt) + chars, char_widths = disp_str(line, colors, offset) + wrapcount = (sum(char_widths) + prompt_len) // self.console.width + if wrapcount == 0 or not char_widths: + offset += line_len + 1 # Takes all of the line plus the newline + last_refresh_line_end_offsets.append(offset) + screen.append(prompt + "".join(chars)) + screeninfo.append((prompt_len, char_widths)) + else: + pre = prompt + prelen = prompt_len + for wrap in range(wrapcount + 1): + index_to_wrap_before = 0 + column = 0 + for char_width in char_widths: + if column + char_width + prelen >= self.console.width: + break + index_to_wrap_before += 1 + column += char_width + if len(chars) > index_to_wrap_before: + offset += index_to_wrap_before + post = "\\" + after = [1] + else: + offset += index_to_wrap_before + 1 # Takes the newline + post = "" + after = [] + last_refresh_line_end_offsets.append(offset) + render = pre + "".join(chars[:index_to_wrap_before]) + post + render_widths = char_widths[:index_to_wrap_before] + after + screen.append(render) + screeninfo.append((prelen, render_widths)) + chars = chars[index_to_wrap_before:] + char_widths = char_widths[index_to_wrap_before:] + pre = "" + prelen = 0 + self.screeninfo = screeninfo + self.cxy = self.pos2xy() + if self.msg: + for mline in self.msg.split("\n"): + screen.append(mline) + screeninfo.append((0, [])) + + self.last_refresh_cache.update_cache(self, screen, screeninfo) + return screen + + @staticmethod + def process_prompt(prompt: str) -> tuple[str, int]: + r"""Return a tuple with the prompt string and its visible length. + + The prompt string has the zero-width brackets recognized by shells + (\x01 and \x02) removed. The length ignores anything between those + brackets as well as any ANSI escape sequences. + """ + out_prompt = unbracket(prompt, including_content=False) + visible_prompt = unbracket(prompt, including_content=True) + return out_prompt, wlen(visible_prompt) + + def bow(self, p: int | None = None) -> int: + """Return the 0-based index of the word break preceding p most + immediately. + + p defaults to self.pos; word boundaries are determined using + self.syntax_table.""" + if p is None: + p = self.pos + st = self.syntax_table + b = self.buffer + p -= 1 + while p >= 0 and st.get(b[p], SYNTAX_WORD) != SYNTAX_WORD: + p -= 1 + while p >= 0 and st.get(b[p], SYNTAX_WORD) == SYNTAX_WORD: + p -= 1 + return p + 1 + + def eow(self, p: int | None = None) -> int: + """Return the 0-based index of the word break following p most + immediately. + + p defaults to self.pos; word boundaries are determined using + self.syntax_table.""" + if p is None: + p = self.pos + st = self.syntax_table + b = self.buffer + while p < len(b) and st.get(b[p], SYNTAX_WORD) != SYNTAX_WORD: + p += 1 + while p < len(b) and st.get(b[p], SYNTAX_WORD) == SYNTAX_WORD: + p += 1 + return p + + def bol(self, p: int | None = None) -> int: + """Return the 0-based index of the line break preceding p most + immediately. + + p defaults to self.pos.""" + if p is None: + p = self.pos + b = self.buffer + p -= 1 + while p >= 0 and b[p] != "\n": + p -= 1 + return p + 1 + + def eol(self, p: int | None = None) -> int: + """Return the 0-based index of the line break following p most + immediately. + + p defaults to self.pos.""" + if p is None: + p = self.pos + b = self.buffer + while p < len(b) and b[p] != "\n": + p += 1 + return p + + def max_column(self, y: int) -> int: + """Return the last x-offset for line y""" + return self.screeninfo[y][0] + sum(self.screeninfo[y][1]) + + def max_row(self) -> int: + return len(self.screeninfo) - 1 + + def get_arg(self, default: int = 1) -> int: + """Return any prefix argument that the user has supplied, + returning 'default' if there is None. Defaults to 1. + """ + if self.arg is None: + return default + return self.arg + + def get_prompt(self, lineno: int, cursor_on_line: bool) -> str: + """Return what should be in the left-hand margin for line + 'lineno'.""" + if self.arg is not None and cursor_on_line: + prompt = f"(arg: {self.arg}) " + elif self.paste_mode: + prompt = "(paste) " + elif "\n" in self.buffer: + if lineno == 0: + prompt = self.ps2 + elif self.ps4 and lineno == self.buffer.count("\n"): + prompt = self.ps4 + else: + prompt = self.ps3 + else: + prompt = self.ps1 + + if self.can_colorize: + t = THEME() + prompt = f"{t.prompt}{prompt}{t.reset}" + return prompt + + def push_input_trans(self, itrans: input.KeymapTranslator) -> None: + self.input_trans_stack.append(self.input_trans) + self.input_trans = itrans + + def pop_input_trans(self) -> None: + self.input_trans = self.input_trans_stack.pop() + + def setpos_from_xy(self, x: int, y: int) -> None: + """Set pos according to coordinates x, y""" + pos = 0 + i = 0 + while i < y: + prompt_len, char_widths = self.screeninfo[i] + offset = len(char_widths) + in_wrapped_line = prompt_len + sum(char_widths) >= self.console.width + if in_wrapped_line: + pos += offset - 1 # -1 cause backslash is not in buffer + else: + pos += offset + 1 # +1 cause newline is in buffer + i += 1 + + j = 0 + cur_x = self.screeninfo[i][0] + while cur_x < x: + if self.screeninfo[i][1][j] == 0: + j += 1 # prevent potential future infinite loop + continue + cur_x += self.screeninfo[i][1][j] + j += 1 + pos += 1 + + self.pos = pos + + def pos2xy(self) -> tuple[int, int]: + """Return the x, y coordinates of position 'pos'.""" + + prompt_len, y = 0, 0 + char_widths: list[int] = [] + pos = self.pos + assert 0 <= pos <= len(self.buffer) + + # optimize for the common case: typing at the end of the buffer + if pos == len(self.buffer) and len(self.screeninfo) > 0: + y = len(self.screeninfo) - 1 + prompt_len, char_widths = self.screeninfo[y] + return prompt_len + sum(char_widths), y + + for prompt_len, char_widths in self.screeninfo: + offset = len(char_widths) + in_wrapped_line = prompt_len + sum(char_widths) >= self.console.width + if in_wrapped_line: + offset -= 1 # need to remove line-wrapping backslash + + if offset >= pos: + break + + if not in_wrapped_line: + offset += 1 # there's a newline in buffer + + pos -= offset + y += 1 + return prompt_len + sum(char_widths[:pos]), y + + def insert(self, text: str | list[str]) -> None: + """Insert 'text' at the insertion point.""" + self.buffer[self.pos : self.pos] = list(text) + self.pos += len(text) + self.dirty = True + + def update_cursor(self) -> None: + """Move the cursor to reflect changes in self.pos""" + self.cxy = self.pos2xy() + trace("update_cursor({pos}) = {cxy}", pos=self.pos, cxy=self.cxy) + self.console.move_cursor(*self.cxy) + + def after_command(self, cmd: Command) -> None: + """This function is called to allow post command cleanup.""" + if getattr(cmd, "kills_digit_arg", True): + if self.arg is not None: + self.dirty = True + self.arg = None + + def prepare(self) -> None: + """Get ready to run. Call restore when finished. You must not + write to the console in between the calls to prepare and + restore.""" + try: + self.console.prepare() + self.arg = None + self.finished = False + del self.buffer[:] + self.pos = 0 + self.dirty = True + self.last_command = None + self.calc_screen() + except BaseException: + self.restore() + raise + + while self.scheduled_commands: + cmd = self.scheduled_commands.pop() + self.do_cmd((cmd, [])) + + def last_command_is(self, cls: type) -> bool: + if not self.last_command: + return False + return issubclass(cls, self.last_command) + + def restore(self) -> None: + """Clean up after a run.""" + self.console.restore() + + @contextmanager + def suspend(self) -> SimpleContextManager: + """A context manager to delegate to another reader.""" + prev_state = {f.name: getattr(self, f.name) for f in fields(self)} + try: + self.restore() + yield + finally: + for arg in ("msg", "ps1", "ps2", "ps3", "ps4", "paste_mode"): + setattr(self, arg, prev_state[arg]) + self.prepare() + + @contextmanager + def suspend_colorization(self) -> SimpleContextManager: + try: + old_can_colorize = self.can_colorize + self.can_colorize = False + yield + finally: + self.can_colorize = old_can_colorize + + + def finish(self) -> None: + """Called when a command signals that we're finished.""" + pass + + def error(self, msg: str = "none") -> None: + self.msg = "! " + msg + " " + self.dirty = True + self.console.beep() + + def update_screen(self) -> None: + if self.dirty: + self.refresh() + + def refresh(self) -> None: + """Recalculate and refresh the screen.""" + # this call sets up self.cxy, so call it first. + self.screen = self.calc_screen() + self.console.refresh(self.screen, self.cxy) + self.dirty = False + + def do_cmd(self, cmd: tuple[str, list[str]]) -> None: + """`cmd` is a tuple of "event_name" and "event", which in the current + implementation is always just the "buffer" which happens to be a list + of single-character strings.""" + + trace("received command {cmd}", cmd=cmd) + if isinstance(cmd[0], str): + command_type = self.commands.get(cmd[0], commands.invalid_command) + elif isinstance(cmd[0], type): + command_type = cmd[0] + else: + return # nothing to do + + command = command_type(self, *cmd) # type: ignore[arg-type] + command.do() + + self.after_command(command) + + if self.dirty: + self.refresh() + else: + self.update_cursor() + + if not isinstance(cmd, commands.digit_arg): + self.last_command = command_type + + self.finished = bool(command.finish) + if self.finished: + self.console.finish() + self.finish() + + def run_hooks(self) -> None: + threading_hook = self.threading_hook + if threading_hook is None and 'threading' in sys.modules: + from ._threading_handler import install_threading_hook + install_threading_hook(self) + if threading_hook is not None: + try: + threading_hook() + except Exception: + pass + + input_hook = self.console.input_hook + if input_hook: + try: + input_hook() + except Exception: + pass + + def handle1(self, block: bool = True) -> bool: + """Handle a single event. Wait as long as it takes if block + is true (the default), otherwise return False if no event is + pending.""" + + if self.msg: + self.msg = "" + self.dirty = True + + while True: + # We use the same timeout as in readline.c: 100ms + self.run_hooks() + self.console.wait(100) + event = self.console.get_event(block=False) + if not event: + if block: + continue + return False + + translate = True + + if event.evt == "key": + self.input_trans.push(event) + elif event.evt == "scroll": + self.refresh() + elif event.evt == "resize": + self.refresh() + else: + translate = False + + if translate: + cmd = self.input_trans.get() + else: + cmd = [event.evt, event.data] + + if cmd is None: + if block: + continue + return False + + self.do_cmd(cmd) + return True + + def push_char(self, char: int | bytes) -> None: + self.console.push_char(char) + self.handle1(block=False) + + def readline(self, startup_hook: Callback | None = None) -> str: + """Read a line. The implementation of this method also shows + how to drive Reader if you want more control over the event + loop.""" + self.prepare() + try: + if startup_hook is not None: + startup_hook() + self.refresh() + while not self.finished: + self.handle1() + return self.get_unicode() + + finally: + self.restore() + + def bind(self, spec: KeySpec, command: CommandName) -> None: + self.keymap = self.keymap + ((spec, command),) + self.input_trans = input.KeymapTranslator( + self.keymap, invalid_cls="invalid-key", character_cls="self-insert" + ) + + def get_unicode(self) -> str: + """Return the current buffer as a unicode string.""" + return "".join(self.buffer) diff --git a/PythonLib/full/_pyrepl/readline.py b/PythonLib/full/_pyrepl/readline.py new file mode 100644 index 00000000..23b8fa6b --- /dev/null +++ b/PythonLib/full/_pyrepl/readline.py @@ -0,0 +1,620 @@ +# Copyright 2000-2010 Michael Hudson-Doyle +# Alex Gaynor +# Antonio Cuni +# Armin Rigo +# Holger Krekel +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +"""A compatibility wrapper reimplementing the 'readline' standard module +on top of pyrepl. Not all functionalities are supported. Contains +extensions for multiline input. +""" + +from __future__ import annotations + +import warnings +from dataclasses import dataclass, field + +import os +from site import gethistoryfile +import sys +from rlcompleter import Completer as RLCompleter + +from . import commands, historical_reader +from .completing_reader import CompletingReader +from .console import Console as ConsoleType +from ._module_completer import ModuleCompleter, make_default_module_completer + +Console: type[ConsoleType] +_error: tuple[type[Exception], ...] | type[Exception] + +if os.name == "nt": + from .windows_console import WindowsConsole as Console, _error +else: + from .unix_console import UnixConsole as Console, _error + +ENCODING = sys.getdefaultencoding() or "latin1" + + +# types +Command = commands.Command +from collections.abc import Callable, Collection +from .types import Callback, Completer, KeySpec, CommandName + +TYPE_CHECKING = False + +if TYPE_CHECKING: + from typing import Any, Mapping + + +MoreLinesCallable = Callable[[str], bool] + + +__all__ = [ + "add_history", + "clear_history", + "get_begidx", + "get_completer", + "get_completer_delims", + "get_current_history_length", + "get_endidx", + "get_history_item", + "get_history_length", + "get_line_buffer", + "insert_text", + "parse_and_bind", + "read_history_file", + # "read_init_file", + # "redisplay", + "remove_history_item", + "replace_history_item", + "set_auto_history", + "set_completer", + "set_completer_delims", + "set_history_length", + # "set_pre_input_hook", + "set_startup_hook", + "write_history_file", + "append_history_file", + # ---- multiline extensions ---- + "multiline_input", +] + +# ____________________________________________________________ + +@dataclass +class ReadlineConfig: + readline_completer: Completer | None = None + completer_delims: frozenset[str] = frozenset(" \t\n`~!@#$%^&*()-=+[{]}\\|;:'\",<>/?") + module_completer: ModuleCompleter = field(default_factory=make_default_module_completer) + +@dataclass(kw_only=True) +class ReadlineAlikeReader(historical_reader.HistoricalReader, CompletingReader): + # Class fields + assume_immutable_completions = False + use_brackets = False + sort_in_column = True + + # Instance fields + config: ReadlineConfig + more_lines: MoreLinesCallable | None = None + last_used_indentation: str | None = None + + def __post_init__(self) -> None: + super().__post_init__() + self.commands["maybe_accept"] = maybe_accept + self.commands["maybe-accept"] = maybe_accept + self.commands["backspace_dedent"] = backspace_dedent + self.commands["backspace-dedent"] = backspace_dedent + + def error(self, msg: str = "none") -> None: + pass # don't show error messages by default + + def get_stem(self) -> str: + b = self.buffer + p = self.pos - 1 + completer_delims = self.config.completer_delims + while p >= 0 and b[p] not in completer_delims: + p -= 1 + return "".join(b[p + 1 : self.pos]) + + def get_completions(self, stem: str) -> list[str]: + module_completions = self.get_module_completions() + if module_completions is not None: + return module_completions + if len(stem) == 0 and self.more_lines is not None: + b = self.buffer + p = self.pos + while p > 0 and b[p - 1] != "\n": + p -= 1 + num_spaces = 4 - ((self.pos - p) % 4) + return [" " * num_spaces] + result = [] + function = self.config.readline_completer + if function is not None: + try: + stem = str(stem) # rlcompleter.py seems to not like unicode + except UnicodeEncodeError: + pass # but feed unicode anyway if we have no choice + state = 0 + while True: + try: + next = function(stem, state) + except Exception: + break + if not isinstance(next, str): + break + result.append(next) + state += 1 + # emulate the behavior of the standard readline that sorts + # the completions before displaying them. + result.sort() + return result + + def get_module_completions(self) -> list[str] | None: + line = self.get_line() + return self.config.module_completer.get_completions(line) + + def get_trimmed_history(self, maxlength: int) -> list[str]: + if maxlength >= 0: + cut = len(self.history) - maxlength + if cut < 0: + cut = 0 + else: + cut = 0 + return self.history[cut:] + + def update_last_used_indentation(self) -> None: + indentation = _get_first_indentation(self.buffer) + if indentation is not None: + self.last_used_indentation = indentation + + # --- simplified support for reading multiline Python statements --- + + def collect_keymap(self) -> tuple[tuple[KeySpec, CommandName], ...]: + return super().collect_keymap() + ( + (r"\n", "maybe-accept"), + (r"\", "backspace-dedent"), + ) + + def after_command(self, cmd: Command) -> None: + super().after_command(cmd) + if self.more_lines is None: + # Force single-line input if we are in raw_input() mode. + # Although there is no direct way to add a \n in this mode, + # multiline buffers can still show up using various + # commands, e.g. navigating the history. + try: + index = self.buffer.index("\n") + except ValueError: + pass + else: + self.buffer = self.buffer[:index] + if self.pos > len(self.buffer): + self.pos = len(self.buffer) + + +def set_auto_history(_should_auto_add_history: bool) -> None: + """Enable or disable automatic history""" + historical_reader.should_auto_add_history = bool(_should_auto_add_history) + + +def _get_this_line_indent(buffer: list[str], pos: int) -> int: + indent = 0 + while pos > 0 and buffer[pos - 1] in " \t": + indent += 1 + pos -= 1 + if pos > 0 and buffer[pos - 1] == "\n": + return indent + return 0 + + +def _get_previous_line_indent(buffer: list[str], pos: int) -> tuple[int, int | None]: + prevlinestart = pos + while prevlinestart > 0 and buffer[prevlinestart - 1] != "\n": + prevlinestart -= 1 + prevlinetext = prevlinestart + while prevlinetext < pos and buffer[prevlinetext] in " \t": + prevlinetext += 1 + if prevlinetext == pos: + indent = None + else: + indent = prevlinetext - prevlinestart + return prevlinestart, indent + + +def _get_first_indentation(buffer: list[str]) -> str | None: + indented_line_start = None + for i in range(len(buffer)): + if (i < len(buffer) - 1 + and buffer[i] == "\n" + and buffer[i + 1] in " \t" + ): + indented_line_start = i + 1 + elif indented_line_start is not None and buffer[i] not in " \t\n": + return ''.join(buffer[indented_line_start : i]) + return None + + +def _should_auto_indent(buffer: list[str], pos: int) -> bool: + # check if last character before "pos" is a colon, ignoring + # whitespaces and comments. + last_char = None + while pos > 0: + pos -= 1 + if last_char is None: + if buffer[pos] not in " \t\n#": # ignore whitespaces and comments + last_char = buffer[pos] + else: + # even if we found a non-whitespace character before + # original pos, we keep going back until newline is reached + # to make sure we ignore comments + if buffer[pos] == "\n": + break + if buffer[pos] == "#": + last_char = None + return last_char == ":" + + +class maybe_accept(commands.Command): + def do(self) -> None: + r: ReadlineAlikeReader + r = self.reader # type: ignore[assignment] + r.dirty = True # this is needed to hide the completion menu, if visible + + # if there are already several lines and the cursor + # is not on the last one, always insert a new \n. + text = r.get_unicode() + + if "\n" in r.buffer[r.pos :] or ( + r.more_lines is not None and r.more_lines(text) + ): + def _newline_before_pos(): + before_idx = r.pos - 1 + while before_idx > 0 and text[before_idx].isspace(): + before_idx -= 1 + return text[before_idx : r.pos].count("\n") > 0 + + # if there's already a new line before the cursor then + # even if the cursor is followed by whitespace, we assume + # the user is trying to terminate the block + if _newline_before_pos() and text[r.pos:].isspace(): + self.finish = True + return + + # auto-indent the next line like the previous line + prevlinestart, indent = _get_previous_line_indent(r.buffer, r.pos) + r.insert("\n") + if not self.reader.paste_mode: + if indent: + for i in range(prevlinestart, prevlinestart + indent): + r.insert(r.buffer[i]) + r.update_last_used_indentation() + if _should_auto_indent(r.buffer, r.pos): + if r.last_used_indentation is not None: + indentation = r.last_used_indentation + else: + # default + indentation = " " * 4 + r.insert(indentation) + elif not self.reader.paste_mode: + self.finish = True + else: + r.insert("\n") + + +class backspace_dedent(commands.Command): + def do(self) -> None: + r = self.reader + b = r.buffer + if r.pos > 0: + repeat = 1 + if b[r.pos - 1] != "\n": + indent = _get_this_line_indent(b, r.pos) + if indent > 0: + ls = r.pos - indent + while ls > 0: + ls, pi = _get_previous_line_indent(b, ls - 1) + if pi is not None and pi < indent: + repeat = indent - pi + break + r.pos -= repeat + del b[r.pos : r.pos + repeat] + r.dirty = True + else: + self.reader.error("can't backspace at start") + + +# ____________________________________________________________ + + +@dataclass(slots=True) +class _ReadlineWrapper: + f_in: int = -1 + f_out: int = -1 + reader: ReadlineAlikeReader | None = field(default=None, repr=False) + saved_history_length: int = -1 + startup_hook: Callback | None = None + config: ReadlineConfig = field(default_factory=ReadlineConfig, repr=False) + + def __post_init__(self) -> None: + if self.f_in == -1: + self.f_in = os.dup(0) + if self.f_out == -1: + self.f_out = os.dup(1) + + def get_reader(self) -> ReadlineAlikeReader: + if self.reader is None: + console = Console(self.f_in, self.f_out, encoding=ENCODING) + self.reader = ReadlineAlikeReader(console=console, config=self.config) + return self.reader + + def input(self, prompt: object = "") -> str: + try: + reader = self.get_reader() + except _error: + assert raw_input is not None + return raw_input(prompt) + prompt_str = str(prompt) + reader.ps1 = prompt_str + sys.audit("builtins.input", prompt_str) + result = reader.readline(startup_hook=self.startup_hook) + sys.audit("builtins.input/result", result) + return result + + def multiline_input(self, more_lines: MoreLinesCallable, ps1: str, ps2: str) -> str: + """Read an input on possibly multiple lines, asking for more + lines as long as 'more_lines(unicodetext)' returns an object whose + boolean value is true. + """ + reader = self.get_reader() + saved = reader.more_lines + try: + reader.more_lines = more_lines + reader.ps1 = ps1 + reader.ps2 = ps1 + reader.ps3 = ps2 + reader.ps4 = "" + with warnings.catch_warnings(action="ignore"): + return reader.readline() + finally: + reader.more_lines = saved + reader.paste_mode = False + + def parse_and_bind(self, string: str) -> None: + pass # XXX we don't support parsing GNU-readline-style init files + + def set_completer(self, function: Completer | None = None) -> None: + self.config.readline_completer = function + + def get_completer(self) -> Completer | None: + return self.config.readline_completer + + def set_completer_delims(self, delimiters: Collection[str]) -> None: + self.config.completer_delims = frozenset(delimiters) + + def get_completer_delims(self) -> str: + return "".join(sorted(self.config.completer_delims)) + + def _histline(self, line: str) -> str: + line = line.rstrip("\n") + return line + + def get_history_length(self) -> int: + return self.saved_history_length + + def set_history_length(self, length: int) -> None: + self.saved_history_length = length + + def get_current_history_length(self) -> int: + return len(self.get_reader().history) + + def read_history_file(self, filename: str = gethistoryfile()) -> None: + # multiline extension (really a hack) for the end of lines that + # are actually continuations inside a single multiline_input() + # history item: we use \r\n instead of just \n. If the history + # file is passed to GNU readline, the extra \r are just ignored. + history = self.get_reader().history + + with open(os.path.expanduser(filename), 'rb') as f: + is_editline = f.readline().startswith(b"_HiStOrY_V2_") + if is_editline: + encoding = "unicode-escape" + else: + f.seek(0) + encoding = "utf-8" + + lines = [line.decode(encoding, errors='replace') for line in f.read().split(b'\n')] + buffer = [] + for line in lines: + if line.endswith("\r"): + buffer.append(line+'\n') + else: + line = self._histline(line) + if buffer: + line = self._histline("".join(buffer).replace("\r", "") + line) + del buffer[:] + if line: + history.append(line) + self.set_history_length(self.get_current_history_length()) + + def write_history_file(self, filename: str = gethistoryfile()) -> None: + maxlength = self.saved_history_length + history = self.get_reader().get_trimmed_history(maxlength) + f = open(os.path.expanduser(filename), "w", + encoding="utf-8", newline="\n") + with f: + for entry in history: + entry = entry.replace("\n", "\r\n") # multiline history support + f.write(entry + "\n") + + def append_history_file(self, filename: str = gethistoryfile()) -> None: + reader = self.get_reader() + saved_length = self.get_history_length() + length = self.get_current_history_length() - saved_length + history = reader.get_trimmed_history(length) + f = open(os.path.expanduser(filename), "a", + encoding="utf-8", newline="\n") + with f: + for entry in history: + entry = entry.replace("\n", "\r\n") # multiline history support + f.write(entry + "\n") + self.set_history_length(saved_length + length) + + def clear_history(self) -> None: + del self.get_reader().history[:] + + def get_history_item(self, index: int) -> str | None: + history = self.get_reader().history + if 1 <= index <= len(history): + return history[index - 1] + else: + return None # like readline.c + + def remove_history_item(self, index: int) -> None: + history = self.get_reader().history + if 0 <= index < len(history): + del history[index] + else: + raise ValueError("No history item at position %d" % index) + # like readline.c + + def replace_history_item(self, index: int, line: str) -> None: + history = self.get_reader().history + if 0 <= index < len(history): + history[index] = self._histline(line) + else: + raise ValueError("No history item at position %d" % index) + # like readline.c + + def add_history(self, line: str) -> None: + self.get_reader().history.append(self._histline(line)) + + def set_startup_hook(self, function: Callback | None = None) -> None: + self.startup_hook = function + + def get_line_buffer(self) -> str: + return self.get_reader().get_unicode() + + def _get_idxs(self) -> tuple[int, int]: + start = cursor = self.get_reader().pos + buf = self.get_line_buffer() + for i in range(cursor - 1, -1, -1): + if buf[i] in self.get_completer_delims(): + break + start = i + return start, cursor + + def get_begidx(self) -> int: + return self._get_idxs()[0] + + def get_endidx(self) -> int: + return self._get_idxs()[1] + + def insert_text(self, text: str) -> None: + self.get_reader().insert(text) + + +_wrapper = _ReadlineWrapper() + +# ____________________________________________________________ +# Public API + +parse_and_bind = _wrapper.parse_and_bind +set_completer = _wrapper.set_completer +get_completer = _wrapper.get_completer +set_completer_delims = _wrapper.set_completer_delims +get_completer_delims = _wrapper.get_completer_delims +get_history_length = _wrapper.get_history_length +set_history_length = _wrapper.set_history_length +get_current_history_length = _wrapper.get_current_history_length +read_history_file = _wrapper.read_history_file +write_history_file = _wrapper.write_history_file +append_history_file = _wrapper.append_history_file +clear_history = _wrapper.clear_history +get_history_item = _wrapper.get_history_item +remove_history_item = _wrapper.remove_history_item +replace_history_item = _wrapper.replace_history_item +add_history = _wrapper.add_history +set_startup_hook = _wrapper.set_startup_hook +get_line_buffer = _wrapper.get_line_buffer +get_begidx = _wrapper.get_begidx +get_endidx = _wrapper.get_endidx +insert_text = _wrapper.insert_text + +# Extension +multiline_input = _wrapper.multiline_input + +# Internal hook +_get_reader = _wrapper.get_reader + +# ____________________________________________________________ +# Stubs + + +def _make_stub(_name: str, _ret: object) -> None: + def stub(*args: object, **kwds: object) -> None: + import warnings + + warnings.warn("readline.%s() not implemented" % _name, stacklevel=2) + + stub.__name__ = _name + globals()[_name] = stub + + +for _name, _ret in [ + ("read_init_file", None), + ("redisplay", None), + ("set_pre_input_hook", None), +]: + assert _name not in globals(), _name + _make_stub(_name, _ret) + +# ____________________________________________________________ + + +def _setup(namespace: Mapping[str, Any]) -> None: + global raw_input + if raw_input is not None: + return # don't run _setup twice + + try: + f_in = sys.stdin.fileno() + f_out = sys.stdout.fileno() + except (AttributeError, ValueError): + return + if not os.isatty(f_in) or not os.isatty(f_out): + return + + _wrapper.f_in = f_in + _wrapper.f_out = f_out + + # set up namespace in rlcompleter, which requires it to be a bona fide dict + if not isinstance(namespace, dict): + namespace = dict(namespace) + _wrapper.config.module_completer = ModuleCompleter(namespace) + _wrapper.config.readline_completer = RLCompleter(namespace).complete + + # this is not really what readline.c does. Better than nothing I guess + import builtins + raw_input = builtins.input + builtins.input = _wrapper.input + + +raw_input: Callable[[object], str] | None = None diff --git a/PythonLib/full/_pyrepl/simple_interact.py b/PythonLib/full/_pyrepl/simple_interact.py new file mode 100644 index 00000000..6508f023 --- /dev/null +++ b/PythonLib/full/_pyrepl/simple_interact.py @@ -0,0 +1,181 @@ +# Copyright 2000-2010 Michael Hudson-Doyle +# Armin Rigo +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +"""This is an alternative to python_reader which tries to emulate +the CPython prompt as closely as possible, with the exception of +allowing multiline input and multiline history entries. +""" + +from __future__ import annotations + +import _sitebuiltins +import functools +import os +import sys +import code +import warnings +import errno + +from .readline import _get_reader, multiline_input, append_history_file + + +_error: tuple[type[Exception], ...] | type[Exception] +try: + from .unix_console import _error +except ModuleNotFoundError: + from .windows_console import _error + +def check() -> str: + """Returns the error message if there is a problem initializing the state.""" + try: + _get_reader() + except _error as e: + if term := os.environ.get("TERM", ""): + term = f"; TERM={term}" + return str(str(e) or repr(e) or "unknown error") + term + return "" + + +def _strip_final_indent(text: str) -> str: + # kill spaces and tabs at the end, but only if they follow '\n'. + # meant to remove the auto-indentation only (although it would of + # course also remove explicitly-added indentation). + short = text.rstrip(" \t") + n = len(short) + if n > 0 and text[n - 1] == "\n": + return short + return text + + +def _clear_screen(): + reader = _get_reader() + reader.scheduled_commands.append("clear_screen") + + +REPL_COMMANDS = { + "exit": _sitebuiltins.Quitter('exit', ''), + "quit": _sitebuiltins.Quitter('quit' ,''), + "copyright": _sitebuiltins._Printer('copyright', sys.copyright), + "help": _sitebuiltins._Helper(), + "clear": _clear_screen, + "\x1a": _sitebuiltins.Quitter('\x1a', ''), +} + + +def _more_lines(console: code.InteractiveConsole, unicodetext: str) -> bool: + # ooh, look at the hack: + src = _strip_final_indent(unicodetext) + try: + code = console.compile(src, "", "single") + except (OverflowError, SyntaxError, ValueError): + lines = src.splitlines(keepends=True) + if len(lines) == 1: + return False + + last_line = lines[-1] + was_indented = last_line.startswith((" ", "\t")) + not_empty = last_line.strip() != "" + incomplete = not last_line.endswith("\n") + return (was_indented or not_empty) and incomplete + else: + return code is None + + +def run_multiline_interactive_console( + console: code.InteractiveConsole, + *, + future_flags: int = 0, +) -> None: + from .readline import _setup + _setup(console.locals) + if future_flags: + console.compile.compiler.flags |= future_flags + + more_lines = functools.partial(_more_lines, console) + input_n = 0 + + _is_x_showrefcount_set = sys._xoptions.get("showrefcount") + _is_pydebug_build = hasattr(sys, "gettotalrefcount") + show_ref_count = _is_x_showrefcount_set and _is_pydebug_build + + def maybe_run_command(statement: str) -> bool: + statement = statement.strip() + if statement in console.locals or statement not in REPL_COMMANDS: + return False + + reader = _get_reader() + reader.history.pop() # skip internal commands in history + command = REPL_COMMANDS[statement] + if callable(command): + # Make sure that history does not change because of commands + with reader.suspend_history(), reader.suspend_colorization(): + command() + return True + return False + + while True: + try: + try: + sys.stdout.flush() + except Exception: + pass + + ps1 = getattr(sys, "ps1", ">>> ") + ps2 = getattr(sys, "ps2", "... ") + try: + statement = multiline_input(more_lines, ps1, ps2) + except EOFError: + break + + if maybe_run_command(statement): + continue + + input_name = f"" + more = console.push(_strip_final_indent(statement), filename=input_name, _symbol="single") # type: ignore[call-arg] + assert not more + try: + append_history_file() + except (FileNotFoundError, PermissionError, OSError) as e: + warnings.warn(f"failed to open the history file for writing: {e}") + + input_n += 1 + except KeyboardInterrupt: + r = _get_reader() + r.cmpltn_reset() + if r.input_trans is r.isearch_trans: + r.do_cmd(("isearch-end", [""])) + r.pos = len(r.get_unicode()) + r.dirty = True + r.refresh() + console.write("\nKeyboardInterrupt\n") + console.resetbuffer() + except MemoryError: + console.write("\nMemoryError\n") + console.resetbuffer() + except SystemExit: + raise + except: + console.showtraceback() + console.resetbuffer() + if show_ref_count: + console.write( + f"[{sys.gettotalrefcount()} refs," + f" {sys.getallocatedblocks()} blocks]\n" + ) diff --git a/PythonLib/full/_pyrepl/terminfo.py b/PythonLib/full/_pyrepl/terminfo.py new file mode 100644 index 00000000..d02ef69c --- /dev/null +++ b/PythonLib/full/_pyrepl/terminfo.py @@ -0,0 +1,488 @@ +"""Pure Python curses-like terminal capability queries.""" + +from dataclasses import dataclass, field +import errno +import os +from pathlib import Path +import re +import struct + + +# Terminfo constants +MAGIC16 = 0o432 # Magic number for 16-bit terminfo format +MAGIC32 = 0o1036 # Magic number for 32-bit terminfo format + +# Special values for absent/cancelled capabilities +ABSENT_BOOLEAN = -1 +ABSENT_NUMERIC = -1 +CANCELLED_NUMERIC = -2 +ABSENT_STRING = None +CANCELLED_STRING = None + + +# Standard string capability names from ncurses Caps file +# This matches the order used by ncurses when compiling terminfo +# fmt: off +_STRING_NAMES: tuple[str, ...] = ( + "cbt", "bel", "cr", "csr", "tbc", "clear", "el", "ed", "hpa", "cmdch", + "cup", "cud1", "home", "civis", "cub1", "mrcup", "cnorm", "cuf1", "ll", + "cuu1", "cvvis", "dch1", "dl1", "dsl", "hd", "smacs", "blink", "bold", + "smcup", "smdc", "dim", "smir", "invis", "prot", "rev", "smso", "smul", + "ech", "rmacs", "sgr0", "rmcup", "rmdc", "rmir", "rmso", "rmul", "flash", + "ff", "fsl", "is1", "is2", "is3", "if", "ich1", "il1", "ip", "kbs", "ktbc", + "kclr", "kctab", "kdch1", "kdl1", "kcud1", "krmir", "kel", "ked", "kf0", + "kf1", "kf10", "kf2", "kf3", "kf4", "kf5", "kf6", "kf7", "kf8", "kf9", + "khome", "kich1", "kil1", "kcub1", "kll", "knp", "kpp", "kcuf1", "kind", + "kri", "khts", "kcuu1", "rmkx", "smkx", "lf0", "lf1", "lf10", "lf2", "lf3", + "lf4", "lf5", "lf6", "lf7", "lf8", "lf9", "rmm", "smm", "nel", "pad", "dch", + "dl", "cud", "ich", "indn", "il", "cub", "cuf", "rin", "cuu", "pfkey", + "pfloc", "pfx", "mc0", "mc4", "mc5", "rep", "rs1", "rs2", "rs3", "rf", "rc", + "vpa", "sc", "ind", "ri", "sgr", "hts", "wind", "ht", "tsl", "uc", "hu", + "iprog", "ka1", "ka3", "kb2", "kc1", "kc3", "mc5p", "rmp", "acsc", "pln", + "kcbt", "smxon", "rmxon", "smam", "rmam", "xonc", "xoffc", "enacs", "smln", + "rmln", "kbeg", "kcan", "kclo", "kcmd", "kcpy", "kcrt", "kend", "kent", + "kext", "kfnd", "khlp", "kmrk", "kmsg", "kmov", "knxt", "kopn", "kopt", + "kprv", "kprt", "krdo", "kref", "krfr", "krpl", "krst", "kres", "ksav", + "kspd", "kund", "kBEG", "kCAN", "kCMD", "kCPY", "kCRT", "kDC", "kDL", + "kslt", "kEND", "kEOL", "kEXT", "kFND", "kHLP", "kHOM", "kIC", "kLFT", + "kMSG", "kMOV", "kNXT", "kOPT", "kPRV", "kPRT", "kRDO", "kRPL", "kRIT", + "kRES", "kSAV", "kSPD", "kUND", "rfi", "kf11", "kf12", "kf13", "kf14", + "kf15", "kf16", "kf17", "kf18", "kf19", "kf20", "kf21", "kf22", "kf23", + "kf24", "kf25", "kf26", "kf27", "kf28", "kf29", "kf30", "kf31", "kf32", + "kf33", "kf34", "kf35", "kf36", "kf37", "kf38", "kf39", "kf40", "kf41", + "kf42", "kf43", "kf44", "kf45", "kf46", "kf47", "kf48", "kf49", "kf50", + "kf51", "kf52", "kf53", "kf54", "kf55", "kf56", "kf57", "kf58", "kf59", + "kf60", "kf61", "kf62", "kf63", "el1", "mgc", "smgl", "smgr", "fln", "sclk", + "dclk", "rmclk", "cwin", "wingo", "hup","dial", "qdial", "tone", "pulse", + "hook", "pause", "wait", "u0", "u1", "u2", "u3", "u4", "u5", "u6", "u7", + "u8", "u9", "op", "oc", "initc", "initp", "scp", "setf", "setb", "cpi", + "lpi", "chr", "cvr", "defc", "swidm", "sdrfq", "sitm", "slm", "smicm", + "snlq", "snrmq", "sshm", "ssubm", "ssupm", "sum", "rwidm", "ritm", "rlm", + "rmicm", "rshm", "rsubm", "rsupm", "rum", "mhpa", "mcud1", "mcub1", "mcuf1", + "mvpa", "mcuu1", "porder", "mcud", "mcub", "mcuf", "mcuu", "scs", "smgb", + "smgbp", "smglp", "smgrp", "smgt", "smgtp", "sbim", "scsd", "rbim", "rcsd", + "subcs", "supcs", "docr", "zerom", "csnm", "kmous", "minfo", "reqmp", + "getm", "setaf", "setab", "pfxl", "devt", "csin", "s0ds", "s1ds", "s2ds", + "s3ds", "smglr", "smgtb", "birep", "binel", "bicr", "colornm", "defbi", + "endbi", "setcolor", "slines", "dispc", "smpch", "rmpch", "smsc", "rmsc", + "pctrm", "scesc", "scesa", "ehhlm", "elhlm", "elohlm", "erhlm", "ethlm", + "evhlm", "sgr1", "slength", "OTi2", "OTrs", "OTnl", "OTbc", "OTko", "OTma", + "OTG2", "OTG3", "OTG1", "OTG4", "OTGR", "OTGL", "OTGU", "OTGD", "OTGH", + "OTGV", "OTGC","meml", "memu", "box1" +) +# fmt: on + + +def _get_terminfo_dirs() -> list[Path]: + """Get list of directories to search for terminfo files. + + Based on ncurses behavior in: + - ncurses/tinfo/db_iterator.c:_nc_next_db() + - ncurses/tinfo/read_entry.c:_nc_read_entry() + """ + dirs = [] + + terminfo = os.environ.get("TERMINFO") + if terminfo: + dirs.append(terminfo) + + try: + home = Path.home() + dirs.append(str(home / ".terminfo")) + except RuntimeError: + pass + + # Check TERMINFO_DIRS + terminfo_dirs = os.environ.get("TERMINFO_DIRS", "") + if terminfo_dirs: + for d in terminfo_dirs.split(":"): + if d: + dirs.append(d) + + dirs.extend( + [ + "/etc/terminfo", + "/lib/terminfo", + "/usr/lib/terminfo", + "/usr/share/terminfo", + "/usr/share/lib/terminfo", + "/usr/share/misc/terminfo", + "/usr/local/lib/terminfo", + "/usr/local/share/terminfo", + ] + ) + + return [Path(d) for d in dirs if Path(d).is_dir()] + + +def _validate_terminal_name_or_raise(terminal_name: str) -> None: + if not isinstance(terminal_name, str): + raise TypeError("`terminal_name` must be a string") + + if not terminal_name: + raise ValueError("`terminal_name` cannot be empty") + + if "\x00" in terminal_name: + raise ValueError("NUL character found in `terminal_name`") + + t = Path(terminal_name) + if len(t.parts) > 1: + raise ValueError("`terminal_name` cannot contain path separators") + + +def _read_terminfo_file(terminal_name: str) -> bytes: + """Find and read terminfo file for given terminal name. + + Terminfo files are stored in directories using the first character + of the terminal name as a subdirectory. + """ + _validate_terminal_name_or_raise(terminal_name) + first_char = terminal_name[0].lower() + filename = terminal_name + + for directory in _get_terminfo_dirs(): + path = directory / first_char / filename + if path.is_file(): + return path.read_bytes() + + # Try with hex encoding of first char (for special chars) + hex_dir = "%02x" % ord(first_char) + path = directory / hex_dir / filename + if path.is_file(): + return path.read_bytes() + + raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), filename) + + +# Hard-coded terminal capabilities for common terminals +# This is a minimal subset needed by PyREPL +_TERMINAL_CAPABILITIES = { + # ANSI/xterm-compatible terminals + "ansi": { + # Bell + "bel": b"\x07", + # Cursor movement + "cub": b"\x1b[%p1%dD", # Move cursor left N columns + "cud": b"\x1b[%p1%dB", # Move cursor down N rows + "cuf": b"\x1b[%p1%dC", # Move cursor right N columns + "cuu": b"\x1b[%p1%dA", # Move cursor up N rows + "cub1": b"\x08", # Move cursor left 1 column + "cud1": b"\n", # Move cursor down 1 row + "cuf1": b"\x1b[C", # Move cursor right 1 column + "cuu1": b"\x1b[A", # Move cursor up 1 row + "cup": b"\x1b[%i%p1%d;%p2%dH", # Move cursor to row, column + "hpa": b"\x1b[%i%p1%dG", # Move cursor to column + # Clear operations + "clear": b"\x1b[H\x1b[2J", # Clear screen and home cursor + "el": b"\x1b[K", # Clear to end of line + # Insert/delete + "dch": b"\x1b[%p1%dP", # Delete N characters + "dch1": b"\x1b[P", # Delete 1 character + "ich": b"\x1b[%p1%d@", # Insert N characters + "ich1": b"", # Insert 1 character + # Cursor visibility + "civis": b"\x1b[?25l", # Make cursor invisible + "cnorm": b"\x1b[?12l\x1b[?25h", # Make cursor normal (visible) + # Scrolling + "ind": b"\n", # Scroll up one line + "ri": b"\x1bM", # Scroll down one line + # Keypad mode + "smkx": b"\x1b[?1h\x1b=", # Enable keypad mode + "rmkx": b"\x1b[?1l\x1b>", # Disable keypad mode + # Padding (not used in modern terminals) + "pad": b"", + # Function keys and special keys + "kdch1": b"\x1b[3~", # Delete key + "kcud1": b"\x1bOB", # Down arrow + "kend": b"\x1bOF", # End key + "kent": b"\x1bOM", # Enter key + "khome": b"\x1bOH", # Home key + "kich1": b"\x1b[2~", # Insert key + "kcub1": b"\x1bOD", # Left arrow + "knp": b"\x1b[6~", # Page down + "kpp": b"\x1b[5~", # Page up + "kcuf1": b"\x1bOC", # Right arrow + "kcuu1": b"\x1bOA", # Up arrow + # Function keys F1-F20 + "kf1": b"\x1bOP", + "kf2": b"\x1bOQ", + "kf3": b"\x1bOR", + "kf4": b"\x1bOS", + "kf5": b"\x1b[15~", + "kf6": b"\x1b[17~", + "kf7": b"\x1b[18~", + "kf8": b"\x1b[19~", + "kf9": b"\x1b[20~", + "kf10": b"\x1b[21~", + "kf11": b"\x1b[23~", + "kf12": b"\x1b[24~", + "kf13": b"\x1b[1;2P", + "kf14": b"\x1b[1;2Q", + "kf15": b"\x1b[1;2R", + "kf16": b"\x1b[1;2S", + "kf17": b"\x1b[15;2~", + "kf18": b"\x1b[17;2~", + "kf19": b"\x1b[18;2~", + "kf20": b"\x1b[19;2~", + }, + # Dumb terminal - minimal capabilities + "dumb": { + "bel": b"\x07", # Bell + "cud1": b"\n", # Move down 1 row (newline) + "ind": b"\n", # Scroll up one line (newline) + }, + # Linux console + "linux": { + # Bell + "bel": b"\x07", + # Cursor movement + "cub": b"\x1b[%p1%dD", # Move cursor left N columns + "cud": b"\x1b[%p1%dB", # Move cursor down N rows + "cuf": b"\x1b[%p1%dC", # Move cursor right N columns + "cuu": b"\x1b[%p1%dA", # Move cursor up N rows + "cub1": b"\x08", # Move cursor left 1 column (backspace) + "cud1": b"\n", # Move cursor down 1 row (newline) + "cuf1": b"\x1b[C", # Move cursor right 1 column + "cuu1": b"\x1b[A", # Move cursor up 1 row + "cup": b"\x1b[%i%p1%d;%p2%dH", # Move cursor to row, column + "hpa": b"\x1b[%i%p1%dG", # Move cursor to column + # Clear operations + "clear": b"\x1b[H\x1b[J", # Clear screen and home cursor (different from ansi!) + "el": b"\x1b[K", # Clear to end of line + # Insert/delete + "dch": b"\x1b[%p1%dP", # Delete N characters + "dch1": b"\x1b[P", # Delete 1 character + "ich": b"\x1b[%p1%d@", # Insert N characters + "ich1": b"\x1b[@", # Insert 1 character + # Cursor visibility + "civis": b"\x1b[?25l\x1b[?1c", # Make cursor invisible + "cnorm": b"\x1b[?25h\x1b[?0c", # Make cursor normal + # Scrolling + "ind": b"\n", # Scroll up one line + "ri": b"\x1bM", # Scroll down one line + # Keypad mode + "smkx": b"\x1b[?1h\x1b=", # Enable keypad mode + "rmkx": b"\x1b[?1l\x1b>", # Disable keypad mode + # Function keys and special keys + "kdch1": b"\x1b[3~", # Delete key + "kcud1": b"\x1b[B", # Down arrow + "kend": b"\x1b[4~", # End key (different from ansi!) + "khome": b"\x1b[1~", # Home key (different from ansi!) + "kich1": b"\x1b[2~", # Insert key + "kcub1": b"\x1b[D", # Left arrow + "knp": b"\x1b[6~", # Page down + "kpp": b"\x1b[5~", # Page up + "kcuf1": b"\x1b[C", # Right arrow + "kcuu1": b"\x1b[A", # Up arrow + # Function keys + "kf1": b"\x1b[[A", + "kf2": b"\x1b[[B", + "kf3": b"\x1b[[C", + "kf4": b"\x1b[[D", + "kf5": b"\x1b[[E", + "kf6": b"\x1b[17~", + "kf7": b"\x1b[18~", + "kf8": b"\x1b[19~", + "kf9": b"\x1b[20~", + "kf10": b"\x1b[21~", + "kf11": b"\x1b[23~", + "kf12": b"\x1b[24~", + "kf13": b"\x1b[25~", + "kf14": b"\x1b[26~", + "kf15": b"\x1b[28~", + "kf16": b"\x1b[29~", + "kf17": b"\x1b[31~", + "kf18": b"\x1b[32~", + "kf19": b"\x1b[33~", + "kf20": b"\x1b[34~", + }, +} + +# Map common TERM values to capability sets +_TERM_ALIASES = { + "xterm": "ansi", + "xterm-color": "ansi", + "xterm-256color": "ansi", + "screen": "ansi", + "screen-256color": "ansi", + "tmux": "ansi", + "tmux-256color": "ansi", + "vt100": "ansi", + "vt220": "ansi", + "rxvt": "ansi", + "rxvt-unicode": "ansi", + "rxvt-unicode-256color": "ansi", + "unknown": "dumb", +} + + +@dataclass +class TermInfo: + terminal_name: str | bytes | None + fallback: bool = True + + _capabilities: dict[str, bytes] = field(default_factory=dict) + + def __post_init__(self) -> None: + """Initialize terminal capabilities for the given terminal type. + + Based on ncurses implementation in: + - ncurses/tinfo/lib_setup.c:setupterm() and _nc_setupterm() + - ncurses/tinfo/lib_setup.c:TINFO_SETUP_TERM() + + This version first attempts to read terminfo database files like ncurses, + then, if `fallback` is True, falls back to hardcoded capabilities for + common terminal types. + """ + # If termstr is None or empty, try to get from environment + if not self.terminal_name: + self.terminal_name = os.environ.get("TERM") or "ANSI" + + if isinstance(self.terminal_name, bytes): + self.terminal_name = self.terminal_name.decode("ascii") + + try: + self._parse_terminfo_file(self.terminal_name) + except (OSError, ValueError): + if not self.fallback: + raise + + term_type = _TERM_ALIASES.get( + self.terminal_name, self.terminal_name + ) + if term_type not in _TERMINAL_CAPABILITIES: + term_type = "dumb" + self._capabilities = _TERMINAL_CAPABILITIES[term_type].copy() + + def _parse_terminfo_file(self, terminal_name: str) -> None: + """Parse a terminfo file. + + Populate the _capabilities dict for easy retrieval + + Based on ncurses implementation in: + - ncurses/tinfo/read_entry.c:_nc_read_termtype() + - ncurses/tinfo/read_entry.c:_nc_read_file_entry() + - ncurses/tinfo/lib_ti.c:tigetstr() + """ + data = _read_terminfo_file(terminal_name) + too_short = f"TermInfo file for {terminal_name!r} too short" + offset = 12 + if len(data) < offset: + raise ValueError(too_short) + + magic, name_size, bool_count, num_count, str_count, str_size = ( + struct.unpack(" len(data): + raise ValueError(too_short) + + # Read string offsets + end_offset = offset + 2 * str_count + if offset > len(data): + raise ValueError(too_short) + string_offset_data = data[offset:end_offset] + string_offsets = [ + off for [off] in struct.iter_unpack(" len(data): + raise ValueError(too_short) + string_table = data[offset : offset + str_size] + + # Extract strings from string table + capabilities = {} + for cap, off in zip(_STRING_NAMES, string_offsets): + if off < 0: + # CANCELLED_STRING; we do not store those + continue + elif off < len(string_table): + # Find null terminator + end = string_table.find(0, off) + if end >= 0: + capabilities[cap] = string_table[off:end] + # in other cases this is ABSENT_STRING; we don't store those. + + # Note: we don't support extended capabilities since PyREPL doesn't + # need them. + + self._capabilities = capabilities + + def get(self, cap: str) -> bytes | None: + """Get terminal capability string by name. + """ + if not isinstance(cap, str): + raise TypeError(f"`cap` must be a string, not {type(cap)}") + + return self._capabilities.get(cap) + + +def tparm(cap_bytes: bytes, *params: int) -> bytes: + """Parameterize a terminal capability string. + + Based on ncurses implementation in: + - ncurses/tinfo/lib_tparm.c:tparm() + - ncurses/tinfo/lib_tparm.c:tparam_internal() + + The ncurses version implements a full stack-based interpreter for + terminfo parameter strings. This pure Python version implements only + the subset of parameter substitution operations needed by PyREPL: + - %i (increment parameters for 1-based indexing) + - %p[1-9]%d (parameter substitution) + - %p[1-9]%{n}%+%d (parameter plus constant) + """ + if not isinstance(cap_bytes, bytes): + raise TypeError(f"`cap` must be bytes, not {type(cap_bytes)}") + + result = cap_bytes + + # %i - increment parameters (1-based instead of 0-based) + increment = b"%i" in result + if increment: + result = result.replace(b"%i", b"") + + # Replace %p1%d, %p2%d, etc. with actual parameter values + for i in range(len(params)): + pattern = b"%%p%d%%d" % (i + 1) + if pattern in result: + value = params[i] + if increment: + value += 1 + result = result.replace(pattern, str(value).encode("ascii")) + + # Handle %p1%{1}%+%d (parameter plus constant) + # Used in some cursor positioning sequences + pattern_re = re.compile(rb"%p(\d)%\{(\d+)\}%\+%d") + matches = list(pattern_re.finditer(result)) + for match in reversed(matches): # reversed to maintain positions + param_idx = int(match.group(1)) + constant = int(match.group(2)) + value = params[param_idx] + constant + result = ( + result[: match.start()] + + str(value).encode("ascii") + + result[match.end() :] + ) + + return result diff --git a/PythonLib/full/_pyrepl/trace.py b/PythonLib/full/_pyrepl/trace.py new file mode 100644 index 00000000..943ee12f --- /dev/null +++ b/PythonLib/full/_pyrepl/trace.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +import os +import sys + +# types +if False: + from typing import IO + + +trace_file: IO[str] | None = None +if trace_filename := os.environ.get("PYREPL_TRACE"): + trace_file = open(trace_filename, "a") + + + +if sys.platform == "emscripten": + from posix import _emscripten_log + + def trace(line: str, *k: object, **kw: object) -> None: + if "PYREPL_TRACE" not in os.environ: + return + if k or kw: + line = line.format(*k, **kw) + _emscripten_log(line) + +else: + def trace(line: str, *k: object, **kw: object) -> None: + if trace_file is None: + return + if k or kw: + line = line.format(*k, **kw) + trace_file.write(line + "\n") + trace_file.flush() diff --git a/PythonLib/full/_pyrepl/types.py b/PythonLib/full/_pyrepl/types.py new file mode 100644 index 00000000..c5b7ebc1 --- /dev/null +++ b/PythonLib/full/_pyrepl/types.py @@ -0,0 +1,10 @@ +from collections.abc import Callable, Iterator + +type Callback = Callable[[], object] +type SimpleContextManager = Iterator[None] +type KeySpec = str # like r"\C-c" +type CommandName = str # like "interrupt" +type EventTuple = tuple[CommandName, str] +type Completer = Callable[[str, int], str | None] +type CharBuffer = list[str] +type CharWidths = list[int] diff --git a/PythonLib/full/_pyrepl/unix_console.py b/PythonLib/full/_pyrepl/unix_console.py new file mode 100644 index 00000000..937b5df6 --- /dev/null +++ b/PythonLib/full/_pyrepl/unix_console.py @@ -0,0 +1,843 @@ +# Copyright 2000-2010 Michael Hudson-Doyle +# Antonio Cuni +# Armin Rigo +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +from __future__ import annotations + +import errno +import os +import re +import select +import signal +import struct +import termios +import time +import types +import platform +from fcntl import ioctl + +from . import terminfo +from .console import Console, Event +from .fancy_termios import tcgetattr, tcsetattr, TermState +from .trace import trace +from .unix_eventqueue import EventQueue +from .utils import wlen + +# declare posix optional to allow None assignment on other platforms +posix: types.ModuleType | None +try: + import posix +except ImportError: + posix = None + +TYPE_CHECKING = False + +# types +if TYPE_CHECKING: + from typing import AbstractSet, IO, Literal, overload, cast +else: + overload = lambda func: None + cast = lambda typ, val: val + + +class InvalidTerminal(RuntimeError): + def __init__(self, message: str) -> None: + super().__init__(errno.EIO, message) + + +_error = (termios.error, InvalidTerminal) +_error_codes_to_ignore = frozenset([errno.EIO, errno.ENXIO, errno.EPERM]) + +SIGWINCH_EVENT = "repaint" + +FIONREAD = getattr(termios, "FIONREAD", None) +TIOCGWINSZ = getattr(termios, "TIOCGWINSZ", None) + +# ------------ start of baudrate definitions ------------ + +# Add (possibly) missing baudrates (check termios man page) to termios + + +def add_baudrate_if_supported(dictionary: dict[int, int], rate: int) -> None: + baudrate_name = "B%d" % rate + if hasattr(termios, baudrate_name): + dictionary[getattr(termios, baudrate_name)] = rate + + +# Check the termios man page (Line speed) to know where these +# values come from. +potential_baudrates = [ + 0, + 110, + 115200, + 1200, + 134, + 150, + 1800, + 19200, + 200, + 230400, + 2400, + 300, + 38400, + 460800, + 4800, + 50, + 57600, + 600, + 75, + 9600, +] + +ratedict: dict[int, int] = {} +for rate in potential_baudrates: + add_baudrate_if_supported(ratedict, rate) + +# Clean up variables to avoid unintended usage +del rate, add_baudrate_if_supported + +# ------------ end of baudrate definitions ------------ + +delayprog = re.compile(b"\\$<([0-9]+)((?:/|\\*){0,2})>") + +try: + poll: type[select.poll] = select.poll +except AttributeError: + # this is exactly the minimum necessary to support what we + # do with poll objects + class MinimalPoll: + def __init__(self): + pass + + def register(self, fd, flag): + self.fd = fd + + # note: The 'timeout' argument is received as *milliseconds* + def poll(self, timeout: float | None = None) -> list[int]: + if timeout is None: + r, w, e = select.select([self.fd], [], []) + else: + r, w, e = select.select([self.fd], [], [], timeout / 1000) + return r + + poll = MinimalPoll # type: ignore[assignment] + + +class UnixConsole(Console): + def __init__( + self, + f_in: IO[bytes] | int = 0, + f_out: IO[bytes] | int = 1, + term: str = "", + encoding: str = "", + ): + """ + Initialize the UnixConsole. + + Parameters: + - f_in (int or file-like object): Input file descriptor or object. + - f_out (int or file-like object): Output file descriptor or object. + - term (str): Terminal name. + - encoding (str): Encoding to use for I/O operations. + """ + super().__init__(f_in, f_out, term, encoding) + + self.pollob = poll() + self.pollob.register(self.input_fd, select.POLLIN) + self.terminfo = terminfo.TermInfo(term or None) + self.term = term + self.is_apple_terminal = ( + platform.system() == "Darwin" + and os.getenv("TERM_PROGRAM") == "Apple_Terminal" + ) + + try: + self.__input_fd_set(tcgetattr(self.input_fd), ignore=frozenset()) + except _error as e: + raise RuntimeError(f"termios failure ({e.args[1]})") + + @overload + def _my_getstr( + cap: str, optional: Literal[False] = False + ) -> bytes: ... + + @overload + def _my_getstr(cap: str, optional: bool) -> bytes | None: ... + + def _my_getstr(cap: str, optional: bool = False) -> bytes | None: + r = self.terminfo.get(cap) + if not optional and r is None: + raise InvalidTerminal( + f"terminal doesn't have the required {cap} capability" + ) + return r + + self._bel = _my_getstr("bel") + self._civis = _my_getstr("civis", optional=True) + self._clear = _my_getstr("clear") + self._cnorm = _my_getstr("cnorm", optional=True) + self._cub = _my_getstr("cub", optional=True) + self._cub1 = _my_getstr("cub1", optional=True) + self._cud = _my_getstr("cud", optional=True) + self._cud1 = _my_getstr("cud1", optional=True) + self._cuf = _my_getstr("cuf", optional=True) + self._cuf1 = _my_getstr("cuf1", optional=True) + self._cup = _my_getstr("cup") + self._cuu = _my_getstr("cuu", optional=True) + self._cuu1 = _my_getstr("cuu1", optional=True) + self._dch1 = _my_getstr("dch1", optional=True) + self._dch = _my_getstr("dch", optional=True) + self._el = _my_getstr("el") + self._hpa = _my_getstr("hpa", optional=True) + self._ich = _my_getstr("ich", optional=True) + self._ich1 = _my_getstr("ich1", optional=True) + self._ind = _my_getstr("ind", optional=True) + self._pad = _my_getstr("pad", optional=True) + self._ri = _my_getstr("ri", optional=True) + self._rmkx = _my_getstr("rmkx", optional=True) + self._smkx = _my_getstr("smkx", optional=True) + + self.__setup_movement() + + self.event_queue = EventQueue( + self.input_fd, self.encoding, self.terminfo + ) + self.cursor_visible = 1 + + signal.signal(signal.SIGCONT, self._sigcont_handler) + + def _sigcont_handler(self, signum, frame): + self.restore() + self.prepare() + + def __read(self, n: int) -> bytes: + return os.read(self.input_fd, n) + + def change_encoding(self, encoding: str) -> None: + """ + Change the encoding used for I/O operations. + + Parameters: + - encoding (str): New encoding to use. + """ + self.encoding = encoding + + def refresh(self, screen, c_xy): + """ + Refresh the console screen. + + Parameters: + - screen (list): List of strings representing the screen contents. + - c_xy (tuple): Cursor position (x, y) on the screen. + """ + cx, cy = c_xy + if not self.__gone_tall: + while len(self.screen) < min(len(screen), self.height): + self.__hide_cursor() + if self.screen: + self.__move(0, len(self.screen) - 1) + self.__write("\n") + self.posxy = 0, len(self.screen) + self.screen.append("") + else: + while len(self.screen) < len(screen): + self.screen.append("") + + if len(screen) > self.height: + self.__gone_tall = 1 + self.__move = self.__move_tall + + px, py = self.posxy + old_offset = offset = self.__offset + height = self.height + + # we make sure the cursor is on the screen, and that we're + # using all of the screen if we can + if cy < offset: + offset = cy + elif cy >= offset + height: + offset = cy - height + 1 + elif offset > 0 and len(screen) < offset + height: + offset = max(len(screen) - height, 0) + screen.append("") + + oldscr = self.screen[old_offset : old_offset + height] + newscr = screen[offset : offset + height] + + # use hardware scrolling if we have it. + if old_offset > offset and self._ri: + self.__hide_cursor() + self.__write_code(self._cup, 0, 0) + self.posxy = 0, old_offset + for i in range(old_offset - offset): + self.__write_code(self._ri) + oldscr.pop(-1) + oldscr.insert(0, "") + elif old_offset < offset and self._ind: + self.__hide_cursor() + self.__write_code(self._cup, self.height - 1, 0) + self.posxy = 0, old_offset + self.height - 1 + for i in range(offset - old_offset): + self.__write_code(self._ind) + oldscr.pop(0) + oldscr.append("") + + self.__offset = offset + + for ( + y, + oldline, + newline, + ) in zip(range(offset, offset + height), oldscr, newscr): + if oldline != newline: + self.__write_changed_line(y, oldline, newline, px) + + y = len(newscr) + while y < len(oldscr): + self.__hide_cursor() + self.__move(0, y) + self.posxy = 0, y + self.__write_code(self._el) + y += 1 + + self.__show_cursor() + + self.screen = screen.copy() + self.move_cursor(cx, cy) + self.flushoutput() + + def move_cursor(self, x, y): + """ + Move the cursor to the specified position on the screen. + + Parameters: + - x (int): X coordinate. + - y (int): Y coordinate. + """ + if y < self.__offset or y >= self.__offset + self.height: + self.event_queue.insert(Event("scroll", None)) + else: + self.__move(x, y) + self.posxy = x, y + self.flushoutput() + + def prepare(self): + """ + Prepare the console for input/output operations. + """ + self.__buffer = [] + + self.__svtermstate = tcgetattr(self.input_fd) + raw = self.__svtermstate.copy() + raw.iflag &= ~(termios.INPCK | termios.ISTRIP | termios.IXON) + raw.oflag &= ~(termios.OPOST) + raw.cflag &= ~(termios.CSIZE | termios.PARENB) + raw.cflag |= termios.CS8 + raw.iflag |= termios.BRKINT + raw.lflag &= ~(termios.ICANON | termios.ECHO | termios.IEXTEN) + raw.lflag |= termios.ISIG + raw.cc[termios.VMIN] = 1 + raw.cc[termios.VTIME] = 0 + self.__input_fd_set(raw) + + # In macOS terminal we need to deactivate line wrap via ANSI escape code + if self.is_apple_terminal: + os.write(self.output_fd, b"\033[?7l") + + self.screen = [] + self.height, self.width = self.getheightwidth() + + self.posxy = 0, 0 + self.__gone_tall = 0 + self.__move = self.__move_short + self.__offset = 0 + + self.__maybe_write_code(self._smkx) + + try: + self.old_sigwinch = signal.signal(signal.SIGWINCH, self.__sigwinch) + except ValueError: + pass + + self.__enable_bracketed_paste() + + def restore(self): + """ + Restore the console to the default state + """ + self.__disable_bracketed_paste() + self.__maybe_write_code(self._rmkx) + self.flushoutput() + self.__input_fd_set(self.__svtermstate) + + if self.is_apple_terminal: + os.write(self.output_fd, b"\033[?7h") + + if hasattr(self, "old_sigwinch"): + try: + signal.signal(signal.SIGWINCH, self.old_sigwinch) + except ValueError as e: + import threading + if threading.current_thread() is threading.main_thread(): + raise e + del self.old_sigwinch + + def push_char(self, char: int | bytes) -> None: + """ + Push a character to the console event queue. + """ + trace("push char {char!r}", char=char) + self.event_queue.push(char) + + def get_event(self, block: bool = True) -> Event | None: + """ + Get an event from the console event queue. + + Parameters: + - block (bool): Whether to block until an event is available. + + Returns: + - Event: Event object from the event queue. + """ + if not block and not self.wait(timeout=0): + return None + + while self.event_queue.empty(): + while True: + try: + self.push_char(self.__read(1)) + except OSError as err: + if err.errno == errno.EINTR: + if not self.event_queue.empty(): + return self.event_queue.get() + else: + continue + elif err.errno == errno.EIO: + raise SystemExit(errno.EIO) + else: + raise + else: + break + return self.event_queue.get() + + def wait(self, timeout: float | None = None) -> bool: + """ + Wait for events on the console. + """ + return ( + not self.event_queue.empty() + or bool(self.pollob.poll(timeout)) + ) + + def set_cursor_vis(self, visible): + """ + Set the visibility of the cursor. + + Parameters: + - visible (bool): Visibility flag. + """ + if visible: + self.__show_cursor() + else: + self.__hide_cursor() + + if TIOCGWINSZ: + + def getheightwidth(self): + """ + Get the height and width of the console. + + Returns: + - tuple: Height and width of the console. + """ + try: + return int(os.environ["LINES"]), int(os.environ["COLUMNS"]) + except (KeyError, TypeError, ValueError): + try: + size = ioctl(self.input_fd, TIOCGWINSZ, b"\000" * 8) + except OSError: + return 25, 80 + height, width = struct.unpack("hhhh", size)[0:2] + if not height: + return 25, 80 + return height, width + + else: + + def getheightwidth(self): + """ + Get the height and width of the console. + + Returns: + - tuple: Height and width of the console. + """ + try: + return int(os.environ["LINES"]), int(os.environ["COLUMNS"]) + except (KeyError, TypeError, ValueError): + return 25, 80 + + def forgetinput(self): + """ + Discard any pending input on the console. + """ + termios.tcflush(self.input_fd, termios.TCIFLUSH) + + def flushoutput(self): + """ + Flush the output buffer. + """ + for text, iscode in self.__buffer: + if iscode: + self.__tputs(text) + else: + os.write(self.output_fd, text.encode(self.encoding, "replace")) + del self.__buffer[:] + + def finish(self): + """ + Finish console operations and flush the output buffer. + """ + y = len(self.screen) - 1 + while y >= 0 and not self.screen[y]: + y -= 1 + self.__move(0, min(y, self.height + self.__offset - 1)) + self.__write("\n\r") + self.flushoutput() + + def beep(self): + """ + Emit a beep sound. + """ + self.__maybe_write_code(self._bel) + self.flushoutput() + + if FIONREAD: + + def getpending(self): + """ + Get pending events from the console event queue. + + Returns: + - Event: Pending event from the event queue. + """ + e = Event("key", "", b"") + + while not self.event_queue.empty(): + e2 = self.event_queue.get() + e.data += e2.data + e.raw += e.raw + + amount = struct.unpack("i", ioctl(self.input_fd, FIONREAD, b"\0\0\0\0"))[0] + trace("getpending({a})", a=amount) + raw = self.__read(amount) + data = str(raw, self.encoding, "replace") + e.data += data + e.raw += raw + return e + + else: + + def getpending(self): + """ + Get pending events from the console event queue. + + Returns: + - Event: Pending event from the event queue. + """ + e = Event("key", "", b"") + + while not self.event_queue.empty(): + e2 = self.event_queue.get() + e.data += e2.data + e.raw += e.raw + + amount = 10000 + raw = self.__read(amount) + data = str(raw, self.encoding, "replace") + e.data += data + e.raw += raw + return e + + def clear(self): + """ + Clear the console screen. + """ + self.__write_code(self._clear) + self.__gone_tall = 1 + self.__move = self.__move_tall + self.posxy = 0, 0 + self.screen = [] + + @property + def input_hook(self): + # avoid inline imports here so the repl doesn't get flooded + # with import logging from -X importtime=2 + if posix is not None and posix._is_inputhook_installed(): + return posix._inputhook + + def __enable_bracketed_paste(self) -> None: + os.write(self.output_fd, b"\x1b[?2004h") + + def __disable_bracketed_paste(self) -> None: + os.write(self.output_fd, b"\x1b[?2004l") + + def __setup_movement(self): + """ + Set up the movement functions based on the terminal capabilities. + """ + if 0 and self._hpa: # hpa don't work in windows telnet :-( + self.__move_x = self.__move_x_hpa + elif self._cub and self._cuf: + self.__move_x = self.__move_x_cub_cuf + elif self._cub1 and self._cuf1: + self.__move_x = self.__move_x_cub1_cuf1 + else: + raise RuntimeError("insufficient terminal (horizontal)") + + if self._cuu and self._cud: + self.__move_y = self.__move_y_cuu_cud + elif self._cuu1 and self._cud1: + self.__move_y = self.__move_y_cuu1_cud1 + else: + raise RuntimeError("insufficient terminal (vertical)") + + if self._dch1: + self.dch1 = self._dch1 + elif self._dch: + self.dch1 = terminfo.tparm(self._dch, 1) + else: + self.dch1 = None + + if self._ich1: + self.ich1 = self._ich1 + elif self._ich: + self.ich1 = terminfo.tparm(self._ich, 1) + else: + self.ich1 = None + + self.__move = self.__move_short + + def __write_changed_line(self, y, oldline, newline, px_coord): + # this is frustrating; there's no reason to test (say) + # self.dch1 inside the loop -- but alternative ways of + # structuring this function are equally painful (I'm trying to + # avoid writing code generators these days...) + minlen = min(wlen(oldline), wlen(newline)) + x_pos = 0 + x_coord = 0 + + px_pos = 0 + j = 0 + for c in oldline: + if j >= px_coord: + break + j += wlen(c) + px_pos += 1 + + # reuse the oldline as much as possible, but stop as soon as we + # encounter an ESCAPE, because it might be the start of an escape + # sequence + while ( + x_coord < minlen + and oldline[x_pos] == newline[x_pos] + and newline[x_pos] != "\x1b" + ): + x_coord += wlen(newline[x_pos]) + x_pos += 1 + + # if we need to insert a single character right after the first detected change + if oldline[x_pos:] == newline[x_pos + 1 :] and self.ich1: + if ( + y == self.posxy[1] + and x_coord > self.posxy[0] + and oldline[px_pos:x_pos] == newline[px_pos + 1 : x_pos + 1] + ): + x_pos = px_pos + x_coord = px_coord + character_width = wlen(newline[x_pos]) + self.__move(x_coord, y) + self.__write_code(self.ich1) + self.__write(newline[x_pos]) + self.posxy = x_coord + character_width, y + + # if it's a single character change in the middle of the line + elif ( + x_coord < minlen + and oldline[x_pos + 1 :] == newline[x_pos + 1 :] + and wlen(oldline[x_pos]) == wlen(newline[x_pos]) + ): + character_width = wlen(newline[x_pos]) + self.__move(x_coord, y) + self.__write(newline[x_pos]) + self.posxy = x_coord + character_width, y + + # if this is the last character to fit in the line and we edit in the middle of the line + elif ( + self.dch1 + and self.ich1 + and wlen(newline) == self.width + and x_coord < wlen(newline) - 2 + and newline[x_pos + 1 : -1] == oldline[x_pos:-2] + ): + self.__hide_cursor() + self.__move(self.width - 2, y) + self.posxy = self.width - 2, y + self.__write_code(self.dch1) + + character_width = wlen(newline[x_pos]) + self.__move(x_coord, y) + self.__write_code(self.ich1) + self.__write(newline[x_pos]) + self.posxy = character_width + 1, y + + else: + self.__hide_cursor() + self.__move(x_coord, y) + if wlen(oldline) > wlen(newline): + self.__write_code(self._el) + self.__write(newline[x_pos:]) + self.posxy = wlen(newline), y + + if "\x1b" in newline: + # ANSI escape characters are present, so we can't assume + # anything about the position of the cursor. Moving the cursor + # to the left margin should work to get to a known position. + self.move_cursor(0, y) + + def __write(self, text): + self.__buffer.append((text, 0)) + + def __write_code(self, fmt, *args): + self.__buffer.append((terminfo.tparm(fmt, *args), 1)) + + def __maybe_write_code(self, fmt, *args): + if fmt: + self.__write_code(fmt, *args) + + def __move_y_cuu1_cud1(self, y): + assert self._cud1 is not None + assert self._cuu1 is not None + dy = y - self.posxy[1] + if dy > 0: + self.__write_code(dy * self._cud1) + elif dy < 0: + self.__write_code((-dy) * self._cuu1) + + def __move_y_cuu_cud(self, y): + dy = y - self.posxy[1] + if dy > 0: + self.__write_code(self._cud, dy) + elif dy < 0: + self.__write_code(self._cuu, -dy) + + def __move_x_hpa(self, x: int) -> None: + if x != self.posxy[0]: + self.__write_code(self._hpa, x) + + def __move_x_cub1_cuf1(self, x: int) -> None: + assert self._cuf1 is not None + assert self._cub1 is not None + dx = x - self.posxy[0] + if dx > 0: + self.__write_code(self._cuf1 * dx) + elif dx < 0: + self.__write_code(self._cub1 * (-dx)) + + def __move_x_cub_cuf(self, x: int) -> None: + dx = x - self.posxy[0] + if dx > 0: + self.__write_code(self._cuf, dx) + elif dx < 0: + self.__write_code(self._cub, -dx) + + def __move_short(self, x, y): + self.__move_x(x) + self.__move_y(y) + + def __move_tall(self, x, y): + assert 0 <= y - self.__offset < self.height, y - self.__offset + self.__write_code(self._cup, y - self.__offset, x) + + def __sigwinch(self, signum, frame): + self.height, self.width = self.getheightwidth() + self.event_queue.insert(Event("resize", None)) + + def __hide_cursor(self): + if self.cursor_visible: + self.__maybe_write_code(self._civis) + self.cursor_visible = 0 + + def __show_cursor(self): + if not self.cursor_visible: + self.__maybe_write_code(self._cnorm) + self.cursor_visible = 1 + + def repaint(self): + if not self.__gone_tall: + self.posxy = 0, self.posxy[1] + self.__write("\r") + ns = len(self.screen) * ["\000" * self.width] + self.screen = ns + else: + self.posxy = 0, self.__offset + self.__move(0, self.__offset) + ns = self.height * ["\000" * self.width] + self.screen = ns + + def __tputs(self, fmt, prog=delayprog): + """A Python implementation of the curses tputs function; the + curses one can't really be wrapped in a sane manner. + + I have the strong suspicion that this is complexity that + will never do anyone any good.""" + # using .get() means that things will blow up + # only if the bps is actually needed (which I'm + # betting is pretty unlikely) + bps = ratedict.get(self.__svtermstate.ospeed) + while True: + m = prog.search(fmt) + if not m: + os.write(self.output_fd, fmt) + break + x, y = m.span() + os.write(self.output_fd, fmt[:x]) + fmt = fmt[y:] + delay = int(m.group(1)) + if b"*" in m.group(2): + delay *= self.height + if self._pad and bps is not None: + nchars = (bps * delay) / 1000 + os.write(self.output_fd, self._pad * nchars) + else: + time.sleep(float(delay) / 1000.0) + + def __input_fd_set( + self, + state: TermState, + ignore: AbstractSet[int] = _error_codes_to_ignore, + ) -> bool: + try: + tcsetattr(self.input_fd, termios.TCSADRAIN, state) + except termios.error as te: + if te.args[0] not in ignore: + raise + return False + else: + return True diff --git a/PythonLib/full/_pyrepl/unix_eventqueue.py b/PythonLib/full/_pyrepl/unix_eventqueue.py new file mode 100644 index 00000000..2a9cca59 --- /dev/null +++ b/PythonLib/full/_pyrepl/unix_eventqueue.py @@ -0,0 +1,77 @@ +# Copyright 2000-2008 Michael Hudson-Doyle +# Armin Rigo +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +from .terminfo import TermInfo +from .trace import trace +from .base_eventqueue import BaseEventQueue +from termios import tcgetattr, VERASE +import os + + +# Mapping of human-readable key names to their terminal-specific codes +TERMINAL_KEYNAMES = { + "delete": "kdch1", + "down": "kcud1", + "end": "kend", + "enter": "kent", + "home": "khome", + "insert": "kich1", + "left": "kcub1", + "page down": "knp", + "page up": "kpp", + "right": "kcuf1", + "up": "kcuu1", +} + + +# Function keys F1-F20 mapping +TERMINAL_KEYNAMES.update(("f%d" % i, "kf%d" % i) for i in range(1, 21)) + +# Known CTRL-arrow keycodes +CTRL_ARROW_KEYCODES= { + # for xterm, gnome-terminal, xfce terminal, etc. + b'\033[1;5D': 'ctrl left', + b'\033[1;5C': 'ctrl right', + # for rxvt + b'\033Od': 'ctrl left', + b'\033Oc': 'ctrl right', +} + +def get_terminal_keycodes(ti: TermInfo) -> dict[bytes, str]: + """ + Generates a dictionary mapping terminal keycodes to human-readable names. + """ + keycodes = {} + for key, terminal_code in TERMINAL_KEYNAMES.items(): + keycode = ti.get(terminal_code) + trace('key {key} tiname {terminal_code} keycode {keycode!r}', **locals()) + if keycode: + keycodes[keycode] = key + keycodes.update(CTRL_ARROW_KEYCODES) + return keycodes + + +class EventQueue(BaseEventQueue): + def __init__(self, fd: int, encoding: str, ti: TermInfo) -> None: + keycodes = get_terminal_keycodes(ti) + if os.isatty(fd): + backspace = tcgetattr(fd)[6][VERASE] + keycodes[backspace] = "backspace" + BaseEventQueue.__init__(self, encoding, keycodes) diff --git a/PythonLib/full/_pyrepl/utils.py b/PythonLib/full/_pyrepl/utils.py new file mode 100644 index 00000000..06cddef8 --- /dev/null +++ b/PythonLib/full/_pyrepl/utils.py @@ -0,0 +1,391 @@ +from __future__ import annotations +import builtins +import functools +import keyword +import re +import token as T +import tokenize +import unicodedata +import _colorize + +from collections import deque +from io import StringIO +from tokenize import TokenInfo as TI +from typing import Iterable, Iterator, Match, NamedTuple, Self + +from .types import CharBuffer, CharWidths +from .trace import trace + +ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]") +ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02") +ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""}) +IDENTIFIERS_AFTER = {"def", "class"} +KEYWORD_CONSTANTS = {"True", "False", "None"} +BUILTINS = {str(name) for name in dir(builtins) if not name.startswith('_')} + + +def THEME(**kwargs): + # Not cached: the user can modify the theme inside the interactive session. + return _colorize.get_theme(**kwargs).syntax + + +class Span(NamedTuple): + """Span indexing that's inclusive on both ends.""" + + start: int + end: int + + @classmethod + def from_re(cls, m: Match[str], group: int | str) -> Self: + re_span = m.span(group) + return cls(re_span[0], re_span[1] - 1) + + @classmethod + def from_token(cls, token: TI, line_len: list[int]) -> Self: + end_offset = -1 + if (token.type in {T.FSTRING_MIDDLE, T.TSTRING_MIDDLE} + and token.string.endswith(("{", "}"))): + # gh-134158: a visible trailing brace comes from a double brace in input + end_offset += 1 + + return cls( + line_len[token.start[0] - 1] + token.start[1], + line_len[token.end[0] - 1] + token.end[1] + end_offset, + ) + + +class ColorSpan(NamedTuple): + span: Span + tag: str + + +@functools.cache +def str_width(c: str) -> int: + if ord(c) < 128: + return 1 + # gh-139246 for zero-width joiner and combining characters + if unicodedata.combining(c): + return 0 + category = unicodedata.category(c) + if category == "Cf" and c != "\u00ad": + return 0 + w = unicodedata.east_asian_width(c) + if w in ("N", "Na", "H", "A"): + return 1 + return 2 + + +def wlen(s: str) -> int: + if len(s) == 1 and s != "\x1a": + return str_width(s) + length = sum(str_width(i) for i in s) + # remove lengths of any escape sequences + sequence = ANSI_ESCAPE_SEQUENCE.findall(s) + ctrl_z_cnt = s.count("\x1a") + return length - sum(len(i) for i in sequence) + ctrl_z_cnt + + +def unbracket(s: str, including_content: bool = False) -> str: + r"""Return `s` with \001 and \002 characters removed. + + If `including_content` is True, content between \001 and \002 is also + stripped. + """ + if including_content: + return ZERO_WIDTH_BRACKET.sub("", s) + return s.translate(ZERO_WIDTH_TRANS) + + +def gen_colors(buffer: str) -> Iterator[ColorSpan]: + """Returns a list of index spans to color using the given color tag. + + The input `buffer` should be a valid start of a Python code block, i.e. + it cannot be a block starting in the middle of a multiline string. + """ + sio = StringIO(buffer) + line_lengths = [0] + [len(line) for line in sio.readlines()] + # make line_lengths cumulative + for i in range(1, len(line_lengths)): + line_lengths[i] += line_lengths[i-1] + + sio.seek(0) + gen = tokenize.generate_tokens(sio.readline) + last_emitted: ColorSpan | None = None + try: + for color in gen_colors_from_token_stream(gen, line_lengths): + yield color + last_emitted = color + except SyntaxError: + return + except tokenize.TokenError as te: + yield from recover_unterminated_string( + te, line_lengths, last_emitted, buffer + ) + + +def recover_unterminated_string( + exc: tokenize.TokenError, + line_lengths: list[int], + last_emitted: ColorSpan | None, + buffer: str, +) -> Iterator[ColorSpan]: + msg, loc = exc.args + if loc is None: + return + + line_no, column = loc + + if msg.startswith( + ( + "unterminated string literal", + "unterminated f-string literal", + "unterminated t-string literal", + "EOF in multi-line string", + "unterminated triple-quoted f-string literal", + "unterminated triple-quoted t-string literal", + ) + ): + start = line_lengths[line_no - 1] + column - 1 + end = line_lengths[-1] - 1 + + # in case FSTRING_START was already emitted + if last_emitted and start <= last_emitted.span.start: + trace("before last emitted = {s}", s=start) + start = last_emitted.span.end + 1 + + span = Span(start, end) + trace("yielding span {a} -> {b}", a=span.start, b=span.end) + yield ColorSpan(span, "string") + else: + trace( + "unhandled token error({buffer}) = {te}", + buffer=repr(buffer), + te=str(exc), + ) + + +def gen_colors_from_token_stream( + token_generator: Iterator[TI], + line_lengths: list[int], +) -> Iterator[ColorSpan]: + token_window = prev_next_window(token_generator) + + is_def_name = False + bracket_level = 0 + for prev_token, token, next_token in token_window: + assert token is not None + if token.start == token.end: + continue + + match token.type: + case ( + T.STRING + | T.FSTRING_START | T.FSTRING_MIDDLE | T.FSTRING_END + | T.TSTRING_START | T.TSTRING_MIDDLE | T.TSTRING_END + ): + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "string") + case T.COMMENT: + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "comment") + case T.NUMBER: + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "number") + case T.OP: + if token.string in "([{": + bracket_level += 1 + elif token.string in ")]}": + bracket_level -= 1 + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "op") + case T.NAME: + if is_def_name: + is_def_name = False + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "definition") + elif keyword.iskeyword(token.string): + span_cls = "keyword" + if token.string in KEYWORD_CONSTANTS: + span_cls = "keyword_constant" + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, span_cls) + if token.string in IDENTIFIERS_AFTER: + is_def_name = True + elif ( + keyword.issoftkeyword(token.string) + and bracket_level == 0 + and is_soft_keyword_used(prev_token, token, next_token) + ): + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "soft_keyword") + elif ( + token.string in BUILTINS + and not (prev_token and prev_token.exact_type == T.DOT) + ): + span = Span.from_token(token, line_lengths) + yield ColorSpan(span, "builtin") + + +keyword_first_sets_match = {"False", "None", "True", "await", "lambda", "not"} +keyword_first_sets_case = {"False", "None", "True"} + + +def is_soft_keyword_used(*tokens: TI | None) -> bool: + """Returns True if the current token is a keyword in this context. + + For the `*tokens` to match anything, they have to be a three-tuple of + (previous, current, next). + """ + trace("is_soft_keyword_used{t}", t=tokens) + match tokens: + case ( + None | TI(T.NEWLINE) | TI(T.INDENT) | TI(string=":"), + TI(string="match"), + TI(T.NUMBER | T.STRING | T.FSTRING_START | T.TSTRING_START) + | TI(T.OP, string="(" | "*" | "[" | "{" | "~" | "...") + ): + return True + case ( + None | TI(T.NEWLINE) | TI(T.INDENT) | TI(string=":"), + TI(string="match"), + TI(T.NAME, string=s) + ): + if keyword.iskeyword(s): + return s in keyword_first_sets_match + return True + case ( + None | TI(T.NEWLINE) | TI(T.INDENT) | TI(T.DEDENT) | TI(string=":"), + TI(string="case"), + TI(T.NUMBER | T.STRING | T.FSTRING_START | T.TSTRING_START) + | TI(T.OP, string="(" | "*" | "-" | "[" | "{") + ): + return True + case ( + None | TI(T.NEWLINE) | TI(T.INDENT) | TI(T.DEDENT) | TI(string=":"), + TI(string="case"), + TI(T.NAME, string=s) + ): + if keyword.iskeyword(s): + return s in keyword_first_sets_case + return True + case (TI(string="case"), TI(string="_"), TI(string=":")): + return True + case ( + None | TI(T.NEWLINE) | TI(T.INDENT) | TI(T.DEDENT) | TI(string=":"), + TI(string="type"), + TI(T.NAME, string=s) + ): + return not keyword.iskeyword(s) + case _: + return False + + +def disp_str( + buffer: str, + colors: list[ColorSpan] | None = None, + start_index: int = 0, + force_color: bool = False, +) -> tuple[CharBuffer, CharWidths]: + r"""Decompose the input buffer into a printable variant with applied colors. + + Returns a tuple of two lists: + - the first list is the input buffer, character by character, with color + escape codes added (while those codes contain multiple ASCII characters, + each code is considered atomic *and is attached for the corresponding + visible character*); + - the second list is the visible width of each character in the input + buffer. + + Note on colors: + - The `colors` list, if provided, is partially consumed within. We're using + a list and not a generator since we need to hold onto the current + unfinished span between calls to disp_str in case of multiline strings. + - The `colors` list is computed from the start of the input block. `buffer` + is only a subset of that input block, a single line within. This is why + we need `start_index` to inform us which position is the start of `buffer` + actually within user input. This allows us to match color spans correctly. + + Examples: + >>> utils.disp_str("a = 9") + (['a', ' ', '=', ' ', '9'], [1, 1, 1, 1, 1]) + + >>> line = "while 1:" + >>> colors = list(utils.gen_colors(line)) + >>> utils.disp_str(line, colors=colors) + (['\x1b[1;34mw', 'h', 'i', 'l', 'e\x1b[0m', ' ', '1', ':'], [1, 1, 1, 1, 1, 1, 1, 1]) + + """ + chars: CharBuffer = [] + char_widths: CharWidths = [] + + if not buffer: + return chars, char_widths + + while colors and colors[0].span.end < start_index: + # move past irrelevant spans + colors.pop(0) + + theme = THEME(force_color=force_color) + pre_color = "" + post_color = "" + if colors and colors[0].span.start < start_index: + # looks like we're continuing a previous color (e.g. a multiline str) + pre_color = theme[colors[0].tag] + + for i, c in enumerate(buffer, start_index): + if colors and colors[0].span.start == i: # new color starts now + pre_color = theme[colors[0].tag] + + if c == "\x1a": # CTRL-Z on Windows + chars.append(c) + char_widths.append(2) + elif ord(c) < 128: + chars.append(c) + char_widths.append(1) + elif unicodedata.category(c).startswith("C"): + c = r"\u%04x" % ord(c) + chars.append(c) + char_widths.append(len(c)) + else: + chars.append(c) + char_widths.append(str_width(c)) + + if colors and colors[0].span.end == i: # current color ends now + post_color = theme.reset + colors.pop(0) + + chars[-1] = pre_color + chars[-1] + post_color + pre_color = "" + post_color = "" + + if colors and colors[0].span.start < i and colors[0].span.end > i: + # even though the current color should be continued, reset it for now. + # the next call to `disp_str()` will revive it. + chars[-1] += theme.reset + + return chars, char_widths + + +def prev_next_window[T]( + iterable: Iterable[T] +) -> Iterator[tuple[T | None, ...]]: + """Generates three-tuples of (previous, current, next) items. + + On the first iteration previous is None. On the last iteration next + is None. In case of exception next is None and the exception is re-raised + on a subsequent next() call. + + Inspired by `sliding_window` from `itertools` recipes. + """ + + iterator = iter(iterable) + window = deque((None, next(iterator)), maxlen=3) + try: + for x in iterator: + window.append(x) + yield tuple(window) + except Exception: + raise + finally: + window.append(None) + yield tuple(window) diff --git a/PythonLib/full/_pyrepl/windows_console.py b/PythonLib/full/_pyrepl/windows_console.py new file mode 100644 index 00000000..bc137ead --- /dev/null +++ b/PythonLib/full/_pyrepl/windows_console.py @@ -0,0 +1,735 @@ +# Copyright 2000-2004 Michael Hudson-Doyle +# +# All Rights Reserved +# +# +# Permission to use, copy, modify, and distribute this software and +# its documentation for any purpose is hereby granted without fee, +# provided that the above copyright notice appear in all copies and +# that both that copyright notice and this permission notice appear in +# supporting documentation. +# +# THE AUTHOR MICHAEL HUDSON DISCLAIMS ALL WARRANTIES WITH REGARD TO +# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, +# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER +# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +from __future__ import annotations + +import io +import os +import sys + +import ctypes +import types +from ctypes.wintypes import ( + _COORD, + WORD, + SMALL_RECT, + BOOL, + HANDLE, + CHAR, + DWORD, + WCHAR, + SHORT, +) +from ctypes import Structure, POINTER, Union +from .console import Event, Console +from .trace import trace +from .utils import wlen +from .windows_eventqueue import EventQueue + +try: + from ctypes import get_last_error, GetLastError, WinDLL, windll, WinError # type: ignore[attr-defined] +except: + # Keep MyPy happy off Windows + from ctypes import CDLL as WinDLL, cdll as windll + + def GetLastError() -> int: + return 42 + + def get_last_error() -> int: + return 42 + + class WinError(OSError): # type: ignore[no-redef] + def __init__(self, err: int | None, descr: str | None = None) -> None: + self.err = err + self.descr = descr + +# declare nt optional to allow None assignment on other platforms +nt: types.ModuleType | None +try: + import nt +except ImportError: + nt = None + +TYPE_CHECKING = False + +if TYPE_CHECKING: + from typing import IO + +# Virtual-Key Codes: https://learn.microsoft.com/en-us/windows/win32/inputdev/virtual-key-codes +VK_MAP: dict[int, str] = { + 0x23: "end", # VK_END + 0x24: "home", # VK_HOME + 0x25: "left", # VK_LEFT + 0x26: "up", # VK_UP + 0x27: "right", # VK_RIGHT + 0x28: "down", # VK_DOWN + 0x2E: "delete", # VK_DELETE + 0x70: "f1", # VK_F1 + 0x71: "f2", # VK_F2 + 0x72: "f3", # VK_F3 + 0x73: "f4", # VK_F4 + 0x74: "f5", # VK_F5 + 0x75: "f6", # VK_F6 + 0x76: "f7", # VK_F7 + 0x77: "f8", # VK_F8 + 0x78: "f9", # VK_F9 + 0x79: "f10", # VK_F10 + 0x7A: "f11", # VK_F11 + 0x7B: "f12", # VK_F12 + 0x7C: "f13", # VK_F13 + 0x7D: "f14", # VK_F14 + 0x7E: "f15", # VK_F15 + 0x7F: "f16", # VK_F16 + 0x80: "f17", # VK_F17 + 0x81: "f18", # VK_F18 + 0x82: "f19", # VK_F19 + 0x83: "f20", # VK_F20 +} + +# Virtual terminal output sequences +# Reference: https://learn.microsoft.com/en-us/windows/console/console-virtual-terminal-sequences#output-sequences +# Check `windows_eventqueue.py` for input sequences +ERASE_IN_LINE = "\x1b[K" +MOVE_LEFT = "\x1b[{}D" +MOVE_RIGHT = "\x1b[{}C" +MOVE_UP = "\x1b[{}A" +MOVE_DOWN = "\x1b[{}B" +CLEAR = "\x1b[H\x1b[J" + +# State of control keys: https://learn.microsoft.com/en-us/windows/console/key-event-record-str +ALT_ACTIVE = 0x01 | 0x02 +CTRL_ACTIVE = 0x04 | 0x08 + +WAIT_TIMEOUT = 0x102 +WAIT_FAILED = 0xFFFFFFFF + +# from winbase.h +INFINITE = 0xFFFFFFFF + + +class _error(Exception): + pass + +def _supports_vt(): + try: + return nt._supports_virtual_terminal() + except AttributeError: + return False + +class WindowsConsole(Console): + def __init__( + self, + f_in: IO[bytes] | int = 0, + f_out: IO[bytes] | int = 1, + term: str = "", + encoding: str = "", + ): + super().__init__(f_in, f_out, term, encoding) + + self.__vt_support = _supports_vt() + + if self.__vt_support: + trace('console supports virtual terminal') + + # Save original console modes so we can recover on cleanup. + original_input_mode = DWORD() + GetConsoleMode(InHandle, original_input_mode) + trace(f'saved original input mode 0x{original_input_mode.value:x}') + self.__original_input_mode = original_input_mode.value + + SetConsoleMode( + OutHandle, + ENABLE_WRAP_AT_EOL_OUTPUT + | ENABLE_PROCESSED_OUTPUT + | ENABLE_VIRTUAL_TERMINAL_PROCESSING, + ) + + self.screen: list[str] = [] + self.width = 80 + self.height = 25 + self.__offset = 0 + self.event_queue = EventQueue(encoding) + try: + self.out = io._WindowsConsoleIO(self.output_fd, "w") # type: ignore[attr-defined] + except ValueError: + # Console I/O is redirected, fallback... + self.out = None + + def refresh(self, screen: list[str], c_xy: tuple[int, int]) -> None: + """ + Refresh the console screen. + + Parameters: + - screen (list): List of strings representing the screen contents. + - c_xy (tuple): Cursor position (x, y) on the screen. + """ + cx, cy = c_xy + + while len(self.screen) < min(len(screen), self.height): + self._hide_cursor() + if self.screen: + self._move_relative(0, len(self.screen) - 1) + self.__write("\n") + self.posxy = 0, len(self.screen) + self.screen.append("") + + px, py = self.posxy + old_offset = offset = self.__offset + height = self.height + + # we make sure the cursor is on the screen, and that we're + # using all of the screen if we can + if cy < offset: + offset = cy + elif cy >= offset + height: + offset = cy - height + 1 + scroll_lines = offset - old_offset + + # Scrolling the buffer as the current input is greater than the visible + # portion of the window. We need to scroll the visible portion and the + # entire history + self._scroll(scroll_lines, self._getscrollbacksize()) + self.posxy = self.posxy[0], self.posxy[1] + scroll_lines + self.__offset += scroll_lines + + for i in range(scroll_lines): + self.screen.append("") + elif offset > 0 and len(screen) < offset + height: + offset = max(len(screen) - height, 0) + screen.append("") + + oldscr = self.screen[old_offset : old_offset + height] + newscr = screen[offset : offset + height] + + self.__offset = offset + + self._hide_cursor() + for ( + y, + oldline, + newline, + ) in zip(range(offset, offset + height), oldscr, newscr): + if oldline != newline: + self.__write_changed_line(y, oldline, newline, px) + + y = len(newscr) + while y < len(oldscr): + self._move_relative(0, y) + self.posxy = 0, y + self._erase_to_end() + y += 1 + + self._show_cursor() + + self.screen = screen + self.move_cursor(cx, cy) + + @property + def input_hook(self): + # avoid inline imports here so the repl doesn't get flooded + # with import logging from -X importtime=2 + if nt is not None and nt._is_inputhook_installed(): + return nt._inputhook + + def __write_changed_line( + self, y: int, oldline: str, newline: str, px_coord: int + ) -> None: + # this is frustrating; there's no reason to test (say) + # self.dch1 inside the loop -- but alternative ways of + # structuring this function are equally painful (I'm trying to + # avoid writing code generators these days...) + minlen = min(wlen(oldline), wlen(newline)) + x_pos = 0 + x_coord = 0 + + px_pos = 0 + j = 0 + for c in oldline: + if j >= px_coord: + break + j += wlen(c) + px_pos += 1 + + # reuse the oldline as much as possible, but stop as soon as we + # encounter an ESCAPE, because it might be the start of an escape + # sequence + while ( + x_coord < minlen + and oldline[x_pos] == newline[x_pos] + and newline[x_pos] != "\x1b" + ): + x_coord += wlen(newline[x_pos]) + x_pos += 1 + + self._hide_cursor() + self._move_relative(x_coord, y) + if wlen(oldline) > wlen(newline): + self._erase_to_end() + + self.__write(newline[x_pos:]) + if wlen(newline) == self.width: + # If we wrapped we want to start at the next line + self._move_relative(0, y + 1) + self.posxy = 0, y + 1 + else: + self.posxy = wlen(newline), y + + if "\x1b" in newline or y != self.posxy[1] or '\x1a' in newline: + # ANSI escape characters are present, so we can't assume + # anything about the position of the cursor. Moving the cursor + # to the left margin should work to get to a known position. + self.move_cursor(0, y) + + def _scroll( + self, top: int, bottom: int, left: int | None = None, right: int | None = None + ) -> None: + scroll_rect = SMALL_RECT() + scroll_rect.Top = SHORT(top) + scroll_rect.Bottom = SHORT(bottom) + scroll_rect.Left = SHORT(0 if left is None else left) + scroll_rect.Right = SHORT( + self.getheightwidth()[1] - 1 if right is None else right + ) + destination_origin = _COORD() + fill_info = CHAR_INFO() + fill_info.UnicodeChar = " " + + if not ScrollConsoleScreenBuffer( + OutHandle, scroll_rect, None, destination_origin, fill_info + ): + raise WinError(GetLastError()) + + def _hide_cursor(self): + self.__write("\x1b[?25l") + + def _show_cursor(self): + self.__write("\x1b[?25h") + + def _enable_blinking(self): + self.__write("\x1b[?12h") + + def _disable_blinking(self): + self.__write("\x1b[?12l") + + def _enable_bracketed_paste(self) -> None: + self.__write("\x1b[?2004h") + + def _disable_bracketed_paste(self) -> None: + self.__write("\x1b[?2004l") + + def __write(self, text: str) -> None: + if "\x1a" in text: + text = ''.join(["^Z" if x == '\x1a' else x for x in text]) + + if self.out is not None: + self.out.write(text.encode(self.encoding, "replace")) + self.out.flush() + else: + os.write(self.output_fd, text.encode(self.encoding, "replace")) + + @property + def screen_xy(self) -> tuple[int, int]: + info = CONSOLE_SCREEN_BUFFER_INFO() + if not GetConsoleScreenBufferInfo(OutHandle, info): + raise WinError(GetLastError()) + return info.dwCursorPosition.X, info.dwCursorPosition.Y + + def _erase_to_end(self) -> None: + self.__write(ERASE_IN_LINE) + + def prepare(self) -> None: + trace("prepare") + self.screen = [] + self.height, self.width = self.getheightwidth() + + self.posxy = 0, 0 + self.__gone_tall = 0 + self.__offset = 0 + + if self.__vt_support: + SetConsoleMode(InHandle, self.__original_input_mode | ENABLE_VIRTUAL_TERMINAL_INPUT) + self._enable_bracketed_paste() + + def restore(self) -> None: + if self.__vt_support: + # Recover to original mode before running REPL + self._disable_bracketed_paste() + SetConsoleMode(InHandle, self.__original_input_mode) + + def _move_relative(self, x: int, y: int) -> None: + """Moves relative to the current posxy""" + dx = x - self.posxy[0] + dy = y - self.posxy[1] + if dx < 0: + self.__write(MOVE_LEFT.format(-dx)) + elif dx > 0: + self.__write(MOVE_RIGHT.format(dx)) + + if dy < 0: + self.__write(MOVE_UP.format(-dy)) + elif dy > 0: + self.__write(MOVE_DOWN.format(dy)) + + def move_cursor(self, x: int, y: int) -> None: + if x < 0 or y < 0: + raise ValueError(f"Bad cursor position {x}, {y}") + + if y < self.__offset or y >= self.__offset + self.height: + self.event_queue.insert(Event("scroll", "")) + else: + self._move_relative(x, y) + self.posxy = x, y + + def set_cursor_vis(self, visible: bool) -> None: + if visible: + self._show_cursor() + else: + self._hide_cursor() + + def getheightwidth(self) -> tuple[int, int]: + """Return (height, width) where height and width are the height + and width of the terminal window in characters.""" + info = CONSOLE_SCREEN_BUFFER_INFO() + if not GetConsoleScreenBufferInfo(OutHandle, info): + raise WinError(GetLastError()) + return ( + info.srWindow.Bottom - info.srWindow.Top + 1, + info.srWindow.Right - info.srWindow.Left + 1, + ) + + def _getscrollbacksize(self) -> int: + info = CONSOLE_SCREEN_BUFFER_INFO() + if not GetConsoleScreenBufferInfo(OutHandle, info): + raise WinError(GetLastError()) + + return info.srWindow.Bottom # type: ignore[no-any-return] + + def _read_input(self) -> INPUT_RECORD | None: + rec = INPUT_RECORD() + read = DWORD() + if not ReadConsoleInput(InHandle, rec, 1, read): + raise WinError(GetLastError()) + + return rec + + def _read_input_bulk( + self, n: int + ) -> tuple[ctypes.Array[INPUT_RECORD], int]: + rec = (n * INPUT_RECORD)() + read = DWORD() + if not ReadConsoleInput(InHandle, rec, n, read): + raise WinError(GetLastError()) + + return rec, read.value + + def get_event(self, block: bool = True) -> Event | None: + """Return an Event instance. Returns None if |block| is false + and there is no event pending, otherwise waits for the + completion of an event.""" + + if not block and not self.wait(timeout=0): + return None + + while self.event_queue.empty(): + rec = self._read_input() + if rec is None: + return None + + if rec.EventType == WINDOW_BUFFER_SIZE_EVENT: + return Event("resize", "") + + if rec.EventType != KEY_EVENT or not rec.Event.KeyEvent.bKeyDown: + # Only process keys and keydown events + if block: + continue + return None + + key_event = rec.Event.KeyEvent + raw_key = key = key_event.uChar.UnicodeChar + + if key == "\r": + # Make enter unix-like + return Event(evt="key", data="\n") + elif key_event.wVirtualKeyCode == 8: + # Turn backspace directly into the command + key = "backspace" + elif key == "\x00": + # Handle special keys like arrow keys and translate them into the appropriate command + key = VK_MAP.get(key_event.wVirtualKeyCode) + if key: + if key_event.dwControlKeyState & CTRL_ACTIVE: + key = f"ctrl {key}" + elif key_event.dwControlKeyState & ALT_ACTIVE: + # queue the key, return the meta command + self.event_queue.insert(Event(evt="key", data=key)) + return Event(evt="key", data="\033") # keymap.py uses this for meta + return Event(evt="key", data=key) + if block: + continue + + return None + elif self.__vt_support: + # If virtual terminal is enabled, scanning VT sequences + for char in raw_key.encode(self.event_queue.encoding, "replace"): + self.event_queue.push(char) + continue + + if key_event.dwControlKeyState & ALT_ACTIVE: + # Do not swallow characters that have been entered via AltGr: + # Windows internally converts AltGr to CTRL+ALT, see + # https://learn.microsoft.com/en-us/windows/win32/api/winuser/nf-winuser-vkkeyscanw + if not key_event.dwControlKeyState & CTRL_ACTIVE: + # queue the key, return the meta command + self.event_queue.insert(Event(evt="key", data=key)) + return Event(evt="key", data="\033") # keymap.py uses this for meta + + return Event(evt="key", data=key) + return self.event_queue.get() + + def push_char(self, char: int | bytes) -> None: + """ + Push a character to the console event queue. + """ + raise NotImplementedError("push_char not supported on Windows") + + def beep(self) -> None: + self.__write("\x07") + + def clear(self) -> None: + """Wipe the screen""" + self.__write(CLEAR) + self.posxy = 0, 0 + self.screen = [] + + def finish(self) -> None: + """Move the cursor to the end of the display and otherwise get + ready for end. XXX could be merged with restore? Hmm.""" + y = len(self.screen) - 1 + while y >= 0 and not self.screen[y]: + y -= 1 + self._move_relative(0, min(y, self.height + self.__offset - 1)) + self.__write("\r\n") + + def flushoutput(self) -> None: + """Flush all output to the screen (assuming there's some + buffering going on somewhere). + + All output on Windows is unbuffered so this is a nop""" + pass + + def forgetinput(self) -> None: + """Forget all pending, but not yet processed input.""" + if not FlushConsoleInputBuffer(InHandle): + raise WinError(GetLastError()) + + def getpending(self) -> Event: + """Return the characters that have been typed but not yet + processed.""" + e = Event("key", "", b"") + + while not self.event_queue.empty(): + e2 = self.event_queue.get() + if e2: + e.data += e2.data + + recs, rec_count = self._read_input_bulk(1024) + for i in range(rec_count): + rec = recs[i] + # In case of a legacy console, we do not only receive a keydown + # event, but also a keyup event - and for uppercase letters + # an additional SHIFT_PRESSED event. + if rec and rec.EventType == KEY_EVENT: + key_event = rec.Event.KeyEvent + if not key_event.bKeyDown: + continue + ch = key_event.uChar.UnicodeChar + if ch == "\x00": + # ignore SHIFT_PRESSED and special keys + continue + if ch == "\r": + ch += "\n" + e.data += ch + return e + + def wait_for_event(self, timeout: float | None) -> bool: + """Wait for an event.""" + if timeout is None: + timeout = INFINITE + else: + timeout = int(timeout) + ret = WaitForSingleObject(InHandle, timeout) + if ret == WAIT_FAILED: + raise WinError(get_last_error()) + elif ret == WAIT_TIMEOUT: + return False + return True + + def wait(self, timeout: float | None) -> bool: + """ + Wait for events on the console. + """ + return ( + not self.event_queue.empty() + or self.wait_for_event(timeout) + ) + + def repaint(self) -> None: + raise NotImplementedError("No repaint support") + + +# Windows interop +class CONSOLE_SCREEN_BUFFER_INFO(Structure): + _fields_ = [ + ("dwSize", _COORD), + ("dwCursorPosition", _COORD), + ("wAttributes", WORD), + ("srWindow", SMALL_RECT), + ("dwMaximumWindowSize", _COORD), + ] + + +class CONSOLE_CURSOR_INFO(Structure): + _fields_ = [ + ("dwSize", DWORD), + ("bVisible", BOOL), + ] + + +class CHAR_INFO(Structure): + _fields_ = [ + ("UnicodeChar", WCHAR), + ("Attributes", WORD), + ] + + +class Char(Union): + _fields_ = [ + ("UnicodeChar", WCHAR), + ("Char", CHAR), + ] + + +class KeyEvent(ctypes.Structure): + _fields_ = [ + ("bKeyDown", BOOL), + ("wRepeatCount", WORD), + ("wVirtualKeyCode", WORD), + ("wVirtualScanCode", WORD), + ("uChar", Char), + ("dwControlKeyState", DWORD), + ] + + +class WindowsBufferSizeEvent(ctypes.Structure): + _fields_ = [("dwSize", _COORD)] + + +class ConsoleEvent(ctypes.Union): + _fields_ = [ + ("KeyEvent", KeyEvent), + ("WindowsBufferSizeEvent", WindowsBufferSizeEvent), + ] + + +class INPUT_RECORD(Structure): + _fields_ = [("EventType", WORD), ("Event", ConsoleEvent)] + + +KEY_EVENT = 0x01 +FOCUS_EVENT = 0x10 +MENU_EVENT = 0x08 +MOUSE_EVENT = 0x02 +WINDOW_BUFFER_SIZE_EVENT = 0x04 + +ENABLE_PROCESSED_INPUT = 0x0001 +ENABLE_LINE_INPUT = 0x0002 +ENABLE_ECHO_INPUT = 0x0004 +ENABLE_MOUSE_INPUT = 0x0010 +ENABLE_INSERT_MODE = 0x0020 +ENABLE_VIRTUAL_TERMINAL_INPUT = 0x0200 + +ENABLE_PROCESSED_OUTPUT = 0x01 +ENABLE_WRAP_AT_EOL_OUTPUT = 0x02 +ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x04 + +STD_INPUT_HANDLE = -10 +STD_OUTPUT_HANDLE = -11 + +if sys.platform == "win32": + _KERNEL32 = WinDLL("kernel32", use_last_error=True) + + GetStdHandle = windll.kernel32.GetStdHandle + GetStdHandle.argtypes = [DWORD] + GetStdHandle.restype = HANDLE + + GetConsoleScreenBufferInfo = _KERNEL32.GetConsoleScreenBufferInfo + GetConsoleScreenBufferInfo.argtypes = [ + HANDLE, + ctypes.POINTER(CONSOLE_SCREEN_BUFFER_INFO), + ] + GetConsoleScreenBufferInfo.restype = BOOL + + ScrollConsoleScreenBuffer = _KERNEL32.ScrollConsoleScreenBufferW + ScrollConsoleScreenBuffer.argtypes = [ + HANDLE, + POINTER(SMALL_RECT), + POINTER(SMALL_RECT), + _COORD, + POINTER(CHAR_INFO), + ] + ScrollConsoleScreenBuffer.restype = BOOL + + GetConsoleMode = _KERNEL32.GetConsoleMode + GetConsoleMode.argtypes = [HANDLE, POINTER(DWORD)] + GetConsoleMode.restype = BOOL + + SetConsoleMode = _KERNEL32.SetConsoleMode + SetConsoleMode.argtypes = [HANDLE, DWORD] + SetConsoleMode.restype = BOOL + + ReadConsoleInput = _KERNEL32.ReadConsoleInputW + ReadConsoleInput.argtypes = [HANDLE, POINTER(INPUT_RECORD), DWORD, POINTER(DWORD)] + ReadConsoleInput.restype = BOOL + + + FlushConsoleInputBuffer = _KERNEL32.FlushConsoleInputBuffer + FlushConsoleInputBuffer.argtypes = [HANDLE] + FlushConsoleInputBuffer.restype = BOOL + + WaitForSingleObject = _KERNEL32.WaitForSingleObject + WaitForSingleObject.argtypes = [HANDLE, DWORD] + WaitForSingleObject.restype = DWORD + + OutHandle = GetStdHandle(STD_OUTPUT_HANDLE) + InHandle = GetStdHandle(STD_INPUT_HANDLE) +else: + + def _win_only(*args, **kwargs): + raise NotImplementedError("Windows only") + + GetStdHandle = _win_only + GetConsoleScreenBufferInfo = _win_only + ScrollConsoleScreenBuffer = _win_only + GetConsoleMode = _win_only + SetConsoleMode = _win_only + ReadConsoleInput = _win_only + FlushConsoleInputBuffer = _win_only + WaitForSingleObject = _win_only + OutHandle = 0 + InHandle = 0 diff --git a/PythonLib/full/_pyrepl/windows_eventqueue.py b/PythonLib/full/_pyrepl/windows_eventqueue.py new file mode 100644 index 00000000..d99722f9 --- /dev/null +++ b/PythonLib/full/_pyrepl/windows_eventqueue.py @@ -0,0 +1,42 @@ +""" +Windows event and VT sequence scanner +""" + +from .base_eventqueue import BaseEventQueue + + +# Reference: https://learn.microsoft.com/en-us/windows/console/console-virtual-terminal-sequences#input-sequences +VT_MAP: dict[bytes, str] = { + b'\x1b[A': 'up', + b'\x1b[B': 'down', + b'\x1b[C': 'right', + b'\x1b[D': 'left', + b'\x1b[1;5D': 'ctrl left', + b'\x1b[1;5C': 'ctrl right', + + b'\x1b[H': 'home', + b'\x1b[F': 'end', + + b'\x7f': 'backspace', + b'\x1b[2~': 'insert', + b'\x1b[3~': 'delete', + b'\x1b[5~': 'page up', + b'\x1b[6~': 'page down', + + b'\x1bOP': 'f1', + b'\x1bOQ': 'f2', + b'\x1bOR': 'f3', + b'\x1bOS': 'f4', + b'\x1b[15~': 'f5', + b'\x1b[17~': 'f6', + b'\x1b[18~': 'f7', + b'\x1b[19~': 'f8', + b'\x1b[20~': 'f9', + b'\x1b[21~': 'f10', + b'\x1b[23~': 'f11', + b'\x1b[24~': 'f12', +} + +class EventQueue(BaseEventQueue): + def __init__(self, encoding: str) -> None: + BaseEventQueue.__init__(self, encoding, VT_MAP) diff --git a/PythonLib/full/_sitebuiltins.py b/PythonLib/full/_sitebuiltins.py index c66269a5..81b36efc 100644 --- a/PythonLib/full/_sitebuiltins.py +++ b/PythonLib/full/_sitebuiltins.py @@ -36,7 +36,7 @@ def __init__(self, name, data, files=(), dirs=()): import os self.__name = name self.__data = data - self.__lines = None + self.__lines = [] self.__filenames = [os.path.join(dir, filename) for dir in dirs for filename in files] @@ -65,24 +65,12 @@ def __repr__(self): return "Type %s() to see the full %s text" % ((self.__name,)*2) def __call__(self): + from _pyrepl.pager import get_pager self.__setup() - prompt = 'Hit Return for more, or q (and Return) to quit: ' - lineno = 0 - while 1: - try: - for i in range(lineno, lineno + self.MAXLINES): - print(self.__lines[i]) - except IndexError: - break - else: - lineno += self.MAXLINES - key = None - while key is None: - key = input(prompt) - if key not in ('', 'q'): - key = None - if key == 'q': - break + + pager = get_pager() + text = "\n".join(self.__lines) + pager(text, title=self.__name) class _Helper(object): diff --git a/PythonLib/full/_strptime.py b/PythonLib/full/_strptime.py index 5d9df2b1..fc7e369c 100644 --- a/PythonLib/full/_strptime.py +++ b/PythonLib/full/_strptime.py @@ -10,9 +10,11 @@ strptime -- Calculates the time struct represented by the passed-in string """ +import os import time import locale import calendar +import re from re import compile as re_compile from re import sub as re_sub from re import IGNORECASE @@ -40,6 +42,29 @@ def _findall(haystack, needle): yield i i += len(needle) +def _fixmonths(months): + yield from months + # The lower case of 'İ' ('\u0130') is 'i\u0307'. + # The re module only supports 1-to-1 character matching in + # case-insensitive mode. + for s in months: + if 'i\u0307' in s: + yield s.replace('i\u0307', '\u0130') + +lzh_TW_alt_digits = ( + # 〇:一:二:三:四:五:六:七:八:九 + '\u3007', '\u4e00', '\u4e8c', '\u4e09', '\u56db', + '\u4e94', '\u516d', '\u4e03', '\u516b', '\u4e5d', + # 十:十一:十二:十三:十四:十五:十六:十七:十八:十九 + '\u5341', '\u5341\u4e00', '\u5341\u4e8c', '\u5341\u4e09', '\u5341\u56db', + '\u5341\u4e94', '\u5341\u516d', '\u5341\u4e03', '\u5341\u516b', '\u5341\u4e5d', + # 廿:廿一:廿二:廿三:廿四:廿五:廿六:廿七:廿八:廿九 + '\u5eff', '\u5eff\u4e00', '\u5eff\u4e8c', '\u5eff\u4e09', '\u5eff\u56db', + '\u5eff\u4e94', '\u5eff\u516d', '\u5eff\u4e03', '\u5eff\u516b', '\u5eff\u4e5d', + # 卅:卅一 + '\u5345', '\u5345\u4e00') + + class LocaleTime(object): """Stores and handles locale-specific information related to time. @@ -83,6 +108,7 @@ def __init__(self): self.__calc_weekday() self.__calc_month() self.__calc_am_pm() + self.__calc_alt_digits() self.__calc_timezone() self.__calc_date_time() if _getlang() != self.lang: @@ -118,9 +144,43 @@ def __calc_am_pm(self): am_pm.append(time.strftime("%p", time_tuple).lower().strip()) self.am_pm = am_pm + def __calc_alt_digits(self): + # Set self.LC_alt_digits by using time.strftime(). + + # The magic data should contain all decimal digits. + time_tuple = time.struct_time((1998, 1, 27, 10, 43, 56, 1, 27, 0)) + s = time.strftime("%x%X", time_tuple) + if s.isascii(): + # Fast path -- all digits are ASCII. + self.LC_alt_digits = () + return + + digits = ''.join(sorted(set(re.findall(r'\d', s)))) + if len(digits) == 10 and ord(digits[-1]) == ord(digits[0]) + 9: + # All 10 decimal digits from the same set. + if digits.isascii(): + # All digits are ASCII. + self.LC_alt_digits = () + return + + self.LC_alt_digits = [a + b for a in digits for b in digits] + # Test whether the numbers contain leading zero. + time_tuple2 = time.struct_time((2000, 1, 1, 1, 1, 1, 5, 1, 0)) + if self.LC_alt_digits[1] not in time.strftime("%x %X", time_tuple2): + self.LC_alt_digits[:10] = digits + return + + # Either non-Gregorian calendar or non-decimal numbers. + if {'\u4e00', '\u4e03', '\u4e5d', '\u5341', '\u5eff'}.issubset(s): + # lzh_TW + self.LC_alt_digits = lzh_TW_alt_digits + return + + self.LC_alt_digits = None + def __calc_date_time(self): - # Set self.date_time, self.date, & self.time by using - # time.strftime(). + # Set self.LC_date_time, self.LC_date, self.LC_time and + # self.LC_time_ampm by using time.strftime(). # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of # overloaded numbers is minimized. The order in which searches for @@ -128,26 +188,32 @@ def __calc_date_time(self): # possible ambiguity for what something represents. time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0)) time_tuple2 = time.struct_time((1999,1,3,1,1,1,6,3,0)) - replacement_pairs = [ + replacement_pairs = [] + + # Non-ASCII digits + if self.LC_alt_digits or self.LC_alt_digits is None: + for n, d in [(19, '%OC'), (99, '%Oy'), (22, '%OH'), + (44, '%OM'), (55, '%OS'), (17, '%Od'), + (3, '%Om'), (2, '%Ow'), (10, '%OI')]: + if self.LC_alt_digits is None: + s = chr(0x660 + n // 10) + chr(0x660 + n % 10) + replacement_pairs.append((s, d)) + if n < 10: + replacement_pairs.append((s[1], d)) + elif len(self.LC_alt_digits) > n: + replacement_pairs.append((self.LC_alt_digits[n], d)) + else: + replacement_pairs.append((time.strftime(d, time_tuple), d)) + replacement_pairs += [ ('1999', '%Y'), ('99', '%y'), ('22', '%H'), ('44', '%M'), ('55', '%S'), ('76', '%j'), ('17', '%d'), ('03', '%m'), ('3', '%m'), # '3' needed for when no leading zero. ('2', '%w'), ('10', '%I'), - # Non-ASCII digits - ('\u0661\u0669\u0669\u0669', '%Y'), - ('\u0669\u0669', '%Oy'), - ('\u0662\u0662', '%OH'), - ('\u0664\u0664', '%OM'), - ('\u0665\u0665', '%OS'), - ('\u0661\u0667', '%Od'), - ('\u0660\u0663', '%Om'), - ('\u0663', '%Om'), - ('\u0662', '%Ow'), - ('\u0661\u0660', '%OI'), ] + date_time = [] - for directive in ('%c', '%x', '%X'): + for directive in ('%c', '%x', '%X', '%r'): current_format = time.strftime(directive, time_tuple).lower() current_format = current_format.replace('%', '%%') # The month and the day of the week formats are treated specially @@ -171,9 +237,10 @@ def __calc_date_time(self): if tz: current_format = current_format.replace(tz, "%Z") # Transform all non-ASCII digits to digits in range U+0660 to U+0669. - current_format = re_sub(r'\d(?3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])", 'f': r"(?P[0-9]{1,6})", - 'H': r"(?P2[0-3]|[0-1]\d|\d)", + 'H': r"(?P2[0-3]|[0-1]\d|\d| \d)", + 'k': r"(?P2[0-3]|[0-1]\d|\d| \d)", 'I': r"(?P1[0-2]|0[1-9]|[1-9]| [1-9])", + 'l': r"(?P1[0-2]|0[1-9]|[1-9]| [1-9])", 'G': r"(?P\d\d\d\d)", 'j': r"(?P36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])", 'm': r"(?P1[0-2]|0[1-9]|[1-9])", @@ -304,23 +374,56 @@ def __init__(self, locale_time=None): 'z': r"(?P[+-]\d\d:?[0-5]\d(:?[0-5]\d(\.\d{1,6})?)?|(?-i:Z))", 'A': self.__seqToRE(self.locale_time.f_weekday, 'A'), 'a': self.__seqToRE(self.locale_time.a_weekday, 'a'), - 'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'), - 'b': self.__seqToRE(self.locale_time.a_month[1:], 'b'), + 'B': self.__seqToRE(_fixmonths(self.locale_time.f_month[1:]), 'B'), + 'b': self.__seqToRE(_fixmonths(self.locale_time.a_month[1:]), 'b'), 'p': self.__seqToRE(self.locale_time.am_pm, 'p'), 'Z': self.__seqToRE((tz for tz_names in self.locale_time.timezone for tz in tz_names), 'Z'), '%': '%'} - for d in 'dmyHIMS': - mapping['O' + d] = r'(?P<%s>\d\d|\d| \d)' % d - mapping['Ow'] = r'(?P\d)' + if self.locale_time.LC_alt_digits is None: + for d in 'dmyCHIMS': + mapping['O' + d] = r'(?P<%s>\d\d|\d| \d)' % d + mapping['Ow'] = r'(?P\d)' + else: + mapping.update({ + 'Od': self.__seqToRE(self.locale_time.LC_alt_digits[1:32], 'd', + '3[0-1]|[1-2][0-9]|0[1-9]|[1-9]'), + 'Om': self.__seqToRE(self.locale_time.LC_alt_digits[1:13], 'm', + '1[0-2]|0[1-9]|[1-9]'), + 'Ow': self.__seqToRE(self.locale_time.LC_alt_digits[:7], 'w', + '[0-6]'), + 'Oy': self.__seqToRE(self.locale_time.LC_alt_digits, 'y', + '[0-9][0-9]'), + 'OC': self.__seqToRE(self.locale_time.LC_alt_digits, 'C', + '[0-9][0-9]'), + 'OH': self.__seqToRE(self.locale_time.LC_alt_digits[:24], 'H', + '2[0-3]|[0-1][0-9]|[0-9]'), + 'OI': self.__seqToRE(self.locale_time.LC_alt_digits[1:13], 'I', + '1[0-2]|0[1-9]|[1-9]'), + 'OM': self.__seqToRE(self.locale_time.LC_alt_digits[:60], 'M', + '[0-5][0-9]|[0-9]'), + 'OS': self.__seqToRE(self.locale_time.LC_alt_digits[:62], 'S', + '6[0-1]|[0-5][0-9]|[0-9]'), + }) + mapping.update({ + 'e': mapping['d'], + 'Oe': mapping['Od'], + 'P': mapping['p'], + 'Op': mapping['p'], + 'W': mapping['U'].replace('U', 'W'), + }) mapping['W'] = mapping['U'].replace('U', 'W') + base.__init__(mapping) + base.__setitem__('T', self.pattern('%H:%M:%S')) + base.__setitem__('R', self.pattern('%H:%M')) + base.__setitem__('r', self.pattern(self.locale_time.LC_time_ampm)) base.__setitem__('X', self.pattern(self.locale_time.LC_time)) base.__setitem__('x', self.pattern(self.locale_time.LC_date)) base.__setitem__('c', self.pattern(self.locale_time.LC_date_time)) - def __seqToRE(self, to_convert, directive): + def __seqToRE(self, to_convert, directive, altregex=None): """Convert a list to a regex string for matching a directive. Want possible matching values to be from longest to shortest. This @@ -336,8 +439,9 @@ def __seqToRE(self, to_convert, directive): else: return '' regex = '|'.join(re_escape(stuff) for stuff in to_convert) - regex = '(?P<%s>%s' % (directive, regex) - return '%s)' % regex + if altregex is not None: + regex += '|' + altregex + return '(?P<%s>%s)' % (directive, regex) def pattern(self, format): """Return regex pattern for the format string. @@ -352,9 +456,29 @@ def pattern(self, format): format = re_sub(r"([\\.^$*+?\(\){}\[\]|])", r"\\\1", format) format = re_sub(r'\s+', r'\\s+', format) format = re_sub(r"'", "['\u02bc]", format) # needed for br_FR + year_in_format = False + day_of_month_in_format = False def repl(m): - return self[m[1]] - format = re_sub(r'%(O?.)', repl, format) + format_char = m[1] + match format_char: + case 'Y' | 'y' | 'G': + nonlocal year_in_format + year_in_format = True + case 'd': + nonlocal day_of_month_in_format + day_of_month_in_format = True + return self[format_char] + format = re_sub(r'%[-_0^#]*[0-9]*([OE]?\\?.?)', repl, format) + if day_of_month_in_format and not year_in_format: + import warnings + warnings.warn("""\ +Parsing dates involving a day of month without a year specified is ambiguous +and fails to parse leap day. The default behavior will change in Python 3.15 +to either always raise an exception or to use a different default year (TBD). +To avoid trouble, add a specific year to the input & format. +See https://github.com/python/cpython/issues/70647.""", + DeprecationWarning, + skip_file_prefixes=(os.path.dirname(__file__),)) return format def compile(self, format): @@ -418,14 +542,13 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): # \\, in which case it was a stray % but with a space after it except KeyError as err: bad_directive = err.args[0] - if bad_directive == "\\": - bad_directive = "%" del err + bad_directive = bad_directive.replace('\\s', '') + if not bad_directive: + raise ValueError("stray %% in format '%s'" % format) from None + bad_directive = bad_directive.replace('\\', '', 1) raise ValueError("'%s' is a bad directive in format '%s'" % (bad_directive, format)) from None - # IndexError only occurs when the format string is "%" - except IndexError: - raise ValueError("stray %% in format '%s'" % format) from None _regex_cache[format] = format_regex found = format_regex.match(data_string) if not found: @@ -447,6 +570,15 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): # values weekday = julian = None found_dict = found.groupdict() + if locale_time.LC_alt_digits: + def parse_int(s): + try: + return locale_time.LC_alt_digits.index(s) + except ValueError: + return int(s) + else: + parse_int = int + for group_key in found_dict.keys(): # Directives not explicitly handled below: # c, x, X @@ -454,30 +586,34 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): # U, W # worthless without day of the week if group_key == 'y': - year = int(found_dict['y']) - # Open Group specification for strptime() states that a %y - #value in the range of [00, 68] is in the century 2000, while - #[69,99] is in the century 1900 - if year <= 68: - year += 2000 + year = parse_int(found_dict['y']) + if 'C' in found_dict: + century = parse_int(found_dict['C']) + year += century * 100 else: - year += 1900 + # Open Group specification for strptime() states that a %y + #value in the range of [00, 68] is in the century 2000, while + #[69,99] is in the century 1900 + if year <= 68: + year += 2000 + else: + year += 1900 elif group_key == 'Y': year = int(found_dict['Y']) elif group_key == 'G': iso_year = int(found_dict['G']) elif group_key == 'm': - month = int(found_dict['m']) + month = parse_int(found_dict['m']) elif group_key == 'B': month = locale_time.f_month.index(found_dict['B'].lower()) elif group_key == 'b': month = locale_time.a_month.index(found_dict['b'].lower()) elif group_key == 'd': - day = int(found_dict['d']) + day = parse_int(found_dict['d']) elif group_key == 'H': - hour = int(found_dict['H']) + hour = parse_int(found_dict['H']) elif group_key == 'I': - hour = int(found_dict['I']) + hour = parse_int(found_dict['I']) ampm = found_dict.get('p', '').lower() # If there was no AM/PM indicator, we'll treat this like AM if ampm in ('', locale_time.am_pm[0]): @@ -493,9 +629,9 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): if hour != 12: hour += 12 elif group_key == 'M': - minute = int(found_dict['M']) + minute = parse_int(found_dict['M']) elif group_key == 'S': - second = int(found_dict['S']) + second = parse_int(found_dict['S']) elif group_key == 'f': s = found_dict['f'] # Pad to always return microseconds. @@ -647,18 +783,40 @@ def _strptime_time(data_string, format="%a %b %d %H:%M:%S %Y"): tt = _strptime(data_string, format)[0] return time.struct_time(tt[:time._STRUCT_TM_ITEMS]) -def _strptime_datetime(cls, data_string, format="%a %b %d %H:%M:%S %Y"): - """Return a class cls instance based on the input string and the +def _strptime_datetime_date(cls, data_string, format="%a %b %d %Y"): + """Return a date instance based on the input string and the + format string.""" + tt, _, _ = _strptime(data_string, format) + args = tt[:3] + return cls(*args) + +def _parse_tz(tzname, gmtoff, gmtoff_fraction): + tzdelta = datetime_timedelta(seconds=gmtoff, microseconds=gmtoff_fraction) + if tzname: + return datetime_timezone(tzdelta, tzname) + else: + return datetime_timezone(tzdelta) + +def _strptime_datetime_time(cls, data_string, format="%H:%M:%S"): + """Return a time instance based on the input string and the format string.""" tt, fraction, gmtoff_fraction = _strptime(data_string, format) tzname, gmtoff = tt[-2:] - args = tt[:6] + (fraction,) - if gmtoff is not None: - tzdelta = datetime_timedelta(seconds=gmtoff, microseconds=gmtoff_fraction) - if tzname: - tz = datetime_timezone(tzdelta, tzname) - else: - tz = datetime_timezone(tzdelta) - args += (tz,) + args = tt[3:6] + (fraction,) + if gmtoff is None: + return cls(*args) + else: + tz = _parse_tz(tzname, gmtoff, gmtoff_fraction) + return cls(*args, tz) - return cls(*args) +def _strptime_datetime_datetime(cls, data_string, format="%a %b %d %H:%M:%S %Y"): + """Return a datetime instance based on the input string and the + format string.""" + tt, fraction, gmtoff_fraction = _strptime(data_string, format) + tzname, gmtoff = tt[-2:] + args = tt[:6] + (fraction,) + if gmtoff is None: + return cls(*args) + else: + tz = _parse_tz(tzname, gmtoff, gmtoff_fraction) + return cls(*args, tz) diff --git a/PythonLib/full/_threading_local.py b/PythonLib/full/_threading_local.py index b006d76c..0b9e5d3b 100644 --- a/PythonLib/full/_threading_local.py +++ b/PythonLib/full/_threading_local.py @@ -4,128 +4,6 @@ class. Depending on the version of Python you're using, there may be a faster one available. You should always import the `local` class from `threading`.) - -Thread-local objects support the management of thread-local data. -If you have data that you want to be local to a thread, simply create -a thread-local object and use its attributes: - - >>> mydata = local() - >>> mydata.number = 42 - >>> mydata.number - 42 - -You can also access the local-object's dictionary: - - >>> mydata.__dict__ - {'number': 42} - >>> mydata.__dict__.setdefault('widgets', []) - [] - >>> mydata.widgets - [] - -What's important about thread-local objects is that their data are -local to a thread. If we access the data in a different thread: - - >>> log = [] - >>> def f(): - ... items = sorted(mydata.__dict__.items()) - ... log.append(items) - ... mydata.number = 11 - ... log.append(mydata.number) - - >>> import threading - >>> thread = threading.Thread(target=f) - >>> thread.start() - >>> thread.join() - >>> log - [[], 11] - -we get different data. Furthermore, changes made in the other thread -don't affect data seen in this thread: - - >>> mydata.number - 42 - -Of course, values you get from a local object, including a __dict__ -attribute, are for whatever thread was current at the time the -attribute was read. For that reason, you generally don't want to save -these values across threads, as they apply only to the thread they -came from. - -You can create custom local objects by subclassing the local class: - - >>> class MyLocal(local): - ... number = 2 - ... def __init__(self, /, **kw): - ... self.__dict__.update(kw) - ... def squared(self): - ... return self.number ** 2 - -This can be useful to support default values, methods and -initialization. Note that if you define an __init__ method, it will be -called each time the local object is used in a separate thread. This -is necessary to initialize each thread's dictionary. - -Now if we create a local object: - - >>> mydata = MyLocal(color='red') - -Now we have a default number: - - >>> mydata.number - 2 - -an initial color: - - >>> mydata.color - 'red' - >>> del mydata.color - -And a method that operates on the data: - - >>> mydata.squared() - 4 - -As before, we can access the data in a separate thread: - - >>> log = [] - >>> thread = threading.Thread(target=f) - >>> thread.start() - >>> thread.join() - >>> log - [[('color', 'red')], 11] - -without affecting this thread's data: - - >>> mydata.number - 2 - >>> mydata.color - Traceback (most recent call last): - ... - AttributeError: 'MyLocal' object has no attribute 'color' - -Note that subclasses can define slots, but they are not thread -local. They are shared across threads: - - >>> class MyLocal(local): - ... __slots__ = 'number' - - >>> mydata = MyLocal() - >>> mydata.number = 42 - >>> mydata.color = 'red' - -So, the separate thread: - - >>> thread = threading.Thread(target=f) - >>> thread.start() - >>> thread.join() - -affects what we see: - - >>> mydata.number - 11 - ->>> del mydata """ from weakref import ref diff --git a/PythonLib/full/_weakrefset.py b/PythonLib/full/_weakrefset.py index 489eec71..d1c7fcae 100644 --- a/PythonLib/full/_weakrefset.py +++ b/PythonLib/full/_weakrefset.py @@ -8,69 +8,29 @@ __all__ = ['WeakSet'] -class _IterationGuard: - # This context manager registers itself in the current iterators of the - # weak container, such as to delay all removals until the context manager - # exits. - # This technique should be relatively thread-safe (since sets are). - - def __init__(self, weakcontainer): - # Don't create cycles - self.weakcontainer = ref(weakcontainer) - - def __enter__(self): - w = self.weakcontainer() - if w is not None: - w._iterating.add(self) - return self - - def __exit__(self, e, t, b): - w = self.weakcontainer() - if w is not None: - s = w._iterating - s.remove(self) - if not s: - w._commit_removals() - - class WeakSet: def __init__(self, data=None): self.data = set() + def _remove(item, selfref=ref(self)): self = selfref() if self is not None: - if self._iterating: - self._pending_removals.append(item) - else: - self.data.discard(item) + self.data.discard(item) + self._remove = _remove - # A list of keys to be removed - self._pending_removals = [] - self._iterating = set() if data is not None: self.update(data) - def _commit_removals(self): - pop = self._pending_removals.pop - discard = self.data.discard - while True: - try: - item = pop() - except IndexError: - return - discard(item) - def __iter__(self): - with _IterationGuard(self): - for itemref in self.data: - item = itemref() - if item is not None: - # Caveat: the iterator will keep a strong reference to - # `item` until it is resumed or closed. - yield item + for itemref in self.data.copy(): + item = itemref() + if item is not None: + # Caveat: the iterator will keep a strong reference to + # `item` until it is resumed or closed. + yield item def __len__(self): - return len(self.data) - len(self._pending_removals) + return len(self.data) def __contains__(self, item): try: @@ -83,21 +43,15 @@ def __reduce__(self): return self.__class__, (list(self),), self.__getstate__() def add(self, item): - if self._pending_removals: - self._commit_removals() self.data.add(ref(item, self._remove)) def clear(self): - if self._pending_removals: - self._commit_removals() self.data.clear() def copy(self): return self.__class__(self) def pop(self): - if self._pending_removals: - self._commit_removals() while True: try: itemref = self.data.pop() @@ -108,18 +62,12 @@ def pop(self): return item def remove(self, item): - if self._pending_removals: - self._commit_removals() self.data.remove(ref(item)) def discard(self, item): - if self._pending_removals: - self._commit_removals() self.data.discard(ref(item)) def update(self, other): - if self._pending_removals: - self._commit_removals() for element in other: self.add(element) @@ -136,8 +84,6 @@ def difference(self, other): def difference_update(self, other): self.__isub__(other) def __isub__(self, other): - if self._pending_removals: - self._commit_removals() if self is other: self.data.clear() else: @@ -151,8 +97,6 @@ def intersection(self, other): def intersection_update(self, other): self.__iand__(other) def __iand__(self, other): - if self._pending_removals: - self._commit_removals() self.data.intersection_update(ref(item) for item in other) return self @@ -184,8 +128,6 @@ def symmetric_difference(self, other): def symmetric_difference_update(self, other): self.__ixor__(other) def __ixor__(self, other): - if self._pending_removals: - self._commit_removals() if self is other: self.data.clear() else: diff --git a/PythonLib/full/aifc.py b/PythonLib/full/aifc.py deleted file mode 100644 index 5254987e..00000000 --- a/PythonLib/full/aifc.py +++ /dev/null @@ -1,984 +0,0 @@ -"""Stuff to parse AIFF-C and AIFF files. - -Unless explicitly stated otherwise, the description below is true -both for AIFF-C files and AIFF files. - -An AIFF-C file has the following structure. - - +-----------------+ - | FORM | - +-----------------+ - | | - +----+------------+ - | | AIFC | - | +------------+ - | | | - | | . | - | | . | - | | . | - +----+------------+ - -An AIFF file has the string "AIFF" instead of "AIFC". - -A chunk consists of an identifier (4 bytes) followed by a size (4 bytes, -big endian order), followed by the data. The size field does not include -the size of the 8 byte header. - -The following chunk types are recognized. - - FVER - (AIFF-C only). - MARK - <# of markers> (2 bytes) - list of markers: - (2 bytes, must be > 0) - (4 bytes) - ("pstring") - COMM - <# of channels> (2 bytes) - <# of sound frames> (4 bytes) - (2 bytes) - (10 bytes, IEEE 80-bit extended - floating point) - in AIFF-C files only: - (4 bytes) - ("pstring") - SSND - (4 bytes, not used by this program) - (4 bytes, not used by this program) - - -A pstring consists of 1 byte length, a string of characters, and 0 or 1 -byte pad to make the total length even. - -Usage. - -Reading AIFF files: - f = aifc.open(file, 'r') -where file is either the name of a file or an open file pointer. -The open file pointer must have methods read(), seek(), and close(). -In some types of audio files, if the setpos() method is not used, -the seek() method is not necessary. - -This returns an instance of a class with the following public methods: - getnchannels() -- returns number of audio channels (1 for - mono, 2 for stereo) - getsampwidth() -- returns sample width in bytes - getframerate() -- returns sampling frequency - getnframes() -- returns number of audio frames - getcomptype() -- returns compression type ('NONE' for AIFF files) - getcompname() -- returns human-readable version of - compression type ('not compressed' for AIFF files) - getparams() -- returns a namedtuple consisting of all of the - above in the above order - getmarkers() -- get the list of marks in the audio file or None - if there are no marks - getmark(id) -- get mark with the specified id (raises an error - if the mark does not exist) - readframes(n) -- returns at most n frames of audio - rewind() -- rewind to the beginning of the audio stream - setpos(pos) -- seek to the specified position - tell() -- return the current position - close() -- close the instance (make it unusable) -The position returned by tell(), the position given to setpos() and -the position of marks are all compatible and have nothing to do with -the actual position in the file. -The close() method is called automatically when the class instance -is destroyed. - -Writing AIFF files: - f = aifc.open(file, 'w') -where file is either the name of a file or an open file pointer. -The open file pointer must have methods write(), tell(), seek(), and -close(). - -This returns an instance of a class with the following public methods: - aiff() -- create an AIFF file (AIFF-C default) - aifc() -- create an AIFF-C file - setnchannels(n) -- set the number of channels - setsampwidth(n) -- set the sample width - setframerate(n) -- set the frame rate - setnframes(n) -- set the number of frames - setcomptype(type, name) - -- set the compression type and the - human-readable compression type - setparams(tuple) - -- set all parameters at once - setmark(id, pos, name) - -- add specified mark to the list of marks - tell() -- return current position in output file (useful - in combination with setmark()) - writeframesraw(data) - -- write audio frames without pathing up the - file header - writeframes(data) - -- write audio frames and patch up the file header - close() -- patch up the file header and close the - output file -You should set the parameters before the first writeframesraw or -writeframes. The total number of frames does not need to be set, -but when it is set to the correct value, the header does not have to -be patched up. -It is best to first set all parameters, perhaps possibly the -compression type, and then write audio frames using writeframesraw. -When all frames have been written, either call writeframes(b'') or -close() to patch up the sizes in the header. -Marks can be added anytime. If there are any marks, you must call -close() after all frames have been written. -The close() method is called automatically when the class instance -is destroyed. - -When a file is opened with the extension '.aiff', an AIFF file is -written, otherwise an AIFF-C file is written. This default can be -changed by calling aiff() or aifc() before the first writeframes or -writeframesraw. -""" - -import struct -import builtins -import warnings - -__all__ = ["Error", "open"] - - -warnings._deprecated(__name__, remove=(3, 13)) - - -class Error(Exception): - pass - -_AIFC_version = 0xA2805140 # Version 1 of AIFF-C - -def _read_long(file): - try: - return struct.unpack('>l', file.read(4))[0] - except struct.error: - raise EOFError from None - -def _read_ulong(file): - try: - return struct.unpack('>L', file.read(4))[0] - except struct.error: - raise EOFError from None - -def _read_short(file): - try: - return struct.unpack('>h', file.read(2))[0] - except struct.error: - raise EOFError from None - -def _read_ushort(file): - try: - return struct.unpack('>H', file.read(2))[0] - except struct.error: - raise EOFError from None - -def _read_string(file): - length = ord(file.read(1)) - if length == 0: - data = b'' - else: - data = file.read(length) - if length & 1 == 0: - dummy = file.read(1) - return data - -_HUGE_VAL = 1.79769313486231e+308 # See - -def _read_float(f): # 10 bytes - expon = _read_short(f) # 2 bytes - sign = 1 - if expon < 0: - sign = -1 - expon = expon + 0x8000 - himant = _read_ulong(f) # 4 bytes - lomant = _read_ulong(f) # 4 bytes - if expon == himant == lomant == 0: - f = 0.0 - elif expon == 0x7FFF: - f = _HUGE_VAL - else: - expon = expon - 16383 - f = (himant * 0x100000000 + lomant) * pow(2.0, expon - 63) - return sign * f - -def _write_short(f, x): - f.write(struct.pack('>h', x)) - -def _write_ushort(f, x): - f.write(struct.pack('>H', x)) - -def _write_long(f, x): - f.write(struct.pack('>l', x)) - -def _write_ulong(f, x): - f.write(struct.pack('>L', x)) - -def _write_string(f, s): - if len(s) > 255: - raise ValueError("string exceeds maximum pstring length") - f.write(struct.pack('B', len(s))) - f.write(s) - if len(s) & 1 == 0: - f.write(b'\x00') - -def _write_float(f, x): - import math - if x < 0: - sign = 0x8000 - x = x * -1 - else: - sign = 0 - if x == 0: - expon = 0 - himant = 0 - lomant = 0 - else: - fmant, expon = math.frexp(x) - if expon > 16384 or fmant >= 1 or fmant != fmant: # Infinity or NaN - expon = sign|0x7FFF - himant = 0 - lomant = 0 - else: # Finite - expon = expon + 16382 - if expon < 0: # denormalized - fmant = math.ldexp(fmant, expon) - expon = 0 - expon = expon | sign - fmant = math.ldexp(fmant, 32) - fsmant = math.floor(fmant) - himant = int(fsmant) - fmant = math.ldexp(fmant - fsmant, 32) - fsmant = math.floor(fmant) - lomant = int(fsmant) - _write_ushort(f, expon) - _write_ulong(f, himant) - _write_ulong(f, lomant) - -with warnings.catch_warnings(): - warnings.simplefilter("ignore", DeprecationWarning) - from chunk import Chunk -from collections import namedtuple - -_aifc_params = namedtuple('_aifc_params', - 'nchannels sampwidth framerate nframes comptype compname') - -_aifc_params.nchannels.__doc__ = 'Number of audio channels (1 for mono, 2 for stereo)' -_aifc_params.sampwidth.__doc__ = 'Sample width in bytes' -_aifc_params.framerate.__doc__ = 'Sampling frequency' -_aifc_params.nframes.__doc__ = 'Number of audio frames' -_aifc_params.comptype.__doc__ = 'Compression type ("NONE" for AIFF files)' -_aifc_params.compname.__doc__ = ("""\ -A human-readable version of the compression type -('not compressed' for AIFF files)""") - - -class Aifc_read: - # Variables used in this class: - # - # These variables are available to the user though appropriate - # methods of this class: - # _file -- the open file with methods read(), close(), and seek() - # set through the __init__() method - # _nchannels -- the number of audio channels - # available through the getnchannels() method - # _nframes -- the number of audio frames - # available through the getnframes() method - # _sampwidth -- the number of bytes per audio sample - # available through the getsampwidth() method - # _framerate -- the sampling frequency - # available through the getframerate() method - # _comptype -- the AIFF-C compression type ('NONE' if AIFF) - # available through the getcomptype() method - # _compname -- the human-readable AIFF-C compression type - # available through the getcomptype() method - # _markers -- the marks in the audio file - # available through the getmarkers() and getmark() - # methods - # _soundpos -- the position in the audio stream - # available through the tell() method, set through the - # setpos() method - # - # These variables are used internally only: - # _version -- the AIFF-C version number - # _decomp -- the decompressor from builtin module cl - # _comm_chunk_read -- 1 iff the COMM chunk has been read - # _aifc -- 1 iff reading an AIFF-C file - # _ssnd_seek_needed -- 1 iff positioned correctly in audio - # file for readframes() - # _ssnd_chunk -- instantiation of a chunk class for the SSND chunk - # _framesize -- size of one frame in the file - - _file = None # Set here since __del__ checks it - - def initfp(self, file): - self._version = 0 - self._convert = None - self._markers = [] - self._soundpos = 0 - self._file = file - chunk = Chunk(file) - if chunk.getname() != b'FORM': - raise Error('file does not start with FORM id') - formdata = chunk.read(4) - if formdata == b'AIFF': - self._aifc = 0 - elif formdata == b'AIFC': - self._aifc = 1 - else: - raise Error('not an AIFF or AIFF-C file') - self._comm_chunk_read = 0 - self._ssnd_chunk = None - while 1: - self._ssnd_seek_needed = 1 - try: - chunk = Chunk(self._file) - except EOFError: - break - chunkname = chunk.getname() - if chunkname == b'COMM': - self._read_comm_chunk(chunk) - self._comm_chunk_read = 1 - elif chunkname == b'SSND': - self._ssnd_chunk = chunk - dummy = chunk.read(8) - self._ssnd_seek_needed = 0 - elif chunkname == b'FVER': - self._version = _read_ulong(chunk) - elif chunkname == b'MARK': - self._readmark(chunk) - chunk.skip() - if not self._comm_chunk_read or not self._ssnd_chunk: - raise Error('COMM chunk and/or SSND chunk missing') - - def __init__(self, f): - if isinstance(f, str): - file_object = builtins.open(f, 'rb') - try: - self.initfp(file_object) - except: - file_object.close() - raise - else: - # assume it is an open file object already - self.initfp(f) - - def __enter__(self): - return self - - def __exit__(self, *args): - self.close() - - # - # User visible methods. - # - def getfp(self): - return self._file - - def rewind(self): - self._ssnd_seek_needed = 1 - self._soundpos = 0 - - def close(self): - file = self._file - if file is not None: - self._file = None - file.close() - - def tell(self): - return self._soundpos - - def getnchannels(self): - return self._nchannels - - def getnframes(self): - return self._nframes - - def getsampwidth(self): - return self._sampwidth - - def getframerate(self): - return self._framerate - - def getcomptype(self): - return self._comptype - - def getcompname(self): - return self._compname - -## def getversion(self): -## return self._version - - def getparams(self): - return _aifc_params(self.getnchannels(), self.getsampwidth(), - self.getframerate(), self.getnframes(), - self.getcomptype(), self.getcompname()) - - def getmarkers(self): - if len(self._markers) == 0: - return None - return self._markers - - def getmark(self, id): - for marker in self._markers: - if id == marker[0]: - return marker - raise Error('marker {0!r} does not exist'.format(id)) - - def setpos(self, pos): - if pos < 0 or pos > self._nframes: - raise Error('position not in range') - self._soundpos = pos - self._ssnd_seek_needed = 1 - - def readframes(self, nframes): - if self._ssnd_seek_needed: - self._ssnd_chunk.seek(0) - dummy = self._ssnd_chunk.read(8) - pos = self._soundpos * self._framesize - if pos: - self._ssnd_chunk.seek(pos + 8) - self._ssnd_seek_needed = 0 - if nframes == 0: - return b'' - data = self._ssnd_chunk.read(nframes * self._framesize) - if self._convert and data: - data = self._convert(data) - self._soundpos = self._soundpos + len(data) // (self._nchannels - * self._sampwidth) - return data - - # - # Internal methods. - # - - def _alaw2lin(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - return audioop.alaw2lin(data, 2) - - def _ulaw2lin(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - return audioop.ulaw2lin(data, 2) - - def _adpcm2lin(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - if not hasattr(self, '_adpcmstate'): - # first time - self._adpcmstate = None - data, self._adpcmstate = audioop.adpcm2lin(data, 2, self._adpcmstate) - return data - - def _sowt2lin(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - return audioop.byteswap(data, 2) - - def _read_comm_chunk(self, chunk): - self._nchannels = _read_short(chunk) - self._nframes = _read_long(chunk) - self._sampwidth = (_read_short(chunk) + 7) // 8 - self._framerate = int(_read_float(chunk)) - if self._sampwidth <= 0: - raise Error('bad sample width') - if self._nchannels <= 0: - raise Error('bad # of channels') - self._framesize = self._nchannels * self._sampwidth - if self._aifc: - #DEBUG: SGI's soundeditor produces a bad size :-( - kludge = 0 - if chunk.chunksize == 18: - kludge = 1 - warnings.warn('Warning: bad COMM chunk size') - chunk.chunksize = 23 - #DEBUG end - self._comptype = chunk.read(4) - #DEBUG start - if kludge: - length = ord(chunk.file.read(1)) - if length & 1 == 0: - length = length + 1 - chunk.chunksize = chunk.chunksize + length - chunk.file.seek(-1, 1) - #DEBUG end - self._compname = _read_string(chunk) - if self._comptype != b'NONE': - if self._comptype == b'G722': - self._convert = self._adpcm2lin - elif self._comptype in (b'ulaw', b'ULAW'): - self._convert = self._ulaw2lin - elif self._comptype in (b'alaw', b'ALAW'): - self._convert = self._alaw2lin - elif self._comptype in (b'sowt', b'SOWT'): - self._convert = self._sowt2lin - else: - raise Error('unsupported compression type') - self._sampwidth = 2 - else: - self._comptype = b'NONE' - self._compname = b'not compressed' - - def _readmark(self, chunk): - nmarkers = _read_short(chunk) - # Some files appear to contain invalid counts. - # Cope with this by testing for EOF. - try: - for i in range(nmarkers): - id = _read_short(chunk) - pos = _read_long(chunk) - name = _read_string(chunk) - if pos or name: - # some files appear to have - # dummy markers consisting of - # a position 0 and name '' - self._markers.append((id, pos, name)) - except EOFError: - w = ('Warning: MARK chunk contains only %s marker%s instead of %s' % - (len(self._markers), '' if len(self._markers) == 1 else 's', - nmarkers)) - warnings.warn(w) - -class Aifc_write: - # Variables used in this class: - # - # These variables are user settable through appropriate methods - # of this class: - # _file -- the open file with methods write(), close(), tell(), seek() - # set through the __init__() method - # _comptype -- the AIFF-C compression type ('NONE' in AIFF) - # set through the setcomptype() or setparams() method - # _compname -- the human-readable AIFF-C compression type - # set through the setcomptype() or setparams() method - # _nchannels -- the number of audio channels - # set through the setnchannels() or setparams() method - # _sampwidth -- the number of bytes per audio sample - # set through the setsampwidth() or setparams() method - # _framerate -- the sampling frequency - # set through the setframerate() or setparams() method - # _nframes -- the number of audio frames written to the header - # set through the setnframes() or setparams() method - # _aifc -- whether we're writing an AIFF-C file or an AIFF file - # set through the aifc() method, reset through the - # aiff() method - # - # These variables are used internally only: - # _version -- the AIFF-C version number - # _comp -- the compressor from builtin module cl - # _nframeswritten -- the number of audio frames actually written - # _datalength -- the size of the audio samples written to the header - # _datawritten -- the size of the audio samples actually written - - _file = None # Set here since __del__ checks it - - def __init__(self, f): - if isinstance(f, str): - file_object = builtins.open(f, 'wb') - try: - self.initfp(file_object) - except: - file_object.close() - raise - - # treat .aiff file extensions as non-compressed audio - if f.endswith('.aiff'): - self._aifc = 0 - else: - # assume it is an open file object already - self.initfp(f) - - def initfp(self, file): - self._file = file - self._version = _AIFC_version - self._comptype = b'NONE' - self._compname = b'not compressed' - self._convert = None - self._nchannels = 0 - self._sampwidth = 0 - self._framerate = 0 - self._nframes = 0 - self._nframeswritten = 0 - self._datawritten = 0 - self._datalength = 0 - self._markers = [] - self._marklength = 0 - self._aifc = 1 # AIFF-C is default - - def __del__(self): - self.close() - - def __enter__(self): - return self - - def __exit__(self, *args): - self.close() - - # - # User visible methods. - # - def aiff(self): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - self._aifc = 0 - - def aifc(self): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - self._aifc = 1 - - def setnchannels(self, nchannels): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if nchannels < 1: - raise Error('bad # of channels') - self._nchannels = nchannels - - def getnchannels(self): - if not self._nchannels: - raise Error('number of channels not set') - return self._nchannels - - def setsampwidth(self, sampwidth): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if sampwidth < 1 or sampwidth > 4: - raise Error('bad sample width') - self._sampwidth = sampwidth - - def getsampwidth(self): - if not self._sampwidth: - raise Error('sample width not set') - return self._sampwidth - - def setframerate(self, framerate): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if framerate <= 0: - raise Error('bad frame rate') - self._framerate = framerate - - def getframerate(self): - if not self._framerate: - raise Error('frame rate not set') - return self._framerate - - def setnframes(self, nframes): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - self._nframes = nframes - - def getnframes(self): - return self._nframeswritten - - def setcomptype(self, comptype, compname): - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if comptype not in (b'NONE', b'ulaw', b'ULAW', - b'alaw', b'ALAW', b'G722', b'sowt', b'SOWT'): - raise Error('unsupported compression type') - self._comptype = comptype - self._compname = compname - - def getcomptype(self): - return self._comptype - - def getcompname(self): - return self._compname - -## def setversion(self, version): -## if self._nframeswritten: -## raise Error, 'cannot change parameters after starting to write' -## self._version = version - - def setparams(self, params): - nchannels, sampwidth, framerate, nframes, comptype, compname = params - if self._nframeswritten: - raise Error('cannot change parameters after starting to write') - if comptype not in (b'NONE', b'ulaw', b'ULAW', - b'alaw', b'ALAW', b'G722', b'sowt', b'SOWT'): - raise Error('unsupported compression type') - self.setnchannels(nchannels) - self.setsampwidth(sampwidth) - self.setframerate(framerate) - self.setnframes(nframes) - self.setcomptype(comptype, compname) - - def getparams(self): - if not self._nchannels or not self._sampwidth or not self._framerate: - raise Error('not all parameters set') - return _aifc_params(self._nchannels, self._sampwidth, self._framerate, - self._nframes, self._comptype, self._compname) - - def setmark(self, id, pos, name): - if id <= 0: - raise Error('marker ID must be > 0') - if pos < 0: - raise Error('marker position must be >= 0') - if not isinstance(name, bytes): - raise Error('marker name must be bytes') - for i in range(len(self._markers)): - if id == self._markers[i][0]: - self._markers[i] = id, pos, name - return - self._markers.append((id, pos, name)) - - def getmark(self, id): - for marker in self._markers: - if id == marker[0]: - return marker - raise Error('marker {0!r} does not exist'.format(id)) - - def getmarkers(self): - if len(self._markers) == 0: - return None - return self._markers - - def tell(self): - return self._nframeswritten - - def writeframesraw(self, data): - if not isinstance(data, (bytes, bytearray)): - data = memoryview(data).cast('B') - self._ensure_header_written(len(data)) - nframes = len(data) // (self._sampwidth * self._nchannels) - if self._convert: - data = self._convert(data) - self._file.write(data) - self._nframeswritten = self._nframeswritten + nframes - self._datawritten = self._datawritten + len(data) - - def writeframes(self, data): - self.writeframesraw(data) - if self._nframeswritten != self._nframes or \ - self._datalength != self._datawritten: - self._patchheader() - - def close(self): - if self._file is None: - return - try: - self._ensure_header_written(0) - if self._datawritten & 1: - # quick pad to even size - self._file.write(b'\x00') - self._datawritten = self._datawritten + 1 - self._writemarkers() - if self._nframeswritten != self._nframes or \ - self._datalength != self._datawritten or \ - self._marklength: - self._patchheader() - finally: - # Prevent ref cycles - self._convert = None - f = self._file - self._file = None - f.close() - - # - # Internal methods. - # - - def _lin2alaw(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - return audioop.lin2alaw(data, 2) - - def _lin2ulaw(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - return audioop.lin2ulaw(data, 2) - - def _lin2adpcm(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - if not hasattr(self, '_adpcmstate'): - self._adpcmstate = None - data, self._adpcmstate = audioop.lin2adpcm(data, 2, self._adpcmstate) - return data - - def _lin2sowt(self, data): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - import audioop - return audioop.byteswap(data, 2) - - def _ensure_header_written(self, datasize): - if not self._nframeswritten: - if self._comptype in (b'ULAW', b'ulaw', - b'ALAW', b'alaw', b'G722', - b'sowt', b'SOWT'): - if not self._sampwidth: - self._sampwidth = 2 - if self._sampwidth != 2: - raise Error('sample width must be 2 when compressing ' - 'with ulaw/ULAW, alaw/ALAW, sowt/SOWT ' - 'or G7.22 (ADPCM)') - if not self._nchannels: - raise Error('# channels not specified') - if not self._sampwidth: - raise Error('sample width not specified') - if not self._framerate: - raise Error('sampling rate not specified') - self._write_header(datasize) - - def _init_compression(self): - if self._comptype == b'G722': - self._convert = self._lin2adpcm - elif self._comptype in (b'ulaw', b'ULAW'): - self._convert = self._lin2ulaw - elif self._comptype in (b'alaw', b'ALAW'): - self._convert = self._lin2alaw - elif self._comptype in (b'sowt', b'SOWT'): - self._convert = self._lin2sowt - - def _write_header(self, initlength): - if self._aifc and self._comptype != b'NONE': - self._init_compression() - self._file.write(b'FORM') - if not self._nframes: - self._nframes = initlength // (self._nchannels * self._sampwidth) - self._datalength = self._nframes * self._nchannels * self._sampwidth - if self._datalength & 1: - self._datalength = self._datalength + 1 - if self._aifc: - if self._comptype in (b'ulaw', b'ULAW', b'alaw', b'ALAW'): - self._datalength = self._datalength // 2 - if self._datalength & 1: - self._datalength = self._datalength + 1 - elif self._comptype == b'G722': - self._datalength = (self._datalength + 3) // 4 - if self._datalength & 1: - self._datalength = self._datalength + 1 - try: - self._form_length_pos = self._file.tell() - except (AttributeError, OSError): - self._form_length_pos = None - commlength = self._write_form_length(self._datalength) - if self._aifc: - self._file.write(b'AIFC') - self._file.write(b'FVER') - _write_ulong(self._file, 4) - _write_ulong(self._file, self._version) - else: - self._file.write(b'AIFF') - self._file.write(b'COMM') - _write_ulong(self._file, commlength) - _write_short(self._file, self._nchannels) - if self._form_length_pos is not None: - self._nframes_pos = self._file.tell() - _write_ulong(self._file, self._nframes) - if self._comptype in (b'ULAW', b'ulaw', b'ALAW', b'alaw', b'G722'): - _write_short(self._file, 8) - else: - _write_short(self._file, self._sampwidth * 8) - _write_float(self._file, self._framerate) - if self._aifc: - self._file.write(self._comptype) - _write_string(self._file, self._compname) - self._file.write(b'SSND') - if self._form_length_pos is not None: - self._ssnd_length_pos = self._file.tell() - _write_ulong(self._file, self._datalength + 8) - _write_ulong(self._file, 0) - _write_ulong(self._file, 0) - - def _write_form_length(self, datalength): - if self._aifc: - commlength = 18 + 5 + len(self._compname) - if commlength & 1: - commlength = commlength + 1 - verslength = 12 - else: - commlength = 18 - verslength = 0 - _write_ulong(self._file, 4 + verslength + self._marklength + \ - 8 + commlength + 16 + datalength) - return commlength - - def _patchheader(self): - curpos = self._file.tell() - if self._datawritten & 1: - datalength = self._datawritten + 1 - self._file.write(b'\x00') - else: - datalength = self._datawritten - if datalength == self._datalength and \ - self._nframes == self._nframeswritten and \ - self._marklength == 0: - self._file.seek(curpos, 0) - return - self._file.seek(self._form_length_pos, 0) - dummy = self._write_form_length(datalength) - self._file.seek(self._nframes_pos, 0) - _write_ulong(self._file, self._nframeswritten) - self._file.seek(self._ssnd_length_pos, 0) - _write_ulong(self._file, datalength + 8) - self._file.seek(curpos, 0) - self._nframes = self._nframeswritten - self._datalength = datalength - - def _writemarkers(self): - if len(self._markers) == 0: - return - self._file.write(b'MARK') - length = 2 - for marker in self._markers: - id, pos, name = marker - length = length + len(name) + 1 + 6 - if len(name) & 1 == 0: - length = length + 1 - _write_ulong(self._file, length) - self._marklength = length + 8 - _write_short(self._file, len(self._markers)) - for marker in self._markers: - id, pos, name = marker - _write_short(self._file, id) - _write_ulong(self._file, pos) - _write_string(self._file, name) - -def open(f, mode=None): - if mode is None: - if hasattr(f, 'mode'): - mode = f.mode - else: - mode = 'rb' - if mode in ('r', 'rb'): - return Aifc_read(f) - elif mode in ('w', 'wb'): - return Aifc_write(f) - else: - raise Error("mode must be 'r', 'rb', 'w', or 'wb'") - - -if __name__ == '__main__': - import sys - if not sys.argv[1:]: - sys.argv.append('/usr/demos/data/audio/bach.aiff') - fn = sys.argv[1] - with open(fn, 'r') as f: - print("Reading", fn) - print("nchannels =", f.getnchannels()) - print("nframes =", f.getnframes()) - print("sampwidth =", f.getsampwidth()) - print("framerate =", f.getframerate()) - print("comptype =", f.getcomptype()) - print("compname =", f.getcompname()) - if sys.argv[2:]: - gn = sys.argv[2] - print("Writing", gn) - with open(gn, 'w') as g: - g.setparams(f.getparams()) - while 1: - data = f.readframes(1024) - if not data: - break - g.writeframes(data) - print("Done.") diff --git a/PythonLib/full/annotationlib.py b/PythonLib/full/annotationlib.py new file mode 100644 index 00000000..832d160d --- /dev/null +++ b/PythonLib/full/annotationlib.py @@ -0,0 +1,1152 @@ +"""Helpers for introspecting and wrapping annotations.""" + +import ast +import builtins +import enum +import keyword +import sys +import types + +__all__ = [ + "Format", + "ForwardRef", + "call_annotate_function", + "call_evaluate_function", + "get_annotate_from_class_namespace", + "get_annotations", + "annotations_to_string", + "type_repr", +] + + +class Format(enum.IntEnum): + VALUE = 1 + VALUE_WITH_FAKE_GLOBALS = 2 + FORWARDREF = 3 + STRING = 4 + + +_sentinel = object() +# Following `NAME_ERROR_MSG` in `ceval_macros.h`: +_NAME_ERROR_MSG = "name '{name:.200}' is not defined" + + +# Slots shared by ForwardRef and _Stringifier. The __forward__ names must be +# preserved for compatibility with the old typing.ForwardRef class. The remaining +# names are private. +_SLOTS = ( + "__forward_is_argument__", + "__forward_is_class__", + "__forward_module__", + "__weakref__", + "__arg__", + "__globals__", + "__extra_names__", + "__code__", + "__ast_node__", + "__cell__", + "__owner__", + "__stringifier_dict__", +) + + +class ForwardRef: + """Wrapper that holds a forward reference. + + Constructor arguments: + * arg: a string representing the code to be evaluated. + * module: the module where the forward reference was created. + Must be a string, not a module object. + * owner: The owning object (module, class, or function). + * is_argument: Does nothing, retained for compatibility. + * is_class: True if the forward reference was created in class scope. + + """ + + __slots__ = _SLOTS + + def __init__( + self, + arg, + *, + module=None, + owner=None, + is_argument=True, + is_class=False, + ): + if not isinstance(arg, str): + raise TypeError(f"Forward reference must be a string -- got {arg!r}") + + self.__arg__ = arg + self.__forward_is_argument__ = is_argument + self.__forward_is_class__ = is_class + self.__forward_module__ = module + self.__owner__ = owner + # These are always set to None here but may be non-None if a ForwardRef + # is created through __class__ assignment on a _Stringifier object. + self.__globals__ = None + # This may be either a cell object (for a ForwardRef referring to a single name) + # or a dict mapping cell names to cell objects (for a ForwardRef containing references + # to multiple names). + self.__cell__ = None + self.__extra_names__ = None + # These are initially None but serve as a cache and may be set to a non-None + # value later. + self.__code__ = None + self.__ast_node__ = None + + def __init_subclass__(cls, /, *args, **kwds): + raise TypeError("Cannot subclass ForwardRef") + + def evaluate( + self, + *, + globals=None, + locals=None, + type_params=None, + owner=None, + format=Format.VALUE, + ): + """Evaluate the forward reference and return the value. + + If the forward reference cannot be evaluated, raise an exception. + """ + match format: + case Format.STRING: + return self.__forward_arg__ + case Format.VALUE: + is_forwardref_format = False + case Format.FORWARDREF: + is_forwardref_format = True + case _: + raise NotImplementedError(format) + if isinstance(self.__cell__, types.CellType): + try: + return self.__cell__.cell_contents + except ValueError: + pass + if owner is None: + owner = self.__owner__ + + if globals is None and self.__forward_module__ is not None: + globals = getattr( + sys.modules.get(self.__forward_module__, None), "__dict__", None + ) + if globals is None: + globals = self.__globals__ + if globals is None: + if isinstance(owner, type): + module_name = getattr(owner, "__module__", None) + if module_name: + module = sys.modules.get(module_name, None) + if module: + globals = getattr(module, "__dict__", None) + elif isinstance(owner, types.ModuleType): + globals = getattr(owner, "__dict__", None) + elif callable(owner): + globals = getattr(owner, "__globals__", None) + + # If we pass None to eval() below, the globals of this module are used. + if globals is None: + globals = {} + + if type_params is None and owner is not None: + type_params = getattr(owner, "__type_params__", None) + + if locals is None: + locals = {} + if isinstance(owner, type): + locals.update(vars(owner)) + elif ( + type_params is not None + or isinstance(self.__cell__, dict) + or self.__extra_names__ + ): + # Create a new locals dict if necessary, + # to avoid mutating the argument. + locals = dict(locals) + + # "Inject" type parameters into the local namespace + # (unless they are shadowed by assignments *in* the local namespace), + # as a way of emulating annotation scopes when calling `eval()` + if type_params is not None: + for param in type_params: + locals.setdefault(param.__name__, param) + + # Similar logic can be used for nonlocals, which should not + # override locals. + if isinstance(self.__cell__, dict): + for cell_name, cell in self.__cell__.items(): + try: + cell_value = cell.cell_contents + except ValueError: + pass + else: + locals.setdefault(cell_name, cell_value) + + if self.__extra_names__: + locals.update(self.__extra_names__) + + arg = self.__forward_arg__ + if arg.isidentifier() and not keyword.iskeyword(arg): + if arg in locals: + return locals[arg] + elif arg in globals: + return globals[arg] + elif hasattr(builtins, arg): + return getattr(builtins, arg) + elif is_forwardref_format: + return self + else: + raise NameError(_NAME_ERROR_MSG.format(name=arg), name=arg) + else: + code = self.__forward_code__ + try: + return eval(code, globals=globals, locals=locals) + except Exception: + if not is_forwardref_format: + raise + + # All variables, in scoping order, should be checked before + # triggering __missing__ to create a _Stringifier. + new_locals = _StringifierDict( + {**builtins.__dict__, **globals, **locals}, + globals=globals, + owner=owner, + is_class=self.__forward_is_class__, + format=format, + ) + try: + result = eval(code, globals=globals, locals=new_locals) + except Exception: + return self + else: + new_locals.transmogrify(self.__cell__) + return result + + def _evaluate(self, globalns, localns, type_params=_sentinel, *, recursive_guard): + import typing + import warnings + + if type_params is _sentinel: + typing._deprecation_warning_for_no_type_params_passed( + "typing.ForwardRef._evaluate" + ) + type_params = () + warnings._deprecated( + "ForwardRef._evaluate", + "{name} is a private API and is retained for compatibility, but will be removed" + " in Python 3.16. Use ForwardRef.evaluate() or typing.evaluate_forward_ref() instead.", + remove=(3, 16), + ) + return typing.evaluate_forward_ref( + self, + globals=globalns, + locals=localns, + type_params=type_params, + _recursive_guard=recursive_guard, + ) + + @property + def __forward_arg__(self): + if self.__arg__ is not None: + return self.__arg__ + if self.__ast_node__ is not None: + self.__arg__ = ast.unparse(self.__ast_node__) + return self.__arg__ + raise AssertionError( + "Attempted to access '__forward_arg__' on an uninitialized ForwardRef" + ) + + @property + def __forward_code__(self): + if self.__code__ is not None: + return self.__code__ + arg = self.__forward_arg__ + try: + self.__code__ = compile(_rewrite_star_unpack(arg), "", "eval") + except SyntaxError: + raise SyntaxError(f"Forward reference must be an expression -- got {arg!r}") + return self.__code__ + + def __eq__(self, other): + if not isinstance(other, ForwardRef): + return NotImplemented + return ( + self.__forward_arg__ == other.__forward_arg__ + and self.__forward_module__ == other.__forward_module__ + # Use "is" here because we use id() for this in __hash__ + # because dictionaries are not hashable. + and self.__globals__ is other.__globals__ + and self.__forward_is_class__ == other.__forward_is_class__ + # Two separate cells are always considered unequal in forward refs. + and ( + {name: id(cell) for name, cell in self.__cell__.items()} + == {name: id(cell) for name, cell in other.__cell__.items()} + if isinstance(self.__cell__, dict) and isinstance(other.__cell__, dict) + else self.__cell__ is other.__cell__ + ) + and self.__owner__ == other.__owner__ + and ( + (tuple(sorted(self.__extra_names__.items())) if self.__extra_names__ else None) == + (tuple(sorted(other.__extra_names__.items())) if other.__extra_names__ else None) + ) + ) + + def __hash__(self): + return hash(( + self.__forward_arg__, + self.__forward_module__, + id(self.__globals__), # dictionaries are not hashable, so hash by identity + self.__forward_is_class__, + ( # cells are not hashable as well + tuple(sorted([(name, id(cell)) for name, cell in self.__cell__.items()])) + if isinstance(self.__cell__, dict) else id(self.__cell__), + ), + self.__owner__, + tuple(sorted(self.__extra_names__.items())) if self.__extra_names__ else None, + )) + + def __or__(self, other): + return types.UnionType[self, other] + + def __ror__(self, other): + return types.UnionType[other, self] + + def __repr__(self): + extra = [] + if self.__forward_module__ is not None: + extra.append(f", module={self.__forward_module__!r}") + if self.__forward_is_class__: + extra.append(", is_class=True") + if self.__owner__ is not None: + extra.append(f", owner={self.__owner__!r}") + return f"ForwardRef({self.__forward_arg__!r}{''.join(extra)})" + + +_Template = type(t"") + + +class _Stringifier: + # Must match the slots on ForwardRef, so we can turn an instance of one into an + # instance of the other in place. + __slots__ = _SLOTS + + def __init__( + self, + node, + globals=None, + owner=None, + is_class=False, + cell=None, + *, + stringifier_dict, + extra_names=None, + ): + # Either an AST node or a simple str (for the common case where a ForwardRef + # represent a single name). + assert isinstance(node, (ast.AST, str)) + self.__arg__ = None + self.__forward_is_argument__ = False + self.__forward_is_class__ = is_class + self.__forward_module__ = None + self.__code__ = None + self.__ast_node__ = node + self.__globals__ = globals + self.__extra_names__ = extra_names + self.__cell__ = cell + self.__owner__ = owner + self.__stringifier_dict__ = stringifier_dict + + def __convert_to_ast(self, other): + if isinstance(other, _Stringifier): + if isinstance(other.__ast_node__, str): + return ast.Name(id=other.__ast_node__), other.__extra_names__ + return other.__ast_node__, other.__extra_names__ + elif type(other) is _Template: + return _template_to_ast(other), None + elif ( + # In STRING format we don't bother with the create_unique_name() dance; + # it's better to emit the repr() of the object instead of an opaque name. + self.__stringifier_dict__.format == Format.STRING + or other is None + or type(other) in (str, int, float, bool, complex) + ): + return ast.Constant(value=other), None + elif type(other) is dict: + extra_names = {} + keys = [] + values = [] + for key, value in other.items(): + new_key, new_extra_names = self.__convert_to_ast(key) + if new_extra_names is not None: + extra_names.update(new_extra_names) + keys.append(new_key) + new_value, new_extra_names = self.__convert_to_ast(value) + if new_extra_names is not None: + extra_names.update(new_extra_names) + values.append(new_value) + return ast.Dict(keys, values), extra_names + elif type(other) in (list, tuple, set): + extra_names = {} + elts = [] + for elt in other: + new_elt, new_extra_names = self.__convert_to_ast(elt) + if new_extra_names is not None: + extra_names.update(new_extra_names) + elts.append(new_elt) + ast_class = {list: ast.List, tuple: ast.Tuple, set: ast.Set}[type(other)] + return ast_class(elts), extra_names + else: + name = self.__stringifier_dict__.create_unique_name() + return ast.Name(id=name), {name: other} + + def __convert_to_ast_getitem(self, other): + if isinstance(other, slice): + extra_names = {} + + def conv(obj): + if obj is None: + return None + new_obj, new_extra_names = self.__convert_to_ast(obj) + if new_extra_names is not None: + extra_names.update(new_extra_names) + return new_obj + + return ast.Slice( + lower=conv(other.start), + upper=conv(other.stop), + step=conv(other.step), + ), extra_names + else: + return self.__convert_to_ast(other) + + def __get_ast(self): + node = self.__ast_node__ + if isinstance(node, str): + return ast.Name(id=node) + return node + + def __make_new(self, node, extra_names=None): + new_extra_names = {} + if self.__extra_names__ is not None: + new_extra_names.update(self.__extra_names__) + if extra_names is not None: + new_extra_names.update(extra_names) + stringifier = _Stringifier( + node, + self.__globals__, + self.__owner__, + self.__forward_is_class__, + stringifier_dict=self.__stringifier_dict__, + extra_names=new_extra_names or None, + ) + self.__stringifier_dict__.stringifiers.append(stringifier) + return stringifier + + # Must implement this since we set __eq__. We hash by identity so that + # stringifiers in dict keys are kept separate. + def __hash__(self): + return id(self) + + def __getitem__(self, other): + # Special case, to avoid stringifying references to class-scoped variables + # as '__classdict__["x"]'. + if self.__ast_node__ == "__classdict__": + raise KeyError + if isinstance(other, tuple): + extra_names = {} + elts = [] + for elt in other: + new_elt, new_extra_names = self.__convert_to_ast_getitem(elt) + if new_extra_names is not None: + extra_names.update(new_extra_names) + elts.append(new_elt) + other = ast.Tuple(elts) + else: + other, extra_names = self.__convert_to_ast_getitem(other) + assert isinstance(other, ast.AST), repr(other) + return self.__make_new(ast.Subscript(self.__get_ast(), other), extra_names) + + def __getattr__(self, attr): + return self.__make_new(ast.Attribute(self.__get_ast(), attr)) + + def __call__(self, *args, **kwargs): + extra_names = {} + ast_args = [] + for arg in args: + new_arg, new_extra_names = self.__convert_to_ast(arg) + if new_extra_names is not None: + extra_names.update(new_extra_names) + ast_args.append(new_arg) + ast_kwargs = [] + for key, value in kwargs.items(): + new_value, new_extra_names = self.__convert_to_ast(value) + if new_extra_names is not None: + extra_names.update(new_extra_names) + ast_kwargs.append(ast.keyword(key, new_value)) + return self.__make_new(ast.Call(self.__get_ast(), ast_args, ast_kwargs), extra_names) + + def __iter__(self): + yield self.__make_new(ast.Starred(self.__get_ast())) + + def __repr__(self): + if isinstance(self.__ast_node__, str): + return self.__ast_node__ + return ast.unparse(self.__ast_node__) + + def __format__(self, format_spec): + raise TypeError("Cannot stringify annotation containing string formatting") + + def _make_binop(op: ast.AST): + def binop(self, other): + rhs, extra_names = self.__convert_to_ast(other) + return self.__make_new( + ast.BinOp(self.__get_ast(), op, rhs), extra_names + ) + + return binop + + __add__ = _make_binop(ast.Add()) + __sub__ = _make_binop(ast.Sub()) + __mul__ = _make_binop(ast.Mult()) + __matmul__ = _make_binop(ast.MatMult()) + __truediv__ = _make_binop(ast.Div()) + __mod__ = _make_binop(ast.Mod()) + __lshift__ = _make_binop(ast.LShift()) + __rshift__ = _make_binop(ast.RShift()) + __or__ = _make_binop(ast.BitOr()) + __xor__ = _make_binop(ast.BitXor()) + __and__ = _make_binop(ast.BitAnd()) + __floordiv__ = _make_binop(ast.FloorDiv()) + __pow__ = _make_binop(ast.Pow()) + + del _make_binop + + def _make_rbinop(op: ast.AST): + def rbinop(self, other): + new_other, extra_names = self.__convert_to_ast(other) + return self.__make_new( + ast.BinOp(new_other, op, self.__get_ast()), extra_names + ) + + return rbinop + + __radd__ = _make_rbinop(ast.Add()) + __rsub__ = _make_rbinop(ast.Sub()) + __rmul__ = _make_rbinop(ast.Mult()) + __rmatmul__ = _make_rbinop(ast.MatMult()) + __rtruediv__ = _make_rbinop(ast.Div()) + __rmod__ = _make_rbinop(ast.Mod()) + __rlshift__ = _make_rbinop(ast.LShift()) + __rrshift__ = _make_rbinop(ast.RShift()) + __ror__ = _make_rbinop(ast.BitOr()) + __rxor__ = _make_rbinop(ast.BitXor()) + __rand__ = _make_rbinop(ast.BitAnd()) + __rfloordiv__ = _make_rbinop(ast.FloorDiv()) + __rpow__ = _make_rbinop(ast.Pow()) + + del _make_rbinop + + def _make_compare(op): + def compare(self, other): + rhs, extra_names = self.__convert_to_ast(other) + return self.__make_new( + ast.Compare( + left=self.__get_ast(), + ops=[op], + comparators=[rhs], + ), + extra_names, + ) + + return compare + + __lt__ = _make_compare(ast.Lt()) + __le__ = _make_compare(ast.LtE()) + __eq__ = _make_compare(ast.Eq()) + __ne__ = _make_compare(ast.NotEq()) + __gt__ = _make_compare(ast.Gt()) + __ge__ = _make_compare(ast.GtE()) + + del _make_compare + + def _make_unary_op(op): + def unary_op(self): + return self.__make_new(ast.UnaryOp(op, self.__get_ast())) + + return unary_op + + __invert__ = _make_unary_op(ast.Invert()) + __pos__ = _make_unary_op(ast.UAdd()) + __neg__ = _make_unary_op(ast.USub()) + + del _make_unary_op + + +def _template_to_ast_constructor(template): + """Convert a `template` instance to a non-literal AST.""" + args = [] + for part in template: + match part: + case str(): + args.append(ast.Constant(value=part)) + case _: + interp = ast.Call( + func=ast.Name(id="Interpolation"), + args=[ + ast.Constant(value=part.value), + ast.Constant(value=part.expression), + ast.Constant(value=part.conversion), + ast.Constant(value=part.format_spec), + ] + ) + args.append(interp) + return ast.Call(func=ast.Name(id="Template"), args=args, keywords=[]) + + +def _template_to_ast_literal(template, parsed): + """Convert a `template` instance to a t-string literal AST.""" + values = [] + interp_count = 0 + for part in template: + match part: + case str(): + values.append(ast.Constant(value=part)) + case _: + interp = ast.Interpolation( + str=part.expression, + value=parsed[interp_count], + conversion=ord(part.conversion) if part.conversion else -1, + format_spec=ast.Constant(value=part.format_spec) + if part.format_spec + else None, + ) + values.append(interp) + interp_count += 1 + return ast.TemplateStr(values=values) + + +def _template_to_ast(template): + """Make a best-effort conversion of a `template` instance to an AST.""" + # gh-138558: Not all Template instances can be represented as t-string + # literals. Return the most accurate AST we can. See issue for details. + + # If any expr is empty or whitespace only, we cannot convert to a literal. + if any(part.expression.strip() == "" for part in template.interpolations): + return _template_to_ast_constructor(template) + + try: + # Wrap in parens to allow whitespace inside interpolation curly braces + parsed = tuple( + ast.parse(f"({part.expression})", mode="eval").body + for part in template.interpolations + ) + except SyntaxError: + return _template_to_ast_constructor(template) + + return _template_to_ast_literal(template, parsed) + + +class _StringifierDict(dict): + def __init__(self, namespace, *, globals=None, owner=None, is_class=False, format): + super().__init__(namespace) + self.namespace = namespace + self.globals = globals + self.owner = owner + self.is_class = is_class + self.stringifiers = [] + self.next_id = 1 + self.format = format + + def __missing__(self, key): + fwdref = _Stringifier( + key, + globals=self.globals, + owner=self.owner, + is_class=self.is_class, + stringifier_dict=self, + ) + self.stringifiers.append(fwdref) + return fwdref + + def transmogrify(self, cell_dict): + for obj in self.stringifiers: + obj.__class__ = ForwardRef + obj.__stringifier_dict__ = None # not needed for ForwardRef + if isinstance(obj.__ast_node__, str): + obj.__arg__ = obj.__ast_node__ + obj.__ast_node__ = None + if cell_dict is not None and obj.__cell__ is None: + obj.__cell__ = cell_dict + + def create_unique_name(self): + name = f"__annotationlib_name_{self.next_id}__" + self.next_id += 1 + return name + + +def call_evaluate_function(evaluate, format, *, owner=None): + """Call an evaluate function. Evaluate functions are normally generated for + the value of type aliases and the bounds, constraints, and defaults of + type parameter objects. + """ + return call_annotate_function(evaluate, format, owner=owner, _is_evaluate=True) + + +def call_annotate_function(annotate, format, *, owner=None, _is_evaluate=False): + """Call an __annotate__ function. __annotate__ functions are normally + generated by the compiler to defer the evaluation of annotations. They + can be called with any of the format arguments in the Format enum, but + compiler-generated __annotate__ functions only support the VALUE format. + This function provides additional functionality to call __annotate__ + functions with the FORWARDREF and STRING formats. + + *annotate* must be an __annotate__ function, which takes a single argument + and returns a dict of annotations. + + *format* must be a member of the Format enum or one of the corresponding + integer values. + + *owner* can be the object that owns the annotations (i.e., the module, + class, or function that the __annotate__ function derives from). With the + FORWARDREF format, it is used to provide better evaluation capabilities + on the generated ForwardRef objects. + + """ + if format == Format.VALUE_WITH_FAKE_GLOBALS: + raise ValueError("The VALUE_WITH_FAKE_GLOBALS format is for internal use only") + try: + return annotate(format) + except NotImplementedError: + pass + if format == Format.STRING: + # STRING is implemented by calling the annotate function in a special + # environment where every name lookup results in an instance of _Stringifier. + # _Stringifier supports every dunder operation and returns a new _Stringifier. + # At the end, we get a dictionary that mostly contains _Stringifier objects (or + # possibly constants if the annotate function uses them directly). We then + # convert each of those into a string to get an approximation of the + # original source. + + # Attempt to call with VALUE_WITH_FAKE_GLOBALS to check if it is implemented + # See: https://github.com/python/cpython/issues/138764 + # Only fail on NotImplementedError + try: + annotate(Format.VALUE_WITH_FAKE_GLOBALS) + except NotImplementedError: + # Both STRING and VALUE_WITH_FAKE_GLOBALS are not implemented: fallback to VALUE + return annotations_to_string(annotate(Format.VALUE)) + except Exception: + pass + + globals = _StringifierDict({}, format=format) + is_class = isinstance(owner, type) + closure, _ = _build_closure( + annotate, owner, is_class, globals, allow_evaluation=False + ) + func = types.FunctionType( + annotate.__code__, + globals, + closure=closure, + argdefs=annotate.__defaults__, + kwdefaults=annotate.__kwdefaults__, + ) + annos = func(Format.VALUE_WITH_FAKE_GLOBALS) + if _is_evaluate: + return _stringify_single(annos) + return { + key: _stringify_single(val) + for key, val in annos.items() + } + elif format == Format.FORWARDREF: + # FORWARDREF is implemented similarly to STRING, but there are two changes, + # at the beginning and the end of the process. + # First, while STRING uses an empty dictionary as the namespace, so that all + # name lookups result in _Stringifier objects, FORWARDREF uses the globals + # and builtins, so that defined names map to their real values. + # Second, instead of returning strings, we want to return either real values + # or ForwardRef objects. To do this, we keep track of all _Stringifier objects + # created while the annotation is being evaluated, and at the end we convert + # them all to ForwardRef objects by assigning to __class__. To make this + # technique work, we have to ensure that the _Stringifier and ForwardRef + # classes share the same attributes. + # We use this technique because while the annotations are being evaluated, + # we want to support all operations that the language allows, including even + # __getattr__ and __eq__, and return new _Stringifier objects so we can accurately + # reconstruct the source. But in the dictionary that we eventually return, we + # want to return objects with more user-friendly behavior, such as an __eq__ + # that returns a bool and an defined set of attributes. + namespace = {**annotate.__builtins__, **annotate.__globals__} + is_class = isinstance(owner, type) + globals = _StringifierDict( + namespace, + globals=annotate.__globals__, + owner=owner, + is_class=is_class, + format=format, + ) + closure, cell_dict = _build_closure( + annotate, owner, is_class, globals, allow_evaluation=True + ) + func = types.FunctionType( + annotate.__code__, + globals, + closure=closure, + argdefs=annotate.__defaults__, + kwdefaults=annotate.__kwdefaults__, + ) + try: + result = func(Format.VALUE_WITH_FAKE_GLOBALS) + except NotImplementedError: + # FORWARDREF and VALUE_WITH_FAKE_GLOBALS not supported, fall back to VALUE + return annotate(Format.VALUE) + except Exception: + pass + else: + globals.transmogrify(cell_dict) + return result + + # Try again, but do not provide any globals. This allows us to return + # a value in certain cases where an exception gets raised during evaluation. + globals = _StringifierDict( + {}, + globals=annotate.__globals__, + owner=owner, + is_class=is_class, + format=format, + ) + closure, cell_dict = _build_closure( + annotate, owner, is_class, globals, allow_evaluation=False + ) + func = types.FunctionType( + annotate.__code__, + globals, + closure=closure, + argdefs=annotate.__defaults__, + kwdefaults=annotate.__kwdefaults__, + ) + result = func(Format.VALUE_WITH_FAKE_GLOBALS) + globals.transmogrify(cell_dict) + if _is_evaluate: + if isinstance(result, ForwardRef): + return result.evaluate(format=Format.FORWARDREF) + else: + return result + else: + return { + key: ( + val.evaluate(format=Format.FORWARDREF) + if isinstance(val, ForwardRef) + else val + ) + for key, val in result.items() + } + elif format == Format.VALUE: + # Should be impossible because __annotate__ functions must not raise + # NotImplementedError for this format. + raise RuntimeError("annotate function does not support VALUE format") + else: + raise ValueError(f"Invalid format: {format!r}") + + +def _build_closure(annotate, owner, is_class, stringifier_dict, *, allow_evaluation): + if not annotate.__closure__: + return None, None + new_closure = [] + cell_dict = {} + for name, cell in zip(annotate.__code__.co_freevars, annotate.__closure__, strict=True): + cell_dict[name] = cell + new_cell = None + if allow_evaluation: + try: + cell.cell_contents + except ValueError: + pass + else: + new_cell = cell + if new_cell is None: + fwdref = _Stringifier( + name, + cell=cell, + owner=owner, + globals=annotate.__globals__, + is_class=is_class, + stringifier_dict=stringifier_dict, + ) + stringifier_dict.stringifiers.append(fwdref) + new_cell = types.CellType(fwdref) + new_closure.append(new_cell) + return tuple(new_closure), cell_dict + + +def _stringify_single(anno): + if anno is ...: + return "..." + # We have to handle str specially to support PEP 563 stringified annotations. + elif isinstance(anno, str): + return anno + elif isinstance(anno, _Template): + return ast.unparse(_template_to_ast(anno)) + else: + return repr(anno) + + +def get_annotate_from_class_namespace(obj): + """Retrieve the annotate function from a class namespace dictionary. + + Return None if the namespace does not contain an annotate function. + This is useful in metaclass ``__new__`` methods to retrieve the annotate function. + """ + try: + return obj["__annotate__"] + except KeyError: + return obj.get("__annotate_func__", None) + + +def get_annotations( + obj, *, globals=None, locals=None, eval_str=False, format=Format.VALUE +): + """Compute the annotations dict for an object. + + obj may be a callable, class, module, or other object with + __annotate__ or __annotations__ attributes. + Passing any other object raises TypeError. + + The *format* parameter controls the format in which annotations are returned, + and must be a member of the Format enum or its integer equivalent. + For the VALUE format, the __annotations__ is tried first; if it + does not exist, the __annotate__ function is called. The + FORWARDREF format uses __annotations__ if it exists and can be + evaluated, and otherwise falls back to calling the __annotate__ function. + The SOURCE format tries __annotate__ first, and falls back to + using __annotations__, stringified using annotations_to_string(). + + This function handles several details for you: + + * If eval_str is true, values of type str will + be un-stringized using eval(). This is intended + for use with stringized annotations + ("from __future__ import annotations"). + * If obj doesn't have an annotations dict, returns an + empty dict. (Functions and methods always have an + annotations dict; classes, modules, and other types of + callables may not.) + * Ignores inherited annotations on classes. If a class + doesn't have its own annotations dict, returns an empty dict. + * All accesses to object members and dict values are done + using getattr() and dict.get() for safety. + * Always, always, always returns a freshly-created dict. + + eval_str controls whether or not values of type str are replaced + with the result of calling eval() on those values: + + * If eval_str is true, eval() is called on values of type str. + * If eval_str is false (the default), values of type str are unchanged. + + globals and locals are passed in to eval(); see the documentation + for eval() for more information. If either globals or locals is + None, this function may replace that value with a context-specific + default, contingent on type(obj): + + * If obj is a module, globals defaults to obj.__dict__. + * If obj is a class, globals defaults to + sys.modules[obj.__module__].__dict__ and locals + defaults to the obj class namespace. + * If obj is a callable, globals defaults to obj.__globals__, + although if obj is a wrapped function (using + functools.update_wrapper()) it is first unwrapped. + """ + if eval_str and format != Format.VALUE: + raise ValueError("eval_str=True is only supported with format=Format.VALUE") + + match format: + case Format.VALUE: + # For VALUE, we first look at __annotations__ + ann = _get_dunder_annotations(obj) + + # If it's not there, try __annotate__ instead + if ann is None: + ann = _get_and_call_annotate(obj, format) + case Format.FORWARDREF: + # For FORWARDREF, we use __annotations__ if it exists + try: + ann = _get_dunder_annotations(obj) + except Exception: + pass + else: + if ann is not None: + return dict(ann) + + # But if __annotations__ threw a NameError, we try calling __annotate__ + ann = _get_and_call_annotate(obj, format) + if ann is None: + # If that didn't work either, we have a very weird object: evaluating + # __annotations__ threw NameError and there is no __annotate__. In that case, + # we fall back to trying __annotations__ again. + ann = _get_dunder_annotations(obj) + case Format.STRING: + # For STRING, we try to call __annotate__ + ann = _get_and_call_annotate(obj, format) + if ann is not None: + return dict(ann) + # But if we didn't get it, we use __annotations__ instead. + ann = _get_dunder_annotations(obj) + if ann is not None: + return annotations_to_string(ann) + case Format.VALUE_WITH_FAKE_GLOBALS: + raise ValueError("The VALUE_WITH_FAKE_GLOBALS format is for internal use only") + case _: + raise ValueError(f"Unsupported format {format!r}") + + if ann is None: + if isinstance(obj, type) or callable(obj): + return {} + raise TypeError(f"{obj!r} does not have annotations") + + if not ann: + return {} + + if not eval_str: + return dict(ann) + + if globals is None or locals is None: + if isinstance(obj, type): + # class + obj_globals = None + module_name = getattr(obj, "__module__", None) + if module_name: + module = sys.modules.get(module_name, None) + if module: + obj_globals = getattr(module, "__dict__", None) + obj_locals = dict(vars(obj)) + unwrap = obj + elif isinstance(obj, types.ModuleType): + # module + obj_globals = getattr(obj, "__dict__") + obj_locals = None + unwrap = None + elif callable(obj): + # this includes types.Function, types.BuiltinFunctionType, + # types.BuiltinMethodType, functools.partial, functools.singledispatch, + # "class funclike" from Lib/test/test_inspect... on and on it goes. + obj_globals = getattr(obj, "__globals__", None) + obj_locals = None + unwrap = obj + else: + obj_globals = obj_locals = unwrap = None + + if unwrap is not None: + while True: + if hasattr(unwrap, "__wrapped__"): + unwrap = unwrap.__wrapped__ + continue + if functools := sys.modules.get("functools"): + if isinstance(unwrap, functools.partial): + unwrap = unwrap.func + continue + break + if hasattr(unwrap, "__globals__"): + obj_globals = unwrap.__globals__ + + if globals is None: + globals = obj_globals + if locals is None: + locals = obj_locals + + # "Inject" type parameters into the local namespace + # (unless they are shadowed by assignments *in* the local namespace), + # as a way of emulating annotation scopes when calling `eval()` + if type_params := getattr(obj, "__type_params__", ()): + if locals is None: + locals = {} + locals = {param.__name__: param for param in type_params} | locals + + return_value = { + key: value if not isinstance(value, str) + else eval(_rewrite_star_unpack(value), globals, locals) + for key, value in ann.items() + } + return return_value + + +def type_repr(value): + """Convert a Python value to a format suitable for use with the STRING format. + + This is intended as a helper for tools that support the STRING format but do + not have access to the code that originally produced the annotations. It uses + repr() for most objects. + + """ + if isinstance(value, (type, types.FunctionType, types.BuiltinFunctionType)): + if value.__module__ == "builtins": + return value.__qualname__ + return f"{value.__module__}.{value.__qualname__}" + elif isinstance(value, _Template): + tree = _template_to_ast(value) + return ast.unparse(tree) + if value is ...: + return "..." + return repr(value) + + +def annotations_to_string(annotations): + """Convert an annotation dict containing values to approximately the STRING format. + + Always returns a fresh a dictionary. + """ + return { + n: t if isinstance(t, str) else type_repr(t) + for n, t in annotations.items() + } + + +def _rewrite_star_unpack(arg): + """If the given argument annotation expression is a star unpack e.g. `'*Ts'` + rewrite it to a valid expression. + """ + if arg.lstrip().startswith("*"): + return f"({arg},)[0]" # E.g. (*Ts,)[0] or (*tuple[int, int],)[0] + else: + return arg + + +def _get_and_call_annotate(obj, format): + """Get the __annotate__ function and call it. + + May not return a fresh dictionary. + """ + annotate = getattr(obj, "__annotate__", None) + if annotate is not None: + ann = call_annotate_function(annotate, format, owner=obj) + if not isinstance(ann, dict): + raise ValueError(f"{obj!r}.__annotate__ returned a non-dict") + return ann + return None + + +_BASE_GET_ANNOTATIONS = type.__dict__["__annotations__"].__get__ + + +def _get_dunder_annotations(obj): + """Return the annotations for an object, checking that it is a dictionary. + + Does not return a fresh dictionary. + """ + # This special case is needed to support types defined under + # from __future__ import annotations, where accessing the __annotations__ + # attribute directly might return annotations for the wrong class. + if isinstance(obj, type): + try: + ann = _BASE_GET_ANNOTATIONS(obj) + except AttributeError: + # For static types, the descriptor raises AttributeError. + return None + else: + ann = getattr(obj, "__annotations__", None) + if ann is None: + return None + + if not isinstance(ann, dict): + raise ValueError(f"{obj!r}.__annotations__ is neither a dict nor None") + return ann diff --git a/PythonLib/full/argparse.py b/PythonLib/full/argparse.py index 2a8b501a..1d7d34f9 100644 --- a/PythonLib/full/argparse.py +++ b/PythonLib/full/argparse.py @@ -18,11 +18,12 @@ 'integers', metavar='int', nargs='+', type=int, help='an integer to be summed') parser.add_argument( - '--log', default=sys.stdout, type=argparse.FileType('w'), + '--log', help='the file where the sum should be written') args = parser.parse_args() - args.log.write('%s' % sum(args.integers)) - args.log.close() + with (open(args.log, 'w') if args.log is not None + else contextlib.nullcontext(sys.stdout)) as log: + log.write('%s' % sum(args.integers)) The module contains the following public classes: @@ -39,7 +40,8 @@ - FileType -- A factory for defining types of files to be created. As the example above shows, instances of FileType are typically passed as - the type= argument of add_argument() calls. + the type= argument of add_argument() calls. Deprecated since + Python 3.14. - Action -- The base class for parser actions. Typically actions are selected by passing strings like 'store_true' or 'append_const' to @@ -89,8 +91,6 @@ import re as _re import sys as _sys -import warnings - from gettext import gettext as _, ngettext SUPPRESS = '==SUPPRESS==' @@ -161,18 +161,21 @@ class HelpFormatter(object): provided by the class are considered an implementation detail. """ - def __init__(self, - prog, - indent_increment=2, - max_help_position=24, - width=None): - + def __init__( + self, + prog, + indent_increment=2, + max_help_position=24, + width=None, + color=True, + ): # default setting for width if width is None: import shutil width = shutil.get_terminal_size().columns width -= 2 + self._set_color(color) self._prog = prog self._indent_increment = indent_increment self._max_help_position = min(max_help_position, @@ -189,9 +192,20 @@ def __init__(self, self._whitespace_matcher = _re.compile(r'\s+', _re.ASCII) self._long_break_matcher = _re.compile(r'\n\n\n+') + def _set_color(self, color): + from _colorize import can_colorize, decolor, get_theme + + if color and can_colorize(): + self._theme = get_theme(force_color=True).argparse + self._decolor = decolor + else: + self._theme = get_theme(force_no_color=True).argparse + self._decolor = lambda text: text + # =============================== # Section and indentation methods # =============================== + def _indent(self): self._current_indent += self._indent_increment self._level += 1 @@ -226,7 +240,11 @@ def format_help(self): if self.heading is not SUPPRESS and self.heading is not None: current_indent = self.formatter._current_indent heading_text = _('%(heading)s:') % dict(heading=self.heading) - heading = '%*s%s\n' % (current_indent, '', heading_text) + t = self.formatter._theme + heading = ( + f'{" " * current_indent}' + f'{t.heading}{heading_text}{t.reset}\n' + ) else: heading = '' @@ -239,6 +257,7 @@ def _add_item(self, func, args): # ======================== # Message building methods # ======================== + def start_section(self, heading): self._indent() section = self._Section(self, self._current_section, heading) @@ -262,7 +281,7 @@ def add_argument(self, action): if action.help is not SUPPRESS: # find all invocations - get_invocation = self._format_action_invocation + get_invocation = lambda x: self._decolor(self._format_action_invocation(x)) invocation_lengths = [len(get_invocation(action)) + self._current_indent] for subaction in self._iter_indented_subactions(action): invocation_lengths.append(len(get_invocation(subaction)) + self._current_indent) @@ -282,6 +301,7 @@ def add_arguments(self, actions): # ======================= # Help-formatting methods # ======================= + def format_help(self): help = self._root_section.format_help() if help: @@ -295,51 +315,39 @@ def _join_parts(self, part_strings): if part and part is not SUPPRESS]) def _format_usage(self, usage, actions, groups, prefix): + t = self._theme + if prefix is None: prefix = _('usage: ') # if usage is specified, use that if usage is not None: - usage = usage % dict(prog=self._prog) + usage = ( + t.prog_extra + + usage + % {"prog": f"{t.prog}{self._prog}{t.reset}{t.prog_extra}"} + + t.reset + ) # if no optionals or positionals are available, usage is just prog elif usage is None and not actions: - usage = '%(prog)s' % dict(prog=self._prog) + usage = f"{t.prog}{self._prog}{t.reset}" # if optionals and positionals are available, calculate usage elif usage is None: prog = '%(prog)s' % dict(prog=self._prog) - # split optionals from positionals - optionals = [] - positionals = [] - for action in actions: - if action.option_strings: - optionals.append(action) - else: - positionals.append(action) - + parts, pos_start = self._get_actions_usage_parts(actions, groups) # build full usage string - format = self._format_actions_usage - action_usage = format(optionals + positionals, groups) - usage = ' '.join([s for s in [prog, action_usage] if s]) + usage = ' '.join(filter(None, [prog, *parts])) # wrap the usage parts if it's too long text_width = self._width - self._current_indent - if len(prefix) + len(usage) > text_width: + if len(prefix) + len(self._decolor(usage)) > text_width: # break usage into wrappable parts - part_regexp = ( - r'\(.*?\)+(?=\s|$)|' - r'\[.*?\]+(?=\s|$)|' - r'\S+' - ) - opt_usage = format(optionals, groups) - pos_usage = format(positionals, groups) - opt_parts = _re.findall(part_regexp, opt_usage) - pos_parts = _re.findall(part_regexp, pos_usage) - assert ' '.join(opt_parts) == opt_usage - assert ' '.join(pos_parts) == pos_usage + opt_parts = parts[:pos_start] + pos_parts = parts[pos_start:] # helper for wrapping lines def get_lines(parts, indent, prefix=None): @@ -351,12 +359,13 @@ def get_lines(parts, indent, prefix=None): else: line_len = indent_length - 1 for part in parts: - if line_len + 1 + len(part) > text_width and line: + part_len = len(self._decolor(part)) + if line_len + 1 + part_len > text_width and line: lines.append(indent + ' '.join(line)) line = [] line_len = indent_length - 1 line.append(part) - line_len += len(part) + 1 + line_len += part_len + 1 if line: lines.append(indent + ' '.join(line)) if prefix is not None: @@ -364,8 +373,9 @@ def get_lines(parts, indent, prefix=None): return lines # if prog is short, follow it with optionals or positionals - if len(prefix) + len(prog) <= 0.75 * text_width: - indent = ' ' * (len(prefix) + len(prog) + 1) + prog_len = len(self._decolor(prog)) + if len(prefix) + prog_len <= 0.75 * text_width: + indent = ' ' * (len(prefix) + prog_len + 1) if opt_parts: lines = get_lines([prog] + opt_parts, indent, prefix) lines.extend(get_lines(pos_parts, indent)) @@ -388,123 +398,120 @@ def get_lines(parts, indent, prefix=None): # join lines into usage usage = '\n'.join(lines) + usage = usage.removeprefix(prog) + usage = f"{t.prog}{prog}{t.reset}{usage}" + # prefix with 'usage:' - return '%s%s\n\n' % (prefix, usage) + return f'{t.usage}{prefix}{t.reset}{usage}\n\n' - def _format_actions_usage(self, actions, groups): - # find group indices and identify actions in groups - group_actions = set() - inserts = {} - for group in groups: - if not group._group_actions: - raise ValueError(f'empty group {group}') + def _is_long_option(self, string): + return len(string) > 2 - try: - start = actions.index(group._group_actions[0]) - except ValueError: - continue - else: - group_action_count = len(group._group_actions) - end = start + group_action_count - if actions[start:end] == group._group_actions: - - suppressed_actions_count = 0 - for action in group._group_actions: - group_actions.add(action) - if action.help is SUPPRESS: - suppressed_actions_count += 1 - - exposed_actions_count = group_action_count - suppressed_actions_count - if not exposed_actions_count: - continue - - if not group.required: - if start in inserts: - inserts[start] += ' [' - else: - inserts[start] = '[' - if end in inserts: - inserts[end] += ']' - else: - inserts[end] = ']' - elif exposed_actions_count > 1: - if start in inserts: - inserts[start] += ' (' - else: - inserts[start] = '(' - if end in inserts: - inserts[end] += ')' - else: - inserts[end] = ')' - for i in range(start + 1, end): - inserts[i] = '|' + def _get_actions_usage_parts(self, actions, groups): + """Get usage parts with split index for optionals/positionals. + + Returns (parts, pos_start) where pos_start is the index in parts + where positionals begin. + This preserves mutually exclusive group formatting across the + optionals/positionals boundary (gh-75949). + """ + actions = [action for action in actions if action.help is not SUPPRESS] + # group actions by mutually exclusive groups + action_groups = dict.fromkeys(actions) + for group in groups: + for action in group._group_actions: + if action in action_groups: + action_groups[action] = group + # positional arguments keep their position + positionals = [] + for action in actions: + if not action.option_strings: + group = action_groups.pop(action) + if group: + group_actions = [ + action2 for action2 in group._group_actions + if action2.option_strings and + action_groups.pop(action2, None) + ] + [action] + positionals.append((group.required, group_actions)) + else: + positionals.append((None, [action])) + # the remaining optional arguments are sorted by the position of + # the first option in the group + optionals = [] + for action in actions: + if action.option_strings and action in action_groups: + group = action_groups.pop(action) + if group: + group_actions = [action] + [ + action2 for action2 in group._group_actions + if action2.option_strings and + action_groups.pop(action2, None) + ] + optionals.append((group.required, group_actions)) + else: + optionals.append((None, [action])) # collect all actions format strings parts = [] - for i, action in enumerate(actions): - - # suppressed arguments are marked with None - # remove | separators for suppressed arguments - if action.help is SUPPRESS: - parts.append(None) - if inserts.get(i) == '|': - inserts.pop(i) - elif inserts.get(i + 1) == '|': - inserts.pop(i + 1) - - # produce all arg strings - elif not action.option_strings: - default = self._get_default_metavar_for_positional(action) - part = self._format_args(action, default) - - # if it's in a group, strip the outer [] - if action in group_actions: - if part[0] == '[' and part[-1] == ']': - part = part[1:-1] - - # add the action string to the list - parts.append(part) - - # produce the first way to invoke the option in brackets - else: - option_string = action.option_strings[0] + t = self._theme + pos_start = None + for i, (required, group) in enumerate(optionals + positionals): + start = len(parts) + if i == len(optionals): + pos_start = start + in_group = len(group) > 1 + for action in group: + # produce all arg strings + if not action.option_strings: + default = self._get_default_metavar_for_positional(action) + part = self._format_args(action, default) + # if it's in a group, strip the outer [] + if in_group: + if part[0] == '[' and part[-1] == ']': + part = part[1:-1] + part = t.summary_action + part + t.reset + + # produce the first way to invoke the option in brackets + else: + option_string = action.option_strings[0] + if self._is_long_option(option_string): + option_color = t.summary_long_option + else: + option_color = t.summary_short_option - # if the Optional doesn't take a value, format is: - # -s or --long - if action.nargs == 0: - part = action.format_usage() + # if the Optional doesn't take a value, format is: + # -s or --long + if action.nargs == 0: + part = action.format_usage() + part = f"{option_color}{part}{t.reset}" - # if the Optional takes a value, format is: - # -s ARGS or --long ARGS - else: - default = self._get_default_metavar_for_optional(action) - args_string = self._format_args(action, default) - part = '%s %s' % (option_string, args_string) + # if the Optional takes a value, format is: + # -s ARGS or --long ARGS + else: + default = self._get_default_metavar_for_optional(action) + args_string = self._format_args(action, default) + part = ( + f"{option_color}{option_string} " + f"{t.summary_label}{args_string}{t.reset}" + ) - # make it look optional if it's not required or in a group - if not action.required and action not in group_actions: - part = '[%s]' % part + # make it look optional if it's not required or in a group + if not (action.required or required or in_group): + part = '[%s]' % part # add the action string to the list parts.append(part) - # insert things at the necessary indices - for i in sorted(inserts, reverse=True): - parts[i:i] = [inserts[i]] - - # join all the action items with spaces - text = ' '.join([item for item in parts if item is not None]) + if in_group: + parts[start] = ('(' if required else '[') + parts[start] + for i in range(start, len(parts) - 1): + parts[i] += ' |' + parts[-1] += ')' if required else ']' - # clean up separators for mutually exclusive groups - open = r'[\[(]' - close = r'[\])]' - text = _re.sub(r'(%s) ' % open, r'\1', text) - text = _re.sub(r' (%s)' % close, r'\1', text) - text = _re.sub(r'%s *%s' % (open, close), r'', text) - text = text.strip() - - # return the text - return text + if pos_start is None: + pos_start = len(parts) + return parts, pos_start def _format_text(self, text): if '%(prog)' in text: @@ -520,6 +527,7 @@ def _format_action(self, action): help_width = max(self._width - help_position, 11) action_width = help_position - self._current_indent - 2 action_header = self._format_action_invocation(action) + action_header_no_color = self._decolor(action_header) # no help; start on same line and add a final newline if not action.help: @@ -527,9 +535,15 @@ def _format_action(self, action): action_header = '%*s%s\n' % tup # short action name; start on the same line and pad two spaces - elif len(action_header) <= action_width: - tup = self._current_indent, '', action_width, action_header + elif len(action_header_no_color) <= action_width: + # calculate widths without color codes + action_header_color = action_header + tup = self._current_indent, '', action_width, action_header_no_color action_header = '%*s%-*s ' % tup + # swap in the colored header + action_header = action_header.replace( + action_header_no_color, action_header_color + ) indent_first = 0 # long action name; start on the next line @@ -562,27 +576,42 @@ def _format_action(self, action): return self._join_parts(parts) def _format_action_invocation(self, action): + t = self._theme + if not action.option_strings: default = self._get_default_metavar_for_positional(action) - return ' '.join(self._metavar_formatter(action, default)(1)) + return ( + t.action + + ' '.join(self._metavar_formatter(action, default)(1)) + + t.reset + ) else: - parts = [] + + def color_option_strings(strings): + parts = [] + for s in strings: + if self._is_long_option(s): + parts.append(f"{t.long_option}{s}{t.reset}") + else: + parts.append(f"{t.short_option}{s}{t.reset}") + return parts # if the Optional doesn't take a value, format is: # -s, --long if action.nargs == 0: - parts.extend(action.option_strings) + option_strings = color_option_strings(action.option_strings) + return ', '.join(option_strings) # if the Optional takes a value, format is: - # -s ARGS, --long ARGS + # -s, --long ARGS else: default = self._get_default_metavar_for_optional(action) - args_string = self._format_args(action, default) - for option_string in action.option_strings: - parts.append('%s %s' % (option_string, args_string)) - - return ', '.join(parts) + option_strings = color_option_strings(action.option_strings) + args_string = ( + f"{t.label}{self._format_args(action, default)}{t.reset}" + ) + return ', '.join(option_strings) + ' ' + args_string def _metavar_formatter(self, action, default_metavar): if action.metavar is not None: @@ -628,16 +657,19 @@ def _format_args(self, action, default_metavar): return result def _expand_help(self, action): + help_string = self._get_help_string(action) + if '%' not in help_string: + return help_string params = dict(vars(action), prog=self._prog) for name in list(params): - if params[name] is SUPPRESS: + value = params[name] + if value is SUPPRESS: del params[name] - for name in list(params): - if hasattr(params[name], '__name__'): - params[name] = params[name].__name__ + elif hasattr(value, '__name__'): + params[name] = value.__name__ if params.get('choices') is not None: params['choices'] = ', '.join(map(str, params['choices'])) - return self._get_help_string(action) % params + return help_string % params def _iter_indented_subactions(self, action): try: @@ -703,23 +735,18 @@ class ArgumentDefaultsHelpFormatter(HelpFormatter): """ def _get_help_string(self, action): - """ - Add the default value to the option help message. - - ArgumentDefaultsHelpFormatter and BooleanOptionalAction when it isn't - already present. This code will do that, detecting cornercases to - prevent duplicates or cases where it wouldn't make sense to the end - user. - """ help = action.help if help is None: help = '' - if '%(default)' not in help: - if action.default is not SUPPRESS: - defaulting_nargs = [OPTIONAL, ZERO_OR_MORE] - if action.option_strings or action.nargs in defaulting_nargs: - help += _(' (default: %(default)s)') + if ( + '%(default)' not in help + and action.default is not SUPPRESS + and not action.required + ): + defaulting_nargs = (OPTIONAL, ZERO_OR_MORE) + if action.option_strings or action.nargs in defaulting_nargs: + help += _(' (default: %(default)s)') return help @@ -856,7 +883,8 @@ def __init__(self, choices=None, required=False, help=None, - metavar=None): + metavar=None, + deprecated=False): self.option_strings = option_strings self.dest = dest self.nargs = nargs @@ -867,6 +895,7 @@ def __init__(self, self.required = required self.help = help self.metavar = metavar + self.deprecated = deprecated def _get_kwargs(self): names = [ @@ -880,6 +909,7 @@ def _get_kwargs(self): 'required', 'help', 'metavar', + 'deprecated', ] return [(name, getattr(self, name)) for name in names] @@ -887,59 +917,37 @@ def format_usage(self): return self.option_strings[0] def __call__(self, parser, namespace, values, option_string=None): - raise NotImplementedError(_('.__call__() not defined')) - + raise NotImplementedError('.__call__() not defined') -# FIXME: remove together with `BooleanOptionalAction` deprecated arguments. -_deprecated_default = object() class BooleanOptionalAction(Action): def __init__(self, option_strings, dest, default=None, - type=_deprecated_default, - choices=_deprecated_default, required=False, help=None, - metavar=_deprecated_default): + deprecated=False): _option_strings = [] for option_string in option_strings: _option_strings.append(option_string) if option_string.startswith('--'): + if option_string.startswith('--no-'): + raise ValueError(f'invalid option name {option_string!r} ' + f'for BooleanOptionalAction') option_string = '--no-' + option_string[2:] _option_strings.append(option_string) - # We need `_deprecated` special value to ban explicit arguments that - # match default value. Like: - # parser.add_argument('-f', action=BooleanOptionalAction, type=int) - for field_name in ('type', 'choices', 'metavar'): - if locals()[field_name] is not _deprecated_default: - warnings._deprecated( - field_name, - "{name!r} is deprecated as of Python 3.12 and will be " - "removed in Python {remove}.", - remove=(3, 14)) - - if type is _deprecated_default: - type = None - if choices is _deprecated_default: - choices = None - if metavar is _deprecated_default: - metavar = None - super().__init__( option_strings=_option_strings, dest=dest, nargs=0, default=default, - type=type, - choices=choices, required=required, help=help, - metavar=metavar) + deprecated=deprecated) def __call__(self, parser, namespace, values, option_string=None): @@ -962,7 +970,8 @@ def __init__(self, choices=None, required=False, help=None, - metavar=None): + metavar=None, + deprecated=False): if nargs == 0: raise ValueError('nargs for store actions must be != 0; if you ' 'have nothing to store, actions such as store ' @@ -979,7 +988,8 @@ def __init__(self, choices=choices, required=required, help=help, - metavar=metavar) + metavar=metavar, + deprecated=deprecated) def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, self.dest, values) @@ -994,7 +1004,8 @@ def __init__(self, default=None, required=False, help=None, - metavar=None): + metavar=None, + deprecated=False): super(_StoreConstAction, self).__init__( option_strings=option_strings, dest=dest, @@ -1002,7 +1013,8 @@ def __init__(self, const=const, default=default, required=required, - help=help) + help=help, + deprecated=deprecated) def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, self.dest, self.const) @@ -1015,14 +1027,16 @@ def __init__(self, dest, default=False, required=False, - help=None): + help=None, + deprecated=False): super(_StoreTrueAction, self).__init__( option_strings=option_strings, dest=dest, const=True, - default=default, + deprecated=deprecated, required=required, - help=help) + help=help, + default=default) class _StoreFalseAction(_StoreConstAction): @@ -1032,14 +1046,16 @@ def __init__(self, dest, default=True, required=False, - help=None): + help=None, + deprecated=False): super(_StoreFalseAction, self).__init__( option_strings=option_strings, dest=dest, const=False, default=default, required=required, - help=help) + help=help, + deprecated=deprecated) class _AppendAction(Action): @@ -1054,7 +1070,8 @@ def __init__(self, choices=None, required=False, help=None, - metavar=None): + metavar=None, + deprecated=False): if nargs == 0: raise ValueError('nargs for append actions must be != 0; if arg ' 'strings are not supplying the value to append, ' @@ -1071,7 +1088,8 @@ def __init__(self, choices=choices, required=required, help=help, - metavar=metavar) + metavar=metavar, + deprecated=deprecated) def __call__(self, parser, namespace, values, option_string=None): items = getattr(namespace, self.dest, None) @@ -1089,7 +1107,8 @@ def __init__(self, default=None, required=False, help=None, - metavar=None): + metavar=None, + deprecated=False): super(_AppendConstAction, self).__init__( option_strings=option_strings, dest=dest, @@ -1098,7 +1117,8 @@ def __init__(self, default=default, required=required, help=help, - metavar=metavar) + metavar=metavar, + deprecated=deprecated) def __call__(self, parser, namespace, values, option_string=None): items = getattr(namespace, self.dest, None) @@ -1114,14 +1134,16 @@ def __init__(self, dest, default=None, required=False, - help=None): + help=None, + deprecated=False): super(_CountAction, self).__init__( option_strings=option_strings, dest=dest, nargs=0, default=default, required=required, - help=help) + help=help, + deprecated=deprecated) def __call__(self, parser, namespace, values, option_string=None): count = getattr(namespace, self.dest, None) @@ -1136,13 +1158,15 @@ def __init__(self, option_strings, dest=SUPPRESS, default=SUPPRESS, - help=None): + help=None, + deprecated=False): super(_HelpAction, self).__init__( option_strings=option_strings, dest=dest, default=default, nargs=0, - help=help) + help=help, + deprecated=deprecated) def __call__(self, parser, namespace, values, option_string=None): parser.print_help() @@ -1156,7 +1180,8 @@ def __init__(self, version=None, dest=SUPPRESS, default=SUPPRESS, - help=None): + help=None, + deprecated=False): if help is None: help = _("show program's version number and exit") super(_VersionAction, self).__init__( @@ -1202,6 +1227,8 @@ def __init__(self, self._parser_class = parser_class self._name_parser_map = {} self._choices_actions = [] + self._deprecated = set() + self._color = True super(_SubParsersAction, self).__init__( option_strings=option_strings, @@ -1212,34 +1239,45 @@ def __init__(self, help=help, metavar=metavar) - def add_parser(self, name, **kwargs): + def add_parser(self, name, *, deprecated=False, **kwargs): # set prog from the existing prefix if kwargs.get('prog') is None: kwargs['prog'] = '%s %s' % (self._prog_prefix, name) + # set color + if kwargs.get('color') is None: + kwargs['color'] = self._color + aliases = kwargs.pop('aliases', ()) if name in self._name_parser_map: - raise ArgumentError(self, _('conflicting subparser: %s') % name) + raise ValueError(f'conflicting subparser: {name}') for alias in aliases: if alias in self._name_parser_map: - raise ArgumentError( - self, _('conflicting subparser alias: %s') % alias) + raise ValueError(f'conflicting subparser alias: {alias}') # create a pseudo-action to hold the choice help if 'help' in kwargs: help = kwargs.pop('help') choice_action = self._ChoicesPseudoAction(name, aliases, help) self._choices_actions.append(choice_action) + else: + choice_action = None # create the parser and add it to the map parser = self._parser_class(**kwargs) + if choice_action is not None: + parser._check_help(choice_action) self._name_parser_map[name] = parser # make parser available under aliases also for alias in aliases: self._name_parser_map[alias] = parser + if deprecated: + self._deprecated.add(name) + self._deprecated.update(aliases) + return parser def _get_subactions(self): @@ -1255,13 +1293,17 @@ def __call__(self, parser, namespace, values, option_string=None): # select the parser try: - parser = self._name_parser_map[parser_name] + subparser = self._name_parser_map[parser_name] except KeyError: args = {'parser_name': parser_name, 'choices': ', '.join(self._name_parser_map)} msg = _('unknown parser %(parser_name)r (choices: %(choices)s)') % args raise ArgumentError(self, msg) + if parser_name in self._deprecated: + parser._warning(_("command '%(parser_name)s' is deprecated") % + {'parser_name': parser_name}) + # parse all the remaining options into the namespace # store any unrecognized options on the object, so that the top # level parser can decide what to do with them @@ -1269,7 +1311,7 @@ def __call__(self, parser, namespace, values, option_string=None): # In case this subparser defines new defaults, we parse them # in a new namespace object and then update the original # namespace for the relevant parts. - subnamespace, arg_strings = parser.parse_known_args(arg_strings, None) + subnamespace, arg_strings = subparser.parse_known_args(arg_strings, None) for key, value in vars(subnamespace).items(): setattr(namespace, key, value) @@ -1290,7 +1332,7 @@ def __call__(self, parser, namespace, values, option_string=None): # ============== class FileType(object): - """Factory for creating file object types + """Deprecated factory for creating file object types Instances of FileType are typically passed as type= arguments to the ArgumentParser add_argument() method. @@ -1307,6 +1349,12 @@ class FileType(object): """ def __init__(self, mode='r', bufsize=-1, encoding=None, errors=None): + import warnings + warnings.warn( + "FileType is deprecated. Simply open files after parsing arguments.", + category=PendingDeprecationWarning, + stacklevel=2 + ) self._mode = mode self._bufsize = bufsize self._encoding = encoding @@ -1410,7 +1458,7 @@ def __init__(self, self._defaults = {} # determines whether an "option" looks like a negative number - self._negative_number_matcher = _re.compile(r'^-\d+$|^-\d*\.\d+$') + self._negative_number_matcher = _re.compile(r'-\.?\d') # whether or not there are any optionals that look like negative # numbers -- uses a list so it can be shared and edited @@ -1419,6 +1467,7 @@ def __init__(self, # ==================== # Registration methods # ==================== + def register(self, registry_name, value, object): registry = self._registries.setdefault(registry_name, {}) registry[value] = object @@ -1429,6 +1478,7 @@ def _registry_get(self, registry_name, value, default=None): # ================================== # Namespace default accessor methods # ================================== + def set_defaults(self, **kwargs): self._defaults.update(kwargs) @@ -1448,6 +1498,7 @@ def get_default(self, dest): # ======================= # Adding argument actions # ======================= + def add_argument(self, *args, **kwargs): """ add_argument(dest, ..., name=value, ...) @@ -1460,7 +1511,8 @@ def add_argument(self, *args, **kwargs): chars = self.prefix_chars if not args or len(args) == 1 and args[0][0] not in chars: if args and 'dest' in kwargs: - raise ValueError('dest supplied twice for positional argument') + raise TypeError('dest supplied twice for positional argument,' + ' did you mean metavar?') kwargs = self._get_positional_kwargs(*args, **kwargs) # otherwise, we're adding an optional argument @@ -1476,27 +1528,34 @@ def add_argument(self, *args, **kwargs): kwargs['default'] = self.argument_default # create the action object, and add it to the parser + action_name = kwargs.get('action') action_class = self._pop_action_class(kwargs) if not callable(action_class): - raise ValueError('unknown action "%s"' % (action_class,)) + raise ValueError(f'unknown action {action_class!r}') action = action_class(**kwargs) + # raise an error if action for positional argument does not + # consume arguments + if not action.option_strings and action.nargs == 0: + raise ValueError(f'action {action_name!r} is not valid for positional arguments') + # raise an error if the action type is not callable type_func = self._registry_get('type', action.type, action.type) if not callable(type_func): - raise ValueError('%r is not callable' % (type_func,)) + raise TypeError(f'{type_func!r} is not callable') if type_func is FileType: - raise ValueError('%r is a FileType class object, instance of it' - ' must be passed' % (type_func,)) + raise TypeError(f'{type_func!r} is a FileType class object, ' + f'instance of it must be passed') # raise an error if the metavar does not match the type - if hasattr(self, "_get_formatter"): + if hasattr(self, "_get_validation_formatter"): + formatter = self._get_validation_formatter() try: - self._get_formatter()._format_args(action, None) + formatter._format_args(action, None) except TypeError: raise ValueError("length of metavar tuple does not match nargs") - + self._check_help(action) return self._add_action(action) def add_argument_group(self, *args, **kwargs): @@ -1538,8 +1597,10 @@ def _add_container_actions(self, container): title_group_map = {} for group in self._action_groups: if group.title in title_group_map: - msg = _('cannot merge actions - two groups are named %r') - raise ValueError(msg % (group.title)) + # This branch could happen if a derived class added + # groups with duplicated titles in __init__ + msg = f'cannot merge actions - two groups are named {group.title!r}' + raise ValueError(msg) title_group_map[group.title] = group # map each action to its group @@ -1580,13 +1641,15 @@ def _add_container_actions(self, container): def _get_positional_kwargs(self, dest, **kwargs): # make sure required is not specified if 'required' in kwargs: - msg = _("'required' is an invalid argument for positionals") + msg = "'required' is an invalid argument for positionals" raise TypeError(msg) # mark positional arguments as required if at least one is # always required nargs = kwargs.get('nargs') - if nargs not in [OPTIONAL, ZERO_OR_MORE, REMAINDER, SUPPRESS, 0]: + if nargs == 0: + raise ValueError('nargs for positionals must be != 0') + if nargs not in [OPTIONAL, ZERO_OR_MORE, REMAINDER, SUPPRESS]: kwargs['required'] = True # return the keyword arguments with no option strings @@ -1599,11 +1662,9 @@ def _get_optional_kwargs(self, *args, **kwargs): for option_string in args: # error on strings that don't start with an appropriate prefix if not option_string[0] in self.prefix_chars: - args = {'option': option_string, - 'prefix_chars': self.prefix_chars} - msg = _('invalid option string %(option)r: ' - 'must start with a character %(prefix_chars)r') - raise ValueError(msg % args) + raise ValueError( + f'invalid option string {option_string!r}: ' + f'must start with a character {self.prefix_chars!r}') # strings starting with two prefix characters are long options option_strings.append(option_string) @@ -1619,8 +1680,8 @@ def _get_optional_kwargs(self, *args, **kwargs): dest_option_string = option_strings[0] dest = dest_option_string.lstrip(self.prefix_chars) if not dest: - msg = _('dest= is required for options like %r') - raise ValueError(msg % option_string) + msg = f'dest= is required for options like {option_string!r}' + raise TypeError(msg) dest = dest.replace('-', '_') # return the updated keyword arguments @@ -1636,8 +1697,8 @@ def _get_handler(self): try: return getattr(self, handler_func_name) except AttributeError: - msg = _('invalid conflict_resolution value: %r') - raise ValueError(msg % self.conflict_handler) + msg = f'invalid conflict_resolution value: {self.conflict_handler!r}' + raise ValueError(msg) def _check_conflict(self, action): @@ -1676,10 +1737,26 @@ def _handle_conflict_resolve(self, action, conflicting_actions): if not action.option_strings: action.container._remove_action(action) + def _check_help(self, action): + if action.help and hasattr(self, "_get_validation_formatter"): + formatter = self._get_validation_formatter() + try: + formatter._expand_help(action) + except (ValueError, TypeError, KeyError) as exc: + raise ValueError('badly formed help string') from exc + class _ArgumentGroup(_ActionsContainer): def __init__(self, container, title=None, description=None, **kwargs): + if 'prefix_chars' in kwargs: + import warnings + depr_msg = ( + "The use of the undocumented 'prefix_chars' parameter in " + "ArgumentParser.add_argument_group() is deprecated." + ) + warnings.warn(depr_msg, DeprecationWarning, stacklevel=3) + # add any missing keyword arguments by checking the container update = kwargs.setdefault update('conflict_handler', container.conflict_handler) @@ -1711,13 +1788,7 @@ def _remove_action(self, action): self._group_actions.remove(action) def add_argument_group(self, *args, **kwargs): - warnings.warn( - "Nesting argument groups is deprecated.", - category=DeprecationWarning, - stacklevel=2 - ) - return super().add_argument_group(*args, **kwargs) - + raise ValueError('argument groups cannot be nested') class _MutuallyExclusiveGroup(_ArgumentGroup): @@ -1728,7 +1799,7 @@ def __init__(self, container, required=False): def _add_action(self, action): if action.required: - msg = _('mutually exclusive arguments must be optional') + msg = 'mutually exclusive arguments must be optional' raise ValueError(msg) action = self._container._add_action(action) self._group_actions.append(action) @@ -1738,13 +1809,29 @@ def _remove_action(self, action): self._container._remove_action(action) self._group_actions.remove(action) - def add_mutually_exclusive_group(self, *args, **kwargs): - warnings.warn( - "Nesting mutually exclusive groups is deprecated.", - category=DeprecationWarning, - stacklevel=2 - ) - return super().add_mutually_exclusive_group(*args, **kwargs) + def add_mutually_exclusive_group(self, **kwargs): + raise ValueError('mutually exclusive groups cannot be nested') + +def _prog_name(prog=None): + if prog is not None: + return prog + arg0 = _sys.argv[0] + try: + modspec = _sys.modules['__main__'].__spec__ + except (KeyError, AttributeError): + # possibly PYTHONSTARTUP or -X presite or other weird edge case + # no good answer here, so fall back to the default + modspec = None + if modspec is None: + # simple script + return _os.path.basename(arg0) + py = _os.path.basename(_sys.executable) + if modspec.name != '__main__': + # imported module or package + modname = modspec.name.removesuffix('.__main__') + return f'{py} -m {modname}' + # directory or ZIP file + return f'{py} {arg0}' class ArgumentParser(_AttributeHolder, _ActionsContainer): @@ -1767,6 +1854,9 @@ class ArgumentParser(_AttributeHolder, _ActionsContainer): - allow_abbrev -- Allow long options to be abbreviated unambiguously - exit_on_error -- Determines whether or not ArgumentParser exits with error info when an error occurs + - suggest_on_error - Enables suggestions for mistyped argument choices + and subparser names (default: ``False``) + - color - Allow color output in help messages (default: ``False``) """ def __init__(self, @@ -1782,19 +1872,18 @@ def __init__(self, conflict_handler='error', add_help=True, allow_abbrev=True, - exit_on_error=True): - + exit_on_error=True, + *, + suggest_on_error=False, + color=True, + ): superinit = super(ArgumentParser, self).__init__ superinit(description=description, prefix_chars=prefix_chars, argument_default=argument_default, conflict_handler=conflict_handler) - # default setting for prog - if prog is None: - prog = _os.path.basename(_sys.argv[0]) - - self.prog = prog + self.prog = _prog_name(prog) self.usage = usage self.epilog = epilog self.formatter_class = formatter_class @@ -1802,6 +1891,11 @@ def __init__(self, self.add_help = add_help self.allow_abbrev = allow_abbrev self.exit_on_error = exit_on_error + self.suggest_on_error = suggest_on_error + self.color = color + + # Cached formatter for validation (avoids repeated _set_color calls) + self._cached_formatter = None add_group = self.add_argument_group self._positionals = add_group(_('positional arguments')) @@ -1824,17 +1918,16 @@ def identity(string): # add parent arguments and defaults for parent in parents: + if not isinstance(parent, ArgumentParser): + raise TypeError('parents must be a list of ArgumentParser') self._add_container_actions(parent) - try: - defaults = parent._defaults - except AttributeError: - pass - else: - self._defaults.update(defaults) + defaults = parent._defaults + self._defaults.update(defaults) # ======================= # Pretty __repr__ methods # ======================= + def _get_kwargs(self): names = [ 'prog', @@ -1849,9 +1942,10 @@ def _get_kwargs(self): # ================================== # Optional/Positional adding methods # ================================== + def add_subparsers(self, **kwargs): if self._subparsers is not None: - raise ArgumentError(None, _('cannot have multiple subparser arguments')) + raise ValueError('cannot have multiple subparser arguments') # add the parser class to the arguments if it's not present kwargs.setdefault('parser_class', type(self)) @@ -1866,15 +1960,19 @@ def add_subparsers(self, **kwargs): # prog defaults to the usage message of this parser, skipping # optional arguments and with no "usage:" prefix if kwargs.get('prog') is None: - formatter = self._get_formatter() + # Create formatter without color to avoid storing ANSI codes in prog + formatter = self.formatter_class(prog=self.prog) + formatter._set_color(False) positionals = self._get_positional_actions() groups = self._mutually_exclusive_groups - formatter.add_usage(self.usage, positionals, groups, '') + formatter.add_usage(None, positionals, groups, '') kwargs['prog'] = formatter.format_help().strip() # create the parsers action and add it to the positionals list parsers_class = self._pop_action_class(kwargs, 'parsers') action = parsers_class(option_strings=[], **kwargs) + action._color = self.color + self._check_help(action) self._subparsers._add_action(action) # return the created parsers action @@ -1900,6 +1998,7 @@ def _get_positional_actions(self): # ===================================== # Command line argument parsing methods # ===================================== + def parse_args(self, args=None, namespace=None): args, argv = self.parse_known_args(args, namespace) if argv: @@ -1997,6 +2096,7 @@ def _parse_known_args(self, arg_strings, namespace, intermixed): # converts arg strings to the appropriate and then takes the action seen_actions = set() seen_non_default_actions = set() + warned = set() def take_action(action, argument_strings, option_string=None): seen_actions.add(action) @@ -2110,6 +2210,10 @@ def consume_optional(start_index): # the Optional's string args stopped assert action_tuples for action, args, option_string in action_tuples: + if action.deprecated and option_string not in warned: + self._warning(_("option '%(option)s' is deprecated") % + {'option': option_string}) + warned.add(option_string) take_action(action, args, option_string) return stop @@ -2138,6 +2242,10 @@ def consume_positionals(start_index): start_index + arg_count) >= 0): args.remove('--') start_index += arg_count + if args and action.deprecated and action.dest not in warned: + self._warning(_("argument '%(argument_name)s' is deprecated") % + {'argument_name': action.dest}) + warned.add(action.dest) take_action(action, args) # slice off the Positionals that we just parsed and return the @@ -2157,10 +2265,11 @@ def consume_positionals(start_index): while start_index <= max_option_string_index: # consume any Positionals preceding the next option - next_option_string_index = min([ - index - for index in option_string_indices - if index >= start_index]) + next_option_string_index = start_index + while next_option_string_index <= max_option_string_index: + if next_option_string_index in option_string_indices: + break + next_option_string_index += 1 if not intermixed and start_index != next_option_string_index: positionals_end_index = consume_positionals(start_index) @@ -2484,6 +2593,7 @@ def parse_known_intermixed_args(self, args=None, namespace=None): # ======================== # Value conversion methods # ======================== + def _get_values(self, action, arg_strings): # optional argument produces a default when not present if not arg_strings and action.nargs == OPTIONAL: @@ -2493,7 +2603,6 @@ def _get_values(self, action, arg_strings): value = action.default if isinstance(value, str) and value is not SUPPRESS: value = self._get_value(action, value) - self._check_value(action, value) # when nargs='*' on a positional, if there were no command-line # args, use the default if it is anything other than None @@ -2501,11 +2610,8 @@ def _get_values(self, action, arg_strings): not action.option_strings): if action.default is not None: value = action.default - self._check_value(action, value) else: - # since arg_strings is always [] at this point - # there is no need to use self._check_value(action, value) - value = arg_strings + value = [] # single argument or optional argument produces a single value elif len(arg_strings) == 1 and action.nargs in [None, OPTIONAL]: @@ -2538,8 +2644,7 @@ def _get_values(self, action, arg_strings): def _get_value(self, action, arg_string): type_func = self._registry_get('type', action.type, action.type) if not callable(type_func): - msg = _('%r is not callable') - raise ArgumentError(action, msg % type_func) + raise TypeError(f'{type_func!r} is not callable') # convert the value to the appropriate type try: @@ -2563,18 +2668,32 @@ def _get_value(self, action, arg_string): def _check_value(self, action, value): # converted value must be one of the choices (if specified) choices = action.choices - if choices is not None: - if isinstance(choices, str): - choices = iter(choices) - if value not in choices: - args = {'value': str(value), - 'choices': ', '.join(map(str, action.choices))} - msg = _('invalid choice: %(value)r (choose from %(choices)s)') - raise ArgumentError(action, msg % args) + if choices is None: + return + + if isinstance(choices, str): + choices = iter(choices) + + if value not in choices: + args = {'value': str(value), + 'choices': ', '.join(map(str, action.choices))} + msg = _('invalid choice: %(value)r (choose from %(choices)s)') + + if self.suggest_on_error and isinstance(value, str): + if all(isinstance(choice, str) for choice in action.choices): + import difflib + suggestions = difflib.get_close_matches(value, action.choices, 1) + if suggestions: + args['closest'] = suggestions[0] + msg = _('invalid choice: %(value)r, maybe you meant %(closest)r? ' + '(choose from %(choices)s)') + + raise ArgumentError(action, msg % args) # ======================= # Help-formatting methods # ======================= + def format_usage(self): formatter = self._get_formatter() formatter.add_usage(self.usage, self._actions, @@ -2605,11 +2724,21 @@ def format_help(self): return formatter.format_help() def _get_formatter(self): - return self.formatter_class(prog=self.prog) + formatter = self.formatter_class(prog=self.prog) + formatter._set_color(self.color) + return formatter + + def _get_validation_formatter(self): + # Return cached formatter for read-only validation operations + # (_expand_help and _format_args). Avoids repeated slow _set_color calls. + if self._cached_formatter is None: + self._cached_formatter = self._get_formatter() + return self._cached_formatter # ===================== # Help-printing methods # ===================== + def print_usage(self, file=None): if file is None: file = _sys.stdout @@ -2631,6 +2760,7 @@ def _print_message(self, message, file=None): # =============== # Exiting methods # =============== + def exit(self, status=0, message=None): if message: self._print_message(message, _sys.stderr) @@ -2648,3 +2778,7 @@ def error(self, message): self.print_usage(_sys.stderr) args = {'prog': self.prog, 'message': message} self.exit(2, _('%(prog)s: error: %(message)s\n') % args) + + def _warning(self, message): + args = {'prog': self.prog, 'message': message} + self._print_message(_('%(prog)s: warning: %(message)s\n') % args, _sys.stderr) diff --git a/PythonLib/full/ast.py b/PythonLib/full/ast.py index 6d9785cc..2f11683e 100644 --- a/PythonLib/full/ast.py +++ b/PythonLib/full/ast.py @@ -1,44 +1,38 @@ """ - ast - ~~~ - - The `ast` module helps Python applications to process trees of the Python - abstract syntax grammar. The abstract syntax itself might change with - each Python release; this module helps to find out programmatically what - the current grammar looks like and allows modifications of it. - - An abstract syntax tree can be generated by passing `ast.PyCF_ONLY_AST` as - a flag to the `compile()` builtin function or by using the `parse()` - function from this module. The result will be a tree of objects whose - classes all inherit from `ast.AST`. - - A modified abstract syntax tree can be compiled into a Python code object - using the built-in `compile()` function. - - Additionally various helper functions are provided that make working with - the trees simpler. The main intention of the helper functions and this - module in general is to provide an easy to use interface for libraries - that work tightly with the python syntax (template engines for example). - - - :copyright: Copyright 2008 by Armin Ronacher. - :license: Python License. +The `ast` module helps Python applications to process trees of the Python +abstract syntax grammar. The abstract syntax itself might change with +each Python release; this module helps to find out programmatically what +the current grammar looks like and allows modifications of it. + +An abstract syntax tree can be generated by passing `ast.PyCF_ONLY_AST` as +a flag to the `compile()` builtin function or by using the `parse()` +function from this module. The result will be a tree of objects whose +classes all inherit from `ast.AST`. + +A modified abstract syntax tree can be compiled into a Python code object +using the built-in `compile()` function. + +Additionally various helper functions are provided that make working with +the trees simpler. The main intention of the helper functions and this +module in general is to provide an easy to use interface for libraries +that work tightly with the python syntax (template engines for example). + +:copyright: Copyright 2008 by Armin Ronacher. +:license: Python License. """ -import sys -import re from _ast import * -from contextlib import contextmanager, nullcontext -from enum import IntEnum, auto, _simple_enum def parse(source, filename='', mode='exec', *, - type_comments=False, feature_version=None): + type_comments=False, feature_version=None, optimize=-1): """ Parse the source into an AST node. Equivalent to compile(source, filename, mode, PyCF_ONLY_AST). Pass type_comments=True to get back type comments where the syntax allows. """ flags = PyCF_ONLY_AST + if optimize > 0: + flags |= PyCF_OPTIMIZED_AST if type_comments: flags |= PyCF_TYPE_COMMENTS if feature_version is None: @@ -50,7 +44,7 @@ def parse(source, filename='', mode='exec', *, feature_version = minor # Else it should be an int giving the minor version for 3.x. return compile(source, filename, mode, flags, - _feature_version=feature_version) + _feature_version=feature_version, optimize=optimize) def literal_eval(node_or_string): @@ -112,7 +106,11 @@ def _convert(node): return _convert(node_or_string) -def dump(node, annotate_fields=True, include_attributes=False, *, indent=None): +def dump( + node, annotate_fields=True, include_attributes=False, + *, + indent=None, show_empty=False, +): """ Return a formatted dump of the tree in node. This is mainly useful for debugging purposes. If annotate_fields is true (by default), @@ -123,6 +121,8 @@ def dump(node, annotate_fields=True, include_attributes=False, *, indent=None): include_attributes can be set to true. If indent is a non-negative integer or string, then the tree will be pretty-printed with that indent level. None (the default) selects the single line representation. + If show_empty is False, then empty lists and fields that are None + will be omitted from the output for better readability. """ def _format(node, level=0): if indent is not None: @@ -135,6 +135,7 @@ def _format(node, level=0): if isinstance(node, AST): cls = type(node) args = [] + args_buffer = [] allsimple = True keywords = annotate_fields for name in node._fields: @@ -146,6 +147,16 @@ def _format(node, level=0): if value is None and getattr(cls, name, ...) is None: keywords = True continue + if not show_empty: + if value == []: + field_type = cls._field_types.get(name, object) + if getattr(field_type, '__origin__', ...) is list: + if not keywords: + args_buffer.append(repr(value)) + continue + if not keywords: + args.extend(args_buffer) + args_buffer = [] value, simple = _format(value, level) allsimple = allsimple and simple if keywords: @@ -304,12 +315,18 @@ def get_docstring(node, clean=True): return text -_line_pattern = re.compile(r"(.*?(?:\r\n|\n|\r|$))") +_line_pattern = None def _splitlines_no_ff(source, maxlines=None): """Split a string into lines ignoring form feed and other chars. This mimics how the Python parser splits source code. """ + global _line_pattern + if _line_pattern is None: + # lazily computed to speedup import time of `ast` + import re + _line_pattern = re.compile(r"(.*?(?:\r\n|\n|\r|$))") + lines = [] for lineno, match in enumerate(_line_pattern.finditer(source), 1): if maxlines is not None and lineno > maxlines: @@ -380,6 +397,88 @@ def walk(node): yield node +def compare( + a, + b, + /, + *, + compare_attributes=False, +): + """Recursively compares two ASTs. + + compare_attributes affects whether AST attributes are considered + in the comparison. If compare_attributes is False (default), then + attributes are ignored. Otherwise they must all be equal. This + option is useful to check whether the ASTs are structurally equal but + might differ in whitespace or similar details. + """ + + sentinel = object() # handle the possibility of a missing attribute/field + + def _compare(a, b): + # Compare two fields on an AST object, which may themselves be + # AST objects, lists of AST objects, or primitive ASDL types + # like identifiers and constants. + if isinstance(a, AST): + return compare( + a, + b, + compare_attributes=compare_attributes, + ) + elif isinstance(a, list): + # If a field is repeated, then both objects will represent + # the value as a list. + if len(a) != len(b): + return False + for a_item, b_item in zip(a, b): + if not _compare(a_item, b_item): + return False + else: + return True + else: + return type(a) is type(b) and a == b + + def _compare_fields(a, b): + if a._fields != b._fields: + return False + for field in a._fields: + a_field = getattr(a, field, sentinel) + b_field = getattr(b, field, sentinel) + if a_field is sentinel and b_field is sentinel: + # both nodes are missing a field at runtime + continue + if a_field is sentinel or b_field is sentinel: + # one of the node is missing a field + return False + if not _compare(a_field, b_field): + return False + else: + return True + + def _compare_attributes(a, b): + if a._attributes != b._attributes: + return False + # Attributes are always ints. + for attr in a._attributes: + a_attr = getattr(a, attr, sentinel) + b_attr = getattr(b, attr, sentinel) + if a_attr is sentinel and b_attr is sentinel: + # both nodes are missing an attribute at runtime + continue + if a_attr != b_attr: + return False + else: + return True + + if type(a) is not type(b): + return False + if not _compare_fields(a, b): + return False + if compare_attributes and not _compare_attributes(a, b): + return False + return True + + class NodeVisitor(object): """ A node visitor base class that walks the abstract syntax tree and calls a @@ -416,27 +515,6 @@ def generic_visit(self, node): elif isinstance(value, AST): self.visit(value) - def visit_Constant(self, node): - value = node.value - type_name = _const_node_type_names.get(type(value)) - if type_name is None: - for cls, name in _const_node_type_names.items(): - if isinstance(value, cls): - type_name = name - break - if type_name is not None: - method = 'visit_' + type_name - try: - visitor = getattr(self, method) - except AttributeError: - pass - else: - import warnings - warnings.warn(f"{method} is deprecated; add visit_Constant", - DeprecationWarning, 2) - return visitor(node) - return self.generic_visit(node) - class NodeTransformer(NodeVisitor): """ @@ -496,151 +574,6 @@ def generic_visit(self, node): setattr(node, field, new_node) return node - -_DEPRECATED_VALUE_ALIAS_MESSAGE = ( - "{name} is deprecated and will be removed in Python {remove}; use value instead" -) -_DEPRECATED_CLASS_MESSAGE = ( - "{name} is deprecated and will be removed in Python {remove}; " - "use ast.Constant instead" -) - - -# If the ast module is loaded more than once, only add deprecated methods once -if not hasattr(Constant, 'n'): - # The following code is for backward compatibility. - # It will be removed in future. - - def _n_getter(self): - """Deprecated. Use value instead.""" - import warnings - warnings._deprecated( - "Attribute n", message=_DEPRECATED_VALUE_ALIAS_MESSAGE, remove=(3, 14) - ) - return self.value - - def _n_setter(self, value): - import warnings - warnings._deprecated( - "Attribute n", message=_DEPRECATED_VALUE_ALIAS_MESSAGE, remove=(3, 14) - ) - self.value = value - - def _s_getter(self): - """Deprecated. Use value instead.""" - import warnings - warnings._deprecated( - "Attribute s", message=_DEPRECATED_VALUE_ALIAS_MESSAGE, remove=(3, 14) - ) - return self.value - - def _s_setter(self, value): - import warnings - warnings._deprecated( - "Attribute s", message=_DEPRECATED_VALUE_ALIAS_MESSAGE, remove=(3, 14) - ) - self.value = value - - Constant.n = property(_n_getter, _n_setter) - Constant.s = property(_s_getter, _s_setter) - -class _ABC(type): - - def __init__(cls, *args): - cls.__doc__ = """Deprecated AST node class. Use ast.Constant instead""" - - def __instancecheck__(cls, inst): - if cls in _const_types: - import warnings - warnings._deprecated( - f"ast.{cls.__qualname__}", - message=_DEPRECATED_CLASS_MESSAGE, - remove=(3, 14) - ) - if not isinstance(inst, Constant): - return False - if cls in _const_types: - try: - value = inst.value - except AttributeError: - return False - else: - return ( - isinstance(value, _const_types[cls]) and - not isinstance(value, _const_types_not.get(cls, ())) - ) - return type.__instancecheck__(cls, inst) - -def _new(cls, *args, **kwargs): - for key in kwargs: - if key not in cls._fields: - # arbitrary keyword arguments are accepted - continue - pos = cls._fields.index(key) - if pos < len(args): - raise TypeError(f"{cls.__name__} got multiple values for argument {key!r}") - if cls in _const_types: - import warnings - warnings._deprecated( - f"ast.{cls.__qualname__}", message=_DEPRECATED_CLASS_MESSAGE, remove=(3, 14) - ) - return Constant(*args, **kwargs) - return Constant.__new__(cls, *args, **kwargs) - -class Num(Constant, metaclass=_ABC): - _fields = ('n',) - __new__ = _new - -class Str(Constant, metaclass=_ABC): - _fields = ('s',) - __new__ = _new - -class Bytes(Constant, metaclass=_ABC): - _fields = ('s',) - __new__ = _new - -class NameConstant(Constant, metaclass=_ABC): - __new__ = _new - -class Ellipsis(Constant, metaclass=_ABC): - _fields = () - - def __new__(cls, *args, **kwargs): - if cls is _ast_Ellipsis: - import warnings - warnings._deprecated( - "ast.Ellipsis", message=_DEPRECATED_CLASS_MESSAGE, remove=(3, 14) - ) - return Constant(..., *args, **kwargs) - return Constant.__new__(cls, *args, **kwargs) - -# Keep another reference to Ellipsis in the global namespace -# so it can be referenced in Ellipsis.__new__ -# (The original "Ellipsis" name is removed from the global namespace later on) -_ast_Ellipsis = Ellipsis - -_const_types = { - Num: (int, float, complex), - Str: (str,), - Bytes: (bytes,), - NameConstant: (type(None), bool), - Ellipsis: (type(...),), -} -_const_types_not = { - Num: (bool,), -} - -_const_node_type_names = { - bool: 'NameConstant', # should be before int - type(None): 'NameConstant', - int: 'Num', - float: 'Num', - complex: 'Num', - str: 'Str', - bytes: 'Bytes', - type(...): 'Ellipsis', -} - class slice(AST): """Deprecated AST node class.""" @@ -681,1143 +614,22 @@ class Param(expr_context): """Deprecated AST node class. Unused in Python 3.""" -# Large float and imaginary literals get turned into infinities in the AST. -# We unparse those infinities to INFSTR. -_INFSTR = "1e" + repr(sys.float_info.max_10_exp + 1) - -@_simple_enum(IntEnum) -class _Precedence: - """Precedence table that originated from python grammar.""" - - NAMED_EXPR = auto() # := - TUPLE = auto() # , - YIELD = auto() # 'yield', 'yield from' - TEST = auto() # 'if'-'else', 'lambda' - OR = auto() # 'or' - AND = auto() # 'and' - NOT = auto() # 'not' - CMP = auto() # '<', '>', '==', '>=', '<=', '!=', - # 'in', 'not in', 'is', 'is not' - EXPR = auto() - BOR = EXPR # '|' - BXOR = auto() # '^' - BAND = auto() # '&' - SHIFT = auto() # '<<', '>>' - ARITH = auto() # '+', '-' - TERM = auto() # '*', '@', '/', '%', '//' - FACTOR = auto() # unary '+', '-', '~' - POWER = auto() # '**' - AWAIT = auto() # 'await' - ATOM = auto() - - def next(self): - try: - return self.__class__(self + 1) - except ValueError: - return self - - -_SINGLE_QUOTES = ("'", '"') -_MULTI_QUOTES = ('"""', "'''") -_ALL_QUOTES = (*_SINGLE_QUOTES, *_MULTI_QUOTES) - -class _Unparser(NodeVisitor): - """Methods in this class recursively traverse an AST and - output source code for the abstract syntax; original formatting - is disregarded.""" - - def __init__(self, *, _avoid_backslashes=False): - self._source = [] - self._precedences = {} - self._type_ignores = {} - self._indent = 0 - self._avoid_backslashes = _avoid_backslashes - self._in_try_star = False - - def interleave(self, inter, f, seq): - """Call f on each item in seq, calling inter() in between.""" - seq = iter(seq) - try: - f(next(seq)) - except StopIteration: - pass - else: - for x in seq: - inter() - f(x) - - def items_view(self, traverser, items): - """Traverse and separate the given *items* with a comma and append it to - the buffer. If *items* is a single item sequence, a trailing comma - will be added.""" - if len(items) == 1: - traverser(items[0]) - self.write(",") - else: - self.interleave(lambda: self.write(", "), traverser, items) - - def maybe_newline(self): - """Adds a newline if it isn't the start of generated source""" - if self._source: - self.write("\n") - - def fill(self, text=""): - """Indent a piece of text and append it, according to the current - indentation level""" - self.maybe_newline() - self.write(" " * self._indent + text) - - def write(self, *text): - """Add new source parts""" - self._source.extend(text) - - @contextmanager - def buffered(self, buffer = None): - if buffer is None: - buffer = [] - - original_source = self._source - self._source = buffer - yield buffer - self._source = original_source - - @contextmanager - def block(self, *, extra = None): - """A context manager for preparing the source for blocks. It adds - the character':', increases the indentation on enter and decreases - the indentation on exit. If *extra* is given, it will be directly - appended after the colon character. - """ - self.write(":") - if extra: - self.write(extra) - self._indent += 1 - yield - self._indent -= 1 - - @contextmanager - def delimit(self, start, end): - """A context manager for preparing the source for expressions. It adds - *start* to the buffer and enters, after exit it adds *end*.""" - - self.write(start) - yield - self.write(end) - - def delimit_if(self, start, end, condition): - if condition: - return self.delimit(start, end) - else: - return nullcontext() - - def require_parens(self, precedence, node): - """Shortcut to adding precedence related parens""" - return self.delimit_if("(", ")", self.get_precedence(node) > precedence) - - def get_precedence(self, node): - return self._precedences.get(node, _Precedence.TEST) - - def set_precedence(self, precedence, *nodes): - for node in nodes: - self._precedences[node] = precedence - - def get_raw_docstring(self, node): - """If a docstring node is found in the body of the *node* parameter, - return that docstring node, None otherwise. - - Logic mirrored from ``_PyAST_GetDocString``.""" - if not isinstance( - node, (AsyncFunctionDef, FunctionDef, ClassDef, Module) - ) or len(node.body) < 1: - return None - node = node.body[0] - if not isinstance(node, Expr): - return None - node = node.value - if isinstance(node, Constant) and isinstance(node.value, str): - return node - - def get_type_comment(self, node): - comment = self._type_ignores.get(node.lineno) or node.type_comment - if comment is not None: - return f" # type: {comment}" - - def traverse(self, node): - if isinstance(node, list): - for item in node: - self.traverse(item) - else: - super().visit(node) - - # Note: as visit() resets the output text, do NOT rely on - # NodeVisitor.generic_visit to handle any nodes (as it calls back in to - # the subclass visit() method, which resets self._source to an empty list) - def visit(self, node): - """Outputs a source code string that, if converted back to an ast - (using ast.parse) will generate an AST equivalent to *node*""" - self._source = [] - self.traverse(node) - return "".join(self._source) - - def _write_docstring_and_traverse_body(self, node): - if (docstring := self.get_raw_docstring(node)): - self._write_docstring(docstring) - self.traverse(node.body[1:]) - else: - self.traverse(node.body) - - def visit_Module(self, node): - self._type_ignores = { - ignore.lineno: f"ignore{ignore.tag}" - for ignore in node.type_ignores - } - self._write_docstring_and_traverse_body(node) - self._type_ignores.clear() - - def visit_FunctionType(self, node): - with self.delimit("(", ")"): - self.interleave( - lambda: self.write(", "), self.traverse, node.argtypes - ) - - self.write(" -> ") - self.traverse(node.returns) - - def visit_Expr(self, node): - self.fill() - self.set_precedence(_Precedence.YIELD, node.value) - self.traverse(node.value) - - def visit_NamedExpr(self, node): - with self.require_parens(_Precedence.NAMED_EXPR, node): - self.set_precedence(_Precedence.ATOM, node.target, node.value) - self.traverse(node.target) - self.write(" := ") - self.traverse(node.value) - - def visit_Import(self, node): - self.fill("import ") - self.interleave(lambda: self.write(", "), self.traverse, node.names) - - def visit_ImportFrom(self, node): - self.fill("from ") - self.write("." * (node.level or 0)) - if node.module: - self.write(node.module) - self.write(" import ") - self.interleave(lambda: self.write(", "), self.traverse, node.names) - - def visit_Assign(self, node): - self.fill() - for target in node.targets: - self.set_precedence(_Precedence.TUPLE, target) - self.traverse(target) - self.write(" = ") - self.traverse(node.value) - if type_comment := self.get_type_comment(node): - self.write(type_comment) - - def visit_AugAssign(self, node): - self.fill() - self.traverse(node.target) - self.write(" " + self.binop[node.op.__class__.__name__] + "= ") - self.traverse(node.value) - - def visit_AnnAssign(self, node): - self.fill() - with self.delimit_if("(", ")", not node.simple and isinstance(node.target, Name)): - self.traverse(node.target) - self.write(": ") - self.traverse(node.annotation) - if node.value: - self.write(" = ") - self.traverse(node.value) - - def visit_Return(self, node): - self.fill("return") - if node.value: - self.write(" ") - self.traverse(node.value) - - def visit_Pass(self, node): - self.fill("pass") - - def visit_Break(self, node): - self.fill("break") - - def visit_Continue(self, node): - self.fill("continue") - - def visit_Delete(self, node): - self.fill("del ") - self.interleave(lambda: self.write(", "), self.traverse, node.targets) - - def visit_Assert(self, node): - self.fill("assert ") - self.traverse(node.test) - if node.msg: - self.write(", ") - self.traverse(node.msg) - - def visit_Global(self, node): - self.fill("global ") - self.interleave(lambda: self.write(", "), self.write, node.names) - - def visit_Nonlocal(self, node): - self.fill("nonlocal ") - self.interleave(lambda: self.write(", "), self.write, node.names) - - def visit_Await(self, node): - with self.require_parens(_Precedence.AWAIT, node): - self.write("await") - if node.value: - self.write(" ") - self.set_precedence(_Precedence.ATOM, node.value) - self.traverse(node.value) - - def visit_Yield(self, node): - with self.require_parens(_Precedence.YIELD, node): - self.write("yield") - if node.value: - self.write(" ") - self.set_precedence(_Precedence.ATOM, node.value) - self.traverse(node.value) - - def visit_YieldFrom(self, node): - with self.require_parens(_Precedence.YIELD, node): - self.write("yield from ") - if not node.value: - raise ValueError("Node can't be used without a value attribute.") - self.set_precedence(_Precedence.ATOM, node.value) - self.traverse(node.value) - - def visit_Raise(self, node): - self.fill("raise") - if not node.exc: - if node.cause: - raise ValueError(f"Node can't use cause without an exception.") - return - self.write(" ") - self.traverse(node.exc) - if node.cause: - self.write(" from ") - self.traverse(node.cause) - - def do_visit_try(self, node): - self.fill("try") - with self.block(): - self.traverse(node.body) - for ex in node.handlers: - self.traverse(ex) - if node.orelse: - self.fill("else") - with self.block(): - self.traverse(node.orelse) - if node.finalbody: - self.fill("finally") - with self.block(): - self.traverse(node.finalbody) - - def visit_Try(self, node): - prev_in_try_star = self._in_try_star - try: - self._in_try_star = False - self.do_visit_try(node) - finally: - self._in_try_star = prev_in_try_star - - def visit_TryStar(self, node): - prev_in_try_star = self._in_try_star - try: - self._in_try_star = True - self.do_visit_try(node) - finally: - self._in_try_star = prev_in_try_star - - def visit_ExceptHandler(self, node): - self.fill("except*" if self._in_try_star else "except") - if node.type: - self.write(" ") - self.traverse(node.type) - if node.name: - self.write(" as ") - self.write(node.name) - with self.block(): - self.traverse(node.body) - - def visit_ClassDef(self, node): - self.maybe_newline() - for deco in node.decorator_list: - self.fill("@") - self.traverse(deco) - self.fill("class " + node.name) - if hasattr(node, "type_params"): - self._type_params_helper(node.type_params) - with self.delimit_if("(", ")", condition = node.bases or node.keywords): - comma = False - for e in node.bases: - if comma: - self.write(", ") - else: - comma = True - self.traverse(e) - for e in node.keywords: - if comma: - self.write(", ") - else: - comma = True - self.traverse(e) - - with self.block(): - self._write_docstring_and_traverse_body(node) - - def visit_FunctionDef(self, node): - self._function_helper(node, "def") - - def visit_AsyncFunctionDef(self, node): - self._function_helper(node, "async def") - - def _function_helper(self, node, fill_suffix): - self.maybe_newline() - for deco in node.decorator_list: - self.fill("@") - self.traverse(deco) - def_str = fill_suffix + " " + node.name - self.fill(def_str) - if hasattr(node, "type_params"): - self._type_params_helper(node.type_params) - with self.delimit("(", ")"): - self.traverse(node.args) - if node.returns: - self.write(" -> ") - self.traverse(node.returns) - with self.block(extra=self.get_type_comment(node)): - self._write_docstring_and_traverse_body(node) - - def _type_params_helper(self, type_params): - if type_params is not None and len(type_params) > 0: - with self.delimit("[", "]"): - self.interleave(lambda: self.write(", "), self.traverse, type_params) - - def visit_TypeVar(self, node): - self.write(node.name) - if node.bound: - self.write(": ") - self.traverse(node.bound) - - def visit_TypeVarTuple(self, node): - self.write("*" + node.name) - - def visit_ParamSpec(self, node): - self.write("**" + node.name) - - def visit_TypeAlias(self, node): - self.fill("type ") - self.traverse(node.name) - self._type_params_helper(node.type_params) - self.write(" = ") - self.traverse(node.value) - - def visit_For(self, node): - self._for_helper("for ", node) - - def visit_AsyncFor(self, node): - self._for_helper("async for ", node) - - def _for_helper(self, fill, node): - self.fill(fill) - self.set_precedence(_Precedence.TUPLE, node.target) - self.traverse(node.target) - self.write(" in ") - self.traverse(node.iter) - with self.block(extra=self.get_type_comment(node)): - self.traverse(node.body) - if node.orelse: - self.fill("else") - with self.block(): - self.traverse(node.orelse) - - def visit_If(self, node): - self.fill("if ") - self.traverse(node.test) - with self.block(): - self.traverse(node.body) - # collapse nested ifs into equivalent elifs. - while node.orelse and len(node.orelse) == 1 and isinstance(node.orelse[0], If): - node = node.orelse[0] - self.fill("elif ") - self.traverse(node.test) - with self.block(): - self.traverse(node.body) - # final else - if node.orelse: - self.fill("else") - with self.block(): - self.traverse(node.orelse) - - def visit_While(self, node): - self.fill("while ") - self.traverse(node.test) - with self.block(): - self.traverse(node.body) - if node.orelse: - self.fill("else") - with self.block(): - self.traverse(node.orelse) - - def visit_With(self, node): - self.fill("with ") - self.interleave(lambda: self.write(", "), self.traverse, node.items) - with self.block(extra=self.get_type_comment(node)): - self.traverse(node.body) - - def visit_AsyncWith(self, node): - self.fill("async with ") - self.interleave(lambda: self.write(", "), self.traverse, node.items) - with self.block(extra=self.get_type_comment(node)): - self.traverse(node.body) - - def _str_literal_helper( - self, string, *, quote_types=_ALL_QUOTES, escape_special_whitespace=False - ): - """Helper for writing string literals, minimizing escapes. - Returns the tuple (string literal to write, possible quote types). - """ - def escape_char(c): - # \n and \t are non-printable, but we only escape them if - # escape_special_whitespace is True - if not escape_special_whitespace and c in "\n\t": - return c - # Always escape backslashes and other non-printable characters - if c == "\\" or not c.isprintable(): - return c.encode("unicode_escape").decode("ascii") - return c - - escaped_string = "".join(map(escape_char, string)) - possible_quotes = quote_types - if "\n" in escaped_string: - possible_quotes = [q for q in possible_quotes if q in _MULTI_QUOTES] - possible_quotes = [q for q in possible_quotes if q not in escaped_string] - if not possible_quotes: - # If there aren't any possible_quotes, fallback to using repr - # on the original string. Try to use a quote from quote_types, - # e.g., so that we use triple quotes for docstrings. - string = repr(string) - quote = next((q for q in quote_types if string[0] in q), string[0]) - return string[1:-1], [quote] - if escaped_string: - # Sort so that we prefer '''"''' over """\"""" - possible_quotes.sort(key=lambda q: q[0] == escaped_string[-1]) - # If we're using triple quotes and we'd need to escape a final - # quote, escape it - if possible_quotes[0][0] == escaped_string[-1]: - assert len(possible_quotes[0]) == 3 - escaped_string = escaped_string[:-1] + "\\" + escaped_string[-1] - return escaped_string, possible_quotes - - def _write_str_avoiding_backslashes(self, string, *, quote_types=_ALL_QUOTES): - """Write string literal value with a best effort attempt to avoid backslashes.""" - string, quote_types = self._str_literal_helper(string, quote_types=quote_types) - quote_type = quote_types[0] - self.write(f"{quote_type}{string}{quote_type}") - - def visit_JoinedStr(self, node): - self.write("f") - - fstring_parts = [] - for value in node.values: - with self.buffered() as buffer: - self._write_fstring_inner(value) - fstring_parts.append( - ("".join(buffer), isinstance(value, Constant)) - ) - - new_fstring_parts = [] - quote_types = list(_ALL_QUOTES) - fallback_to_repr = False - for value, is_constant in fstring_parts: - if is_constant: - value, new_quote_types = self._str_literal_helper( - value, - quote_types=quote_types, - escape_special_whitespace=True, - ) - if set(new_quote_types).isdisjoint(quote_types): - fallback_to_repr = True - break - quote_types = new_quote_types - else: - if "\n" in value: - quote_types = [q for q in quote_types if q in _MULTI_QUOTES] - assert quote_types - - new_quote_types = [q for q in quote_types if q not in value] - if new_quote_types: - quote_types = new_quote_types - new_fstring_parts.append(value) - - if fallback_to_repr: - # If we weren't able to find a quote type that works for all parts - # of the JoinedStr, fallback to using repr and triple single quotes. - quote_types = ["'''"] - new_fstring_parts.clear() - for value, is_constant in fstring_parts: - if is_constant: - value = repr('"' + value) # force repr to use single quotes - expected_prefix = "'\"" - assert value.startswith(expected_prefix), repr(value) - value = value[len(expected_prefix):-1] - new_fstring_parts.append(value) - - value = "".join(new_fstring_parts) - quote_type = quote_types[0] - self.write(f"{quote_type}{value}{quote_type}") - - def _write_fstring_inner(self, node, is_format_spec=False): - if isinstance(node, JoinedStr): - # for both the f-string itself, and format_spec - for value in node.values: - self._write_fstring_inner(value, is_format_spec=is_format_spec) - elif isinstance(node, Constant) and isinstance(node.value, str): - value = node.value.replace("{", "{{").replace("}", "}}") - - if is_format_spec: - value = value.replace("\\", "\\\\") - value = value.replace("'", "\\'") - value = value.replace('"', '\\"') - value = value.replace("\n", "\\n") - self.write(value) - elif isinstance(node, FormattedValue): - self.visit_FormattedValue(node) - else: - raise ValueError(f"Unexpected node inside JoinedStr, {node!r}") - - def visit_FormattedValue(self, node): - def unparse_inner(inner): - unparser = type(self)() - unparser.set_precedence(_Precedence.TEST.next(), inner) - return unparser.visit(inner) - - with self.delimit("{", "}"): - expr = unparse_inner(node.value) - if expr.startswith("{"): - # Separate pair of opening brackets as "{ {" - self.write(" ") - self.write(expr) - if node.conversion != -1: - self.write(f"!{chr(node.conversion)}") - if node.format_spec: - self.write(":") - self._write_fstring_inner(node.format_spec, is_format_spec=True) - - def visit_Name(self, node): - self.write(node.id) - - def _write_docstring(self, node): - self.fill() - if node.kind == "u": - self.write("u") - self._write_str_avoiding_backslashes(node.value, quote_types=_MULTI_QUOTES) - - def _write_constant(self, value): - if isinstance(value, (float, complex)): - # Substitute overflowing decimal literal for AST infinities, - # and inf - inf for NaNs. - self.write( - repr(value) - .replace("inf", _INFSTR) - .replace("nan", f"({_INFSTR}-{_INFSTR})") - ) - elif self._avoid_backslashes and isinstance(value, str): - self._write_str_avoiding_backslashes(value) - else: - self.write(repr(value)) - - def visit_Constant(self, node): - value = node.value - if isinstance(value, tuple): - with self.delimit("(", ")"): - self.items_view(self._write_constant, value) - elif value is ...: - self.write("...") - else: - if node.kind == "u": - self.write("u") - self._write_constant(node.value) - - def visit_List(self, node): - with self.delimit("[", "]"): - self.interleave(lambda: self.write(", "), self.traverse, node.elts) - - def visit_ListComp(self, node): - with self.delimit("[", "]"): - self.traverse(node.elt) - for gen in node.generators: - self.traverse(gen) - - def visit_GeneratorExp(self, node): - with self.delimit("(", ")"): - self.traverse(node.elt) - for gen in node.generators: - self.traverse(gen) - - def visit_SetComp(self, node): - with self.delimit("{", "}"): - self.traverse(node.elt) - for gen in node.generators: - self.traverse(gen) - - def visit_DictComp(self, node): - with self.delimit("{", "}"): - self.traverse(node.key) - self.write(": ") - self.traverse(node.value) - for gen in node.generators: - self.traverse(gen) - - def visit_comprehension(self, node): - if node.is_async: - self.write(" async for ") - else: - self.write(" for ") - self.set_precedence(_Precedence.TUPLE, node.target) - self.traverse(node.target) - self.write(" in ") - self.set_precedence(_Precedence.TEST.next(), node.iter, *node.ifs) - self.traverse(node.iter) - for if_clause in node.ifs: - self.write(" if ") - self.traverse(if_clause) - - def visit_IfExp(self, node): - with self.require_parens(_Precedence.TEST, node): - self.set_precedence(_Precedence.TEST.next(), node.body, node.test) - self.traverse(node.body) - self.write(" if ") - self.traverse(node.test) - self.write(" else ") - self.set_precedence(_Precedence.TEST, node.orelse) - self.traverse(node.orelse) - - def visit_Set(self, node): - if node.elts: - with self.delimit("{", "}"): - self.interleave(lambda: self.write(", "), self.traverse, node.elts) - else: - # `{}` would be interpreted as a dictionary literal, and - # `set` might be shadowed. Thus: - self.write('{*()}') - - def visit_Dict(self, node): - def write_key_value_pair(k, v): - self.traverse(k) - self.write(": ") - self.traverse(v) - - def write_item(item): - k, v = item - if k is None: - # for dictionary unpacking operator in dicts {**{'y': 2}} - # see PEP 448 for details - self.write("**") - self.set_precedence(_Precedence.EXPR, v) - self.traverse(v) - else: - write_key_value_pair(k, v) - - with self.delimit("{", "}"): - self.interleave( - lambda: self.write(", "), write_item, zip(node.keys, node.values) - ) - - def visit_Tuple(self, node): - with self.delimit_if( - "(", - ")", - len(node.elts) == 0 or self.get_precedence(node) > _Precedence.TUPLE - ): - self.items_view(self.traverse, node.elts) - - unop = {"Invert": "~", "Not": "not", "UAdd": "+", "USub": "-"} - unop_precedence = { - "not": _Precedence.NOT, - "~": _Precedence.FACTOR, - "+": _Precedence.FACTOR, - "-": _Precedence.FACTOR, - } - - def visit_UnaryOp(self, node): - operator = self.unop[node.op.__class__.__name__] - operator_precedence = self.unop_precedence[operator] - with self.require_parens(operator_precedence, node): - self.write(operator) - # factor prefixes (+, -, ~) shouldn't be separated - # from the value they belong, (e.g: +1 instead of + 1) - if operator_precedence is not _Precedence.FACTOR: - self.write(" ") - self.set_precedence(operator_precedence, node.operand) - self.traverse(node.operand) - - binop = { - "Add": "+", - "Sub": "-", - "Mult": "*", - "MatMult": "@", - "Div": "/", - "Mod": "%", - "LShift": "<<", - "RShift": ">>", - "BitOr": "|", - "BitXor": "^", - "BitAnd": "&", - "FloorDiv": "//", - "Pow": "**", - } - - binop_precedence = { - "+": _Precedence.ARITH, - "-": _Precedence.ARITH, - "*": _Precedence.TERM, - "@": _Precedence.TERM, - "/": _Precedence.TERM, - "%": _Precedence.TERM, - "<<": _Precedence.SHIFT, - ">>": _Precedence.SHIFT, - "|": _Precedence.BOR, - "^": _Precedence.BXOR, - "&": _Precedence.BAND, - "//": _Precedence.TERM, - "**": _Precedence.POWER, - } - - binop_rassoc = frozenset(("**",)) - def visit_BinOp(self, node): - operator = self.binop[node.op.__class__.__name__] - operator_precedence = self.binop_precedence[operator] - with self.require_parens(operator_precedence, node): - if operator in self.binop_rassoc: - left_precedence = operator_precedence.next() - right_precedence = operator_precedence - else: - left_precedence = operator_precedence - right_precedence = operator_precedence.next() - - self.set_precedence(left_precedence, node.left) - self.traverse(node.left) - self.write(f" {operator} ") - self.set_precedence(right_precedence, node.right) - self.traverse(node.right) - - cmpops = { - "Eq": "==", - "NotEq": "!=", - "Lt": "<", - "LtE": "<=", - "Gt": ">", - "GtE": ">=", - "Is": "is", - "IsNot": "is not", - "In": "in", - "NotIn": "not in", - } - - def visit_Compare(self, node): - with self.require_parens(_Precedence.CMP, node): - self.set_precedence(_Precedence.CMP.next(), node.left, *node.comparators) - self.traverse(node.left) - for o, e in zip(node.ops, node.comparators): - self.write(" " + self.cmpops[o.__class__.__name__] + " ") - self.traverse(e) - - boolops = {"And": "and", "Or": "or"} - boolop_precedence = {"and": _Precedence.AND, "or": _Precedence.OR} - - def visit_BoolOp(self, node): - operator = self.boolops[node.op.__class__.__name__] - operator_precedence = self.boolop_precedence[operator] - - def increasing_level_traverse(node): - nonlocal operator_precedence - operator_precedence = operator_precedence.next() - self.set_precedence(operator_precedence, node) - self.traverse(node) - - with self.require_parens(operator_precedence, node): - s = f" {operator} " - self.interleave(lambda: self.write(s), increasing_level_traverse, node.values) - - def visit_Attribute(self, node): - self.set_precedence(_Precedence.ATOM, node.value) - self.traverse(node.value) - # Special case: 3.__abs__() is a syntax error, so if node.value - # is an integer literal then we need to either parenthesize - # it or add an extra space to get 3 .__abs__(). - if isinstance(node.value, Constant) and isinstance(node.value.value, int): - self.write(" ") - self.write(".") - self.write(node.attr) - - def visit_Call(self, node): - self.set_precedence(_Precedence.ATOM, node.func) - self.traverse(node.func) - with self.delimit("(", ")"): - comma = False - for e in node.args: - if comma: - self.write(", ") - else: - comma = True - self.traverse(e) - for e in node.keywords: - if comma: - self.write(", ") - else: - comma = True - self.traverse(e) - - def visit_Subscript(self, node): - def is_non_empty_tuple(slice_value): - return ( - isinstance(slice_value, Tuple) - and slice_value.elts - ) - - self.set_precedence(_Precedence.ATOM, node.value) - self.traverse(node.value) - with self.delimit("[", "]"): - if is_non_empty_tuple(node.slice): - # parentheses can be omitted if the tuple isn't empty - self.items_view(self.traverse, node.slice.elts) - else: - self.traverse(node.slice) - - def visit_Starred(self, node): - self.write("*") - self.set_precedence(_Precedence.EXPR, node.value) - self.traverse(node.value) - - def visit_Ellipsis(self, node): - self.write("...") - - def visit_Slice(self, node): - if node.lower: - self.traverse(node.lower) - self.write(":") - if node.upper: - self.traverse(node.upper) - if node.step: - self.write(":") - self.traverse(node.step) - - def visit_Match(self, node): - self.fill("match ") - self.traverse(node.subject) - with self.block(): - for case in node.cases: - self.traverse(case) - - def visit_arg(self, node): - self.write(node.arg) - if node.annotation: - self.write(": ") - self.traverse(node.annotation) - - def visit_arguments(self, node): - first = True - # normal arguments - all_args = node.posonlyargs + node.args - defaults = [None] * (len(all_args) - len(node.defaults)) + node.defaults - for index, elements in enumerate(zip(all_args, defaults), 1): - a, d = elements - if first: - first = False - else: - self.write(", ") - self.traverse(a) - if d: - self.write("=") - self.traverse(d) - if index == len(node.posonlyargs): - self.write(", /") - - # varargs, or bare '*' if no varargs but keyword-only arguments present - if node.vararg or node.kwonlyargs: - if first: - first = False - else: - self.write(", ") - self.write("*") - if node.vararg: - self.write(node.vararg.arg) - if node.vararg.annotation: - self.write(": ") - self.traverse(node.vararg.annotation) - - # keyword-only arguments - if node.kwonlyargs: - for a, d in zip(node.kwonlyargs, node.kw_defaults): - self.write(", ") - self.traverse(a) - if d: - self.write("=") - self.traverse(d) - - # kwargs - if node.kwarg: - if first: - first = False - else: - self.write(", ") - self.write("**" + node.kwarg.arg) - if node.kwarg.annotation: - self.write(": ") - self.traverse(node.kwarg.annotation) - - def visit_keyword(self, node): - if node.arg is None: - self.write("**") - else: - self.write(node.arg) - self.write("=") - self.traverse(node.value) - - def visit_Lambda(self, node): - with self.require_parens(_Precedence.TEST, node): - self.write("lambda") - with self.buffered() as buffer: - self.traverse(node.args) - if buffer: - self.write(" ", *buffer) - self.write(": ") - self.set_precedence(_Precedence.TEST, node.body) - self.traverse(node.body) - - def visit_alias(self, node): - self.write(node.name) - if node.asname: - self.write(" as " + node.asname) - - def visit_withitem(self, node): - self.traverse(node.context_expr) - if node.optional_vars: - self.write(" as ") - self.traverse(node.optional_vars) - - def visit_match_case(self, node): - self.fill("case ") - self.traverse(node.pattern) - if node.guard: - self.write(" if ") - self.traverse(node.guard) - with self.block(): - self.traverse(node.body) - - def visit_MatchValue(self, node): - self.traverse(node.value) - - def visit_MatchSingleton(self, node): - self._write_constant(node.value) - - def visit_MatchSequence(self, node): - with self.delimit("[", "]"): - self.interleave( - lambda: self.write(", "), self.traverse, node.patterns - ) - - def visit_MatchStar(self, node): - name = node.name - if name is None: - name = "_" - self.write(f"*{name}") - - def visit_MatchMapping(self, node): - def write_key_pattern_pair(pair): - k, p = pair - self.traverse(k) - self.write(": ") - self.traverse(p) - - with self.delimit("{", "}"): - keys = node.keys - self.interleave( - lambda: self.write(", "), - write_key_pattern_pair, - zip(keys, node.patterns, strict=True), - ) - rest = node.rest - if rest is not None: - if keys: - self.write(", ") - self.write(f"**{rest}") - - def visit_MatchClass(self, node): - self.set_precedence(_Precedence.ATOM, node.cls) - self.traverse(node.cls) - with self.delimit("(", ")"): - patterns = node.patterns - self.interleave( - lambda: self.write(", "), self.traverse, patterns - ) - attrs = node.kwd_attrs - if attrs: - def write_attr_pattern(pair): - attr, pattern = pair - self.write(f"{attr}=") - self.traverse(pattern) - - if patterns: - self.write(", ") - self.interleave( - lambda: self.write(", "), - write_attr_pattern, - zip(attrs, node.kwd_patterns, strict=True), - ) - - def visit_MatchAs(self, node): - name = node.name - pattern = node.pattern - if name is None: - self.write("_") - elif pattern is None: - self.write(node.name) - else: - with self.require_parens(_Precedence.TEST, node): - self.set_precedence(_Precedence.BOR, node.pattern) - self.traverse(node.pattern) - self.write(f" as {node.name}") - - def visit_MatchOr(self, node): - with self.require_parens(_Precedence.BOR, node): - self.set_precedence(_Precedence.BOR.next(), *node.patterns) - self.interleave(lambda: self.write(" | "), self.traverse, node.patterns) - def unparse(ast_obj): - unparser = _Unparser() + global _Unparser + try: + unparser = _Unparser() + except NameError: + from _ast_unparse import Unparser as _Unparser + unparser = _Unparser() return unparser.visit(ast_obj) -_deprecated_globals = { - name: globals().pop(name) - for name in ('Num', 'Str', 'Bytes', 'NameConstant', 'Ellipsis') -} - -def __getattr__(name): - if name in _deprecated_globals: - globals()[name] = value = _deprecated_globals[name] - import warnings - warnings._deprecated( - f"ast.{name}", message=_DEPRECATED_CLASS_MESSAGE, remove=(3, 14) - ) - return value - raise AttributeError(f"module 'ast' has no attribute '{name}'") - - -def main(): +def main(args=None): import argparse + import sys - parser = argparse.ArgumentParser(prog='python -m ast') - parser.add_argument('infile', type=argparse.FileType(mode='rb'), nargs='?', - default='-', + parser = argparse.ArgumentParser(color=True) + parser.add_argument('infile', nargs='?', default='-', help='the file to parse; defaults to stdin') parser.add_argument('-m', '--mode', default='exec', choices=('exec', 'single', 'eval', 'func_type'), @@ -1829,12 +641,40 @@ def main(): 'column offsets') parser.add_argument('-i', '--indent', type=int, default=3, help='indentation of nodes (number of spaces)') - args = parser.parse_args() + parser.add_argument('--feature-version', + type=str, default=None, metavar='VERSION', + help='Python version in the format 3.x ' + '(for example, 3.10)') + parser.add_argument('-O', '--optimize', + type=int, default=-1, metavar='LEVEL', + help='optimization level for parser (default -1)') + parser.add_argument('--show-empty', default=False, action='store_true', + help='show empty lists and fields in dump output') + args = parser.parse_args(args) + + if args.infile == '-': + name = '' + source = sys.stdin.buffer.read() + else: + name = args.infile + with open(args.infile, 'rb') as infile: + source = infile.read() + + # Process feature_version + feature_version = None + if args.feature_version: + try: + major, minor = map(int, args.feature_version.split('.', 1)) + except ValueError: + parser.error('Invalid format for --feature-version; ' + 'expected format 3.x (for example, 3.10)') + + feature_version = (major, minor) - with args.infile as infile: - source = infile.read() - tree = parse(source, args.infile.name, args.mode, type_comments=args.no_type_comments) - print(dump(tree, include_attributes=args.include_attributes, indent=args.indent)) + tree = parse(source, name, args.mode, type_comments=args.no_type_comments, + feature_version=feature_version, optimize=args.optimize) + print(dump(tree, include_attributes=args.include_attributes, + indent=args.indent, show_empty=args.show_empty)) if __name__ == '__main__': main() diff --git a/PythonLib/full/asyncio/__init__.py b/PythonLib/full/asyncio/__init__.py index 03165a42..32a5dbae 100644 --- a/PythonLib/full/asyncio/__init__.py +++ b/PythonLib/full/asyncio/__init__.py @@ -10,6 +10,7 @@ from .events import * from .exceptions import * from .futures import * +from .graph import * from .locks import * from .protocols import * from .runners import * @@ -27,6 +28,7 @@ events.__all__ + exceptions.__all__ + futures.__all__ + + graph.__all__ + locks.__all__ + protocols.__all__ + runners.__all__ + @@ -45,3 +47,28 @@ else: from .unix_events import * # pragma: no cover __all__ += unix_events.__all__ + +def __getattr__(name: str): + import warnings + + match name: + case "AbstractEventLoopPolicy": + warnings._deprecated(f"asyncio.{name}", remove=(3, 16)) + return events._AbstractEventLoopPolicy + case "DefaultEventLoopPolicy": + warnings._deprecated(f"asyncio.{name}", remove=(3, 16)) + if sys.platform == 'win32': + return windows_events._DefaultEventLoopPolicy + return unix_events._DefaultEventLoopPolicy + case "WindowsSelectorEventLoopPolicy": + if sys.platform == 'win32': + warnings._deprecated(f"asyncio.{name}", remove=(3, 16)) + return windows_events._WindowsSelectorEventLoopPolicy + # Else fall through to the AttributeError below. + case "WindowsProactorEventLoopPolicy": + if sys.platform == 'win32': + warnings._deprecated(f"asyncio.{name}", remove=(3, 16)) + return windows_events._WindowsProactorEventLoopPolicy + # Else fall through to the AttributeError below. + + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/PythonLib/full/asyncio/__main__.py b/PythonLib/full/asyncio/__main__.py index 29e528ae..0e100358 100644 --- a/PythonLib/full/asyncio/__main__.py +++ b/PythonLib/full/asyncio/__main__.py @@ -1,42 +1,53 @@ +import argparse import ast import asyncio -import code +import asyncio.tools import concurrent.futures import contextvars import inspect +import os +import site import sys import threading import types import warnings +from _colorize import get_theme +from _pyrepl.console import InteractiveColoredConsole + from . import futures -class AsyncIOInteractiveConsole(code.InteractiveConsole): +class AsyncIOInteractiveConsole(InteractiveColoredConsole): def __init__(self, locals, loop): - super().__init__(locals) + super().__init__(locals, filename="") self.compile.compiler.flags |= ast.PyCF_ALLOW_TOP_LEVEL_AWAIT + self.loop = loop self.context = contextvars.copy_context() def runcode(self, code): + global return_code future = concurrent.futures.Future() def callback(): + global return_code global repl_future - global repl_future_interrupted + global keyboard_interrupted repl_future = None - repl_future_interrupted = False + keyboard_interrupted = False func = types.FunctionType(code, self.locals) try: coro = func() - except SystemExit: - raise + except SystemExit as se: + return_code = se.code + self.loop.stop() + return except KeyboardInterrupt as ex: - repl_future_interrupted = True + keyboard_interrupted = True future.set_exception(ex) return except BaseException as ex: @@ -53,34 +64,68 @@ def callback(): except BaseException as exc: future.set_exception(exc) - loop.call_soon_threadsafe(callback, context=self.context) + self.loop.call_soon_threadsafe(callback, context=self.context) try: return future.result() - except SystemExit: - raise + except SystemExit as se: + return_code = se.code + self.loop.stop() + return except BaseException: - if repl_future_interrupted: - self.write("\nKeyboardInterrupt\n") + if keyboard_interrupted: + if not CAN_USE_PYREPL: + self.write("\nKeyboardInterrupt\n") else: self.showtraceback() - + return self.STATEMENT_FAILED class REPLThread(threading.Thread): def run(self): + global return_code + try: - banner = ( - f'asyncio REPL {sys.version} on {sys.platform}\n' - f'Use "await" directly instead of "asyncio.run()".\n' - f'Type "help", "copyright", "credits" or "license" ' - f'for more information.\n' - f'{getattr(sys, "ps1", ">>> ")}import asyncio' - ) - - console.interact( - banner=banner, - exitmsg='exiting asyncio REPL...') + if not sys.flags.quiet: + banner = ( + f'asyncio REPL {sys.version} on {sys.platform}\n' + f'Use "await" directly instead of "asyncio.run()".\n' + f'Type "help", "copyright", "credits" or "license" ' + f'for more information.\n' + ) + + console.write(banner) + + if not sys.flags.isolated and (startup_path := os.getenv("PYTHONSTARTUP")): + sys.audit("cpython.run_startup", startup_path) + + import tokenize + with tokenize.open(startup_path) as f: + startup_code = compile(f.read(), startup_path, "exec") + exec(startup_code, console.locals) + + ps1 = getattr(sys, "ps1", ">>> ") + if CAN_USE_PYREPL: + theme = get_theme().syntax + ps1 = f"{theme.prompt}{ps1}{theme.reset}" + console.write(f"{ps1}import asyncio\n") + + if CAN_USE_PYREPL: + from _pyrepl.simple_interact import ( + run_multiline_interactive_console, + ) + try: + run_multiline_interactive_console(console) + except SystemExit: + # expected via the `exit` and `quit` commands + pass + except BaseException: + # unexpected issue + console.showtraceback() + console.write("Internal error, ") + return_code = 1 + else: + console.interact(banner="", exitmsg="") finally: warnings.filterwarnings( 'ignore', @@ -89,10 +134,56 @@ def run(self): loop.call_soon_threadsafe(loop.stop) + def interrupt(self) -> None: + if not CAN_USE_PYREPL: + return + + from _pyrepl.simple_interact import _get_reader + r = _get_reader() + if r.threading_hook is not None: + r.threading_hook.add("") # type: ignore + if __name__ == '__main__': + parser = argparse.ArgumentParser( + prog="python3 -m asyncio", + description="Interactive asyncio shell and CLI tools", + color=True, + ) + subparsers = parser.add_subparsers(help="sub-commands", dest="command") + ps = subparsers.add_parser( + "ps", help="Display a table of all pending tasks in a process" + ) + ps.add_argument("pid", type=int, help="Process ID to inspect") + pstree = subparsers.add_parser( + "pstree", help="Display a tree of all pending tasks in a process" + ) + pstree.add_argument("pid", type=int, help="Process ID to inspect") + args = parser.parse_args() + match args.command: + case "ps": + asyncio.tools.display_awaited_by_tasks_table(args.pid) + sys.exit(0) + case "pstree": + asyncio.tools.display_awaited_by_tasks_tree(args.pid) + sys.exit(0) + case None: + pass # continue to the interactive shell + case _: + # shouldn't happen as an invalid command-line wouldn't parse + # but let's keep it for the next person adding a command + print(f"error: unhandled command {args.command}", file=sys.stderr) + parser.print_usage(file=sys.stderr) + sys.exit(1) + sys.audit("cpython.run_stdin") + if os.getenv('PYTHON_BASIC_REPL'): + CAN_USE_PYREPL = False + else: + from _pyrepl.main import CAN_USE_PYREPL + + return_code = 0 loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) @@ -105,14 +196,31 @@ def run(self): console = AsyncIOInteractiveConsole(repl_locals, loop) repl_future = None - repl_future_interrupted = False + keyboard_interrupted = False try: import readline # NoQA except ImportError: - pass + readline = None + + interactive_hook = getattr(sys, "__interactivehook__", None) + + if interactive_hook is not None: + sys.audit("cpython.run_interactivehook", interactive_hook) + interactive_hook() - repl_thread = REPLThread() + if interactive_hook is site.register_readline: + # Fix the completer function to use the interactive console locals + try: + import rlcompleter + except: + pass + else: + if readline is not None: + completer = rlcompleter.Completer(console.locals) + readline.set_completer(completer.complete) + + repl_thread = REPLThread(name="Interactive thread") repl_thread.daemon = True repl_thread.start() @@ -120,9 +228,14 @@ def run(self): try: loop.run_forever() except KeyboardInterrupt: + keyboard_interrupted = True if repl_future and not repl_future.done(): repl_future.cancel() - repl_future_interrupted = True + repl_thread.interrupt() continue else: break + + console.write('exiting asyncio REPL...\n') + loop.close() + sys.exit(return_code) diff --git a/PythonLib/full/asyncio/base_events.py b/PythonLib/full/asyncio/base_events.py index 813a85db..8cbb71f7 100644 --- a/PythonLib/full/asyncio/base_events.py +++ b/PythonLib/full/asyncio/base_events.py @@ -278,7 +278,9 @@ def __init__(self, loop, sockets, protocol_factory, ssl_context, backlog, ssl_handshake_timeout, ssl_shutdown_timeout=None): self._loop = loop self._sockets = sockets - self._active_count = 0 + # Weak references so we don't break Transport's ability to + # detect abandoned transports + self._clients = weakref.WeakSet() self._waiters = [] self._protocol_factory = protocol_factory self._backlog = backlog @@ -291,14 +293,13 @@ def __init__(self, loop, sockets, protocol_factory, ssl_context, backlog, def __repr__(self): return f'<{self.__class__.__name__} sockets={self.sockets!r}>' - def _attach(self): + def _attach(self, transport): assert self._sockets is not None - self._active_count += 1 + self._clients.add(transport) - def _detach(self): - assert self._active_count > 0 - self._active_count -= 1 - if self._active_count == 0 and self._sockets is None: + def _detach(self, transport): + self._clients.discard(transport) + if len(self._clients) == 0 and self._sockets is None: self._wakeup() def _wakeup(self): @@ -347,9 +348,17 @@ def close(self): self._serving_forever_fut.cancel() self._serving_forever_fut = None - if self._active_count == 0: + if len(self._clients) == 0: self._wakeup() + def close_clients(self): + for transport in self._clients.copy(): + transport.close() + + def abort_clients(self): + for transport in self._clients.copy(): + transport.abort() + async def start_serving(self): self._start_serving() # Skip one loop iteration so that all 'loop.add_reader' @@ -421,6 +430,8 @@ def __init__(self): self._clock_resolution = time.get_clock_info('monotonic').resolution self._exception_handler = None self.set_debug(coroutines._is_debug_mode()) + # The preserved state of async generator hooks. + self._old_agen_hooks = None # In debug mode, if the execution of a callback or a step of a task # exceed this duration in seconds, the slow callback/task is logged. self.slow_callback_duration = 0.1 @@ -447,25 +458,18 @@ def create_future(self): """Create a Future object attached to the loop.""" return futures.Future(loop=self) - def create_task(self, coro, *, name=None, context=None): - """Schedule a coroutine object. + def create_task(self, coro, **kwargs): + """Schedule or begin executing a coroutine object. Return a task object. """ self._check_closed() - if self._task_factory is None: - task = tasks.Task(coro, loop=self, name=name, context=context) - if task._source_traceback: - del task._source_traceback[-1] - else: - if context is None: - # Use legacy API if context is not needed - task = self._task_factory(self, coro) - else: - task = self._task_factory(self, coro, context=context) - - tasks._set_task_name(task, name) + if self._task_factory is not None: + return self._task_factory(self, coro, **kwargs) + task = tasks.Task(coro, loop=self, **kwargs) + if task._source_traceback: + del task._source_traceback[-1] try: return task finally: @@ -479,9 +483,10 @@ def set_task_factory(self, factory): If factory is None the default task factory will be set. If factory is a callable, it should have a signature matching - '(loop, coro)', where 'loop' will be a reference to the active - event loop, 'coro' will be a coroutine object. The callable - must return a Future. + '(loop, coro, **kwargs)', where 'loop' will be a reference to the active + event loop, 'coro' will be a coroutine object, and **kwargs will be + arbitrary keyword arguments that should be passed on to Task. + The callable must return a Task. """ if factory is not None and not callable(factory): raise TypeError('task factory must be a callable or None') @@ -628,29 +633,52 @@ def _check_running(self): raise RuntimeError( 'Cannot run the event loop while another loop is running') - def run_forever(self): - """Run until stop() is called.""" + def _run_forever_setup(self): + """Prepare the run loop to process events. + + This method exists so that custom event loop subclasses (e.g., event loops + that integrate a GUI event loop with Python's event loop) have access to all the + loop setup logic. + """ self._check_closed() self._check_running() self._set_coroutine_origin_tracking(self._debug) - old_agen_hooks = sys.get_asyncgen_hooks() - try: - self._thread_id = threading.get_ident() - sys.set_asyncgen_hooks(firstiter=self._asyncgen_firstiter_hook, - finalizer=self._asyncgen_finalizer_hook) + self._old_agen_hooks = sys.get_asyncgen_hooks() + self._thread_id = threading.get_ident() + sys.set_asyncgen_hooks( + firstiter=self._asyncgen_firstiter_hook, + finalizer=self._asyncgen_finalizer_hook + ) + + events._set_running_loop(self) + + def _run_forever_cleanup(self): + """Clean up after an event loop finishes the looping over events. - events._set_running_loop(self) + This method exists so that custom event loop subclasses (e.g., event loops + that integrate a GUI event loop with Python's event loop) have access to all the + loop cleanup logic. + """ + self._stopping = False + self._thread_id = None + events._set_running_loop(None) + self._set_coroutine_origin_tracking(False) + # Restore any pre-existing async generator hooks. + if self._old_agen_hooks is not None: + sys.set_asyncgen_hooks(*self._old_agen_hooks) + self._old_agen_hooks = None + + def run_forever(self): + """Run until stop() is called.""" + self._run_forever_setup() + try: while True: self._run_once() if self._stopping: break finally: - self._stopping = False - self._thread_id = None - events._set_running_loop(None) - self._set_coroutine_origin_tracking(False) - sys.set_asyncgen_hooks(*old_agen_hooks) + self._run_forever_cleanup() def run_until_complete(self, future): """Run until the Future is done. @@ -807,7 +835,7 @@ def call_soon(self, callback, *args, context=None): def _check_callback(self, callback, method): if (coroutines.iscoroutine(callback) or - coroutines.iscoroutinefunction(callback)): + coroutines._iscoroutinefunction(callback)): raise TypeError( f"coroutines cannot be used with {method}()") if not callable(callback): @@ -844,7 +872,10 @@ def call_soon_threadsafe(self, callback, *args, context=None): self._check_closed() if self._debug: self._check_callback(callback, 'call_soon_threadsafe') - handle = self._call_soon(callback, args, context) + handle = events._ThreadSafeHandle(callback, args, self, context) + self._ready.append(handle) + if handle._source_traceback: + del handle._source_traceback[-1] if handle._source_traceback: del handle._source_traceback[-1] self._write_to_self() @@ -985,38 +1016,43 @@ async def _connect_sock(self, exceptions, addr_info, local_addr_infos=None): family, type_, proto, _, address = addr_info sock = None try: - sock = socket.socket(family=family, type=type_, proto=proto) - sock.setblocking(False) - if local_addr_infos is not None: - for lfamily, _, _, _, laddr in local_addr_infos: - # skip local addresses of different family - if lfamily != family: - continue - try: - sock.bind(laddr) - break - except OSError as exc: - msg = ( - f'error while attempting to bind on ' - f'address {laddr!r}: {str(exc).lower()}' - ) - exc = OSError(exc.errno, msg) - my_exceptions.append(exc) - else: # all bind attempts failed - if my_exceptions: - raise my_exceptions.pop() - else: - raise OSError(f"no matching local address with {family=} found") - await self.sock_connect(sock, address) - return sock - except OSError as exc: - my_exceptions.append(exc) - if sock is not None: - sock.close() - raise + try: + sock = socket.socket(family=family, type=type_, proto=proto) + sock.setblocking(False) + if local_addr_infos is not None: + for lfamily, _, _, _, laddr in local_addr_infos: + # skip local addresses of different family + if lfamily != family: + continue + try: + sock.bind(laddr) + break + except OSError as exc: + msg = ( + f'error while attempting to bind on ' + f'address {laddr!r}: {str(exc).lower()}' + ) + exc = OSError(exc.errno, msg) + my_exceptions.append(exc) + else: # all bind attempts failed + if my_exceptions: + raise my_exceptions.pop() + else: + raise OSError(f"no matching local address with {family=} found") + await self.sock_connect(sock, address) + return sock + except OSError as exc: + my_exceptions.append(exc) + raise except: if sock is not None: - sock.close() + try: + sock.close() + except OSError: + # An error when closing a newly created socket is + # not important, but it can overwrite more important + # non-OSError error. So ignore it. + pass raise finally: exceptions = my_exceptions = None @@ -1130,7 +1166,7 @@ async def create_connection( raise ExceptionGroup("create_connection failed", exceptions) if len(exceptions) == 1: raise exceptions[0] - else: + elif exceptions: # If they all have the same str(), raise one. model = str(exceptions[0]) if all(str(exc) == model for exc in exceptions): @@ -1139,6 +1175,9 @@ async def create_connection( # the various error messages. raise OSError('Multiple exceptions: {}'.format( ', '.join(str(exc) for exc in exceptions))) + else: + # No exceptions were collected, raise a timeout error + raise TimeoutError('create_connection failed') finally: exceptions = None @@ -1484,6 +1523,7 @@ async def create_server( ssl=None, reuse_address=None, reuse_port=None, + keep_alive=None, ssl_handshake_timeout=None, ssl_shutdown_timeout=None, start_serving=True): @@ -1559,6 +1599,9 @@ async def create_server( # on other address families than AF_INET/AF_INET6. if reuse_port and af in (socket.AF_INET, socket.AF_INET6): _set_reuseport(sock) + if keep_alive: + sock.setsockopt( + socket.SOL_SOCKET, socket.SO_KEEPALIVE, True) # Disable IPv4/IPv6 dual stack support (enabled by # default on Linux) which makes a single socket # listen on both address families. @@ -1631,8 +1674,7 @@ async def connect_accepted_socket( raise ValueError( 'ssl_shutdown_timeout is only meaningful with ssl') - if sock is not None: - _check_ssl_socket(sock) + _check_ssl_socket(sock) transport, protocol = await self._create_connection_transport( sock, protocol_factory, ssl, '', server_side=True, @@ -1845,6 +1887,8 @@ def call_exception_handler(self, context): - 'protocol' (optional): Protocol instance; - 'transport' (optional): Transport instance; - 'socket' (optional): Socket instance; + - 'source_traceback' (optional): Traceback of the source; + - 'handle_traceback' (optional): Traceback of the handle; - 'asyncgen' (optional): Asynchronous generator that caused the exception. @@ -1955,8 +1999,11 @@ def _run_once(self): timeout = 0 elif self._scheduled: # Compute the desired timeout. - when = self._scheduled[0]._when - timeout = min(max(0, when - self.time()), MAXIMUM_SELECT_TIMEOUT) + timeout = self._scheduled[0]._when - self.time() + if timeout > MAXIMUM_SELECT_TIMEOUT: + timeout = MAXIMUM_SELECT_TIMEOUT + elif timeout < 0: + timeout = 0 event_list = self._selector.select(timeout) self._process_events(event_list) diff --git a/PythonLib/full/asyncio/base_subprocess.py b/PythonLib/full/asyncio/base_subprocess.py index 4c9b0dd5..321a4e5d 100644 --- a/PythonLib/full/asyncio/base_subprocess.py +++ b/PythonLib/full/asyncio/base_subprocess.py @@ -1,6 +1,9 @@ import collections import subprocess import warnings +import os +import signal +import sys from . import protocols from . import transports @@ -23,6 +26,7 @@ def __init__(self, loop, protocol, args, shell, self._pending_calls = collections.deque() self._pipes = {} self._finished = False + self._pipes_connected = False if stdin == subprocess.PIPE: self._pipes[0] = None @@ -101,7 +105,12 @@ def close(self): for proto in self._pipes.values(): if proto is None: continue - proto.pipe.close() + # See gh-114177 + # skip closing the pipe if loop is already closed + # this can happen e.g. when loop is closed immediately after + # process is killed + if self._loop and not self._loop.is_closed(): + proto.pipe.close() if (self._proc is not None and # has the child process finished? @@ -115,7 +124,8 @@ def close(self): try: self._proc.kill() - except ProcessLookupError: + except (ProcessLookupError, PermissionError): + # the process may have already exited or may be running setuid pass # Don't clear the _proc reference yet: _post_init() may still run @@ -141,17 +151,31 @@ def _check_proc(self): if self._proc is None: raise ProcessLookupError() - def send_signal(self, signal): - self._check_proc() - self._proc.send_signal(signal) + if sys.platform == 'win32': + def send_signal(self, signal): + self._check_proc() + self._proc.send_signal(signal) + + def terminate(self): + self._check_proc() + self._proc.terminate() + + def kill(self): + self._check_proc() + self._proc.kill() + else: + def send_signal(self, signal): + self._check_proc() + try: + os.kill(self._proc.pid, signal) + except ProcessLookupError: + pass - def terminate(self): - self._check_proc() - self._proc.terminate() + def terminate(self): + self.send_signal(signal.SIGTERM) - def kill(self): - self._check_proc() - self._proc.kill() + def kill(self): + self.send_signal(signal.SIGKILL) async def _connect_pipes(self, waiter): try: @@ -190,6 +214,7 @@ async def _connect_pipes(self, waiter): else: if waiter is not None and not waiter.cancelled(): waiter.set_result(None) + self._pipes_connected = True def _call(self, cb, *data): if self._pending_calls is not None: @@ -233,6 +258,15 @@ def _try_finish(self): assert not self._finished if self._returncode is None: return + if not self._pipes_connected: + # self._pipes_connected can be False if not all pipes were connected + # because either the process failed to start or the self._connect_pipes task + # got cancelled. In this broken state we consider all pipes disconnected and + # to avoid hanging forever in self._wait as otherwise _exit_waiters + # would never be woken up, we wake them up here. + for waiter in self._exit_waiters: + if not waiter.cancelled(): + waiter.set_result(self._returncode) if all(p is not None and p.disconnected for p in self._pipes.values()): self._finished = True diff --git a/PythonLib/full/asyncio/coroutines.py b/PythonLib/full/asyncio/coroutines.py index ab4f30eb..a51319cb 100644 --- a/PythonLib/full/asyncio/coroutines.py +++ b/PythonLib/full/asyncio/coroutines.py @@ -18,7 +18,16 @@ def _is_debug_mode(): def iscoroutinefunction(func): + import warnings """Return True if func is a decorated coroutine function.""" + warnings._deprecated("asyncio.iscoroutinefunction", + f"{warnings._DEPRECATED_MSG}; " + "use inspect.iscoroutinefunction() instead", + remove=(3,16)) + return _iscoroutinefunction(func) + + +def _iscoroutinefunction(func): return (inspect.iscoroutinefunction(func) or getattr(func, '_is_coroutine', None) is _is_coroutine) diff --git a/PythonLib/full/asyncio/events.py b/PythonLib/full/asyncio/events.py index 01685288..a7fb5598 100644 --- a/PythonLib/full/asyncio/events.py +++ b/PythonLib/full/asyncio/events.py @@ -5,14 +5,18 @@ # SPDX-FileCopyrightText: Copyright (c) 2015-2021 MagicStack Inc. http://magic.io __all__ = ( - 'AbstractEventLoopPolicy', - 'AbstractEventLoop', 'AbstractServer', - 'Handle', 'TimerHandle', - 'get_event_loop_policy', 'set_event_loop_policy', - 'get_event_loop', 'set_event_loop', 'new_event_loop', - 'get_child_watcher', 'set_child_watcher', - '_set_running_loop', 'get_running_loop', - '_get_running_loop', + "AbstractEventLoop", + "AbstractServer", + "Handle", + "TimerHandle", + "get_event_loop_policy", + "set_event_loop_policy", + "get_event_loop", + "set_event_loop", + "new_event_loop", + "_set_running_loop", + "get_running_loop", + "_get_running_loop", ) import contextvars @@ -22,6 +26,7 @@ import subprocess import sys import threading +import warnings from . import format_helpers @@ -54,7 +59,8 @@ def _repr_info(self): info.append('cancelled') if self._callback is not None: info.append(format_helpers._format_callback_source( - self._callback, self._args)) + self._callback, self._args, + debug=self._loop.get_debug())) if self._source_traceback: frame = self._source_traceback[-1] info.append(f'created at {frame[0]}:{frame[1]}') @@ -90,7 +96,8 @@ def _run(self): raise except BaseException as exc: cb = format_helpers._format_callback_source( - self._callback, self._args) + self._callback, self._args, + debug=self._loop.get_debug()) msg = f'Exception in callback {cb}' context = { 'message': msg, @@ -102,6 +109,34 @@ def _run(self): self._loop.call_exception_handler(context) self = None # Needed to break cycles when an exception occurs. +# _ThreadSafeHandle is used for callbacks scheduled with call_soon_threadsafe +# and is thread safe unlike Handle which is not thread safe. +class _ThreadSafeHandle(Handle): + + __slots__ = ('_lock',) + + def __init__(self, callback, args, loop, context=None): + super().__init__(callback, args, loop, context) + self._lock = threading.RLock() + + def cancel(self): + with self._lock: + return super().cancel() + + def cancelled(self): + with self._lock: + return super().cancelled() + + def _run(self): + # The event loop checks for cancellation without holding the lock + # It is possible that the handle is cancelled after the check + # but before the callback is called so check it again after acquiring + # the lock and return without calling the callback if it is cancelled. + with self._lock: + if self._cancelled: + return + return super()._run() + class TimerHandle(Handle): """Object returned by timed callback registration methods.""" @@ -173,6 +208,14 @@ def close(self): """Stop serving. This leaves existing connections open.""" raise NotImplementedError + def close_clients(self): + """Close all active connections.""" + raise NotImplementedError + + def abort_clients(self): + """Close all active connections immediately.""" + raise NotImplementedError + def get_loop(self): """Get the event loop the Server object is attached to.""" raise NotImplementedError @@ -282,7 +325,7 @@ def create_future(self): # Method scheduling a coroutine object: create a task. - def create_task(self, coro, *, name=None, context=None): + def create_task(self, coro, **kwargs): raise NotImplementedError # Methods for interacting with threads. @@ -320,6 +363,7 @@ async def create_server( *, family=socket.AF_UNSPEC, flags=socket.AI_PASSIVE, sock=None, backlog=100, ssl=None, reuse_address=None, reuse_port=None, + keep_alive=None, ssl_handshake_timeout=None, ssl_shutdown_timeout=None, start_serving=True): @@ -358,6 +402,9 @@ async def create_server( they all set this flag when being created. This option is not supported on Windows. + keep_alive set to True keeps connections active by enabling the + periodic transmission of messages. + ssl_handshake_timeout is the time in seconds that an SSL server will wait for completion of the SSL handshake before aborting the connection. Default is 60s. @@ -615,7 +662,7 @@ def set_debug(self, enabled): raise NotImplementedError -class AbstractEventLoopPolicy: +class _AbstractEventLoopPolicy: """Abstract policy for accessing the event loop.""" def get_event_loop(self): @@ -638,18 +685,7 @@ def new_event_loop(self): the current context, set_event_loop must be called explicitly.""" raise NotImplementedError - # Child processes handling (Unix only). - - def get_child_watcher(self): - "Get the watcher for child processes." - raise NotImplementedError - - def set_child_watcher(self, watcher): - """Set the watcher for child processes.""" - raise NotImplementedError - - -class BaseDefaultEventLoopPolicy(AbstractEventLoopPolicy): +class _BaseDefaultEventLoopPolicy(_AbstractEventLoopPolicy): """Default policy implementation for accessing the event loop. In this policy, each thread has its own event loop. However, we @@ -666,7 +702,6 @@ class BaseDefaultEventLoopPolicy(AbstractEventLoopPolicy): class _Local(threading.local): _loop = None - _set_called = False def __init__(self): self._local = self._Local() @@ -676,28 +711,6 @@ def get_event_loop(self): Returns an instance of EventLoop or raises an exception. """ - if (self._local._loop is None and - not self._local._set_called and - threading.current_thread() is threading.main_thread()): - stacklevel = 2 - try: - f = sys._getframe(1) - except AttributeError: - pass - else: - # Move up the call stack so that the warning is attached - # to the line outside asyncio itself. - while f: - module = f.f_globals.get('__name__') - if not (module == 'asyncio' or module.startswith('asyncio.')): - break - f = f.f_back - stacklevel += 1 - import warnings - warnings.warn('There is no current event loop', - DeprecationWarning, stacklevel=stacklevel) - self.set_event_loop(self.new_event_loop()) - if self._local._loop is None: raise RuntimeError('There is no current event loop in thread %r.' % threading.current_thread().name) @@ -706,7 +719,6 @@ def get_event_loop(self): def set_event_loop(self, loop): """Set the event loop.""" - self._local._set_called = True if loop is not None and not isinstance(loop, AbstractEventLoop): raise TypeError(f"loop must be an instance of AbstractEventLoop or None, not '{type(loop).__name__}'") self._local._loop = loop @@ -776,26 +788,35 @@ def _init_event_loop_policy(): global _event_loop_policy with _lock: if _event_loop_policy is None: # pragma: no branch - from . import DefaultEventLoopPolicy - _event_loop_policy = DefaultEventLoopPolicy() + if sys.platform == 'win32': + from .windows_events import _DefaultEventLoopPolicy + else: + from .unix_events import _DefaultEventLoopPolicy + _event_loop_policy = _DefaultEventLoopPolicy() -def get_event_loop_policy(): +def _get_event_loop_policy(): """Get the current event loop policy.""" if _event_loop_policy is None: _init_event_loop_policy() return _event_loop_policy +def get_event_loop_policy(): + warnings._deprecated('asyncio.get_event_loop_policy', remove=(3, 16)) + return _get_event_loop_policy() -def set_event_loop_policy(policy): +def _set_event_loop_policy(policy): """Set the current event loop policy. If policy is None, the default policy is restored.""" global _event_loop_policy - if policy is not None and not isinstance(policy, AbstractEventLoopPolicy): + if policy is not None and not isinstance(policy, _AbstractEventLoopPolicy): raise TypeError(f"policy must be an instance of AbstractEventLoopPolicy or None, not '{type(policy).__name__}'") _event_loop_policy = policy +def set_event_loop_policy(policy): + warnings._deprecated('asyncio.set_event_loop_policy', remove=(3,16)) + _set_event_loop_policy(policy) def get_event_loop(): """Return an asyncio event loop. @@ -810,28 +831,17 @@ def get_event_loop(): current_loop = _get_running_loop() if current_loop is not None: return current_loop - return get_event_loop_policy().get_event_loop() + return _get_event_loop_policy().get_event_loop() def set_event_loop(loop): """Equivalent to calling get_event_loop_policy().set_event_loop(loop).""" - get_event_loop_policy().set_event_loop(loop) + _get_event_loop_policy().set_event_loop(loop) def new_event_loop(): """Equivalent to calling get_event_loop_policy().new_event_loop().""" - return get_event_loop_policy().new_event_loop() - - -def get_child_watcher(): - """Equivalent to calling get_event_loop_policy().get_child_watcher().""" - return get_event_loop_policy().get_child_watcher() - - -def set_child_watcher(watcher): - """Equivalent to calling - get_event_loop_policy().set_child_watcher(watcher).""" - return get_event_loop_policy().set_child_watcher(watcher) + return _get_event_loop_policy().new_event_loop() # Alias pure-Python implementations for testing purposes. @@ -861,7 +871,7 @@ def set_child_watcher(watcher): def on_fork(): # Reset the loop and wakeupfd in the forked child process. if _event_loop_policy is not None: - _event_loop_policy._local = BaseDefaultEventLoopPolicy._Local() + _event_loop_policy._local = _BaseDefaultEventLoopPolicy._Local() _set_running_loop(None) signal.set_wakeup_fd(-1) diff --git a/PythonLib/full/asyncio/format_helpers.py b/PythonLib/full/asyncio/format_helpers.py index 27d11fd4..93737b77 100644 --- a/PythonLib/full/asyncio/format_helpers.py +++ b/PythonLib/full/asyncio/format_helpers.py @@ -19,19 +19,26 @@ def _get_function_source(func): return None -def _format_callback_source(func, args): - func_repr = _format_callback(func, args, None) +def _format_callback_source(func, args, *, debug=False): + func_repr = _format_callback(func, args, None, debug=debug) source = _get_function_source(func) if source: func_repr += f' at {source[0]}:{source[1]}' return func_repr -def _format_args_and_kwargs(args, kwargs): +def _format_args_and_kwargs(args, kwargs, *, debug=False): """Format function arguments and keyword arguments. Special case for a single parameter: ('hello',) is formatted as ('hello'). + + Note that this function only returns argument details when + debug=True is specified, as arguments may contain sensitive + information. """ + if not debug: + return '()' + # use reprlib to limit the length of the output items = [] if args: @@ -41,10 +48,11 @@ def _format_args_and_kwargs(args, kwargs): return '({})'.format(', '.join(items)) -def _format_callback(func, args, kwargs, suffix=''): +def _format_callback(func, args, kwargs, *, debug=False, suffix=''): if isinstance(func, functools.partial): - suffix = _format_args_and_kwargs(args, kwargs) + suffix - return _format_callback(func.func, func.args, func.keywords, suffix) + suffix = _format_args_and_kwargs(args, kwargs, debug=debug) + suffix + return _format_callback(func.func, func.args, func.keywords, + debug=debug, suffix=suffix) if hasattr(func, '__qualname__') and func.__qualname__: func_repr = func.__qualname__ @@ -53,7 +61,7 @@ def _format_callback(func, args, kwargs, suffix=''): else: func_repr = repr(func) - func_repr += _format_args_and_kwargs(args, kwargs) + func_repr += _format_args_and_kwargs(args, kwargs, debug=debug) if suffix: func_repr += suffix return func_repr diff --git a/PythonLib/full/asyncio/futures.py b/PythonLib/full/asyncio/futures.py index 0c530bbd..e8a99556 100644 --- a/PythonLib/full/asyncio/futures.py +++ b/PythonLib/full/asyncio/futures.py @@ -2,6 +2,7 @@ __all__ = ( 'Future', 'wrap_future', 'isfuture', + 'future_add_to_awaited_by', 'future_discard_from_awaited_by', ) import concurrent.futures @@ -43,7 +44,6 @@ class Future: - This class is not compatible with the wait() and as_completed() methods in the concurrent.futures package. - (In Python 3.4 or later we may be able to unify the implementations.) """ # Class variables serving as defaults for instance variables. @@ -61,12 +61,15 @@ class Future: # the Future protocol (i.e. is intended to be duck-type compatible). # The value must also be not-None, to enable a subclass to declare # that it is not compatible by setting this to None. - # - It is set by __iter__() below so that Task._step() can tell + # - It is set by __iter__() below so that Task.__step() can tell # the difference between - # `await Future()` or`yield from Future()` (correct) vs. + # `await Future()` or `yield from Future()` (correct) vs. # `yield Future()` (incorrect). _asyncio_future_blocking = False + # Used by the capture_call_stack() API. + __asyncio_awaited_by = None + __log_traceback = False def __init__(self, *, loop=None): @@ -116,6 +119,12 @@ def _log_traceback(self, val): raise ValueError('_log_traceback can only be set to False') self.__log_traceback = False + @property + def _asyncio_awaited_by(self): + if self.__asyncio_awaited_by is None: + return None + return frozenset(self.__asyncio_awaited_by) + def get_loop(self): """Return the event loop the Future is bound to.""" loop = self._loop @@ -138,9 +147,6 @@ def _make_cancelled_error(self): exc = exceptions.CancelledError() else: exc = exceptions.CancelledError(self._cancel_message) - exc.__context__ = self._cancelled_exc - # Remove the reference since we don't need this anymore. - self._cancelled_exc = None return exc def cancel(self, msg=None): @@ -320,11 +326,9 @@ def _set_result_unless_cancelled(fut, result): def _convert_future_exc(exc): exc_class = type(exc) if exc_class is concurrent.futures.CancelledError: - return exceptions.CancelledError(*exc.args) - elif exc_class is concurrent.futures.TimeoutError: - return exceptions.TimeoutError(*exc.args) + return exceptions.CancelledError(*exc.args).with_traceback(exc.__traceback__) elif exc_class is concurrent.futures.InvalidStateError: - return exceptions.InvalidStateError(*exc.args) + return exceptions.InvalidStateError(*exc.args).with_traceback(exc.__traceback__) else: return exc @@ -388,7 +392,7 @@ def _set_state(future, other): def _call_check_cancel(destination): if destination.cancelled(): - if source_loop is None or source_loop is dest_loop: + if source_loop is None or source_loop is events._get_running_loop(): source.cancel() else: source_loop.call_soon_threadsafe(source.cancel) @@ -397,7 +401,7 @@ def _call_set_state(source): if (destination.cancelled() and dest_loop is not None and dest_loop.is_closed()): return - if dest_loop is None or dest_loop is source_loop: + if dest_loop is None or dest_loop is events._get_running_loop(): _set_state(destination, source) else: if dest_loop.is_closed(): @@ -421,6 +425,49 @@ def wrap_future(future, *, loop=None): return new_future +def future_add_to_awaited_by(fut, waiter, /): + """Record that `fut` is awaited on by `waiter`.""" + # For the sake of keeping the implementation minimal and assuming + # that most of asyncio users use the built-in Futures and Tasks + # (or their subclasses), we only support native Future objects + # and their subclasses. + # + # Longer version: tracking requires storing the caller-callee + # dependency somewhere. One obvious choice is to store that + # information right in the future itself in a dedicated attribute. + # This means that we'd have to require all duck-type compatible + # futures to implement a specific attribute used by asyncio for + # the book keeping. Another solution would be to store that in + # a global dictionary. The downside here is that that would create + # strong references and any scenario where the "add" call isn't + # followed by a "discard" call would lead to a memory leak. + # Using WeakDict would resolve that issue, but would complicate + # the C code (_asynciomodule.c). The bottom line here is that + # it's not clear that all this work would be worth the effort. + # + # Note that there's an accelerated version of this function + # shadowing this implementation later in this file. + if isinstance(fut, _PyFuture) and isinstance(waiter, _PyFuture): + if fut._Future__asyncio_awaited_by is None: + fut._Future__asyncio_awaited_by = set() + fut._Future__asyncio_awaited_by.add(waiter) + + +def future_discard_from_awaited_by(fut, waiter, /): + """Record that `fut` is no longer awaited on by `waiter`.""" + # See the comment in "future_add_to_awaited_by()" body for + # details on implementation. + # + # Note that there's an accelerated version of this function + # shadowing this implementation later in this file. + if isinstance(fut, _PyFuture) and isinstance(waiter, _PyFuture): + if fut._Future__asyncio_awaited_by is not None: + fut._Future__asyncio_awaited_by.discard(waiter) + + +_py_future_add_to_awaited_by = future_add_to_awaited_by +_py_future_discard_from_awaited_by = future_discard_from_awaited_by + try: import _asyncio except ImportError: @@ -428,3 +475,7 @@ def wrap_future(future, *, loop=None): else: # _CFuture is needed for tests. Future = _CFuture = _asyncio.Future + future_add_to_awaited_by = _asyncio.future_add_to_awaited_by + future_discard_from_awaited_by = _asyncio.future_discard_from_awaited_by + _c_future_add_to_awaited_by = future_add_to_awaited_by + _c_future_discard_from_awaited_by = future_discard_from_awaited_by diff --git a/PythonLib/full/asyncio/graph.py b/PythonLib/full/asyncio/graph.py new file mode 100644 index 00000000..b5bfeb16 --- /dev/null +++ b/PythonLib/full/asyncio/graph.py @@ -0,0 +1,276 @@ +"""Introspection utils for tasks call graphs.""" + +import dataclasses +import io +import sys +import types + +from . import events +from . import futures +from . import tasks + +__all__ = ( + 'capture_call_graph', + 'format_call_graph', + 'print_call_graph', + 'FrameCallGraphEntry', + 'FutureCallGraph', +) + +# Sadly, we can't re-use the traceback module's datastructures as those +# are tailored for error reporting, whereas we need to represent an +# async call graph. +# +# Going with pretty verbose names as we'd like to export them to the +# top level asyncio namespace, and want to avoid future name clashes. + + +@dataclasses.dataclass(frozen=True, slots=True) +class FrameCallGraphEntry: + frame: types.FrameType + + +@dataclasses.dataclass(frozen=True, slots=True) +class FutureCallGraph: + future: futures.Future + call_stack: tuple["FrameCallGraphEntry", ...] + awaited_by: tuple["FutureCallGraph", ...] + + +def _build_graph_for_future( + future: futures.Future, + *, + limit: int | None = None, +) -> FutureCallGraph: + if not isinstance(future, futures.Future): + raise TypeError( + f"{future!r} object does not appear to be compatible " + f"with asyncio.Future" + ) + + coro = None + if get_coro := getattr(future, 'get_coro', None): + coro = get_coro() if limit != 0 else None + + st: list[FrameCallGraphEntry] = [] + awaited_by: list[FutureCallGraph] = [] + + while coro is not None: + if hasattr(coro, 'cr_await'): + # A native coroutine or duck-type compatible iterator + st.append(FrameCallGraphEntry(coro.cr_frame)) + coro = coro.cr_await + elif hasattr(coro, 'ag_await'): + # A native async generator or duck-type compatible iterator + st.append(FrameCallGraphEntry(coro.cr_frame)) + coro = coro.ag_await + else: + break + + if future._asyncio_awaited_by: + for parent in future._asyncio_awaited_by: + awaited_by.append(_build_graph_for_future(parent, limit=limit)) + + if limit is not None: + if limit > 0: + st = st[:limit] + elif limit < 0: + st = st[limit:] + st.reverse() + return FutureCallGraph(future, tuple(st), tuple(awaited_by)) + + +def capture_call_graph( + future: futures.Future | None = None, + /, + *, + depth: int = 1, + limit: int | None = None, +) -> FutureCallGraph | None: + """Capture the async call graph for the current task or the provided Future. + + The graph is represented with three data structures: + + * FutureCallGraph(future, call_stack, awaited_by) + + Where 'future' is an instance of asyncio.Future or asyncio.Task. + + 'call_stack' is a tuple of FrameGraphEntry objects. + + 'awaited_by' is a tuple of FutureCallGraph objects. + + * FrameCallGraphEntry(frame) + + Where 'frame' is a frame object of a regular Python function + in the call stack. + + Receives an optional 'future' argument. If not passed, + the current task will be used. If there's no current task, the function + returns None. + + If "capture_call_graph()" is introspecting *the current task*, the + optional keyword-only 'depth' argument can be used to skip the specified + number of frames from top of the stack. + + If the optional keyword-only 'limit' argument is provided, each call stack + in the resulting graph is truncated to include at most ``abs(limit)`` + entries. If 'limit' is positive, the entries left are the closest to + the invocation point. If 'limit' is negative, the topmost entries are + left. If 'limit' is omitted or None, all entries are present. + If 'limit' is 0, the call stack is not captured at all, only + "awaited by" information is present. + """ + + loop = events._get_running_loop() + + if future is not None: + # Check if we're in a context of a running event loop; + # if yes - check if the passed future is the currently + # running task or not. + if loop is None or future is not tasks.current_task(loop=loop): + return _build_graph_for_future(future, limit=limit) + # else: future is the current task, move on. + else: + if loop is None: + raise RuntimeError( + 'capture_call_graph() is called outside of a running ' + 'event loop and no *future* to introspect was provided') + future = tasks.current_task(loop=loop) + + if future is None: + # This isn't a generic call stack introspection utility. If we + # can't determine the current task and none was provided, we + # just return. + return None + + if not isinstance(future, futures.Future): + raise TypeError( + f"{future!r} object does not appear to be compatible " + f"with asyncio.Future" + ) + + call_stack: list[FrameCallGraphEntry] = [] + + f = sys._getframe(depth) if limit != 0 else None + try: + while f is not None: + is_async = f.f_generator is not None + call_stack.append(FrameCallGraphEntry(f)) + + if is_async: + if f.f_back is not None and f.f_back.f_generator is None: + # We've reached the bottom of the coroutine stack, which + # must be the Task that runs it. + break + + f = f.f_back + finally: + del f + + awaited_by = [] + if future._asyncio_awaited_by: + for parent in future._asyncio_awaited_by: + awaited_by.append(_build_graph_for_future(parent, limit=limit)) + + if limit is not None: + limit *= -1 + if limit > 0: + call_stack = call_stack[:limit] + elif limit < 0: + call_stack = call_stack[limit:] + + return FutureCallGraph(future, tuple(call_stack), tuple(awaited_by)) + + +def format_call_graph( + future: futures.Future | None = None, + /, + *, + depth: int = 1, + limit: int | None = None, +) -> str: + """Return the async call graph as a string for `future`. + + If `future` is not provided, format the call graph for the current task. + """ + + def render_level(st: FutureCallGraph, buf: list[str], level: int) -> None: + def add_line(line: str) -> None: + buf.append(level * ' ' + line) + + if isinstance(st.future, tasks.Task): + add_line( + f'* Task(name={st.future.get_name()!r}, id={id(st.future):#x})' + ) + else: + add_line( + f'* Future(id={id(st.future):#x})' + ) + + if st.call_stack: + add_line( + f' + Call stack:' + ) + for ste in st.call_stack: + f = ste.frame + + if f.f_generator is None: + f = ste.frame + add_line( + f' | File {f.f_code.co_filename!r},' + f' line {f.f_lineno}, in' + f' {f.f_code.co_qualname}()' + ) + else: + c = f.f_generator + + try: + f = c.cr_frame + code = c.cr_code + tag = 'async' + except AttributeError: + try: + f = c.ag_frame + code = c.ag_code + tag = 'async generator' + except AttributeError: + f = c.gi_frame + code = c.gi_code + tag = 'generator' + + add_line( + f' | File {f.f_code.co_filename!r},' + f' line {f.f_lineno}, in' + f' {tag} {code.co_qualname}()' + ) + + if st.awaited_by: + add_line( + f' + Awaited by:' + ) + for fut in st.awaited_by: + render_level(fut, buf, level + 1) + + graph = capture_call_graph(future, depth=depth + 1, limit=limit) + if graph is None: + return "" + + buf: list[str] = [] + try: + render_level(graph, buf, 0) + finally: + # 'graph' has references to frames so we should + # make sure it's GC'ed as soon as we don't need it. + del graph + return '\n'.join(buf) + +def print_call_graph( + future: futures.Future | None = None, + /, + *, + file: io.Writer[str] | None = None, + depth: int = 1, + limit: int | None = None, +) -> None: + """Print the async call graph for the current task or the provided Future.""" + print(format_call_graph(future, depth=depth, limit=limit), file=file) diff --git a/PythonLib/full/asyncio/locks.py b/PythonLib/full/asyncio/locks.py index 588dca6c..fa3a9476 100644 --- a/PythonLib/full/asyncio/locks.py +++ b/PythonLib/full/asyncio/locks.py @@ -24,25 +24,23 @@ class Lock(_ContextManagerMixin, mixins._LoopBoundMixin): """Primitive lock objects. A primitive lock is a synchronization primitive that is not owned - by a particular coroutine when locked. A primitive lock is in one + by a particular task when locked. A primitive lock is in one of two states, 'locked' or 'unlocked'. It is created in the unlocked state. It has two basic methods, acquire() and release(). When the state is unlocked, acquire() changes the state to locked and returns immediately. When the state is locked, acquire() blocks until a call to release() in - another coroutine changes it to unlocked, then the acquire() call + another task changes it to unlocked, then the acquire() call resets it to locked and returns. The release() method should only be called in the locked state; it changes the state to unlocked and returns immediately. If an attempt is made to release an unlocked lock, a RuntimeError will be raised. - When more than one coroutine is blocked in acquire() waiting for - the state to turn to unlocked, only one coroutine proceeds when a - release() call resets the state to unlocked; first coroutine which - is blocked in acquire() is being processed. - - acquire() is a coroutine and should be called with 'await'. + When more than one task is blocked in acquire() waiting for + the state to turn to unlocked, only one task proceeds when a + release() call resets the state to unlocked; successive release() + calls will unblock tasks in FIFO order. Locks also support the asynchronous context management protocol. 'async with lock' statement should be used. @@ -95,6 +93,8 @@ async def acquire(self): This method blocks until the lock is unlocked, then sets it to locked and returns True. """ + # Implement fair scheduling, where thread always waits + # its turn. Jumping the queue if all are cancelled is an optimization. if (not self._locked and (self._waiters is None or all(w.cancelled() for w in self._waiters))): self._locked = True @@ -105,19 +105,22 @@ async def acquire(self): fut = self._get_loop().create_future() self._waiters.append(fut) - # Finally block should be called before the CancelledError - # handling as we don't want CancelledError to call - # _wake_up_first() and attempt to wake up itself. try: try: await fut finally: self._waiters.remove(fut) except exceptions.CancelledError: + # Currently the only exception designed be able to occur here. + + # Ensure the lock invariant: If lock is not claimed (or about + # to be claimed by us) and there is a Task in waiters, + # ensure that the Task at the head will run. if not self._locked: self._wake_up_first() raise + # assert self._locked is False self._locked = True return True @@ -125,7 +128,7 @@ def release(self): """Release a lock. When the lock is locked, reset it to unlocked, and return. - If any other coroutines are blocked waiting for the lock to become + If any other tasks are blocked waiting for the lock to become unlocked, allow exactly one of them to proceed. When invoked on an unlocked lock, a RuntimeError is raised. @@ -139,7 +142,7 @@ def release(self): raise RuntimeError('Lock is not acquired.') def _wake_up_first(self): - """Wake up the first waiter if it isn't done.""" + """Ensure that the first waiter will wake up.""" if not self._waiters: return try: @@ -147,9 +150,7 @@ def _wake_up_first(self): except StopIteration: return - # .done() necessarily means that a waiter will wake up later on and - # either take the lock, or, if it was cancelled and lock wasn't - # taken already, will hit this again and wake up a new waiter. + # .done() means that the waiter is already set to wake up. if not fut.done(): fut.set_result(True) @@ -179,8 +180,8 @@ def is_set(self): return self._value def set(self): - """Set the internal flag to true. All coroutines waiting for it to - become true are awakened. Coroutine that call wait() once the flag is + """Set the internal flag to true. All tasks waiting for it to + become true are awakened. Tasks that call wait() once the flag is true will not block at all. """ if not self._value: @@ -191,7 +192,7 @@ def set(self): fut.set_result(True) def clear(self): - """Reset the internal flag to false. Subsequently, coroutines calling + """Reset the internal flag to false. Subsequently, tasks calling wait() will block until set() is called to set the internal flag to true again.""" self._value = False @@ -200,7 +201,7 @@ async def wait(self): """Block until the internal flag is true. If the internal flag is true on entry, return True - immediately. Otherwise, block until another coroutine calls + immediately. Otherwise, block until another task calls set() to set the flag to true, then return True. """ if self._value: @@ -219,8 +220,8 @@ class Condition(_ContextManagerMixin, mixins._LoopBoundMixin): """Asynchronous equivalent to threading.Condition. This class implements condition variable objects. A condition variable - allows one or more coroutines to wait until they are notified by another - coroutine. + allows one or more tasks to wait until they are notified by another + task. A new Lock object is created and used as the underlying lock. """ @@ -247,45 +248,64 @@ def __repr__(self): async def wait(self): """Wait until notified. - If the calling coroutine has not acquired the lock when this + If the calling task has not acquired the lock when this method is called, a RuntimeError is raised. This method releases the underlying lock, and then blocks until it is awakened by a notify() or notify_all() call for - the same condition variable in another coroutine. Once + the same condition variable in another task. Once awakened, it re-acquires the lock and returns True. + + This method may return spuriously, + which is why the caller should always + re-check the state and be prepared to wait() again. """ if not self.locked(): raise RuntimeError('cannot wait on un-acquired lock') + fut = self._get_loop().create_future() self.release() try: - fut = self._get_loop().create_future() - self._waiters.append(fut) try: - await fut - return True - finally: - self._waiters.remove(fut) - - finally: - # Must reacquire lock even if wait is cancelled - cancelled = False - while True: + self._waiters.append(fut) try: - await self.acquire() - break - except exceptions.CancelledError: - cancelled = True + await fut + return True + finally: + self._waiters.remove(fut) - if cancelled: - raise exceptions.CancelledError + finally: + # Must re-acquire lock even if wait is cancelled. + # We only catch CancelledError here, since we don't want any + # other (fatal) errors with the future to cause us to spin. + err = None + while True: + try: + await self.acquire() + break + except exceptions.CancelledError as e: + err = e + + if err is not None: + try: + raise err # Re-raise most recent exception instance. + finally: + err = None # Break reference cycles. + except BaseException: + # Any error raised out of here _may_ have occurred after this Task + # believed to have been successfully notified. + # Make sure to notify another Task instead. This may result + # in a "spurious wakeup", which is allowed as part of the + # Condition Variable protocol. + self._notify(1) + raise async def wait_for(self, predicate): """Wait until a predicate becomes true. - The predicate should be a callable which result will be - interpreted as a boolean value. The final predicate value is + The predicate should be a callable whose result will be + interpreted as a boolean value. The method will repeatedly + wait() until it evaluates to true. The final predicate value is the return value. """ result = predicate() @@ -295,20 +315,22 @@ async def wait_for(self, predicate): return result def notify(self, n=1): - """By default, wake up one coroutine waiting on this condition, if any. - If the calling coroutine has not acquired the lock when this method + """By default, wake up one task waiting on this condition, if any. + If the calling task has not acquired the lock when this method is called, a RuntimeError is raised. - This method wakes up at most n of the coroutines waiting for the - condition variable; it is a no-op if no coroutines are waiting. + This method wakes up n of the tasks waiting for the condition + variable; if fewer than n are waiting, they are all awoken. - Note: an awakened coroutine does not actually return from its + Note: an awakened task does not actually return from its wait() call until it can reacquire the lock. Since notify() does not release the lock, its caller should. """ if not self.locked(): raise RuntimeError('cannot notify on un-acquired lock') + self._notify(n) + def _notify(self, n): idx = 0 for fut in self._waiters: if idx >= n: @@ -319,9 +341,9 @@ def notify(self, n=1): fut.set_result(False) def notify_all(self): - """Wake up all threads waiting on this condition. This method acts - like notify(), but wakes up all waiting threads instead of one. If the - calling thread has not acquired the lock when this method is called, + """Wake up all tasks waiting on this condition. This method acts + like notify(), but wakes up all waiting tasks instead of one. If the + calling task has not acquired the lock when this method is called, a RuntimeError is raised. """ self.notify(len(self._waiters)) @@ -357,6 +379,7 @@ def __repr__(self): def locked(self): """Returns True if semaphore cannot be acquired immediately.""" + # Due to state, or FIFO rules (must allow others to run first). return self._value == 0 or ( any(not w.cancelled() for w in (self._waiters or ()))) @@ -365,11 +388,12 @@ async def acquire(self): If the internal counter is larger than zero on entry, decrement it by one and return True immediately. If it is - zero on entry, block, waiting until some other coroutine has + zero on entry, block, waiting until some other task has called release() to make it larger than 0, and then return True. """ if not self.locked(): + # Maintain FIFO, wait for others to start even if _value > 0. self._value -= 1 return True @@ -378,29 +402,34 @@ async def acquire(self): fut = self._get_loop().create_future() self._waiters.append(fut) - # Finally block should be called before the CancelledError - # handling as we don't want CancelledError to call - # _wake_up_first() and attempt to wake up itself. try: try: await fut finally: self._waiters.remove(fut) except exceptions.CancelledError: - if not fut.cancelled(): + # Currently the only exception designed be able to occur here. + if fut.done() and not fut.cancelled(): + # Our Future was successfully set to True via _wake_up_next(), + # but we are not about to successfully acquire(). Therefore we + # must undo the bookkeeping already done and attempt to wake + # up someone else. self._value += 1 - self._wake_up_next() raise - if self._value > 0: - self._wake_up_next() + finally: + # New waiters may have arrived but had to wait due to FIFO. + # Wake up as many as are allowed. + while self._value > 0: + if not self._wake_up_next(): + break # There was no-one to wake up. return True def release(self): """Release a semaphore, incrementing the internal counter by one. - When it was zero on entry and another coroutine is waiting for it to - become larger than zero again, wake up that coroutine. + When it was zero on entry and another task is waiting for it to + become larger than zero again, wake up that task. """ self._value += 1 self._wake_up_next() @@ -408,13 +437,15 @@ def release(self): def _wake_up_next(self): """Wake up the first waiter that isn't done.""" if not self._waiters: - return + return False for fut in self._waiters: if not fut.done(): self._value -= 1 fut.set_result(True) - return + # `fut` is now `done()` and not `cancelled()`. + return True + return False class BoundedSemaphore(Semaphore): diff --git a/PythonLib/full/asyncio/proactor_events.py b/PythonLib/full/asyncio/proactor_events.py index 23ea29c6..f404273c 100644 --- a/PythonLib/full/asyncio/proactor_events.py +++ b/PythonLib/full/asyncio/proactor_events.py @@ -63,7 +63,7 @@ def __init__(self, loop, sock, protocol, waiter=None, self._called_connection_lost = False self._eof_written = False if self._server is not None: - self._server._attach() + self._server._attach(self) self._loop.call_soon(self._protocol.connection_made, self) if waiter is not None: # only wake up the waiter when connection_made() has been called @@ -167,7 +167,7 @@ def _call_connection_lost(self, exc): self._sock = None server = self._server if server is not None: - server._detach() + server._detach(self) self._server = None self._called_connection_lost = True @@ -460,6 +460,8 @@ def _pipe_closed(self, fut): class _ProactorDatagramTransport(_ProactorBasePipeTransport, transports.DatagramTransport): max_size = 256 * 1024 + _header_size = 8 + def __init__(self, loop, sock, protocol, address=None, waiter=None, extra=None): self._address = address @@ -487,9 +489,6 @@ def sendto(self, data, addr=None): raise TypeError('data argument must be bytes-like object (%r)', type(data)) - if not data: - return - if self._address is not None and addr not in (None, self._address): raise ValueError( f'Invalid address: must be None or {self._address}') @@ -502,7 +501,7 @@ def sendto(self, data, addr=None): # Ensure that what we buffer is immutable. self._buffer.append((bytes(data), addr)) - self._buffer_size += len(data) + self._buffer_size += len(data) + self._header_size if self._write_fut is None: # No current write operations are active, kick one off @@ -529,7 +528,7 @@ def _loop_writing(self, fut=None): return data, addr = self._buffer.popleft() - self._buffer_size -= len(data) + self._buffer_size -= len(data) + self._header_size if self._address is not None: self._write_fut = self._loop._proactor.send(self._sock, data) diff --git a/PythonLib/full/asyncio/queues.py b/PythonLib/full/asyncio/queues.py index a9656a6d..084fccaa 100644 --- a/PythonLib/full/asyncio/queues.py +++ b/PythonLib/full/asyncio/queues.py @@ -1,4 +1,11 @@ -__all__ = ('Queue', 'PriorityQueue', 'LifoQueue', 'QueueFull', 'QueueEmpty') +__all__ = ( + 'Queue', + 'PriorityQueue', + 'LifoQueue', + 'QueueFull', + 'QueueEmpty', + 'QueueShutDown', +) import collections import heapq @@ -18,6 +25,11 @@ class QueueFull(Exception): pass +class QueueShutDown(Exception): + """Raised when putting on to or getting from a shut-down Queue.""" + pass + + class Queue(mixins._LoopBoundMixin): """A queue, useful for coordinating producer and consumer coroutines. @@ -41,6 +53,7 @@ def __init__(self, maxsize=0): self._finished = locks.Event() self._finished.set() self._init(maxsize) + self._is_shutdown = False # These three are overridable in subclasses. @@ -81,6 +94,8 @@ def _format(self): result += f' _putters[{len(self._putters)}]' if self._unfinished_tasks: result += f' tasks={self._unfinished_tasks}' + if self._is_shutdown: + result += ' shutdown' return result def qsize(self): @@ -112,8 +127,12 @@ async def put(self, item): Put an item into the queue. If the queue is full, wait until a free slot is available before adding item. + + Raises QueueShutDown if the queue has been shut down. """ while self.full(): + if self._is_shutdown: + raise QueueShutDown putter = self._get_loop().create_future() self._putters.append(putter) try: @@ -125,7 +144,7 @@ async def put(self, item): self._putters.remove(putter) except ValueError: # The putter could be removed from self._putters by a - # previous get_nowait call. + # previous get_nowait call or a shutdown call. pass if not self.full() and not putter.cancelled(): # We were woken up by get_nowait(), but can't take @@ -138,7 +157,11 @@ def put_nowait(self, item): """Put an item into the queue without blocking. If no free slot is immediately available, raise QueueFull. + + Raises QueueShutDown if the queue has been shut down. """ + if self._is_shutdown: + raise QueueShutDown if self.full(): raise QueueFull self._put(item) @@ -150,8 +173,13 @@ async def get(self): """Remove and return an item from the queue. If queue is empty, wait until an item is available. + + Raises QueueShutDown if the queue has been shut down and is empty, or + if the queue has been shut down immediately. """ while self.empty(): + if self._is_shutdown and self.empty(): + raise QueueShutDown getter = self._get_loop().create_future() self._getters.append(getter) try: @@ -163,7 +191,7 @@ async def get(self): self._getters.remove(getter) except ValueError: # The getter could be removed from self._getters by a - # previous put_nowait call. + # previous put_nowait call, or a shutdown call. pass if not self.empty() and not getter.cancelled(): # We were woken up by put_nowait(), but can't take @@ -176,8 +204,13 @@ def get_nowait(self): """Remove and return an item from the queue. Return an item if one is immediately available, else raise QueueEmpty. + + Raises QueueShutDown if the queue has been shut down and is empty, or + if the queue has been shut down immediately. """ if self.empty(): + if self._is_shutdown: + raise QueueShutDown raise QueueEmpty item = self._get() self._wakeup_next(self._putters) @@ -214,6 +247,36 @@ async def join(self): if self._unfinished_tasks > 0: await self._finished.wait() + def shutdown(self, immediate=False): + """Shut-down the queue, making queue gets and puts raise QueueShutDown. + + By default, gets will only raise once the queue is empty. Set + 'immediate' to True to make gets raise immediately instead. + + All blocked callers of put() and get() will be unblocked. + + If 'immediate', the queue is drained and unfinished tasks + is reduced by the number of drained tasks. If unfinished tasks + is reduced to zero, callers of Queue.join are unblocked. + """ + self._is_shutdown = True + if immediate: + while not self.empty(): + self._get() + if self._unfinished_tasks > 0: + self._unfinished_tasks -= 1 + if self._unfinished_tasks == 0: + self._finished.set() + # All getters need to re-check queue-empty to raise ShutDown + while self._getters: + getter = self._getters.popleft() + if not getter.done(): + getter.set_result(None) + while self._putters: + putter = self._putters.popleft() + if not putter.done(): + putter.set_result(None) + class PriorityQueue(Queue): """A subclass of Queue; retrieves entries in priority order (lowest first). diff --git a/PythonLib/full/asyncio/runners.py b/PythonLib/full/asyncio/runners.py index 102ae780..ba37e003 100644 --- a/PythonLib/full/asyncio/runners.py +++ b/PythonLib/full/asyncio/runners.py @@ -3,6 +3,7 @@ import contextvars import enum import functools +import inspect import threading import signal from . import coroutines @@ -84,10 +85,7 @@ def get_loop(self): return self._loop def run(self, coro, *, context=None): - """Run a coroutine inside the embedded event loop.""" - if not coroutines.iscoroutine(coro): - raise ValueError("a coroutine was expected, got {!r}".format(coro)) - + """Run code in the embedded event loop.""" if events._get_running_loop() is not None: # fail fast with short traceback raise RuntimeError( @@ -95,8 +93,19 @@ def run(self, coro, *, context=None): self._lazy_init() + if not coroutines.iscoroutine(coro): + if inspect.isawaitable(coro): + async def _wrap_awaitable(awaitable): + return await awaitable + + coro = _wrap_awaitable(coro) + else: + raise TypeError('An asyncio.Future, a coroutine or an ' + 'awaitable is required') + if context is None: context = self._context + task = self._loop.create_task(coro, context=context) if (threading.current_thread() is threading.main_thread() diff --git a/PythonLib/full/asyncio/selector_events.py b/PythonLib/full/asyncio/selector_events.py index 160ed6ca..ff7e16df 100644 --- a/PythonLib/full/asyncio/selector_events.py +++ b/PythonLib/full/asyncio/selector_events.py @@ -173,16 +173,20 @@ def _accept_connection( # listening socket has triggered an EVENT_READ. There may be multiple # connections waiting for an .accept() so it is called in a loop. # See https://bugs.python.org/issue27906 for more details. - for _ in range(backlog): + for _ in range(backlog + 1): try: conn, addr = sock.accept() if self._debug: logger.debug("%r got a new connection from %r: %r", server, addr, conn) conn.setblocking(False) - except (BlockingIOError, InterruptedError, ConnectionAbortedError): - # Early exit because the socket accept buffer is empty. - return None + except ConnectionAbortedError: + # Discard connections that were aborted before accept(). + continue + except (BlockingIOError, InterruptedError): + # Early exit because of a signal or + # the socket accept buffer is empty. + return except OSError as exc: # There's nowhere to send the error, so just log it. if exc.errno in (errno.EMFILE, errno.ENFILE, @@ -265,22 +269,17 @@ def _ensure_fd_no_transport(self, fd): except (AttributeError, TypeError, ValueError): # This code matches selectors._fileobj_to_fd function. raise ValueError(f"Invalid file object: {fd!r}") from None - try: - transport = self._transports[fileno] - except KeyError: - pass - else: - if not transport.is_closing(): - raise RuntimeError( - f'File descriptor {fd!r} is used by transport ' - f'{transport!r}') + transport = self._transports.get(fileno) + if transport and not transport.is_closing(): + raise RuntimeError( + f'File descriptor {fd!r} is used by transport ' + f'{transport!r}') def _add_reader(self, fd, callback, *args): self._check_closed() handle = events.Handle(callback, args, self, None) - try: - key = self._selector.get_key(fd) - except KeyError: + key = self._selector.get_map().get(fd) + if key is None: self._selector.register(fd, selectors.EVENT_READ, (handle, None)) else: @@ -294,30 +293,27 @@ def _add_reader(self, fd, callback, *args): def _remove_reader(self, fd): if self.is_closed(): return False - try: - key = self._selector.get_key(fd) - except KeyError: + key = self._selector.get_map().get(fd) + if key is None: return False + mask, (reader, writer) = key.events, key.data + mask &= ~selectors.EVENT_READ + if not mask: + self._selector.unregister(fd) else: - mask, (reader, writer) = key.events, key.data - mask &= ~selectors.EVENT_READ - if not mask: - self._selector.unregister(fd) - else: - self._selector.modify(fd, mask, (None, writer)) + self._selector.modify(fd, mask, (None, writer)) - if reader is not None: - reader.cancel() - return True - else: - return False + if reader is not None: + reader.cancel() + return True + else: + return False def _add_writer(self, fd, callback, *args): self._check_closed() handle = events.Handle(callback, args, self, None) - try: - key = self._selector.get_key(fd) - except KeyError: + key = self._selector.get_map().get(fd) + if key is None: self._selector.register(fd, selectors.EVENT_WRITE, (None, handle)) else: @@ -332,24 +328,22 @@ def _remove_writer(self, fd): """Remove a writer callback.""" if self.is_closed(): return False - try: - key = self._selector.get_key(fd) - except KeyError: + key = self._selector.get_map().get(fd) + if key is None: return False + mask, (reader, writer) = key.events, key.data + # Remove both writer and connector. + mask &= ~selectors.EVENT_WRITE + if not mask: + self._selector.unregister(fd) else: - mask, (reader, writer) = key.events, key.data - # Remove both writer and connector. - mask &= ~selectors.EVENT_WRITE - if not mask: - self._selector.unregister(fd) - else: - self._selector.modify(fd, mask, (reader, None)) + self._selector.modify(fd, mask, (reader, None)) - if writer is not None: - writer.cancel() - return True - else: - return False + if writer is not None: + writer.cancel() + return True + else: + return False def add_reader(self, fd, callback, *args): """Add a reader callback.""" @@ -801,7 +795,7 @@ def __init__(self, loop, sock, protocol, extra=None, server=None): self._paused = False # Set when pause_reading() called if self._server is not None: - self._server._attach() + self._server._attach(self) loop._transports[self._sock_fd] = self def __repr__(self): @@ -878,6 +872,8 @@ def __del__(self, _warn=warnings.warn): if self._sock is not None: _warn(f"unclosed transport {self!r}", ResourceWarning, source=self) self._sock.close() + if self._server is not None: + self._server._detach(self) def _fatal_error(self, exc, message='Fatal error on transport'): # Should be called from exception handler only. @@ -916,7 +912,7 @@ def _call_connection_lost(self, exc): self._loop = None server = self._server if server is not None: - server._detach() + server._detach(self) self._server = None def get_write_buffer_size(self): @@ -1054,8 +1050,8 @@ def _read_ready__on_eof(self): def write(self, data): if not isinstance(data, (bytes, bytearray, memoryview)): - raise TypeError(f'data argument must be a bytes-like object, ' - f'not {type(data).__name__!r}') + raise TypeError(f'data argument must be a bytes, bytearray, or memoryview ' + f'object, not {type(data).__name__!r}') if self._eof: raise RuntimeError('Cannot call write() after write_eof()') if self._empty_waiter is not None: @@ -1178,6 +1174,13 @@ def writelines(self, list_of_data): raise RuntimeError('unable to writelines; sendfile is in progress') if not list_of_data: return + + if self._conn_lost: + if self._conn_lost >= constants.LOG_THRESHOLD_FOR_CONNLOST_WRITES: + logger.warning('socket.send() raised exception.') + self._conn_lost += 1 + return + self._buffer.extend([memoryview(data) for data in list_of_data]) self._write_ready() # If the entire buffer couldn't be written, register a write handler @@ -1216,6 +1219,7 @@ def close(self): class _SelectorDatagramTransport(_SelectorTransport, transports.DatagramTransport): _buffer_factory = collections.deque + _header_size = 8 def __init__(self, loop, sock, protocol, address=None, waiter=None, extra=None): @@ -1254,8 +1258,6 @@ def sendto(self, data, addr=None): if not isinstance(data, (bytes, bytearray, memoryview)): raise TypeError(f'data argument must be a bytes-like object, ' f'not {type(data).__name__!r}') - if not data: - return if self._address: if addr not in (None, self._address): @@ -1291,13 +1293,13 @@ def sendto(self, data, addr=None): # Ensure that what we buffer is immutable. self._buffer.append((bytes(data), addr)) - self._buffer_size += len(data) + self._buffer_size += len(data) + self._header_size self._maybe_pause_protocol() def _sendto_ready(self): while self._buffer: data, addr = self._buffer.popleft() - self._buffer_size -= len(data) + self._buffer_size -= len(data) + self._header_size try: if self._extra['peername']: self._sock.send(data) @@ -1305,7 +1307,7 @@ def _sendto_ready(self): self._sock.sendto(data, addr) except (BlockingIOError, InterruptedError): self._buffer.appendleft((data, addr)) # Try again later. - self._buffer_size += len(data) + self._buffer_size += len(data) + self._header_size break except OSError as exc: self._protocol.error_received(exc) diff --git a/PythonLib/full/asyncio/sslproto.py b/PythonLib/full/asyncio/sslproto.py index 29e72b1f..74c5f0d5 100644 --- a/PythonLib/full/asyncio/sslproto.py +++ b/PythonLib/full/asyncio/sslproto.py @@ -545,7 +545,7 @@ def _start_handshake(self): # start handshake timeout count down self._handshake_timeout_handle = \ self._loop.call_later(self._ssl_handshake_timeout, - lambda: self._check_handshake_timeout()) + self._check_handshake_timeout) self._do_handshake() @@ -626,7 +626,7 @@ def _start_shutdown(self): self._set_state(SSLProtocolState.FLUSHING) self._shutdown_timeout_handle = self._loop.call_later( self._ssl_shutdown_timeout, - lambda: self._check_shutdown_timeout() + self._check_shutdown_timeout ) self._do_flush() @@ -765,7 +765,7 @@ def _do_read__buffered(self): else: break else: - self._loop.call_soon(lambda: self._do_read()) + self._loop.call_soon(self._do_read) except SSLAgainErrors: pass if offset > 0: diff --git a/PythonLib/full/asyncio/staggered.py b/PythonLib/full/asyncio/staggered.py index 0afed64f..2ad65d86 100644 --- a/PythonLib/full/asyncio/staggered.py +++ b/PythonLib/full/asyncio/staggered.py @@ -8,6 +8,7 @@ from . import exceptions as exceptions_mod from . import locks from . import tasks +from . import futures async def staggered_race(coro_fns, delay, *, loop=None): @@ -63,6 +64,7 @@ async def staggered_race(coro_fns, delay, *, loop=None): """ # TODO: when we have aiter() and anext(), allow async iterables in coro_fns. loop = loop or events.get_running_loop() + parent_task = tasks.current_task(loop) enum_coro_fns = enumerate(coro_fns) winner_result = None winner_index = None @@ -73,6 +75,7 @@ async def staggered_race(coro_fns, delay, *, loop=None): def task_done(task): running_tasks.discard(task) + futures.future_discard_from_awaited_by(task, parent_task) if ( on_completed_fut is not None and not on_completed_fut.done() @@ -110,6 +113,7 @@ async def run_one_coro(ok_to_start, previous_failed) -> None: this_failed = locks.Event() next_ok_to_start = locks.Event() next_task = loop.create_task(run_one_coro(next_ok_to_start, this_failed)) + futures.future_add_to_awaited_by(next_task, parent_task) running_tasks.add(next_task) next_task.add_done_callback(task_done) # next_task has been appended to running_tasks so next_task is ok to @@ -148,6 +152,7 @@ async def run_one_coro(ok_to_start, previous_failed) -> None: try: ok_to_start = locks.Event() first_task = loop.create_task(run_one_coro(ok_to_start, None)) + futures.future_add_to_awaited_by(first_task, parent_task) running_tasks.add(first_task) first_task.add_done_callback(task_done) # first_task has been appended to running_tasks so first_task is ok to start. @@ -171,4 +176,4 @@ async def run_one_coro(ok_to_start, previous_failed) -> None: raise propagate_cancellation_error return winner_result, winner_index, exceptions finally: - del exceptions, propagate_cancellation_error, unhandled_exceptions + del exceptions, propagate_cancellation_error, unhandled_exceptions, parent_task diff --git a/PythonLib/full/asyncio/streams.py b/PythonLib/full/asyncio/streams.py index f310aa2f..64aac4cc 100644 --- a/PythonLib/full/asyncio/streams.py +++ b/PythonLib/full/asyncio/streams.py @@ -201,7 +201,6 @@ def __init__(self, stream_reader, client_connected_cb=None, loop=None): # is established. self._strong_reader = stream_reader self._reject_connection = False - self._stream_writer = None self._task = None self._transport = None self._client_connected_cb = client_connected_cb @@ -214,10 +213,8 @@ def _stream_reader(self): return None return self._stream_reader_wr() - def _replace_writer(self, writer): + def _replace_transport(self, transport): loop = self._loop - transport = writer.transport - self._stream_writer = writer self._transport = transport self._over_ssl = transport.get_extra_info('sslcontext') is not None @@ -239,11 +236,8 @@ def connection_made(self, transport): reader.set_transport(transport) self._over_ssl = transport.get_extra_info('sslcontext') is not None if self._client_connected_cb is not None: - self._stream_writer = StreamWriter(transport, self, - reader, - self._loop) - res = self._client_connected_cb(reader, - self._stream_writer) + writer = StreamWriter(transport, self, reader, self._loop) + res = self._client_connected_cb(reader, writer) if coroutines.iscoroutine(res): def callback(task): if task.cancelled(): @@ -405,9 +399,9 @@ async def start_tls(self, sslcontext, *, ssl_handshake_timeout=ssl_handshake_timeout, ssl_shutdown_timeout=ssl_shutdown_timeout) self._transport = new_transport - protocol._replace_writer(self) + protocol._replace_transport(new_transport) - def __del__(self): + def __del__(self, warnings=warnings): if not self._transport.is_closing(): if self._loop.is_closed(): warnings.warn("loop is closed", ResourceWarning) @@ -596,20 +590,34 @@ async def readuntil(self, separator=b'\n'): If the data cannot be read because of over limit, a LimitOverrunError exception will be raised, and the data will be left in the internal buffer, so it can be read again. + + The ``separator`` may also be a tuple of separators. In this + case the return value will be the shortest possible that has any + separator as the suffix. For the purposes of LimitOverrunError, + the shortest possible separator is considered to be the one that + matched. """ - seplen = len(separator) - if seplen == 0: + if isinstance(separator, tuple): + # Makes sure shortest matches wins + separator = sorted(separator, key=len) + else: + separator = [separator] + if not separator: + raise ValueError('Separator should contain at least one element') + min_seplen = len(separator[0]) + max_seplen = len(separator[-1]) + if min_seplen == 0: raise ValueError('Separator should be at least one-byte string') if self._exception is not None: raise self._exception # Consume whole buffer except last bytes, which length is - # one less than seplen. Let's check corner cases with - # separator='SEPARATOR': + # one less than max_seplen. Let's check corner cases with + # separator[-1]='SEPARATOR': # * we have received almost complete separator (without last # byte). i.e buffer='some textSEPARATO'. In this case we - # can safely consume len(separator) - 1 bytes. + # can safely consume max_seplen - 1 bytes. # * last byte of buffer is first byte of separator, i.e. # buffer='abcdefghijklmnopqrS'. We may safely consume # everything except that last byte, but this require to @@ -622,26 +630,35 @@ async def readuntil(self, separator=b'\n'): # messages :) # `offset` is the number of bytes from the beginning of the buffer - # where there is no occurrence of `separator`. + # where there is no occurrence of any `separator`. offset = 0 - # Loop until we find `separator` in the buffer, exceed the buffer size, + # Loop until we find a `separator` in the buffer, exceed the buffer size, # or an EOF has happened. while True: buflen = len(self._buffer) - # Check if we now have enough data in the buffer for `separator` to - # fit. - if buflen - offset >= seplen: - isep = self._buffer.find(separator, offset) - - if isep != -1: - # `separator` is in the buffer. `isep` will be used later - # to retrieve the data. + # Check if we now have enough data in the buffer for shortest + # separator to fit. + if buflen - offset >= min_seplen: + match_start = None + match_end = None + for sep in separator: + isep = self._buffer.find(sep, offset) + + if isep != -1: + # `separator` is in the buffer. `match_start` and + # `match_end` will be used later to retrieve the + # data. + end = isep + len(sep) + if match_end is None or end < match_end: + match_end = end + match_start = isep + if match_end is not None: break # see upper comment for explanation. - offset = buflen + 1 - seplen + offset = max(0, buflen + 1 - max_seplen) if offset > self._limit: raise exceptions.LimitOverrunError( 'Separator is not found, and chunk exceed the limit', @@ -650,7 +667,7 @@ async def readuntil(self, separator=b'\n'): # Complete message (with full separator) may be present in buffer # even when EOF flag is set. This may happen when the last chunk # adds data which makes separator be found. That's why we check for - # EOF *ater* inspecting the buffer. + # EOF *after* inspecting the buffer. if self._eof: chunk = bytes(self._buffer) self._buffer.clear() @@ -659,12 +676,12 @@ async def readuntil(self, separator=b'\n'): # _wait_for_data() will resume reading if stream was paused. await self._wait_for_data('readuntil') - if isep > self._limit: + if match_start > self._limit: raise exceptions.LimitOverrunError( - 'Separator is found, but chunk is longer than limit', isep) + 'Separator is found, but chunk is longer than limit', match_start) - chunk = self._buffer[:isep + seplen] - del self._buffer[:isep + seplen] + chunk = self._buffer[:match_end] + del self._buffer[:match_end] self._maybe_resume_transport() return bytes(chunk) diff --git a/PythonLib/full/asyncio/taskgroups.py b/PythonLib/full/asyncio/taskgroups.py index b2b953b0..00e8f6d5 100644 --- a/PythonLib/full/asyncio/taskgroups.py +++ b/PythonLib/full/asyncio/taskgroups.py @@ -6,6 +6,7 @@ from . import events from . import exceptions +from . import futures from . import tasks @@ -87,14 +88,10 @@ async def _aexit(self, et, exc): self._base_error is None): self._base_error = exc - propagate_cancellation_error = \ - exc if et is exceptions.CancelledError else None - if self._parent_cancel_requested: - # If this flag is set we *must* call uncancel(). - if self._parent_task.uncancel() == 0: - # If there are no pending cancellations left, - # don't propagate CancelledError. - propagate_cancellation_error = None + if et is not None and issubclass(et, exceptions.CancelledError): + propagate_cancellation_error = exc + else: + propagate_cancellation_error = None if et is not None: if not self._aborting: @@ -145,10 +142,17 @@ async def _aexit(self, et, exc): finally: exc = None + if self._parent_cancel_requested: + # If this flag is set we *must* call uncancel(). + if self._parent_task.uncancel() == 0: + # If there are no pending cancellations left, + # don't propagate CancelledError. + propagate_cancellation_error = None + # Propagate CancelledError if there is one, except if there # are other errors -- those have priority. try: - if propagate_cancellation_error and not self._errors: + if propagate_cancellation_error is not None and not self._errors: try: raise propagate_cancellation_error finally: @@ -156,10 +160,16 @@ async def _aexit(self, et, exc): finally: propagate_cancellation_error = None - if et is not None and et is not exceptions.CancelledError: + if et is not None and not issubclass(et, exceptions.CancelledError): self._errors.append(exc) if self._errors: + # If the parent task is being cancelled from the outside + # of the taskgroup, un-cancel and re-cancel the parent task, + # which will keep the cancel count stable. + if self._parent_task.cancelling(): + self._parent_task.uncancel() + self._parent_task.cancel() try: raise BaseExceptionGroup( 'unhandled errors in a TaskGroup', @@ -169,28 +179,28 @@ async def _aexit(self, et, exc): exc = None - def create_task(self, coro, *, name=None, context=None): + def create_task(self, coro, **kwargs): """Create a new task in this group and return it. Similar to `asyncio.create_task`. """ if not self._entered: + coro.close() raise RuntimeError(f"TaskGroup {self!r} has not been entered") if self._exiting and not self._tasks: + coro.close() raise RuntimeError(f"TaskGroup {self!r} is finished") if self._aborting: + coro.close() raise RuntimeError(f"TaskGroup {self!r} is shutting down") - if context is None: - task = self._loop.create_task(coro) - else: - task = self._loop.create_task(coro, context=context) - tasks._set_task_name(task, name) + task = self._loop.create_task(coro, **kwargs) + + futures.future_add_to_awaited_by(task, self._parent_task) # Always schedule the done callback even if the task is # already done (e.g. if the coro was able to complete eagerly), # otherwise if the task completes with an exception then it will cancel # the current task too early. gh-128550, gh-128588 - self._tasks.add(task) task.add_done_callback(self._on_task_done) try: @@ -218,6 +228,8 @@ def _abort(self): def _on_task_done(self, task): self._tasks.discard(task) + futures.future_discard_from_awaited_by(task, self._parent_task) + if self._on_completed_fut is not None and not self._tasks: if not self._on_completed_fut.done(): self._on_completed_fut.set_result(True) diff --git a/PythonLib/full/asyncio/tasks.py b/PythonLib/full/asyncio/tasks.py index 0b22e28d..fbd5c39a 100644 --- a/PythonLib/full/asyncio/tasks.py +++ b/PythonLib/full/asyncio/tasks.py @@ -15,8 +15,8 @@ import functools import inspect import itertools +import math import types -import warnings import weakref from types import GenericAlias @@ -25,6 +25,7 @@ from . import events from . import exceptions from . import futures +from . import queues from . import timeouts # Helper to generate new task names @@ -47,37 +48,9 @@ def all_tasks(loop=None): # capturing the set of eager tasks first, so if an eager task "graduates" # to a regular task in another thread, we don't risk missing it. eager_tasks = list(_eager_tasks) - # Looping over the WeakSet isn't safe as it can be updated from another - # thread, therefore we cast it to list prior to filtering. The list cast - # itself requires iteration, so we repeat it several times ignoring - # RuntimeErrors (which are not very likely to occur). - # See issues 34970 and 36607 for details. - scheduled_tasks = None - i = 0 - while True: - try: - scheduled_tasks = list(_scheduled_tasks) - except RuntimeError: - i += 1 - if i >= 1000: - raise - else: - break - return {t for t in itertools.chain(scheduled_tasks, eager_tasks) - if futures._get_loop(t) is loop and not t.done()} - -def _set_task_name(task, name): - if name is not None: - try: - set_name = task.set_name - except AttributeError: - warnings.warn("Task.set_name() was added in Python 3.8, " - "the method support will be mandatory for third-party " - "task implementations since 3.13.", - DeprecationWarning, stacklevel=3) - else: - set_name(name) + return {t for t in itertools.chain(_scheduled_tasks, eager_tasks) + if futures._get_loop(t) is loop and not t.done()} class Task(futures._PyFuture): # Inherit Python Task implementation @@ -137,7 +110,7 @@ def __init__(self, coro, *, loop=None, name=None, context=None, self.__eager_start() else: self._loop.call_soon(self.__step, context=self._context) - _register_task(self) + _py_register_task(self) def __del__(self): if self._state == futures._PENDING and self._log_destroy_pending: @@ -267,42 +240,44 @@ def uncancel(self): """ if self._num_cancels_requested > 0: self._num_cancels_requested -= 1 + if self._num_cancels_requested == 0: + self._must_cancel = False return self._num_cancels_requested def __eager_start(self): - prev_task = _swap_current_task(self._loop, self) + prev_task = _py_swap_current_task(self._loop, self) try: - _register_eager_task(self) + _py_register_eager_task(self) try: self._context.run(self.__step_run_and_handle_result, None) finally: - _unregister_eager_task(self) + _py_unregister_eager_task(self) finally: try: - curtask = _swap_current_task(self._loop, prev_task) + curtask = _py_swap_current_task(self._loop, prev_task) assert curtask is self finally: if self.done(): self._coro = None self = None # Needed to break cycles when an exception occurs. else: - _register_task(self) + _py_register_task(self) def __step(self, exc=None): if self.done(): raise exceptions.InvalidStateError( - f'_step(): already done: {self!r}, {exc!r}') + f'__step(): already done: {self!r}, {exc!r}') if self._must_cancel: if not isinstance(exc, exceptions.CancelledError): exc = self._make_cancelled_error() self._must_cancel = False self._fut_waiter = None - _enter_task(self._loop, self) + _py_enter_task(self._loop, self) try: self.__step_run_and_handle_result(exc) finally: - _leave_task(self._loop, self) + _py_leave_task(self._loop, self) self = None # Needed to break cycles when an exception occurs. def __step_run_and_handle_result(self, exc): @@ -347,6 +322,7 @@ def __step_run_and_handle_result(self, exc): self._loop.call_soon( self.__step, new_exc, context=self._context) else: + futures.future_add_to_awaited_by(result, self) result._asyncio_future_blocking = False result.add_done_callback( self.__wakeup, context=self._context) @@ -381,6 +357,7 @@ def __step_run_and_handle_result(self, exc): self = None # Needed to break cycles when an exception occurs. def __wakeup(self, future): + futures.future_discard_from_awaited_by(future, self) try: future.result() except BaseException as exc: @@ -389,7 +366,7 @@ def __wakeup(self, future): else: # Don't pass the value of `future.result()` explicitly, # as `Future.__iter__` and `Future.__await__` don't need it. - # If we call `_step(value, None)` instead of `_step()`, + # If we call `__step(value, None)` instead of `__step()`, # Python eval loop would use `.send(value)` method call, # instead of `__next__()`, which is slower for futures # that return non-generator iterators from their `__iter__`. @@ -409,20 +386,13 @@ def __wakeup(self, future): Task = _CTask = _asyncio.Task -def create_task(coro, *, name=None, context=None): +def create_task(coro, **kwargs): """Schedule the execution of a coroutine object in a spawn task. Return a Task object. """ loop = events.get_running_loop() - if context is None: - # Use legacy API if context is not needed - task = loop.create_task(coro) - else: - task = loop.create_task(coro, context=context) - - _set_task_name(task, name) - return task + return loop.create_task(coro, **kwargs) # wait() and as_completed() similar to those in PEP 3148. @@ -437,8 +407,6 @@ async def wait(fs, *, timeout=None, return_when=ALL_COMPLETED): The fs iterable must not be empty. - Coroutines will be wrapped in Tasks. - Returns two sets of Future: (done, pending). Usage: @@ -530,6 +498,7 @@ async def _wait(fs, timeout, return_when, loop): if timeout is not None: timeout_handle = loop.call_later(timeout, _release_waiter, waiter) counter = len(fs) + cur_task = current_task() def _on_completion(f): nonlocal counter @@ -542,9 +511,11 @@ def _on_completion(f): timeout_handle.cancel() if not waiter.done(): waiter.set_result(None) + futures.future_discard_from_awaited_by(f, cur_task) for f in fs: f.add_done_callback(_on_completion) + futures.future_add_to_awaited_by(f, cur_task) try: await waiter @@ -580,62 +551,125 @@ async def _cancel_and_wait(fut): fut.remove_done_callback(cb) -# This is *not* a @coroutine! It is just an iterator (yielding Futures). +class _AsCompletedIterator: + """Iterator of awaitables representing tasks of asyncio.as_completed. + + As an asynchronous iterator, iteration yields futures as they finish. As a + plain iterator, new coroutines are yielded that will return or raise the + result of the next underlying future to complete. + """ + def __init__(self, aws, timeout): + self._done = queues.Queue() + self._timeout_handle = None + + loop = events.get_event_loop() + todo = {ensure_future(aw, loop=loop) for aw in set(aws)} + for f in todo: + f.add_done_callback(self._handle_completion) + if todo and timeout is not None: + self._timeout_handle = ( + loop.call_later(timeout, self._handle_timeout) + ) + self._todo = todo + self._todo_left = len(todo) + + def __aiter__(self): + return self + + def __iter__(self): + return self + + async def __anext__(self): + if not self._todo_left: + raise StopAsyncIteration + assert self._todo_left > 0 + self._todo_left -= 1 + return await self._wait_for_one() + + def __next__(self): + if not self._todo_left: + raise StopIteration + assert self._todo_left > 0 + self._todo_left -= 1 + return self._wait_for_one(resolve=True) + + def _handle_timeout(self): + for f in self._todo: + f.remove_done_callback(self._handle_completion) + self._done.put_nowait(None) # Sentinel for _wait_for_one(). + self._todo.clear() # Can't do todo.remove(f) in the loop. + + def _handle_completion(self, f): + if not self._todo: + return # _handle_timeout() was here first. + self._todo.remove(f) + self._done.put_nowait(f) + if not self._todo and self._timeout_handle is not None: + self._timeout_handle.cancel() + + async def _wait_for_one(self, resolve=False): + # Wait for the next future to be done and return it unless resolve is + # set, in which case return either the result of the future or raise + # an exception. + f = await self._done.get() + if f is None: + # Dummy value from _handle_timeout(). + raise exceptions.TimeoutError + return f.result() if resolve else f + + def as_completed(fs, *, timeout=None): - """Return an iterator whose values are coroutines. + """Create an iterator of awaitables or their results in completion order. - When waiting for the yielded coroutines you'll get the results (or - exceptions!) of the original Futures (or coroutines), in the order - in which and as soon as they complete. + Run the supplied awaitables concurrently. The returned object can be + iterated to obtain the results of the awaitables as they finish. - This differs from PEP 3148; the proper way to use this is: + The object returned can be iterated as an asynchronous iterator or a plain + iterator. When asynchronous iteration is used, the originally-supplied + awaitables are yielded if they are tasks or futures. This makes it easy to + correlate previously-scheduled tasks with their results: - for f in as_completed(fs): - result = await f # The 'await' may raise. - # Use result. + ipv4_connect = create_task(open_connection("127.0.0.1", 80)) + ipv6_connect = create_task(open_connection("::1", 80)) + tasks = [ipv4_connect, ipv6_connect] - If a timeout is specified, the 'await' will raise - TimeoutError when the timeout occurs before all Futures are done. + async for earliest_connect in as_completed(tasks): + # earliest_connect is done. The result can be obtained by + # awaiting it or calling earliest_connect.result() + reader, writer = await earliest_connect - Note: The futures 'f' are not necessarily members of fs. - """ - if futures.isfuture(fs) or coroutines.iscoroutine(fs): - raise TypeError(f"expect an iterable of futures, not {type(fs).__name__}") + if earliest_connect is ipv6_connect: + print("IPv6 connection established.") + else: + print("IPv4 connection established.") - from .queues import Queue # Import here to avoid circular import problem. - done = Queue() + During asynchronous iteration, implicitly-created tasks will be yielded for + supplied awaitables that aren't tasks or futures. - loop = events.get_event_loop() - todo = {ensure_future(f, loop=loop) for f in set(fs)} - timeout_handle = None + When used as a plain iterator, each iteration yields a new coroutine that + returns the result or raises the exception of the next completed awaitable. + This pattern is compatible with Python versions older than 3.13: - def _on_timeout(): - for f in todo: - f.remove_done_callback(_on_completion) - done.put_nowait(None) # Queue a dummy value for _wait_for_one(). - todo.clear() # Can't do todo.remove(f) in the loop. + ipv4_connect = create_task(open_connection("127.0.0.1", 80)) + ipv6_connect = create_task(open_connection("::1", 80)) + tasks = [ipv4_connect, ipv6_connect] - def _on_completion(f): - if not todo: - return # _on_timeout() was here first. - todo.remove(f) - done.put_nowait(f) - if not todo and timeout_handle is not None: - timeout_handle.cancel() + for next_connect in as_completed(tasks): + # next_connect is not one of the original task objects. It must be + # awaited to obtain the result value or raise the exception of the + # awaitable that finishes next. + reader, writer = await next_connect - async def _wait_for_one(): - f = await done.get() - if f is None: - # Dummy value from _on_timeout(). - raise exceptions.TimeoutError - return f.result() # May raise f.exception(). + A TimeoutError is raised if the timeout occurs before all awaitables are + done. This is raised by the async for loop during asynchronous iteration or + by the coroutines yielded during plain iteration. + """ + if inspect.isawaitable(fs): + raise TypeError( + f"expects an iterable of awaitables, not {type(fs).__name__}" + ) - for f in todo: - f.add_done_callback(_on_completion) - if todo and timeout is not None: - timeout_handle = loop.call_later(timeout, _on_timeout) - for _ in range(len(todo)): - yield _wait_for_one() + return _AsCompletedIterator(fs, timeout) @types.coroutine @@ -656,6 +690,9 @@ async def sleep(delay, result=None): await __sleep0() return result + if math.isnan(delay): + raise ValueError("Invalid delay: NaN (not a number)") + loop = events.get_running_loop() future = loop.create_future() h = loop.call_later(delay, @@ -764,10 +801,19 @@ def gather(*coros_or_futures, return_exceptions=False): outer.set_result([]) return outer - def _done_callback(fut): + loop = events._get_running_loop() + if loop is not None: + cur_task = current_task(loop) + else: + cur_task = None + + def _done_callback(fut, cur_task=cur_task): nonlocal nfinished nfinished += 1 + if cur_task is not None: + futures.future_discard_from_awaited_by(fut, cur_task) + if outer is None or outer.done(): if not fut.cancelled(): # Mark exception retrieved. @@ -824,7 +870,6 @@ def _done_callback(fut): nfuts = 0 nfinished = 0 done_futs = [] - loop = None outer = None # bpo-46672 for arg in coros_or_futures: if arg not in arg_to_fut: @@ -837,12 +882,13 @@ def _done_callback(fut): # can't control it, disable the "destroy pending task" # warning. fut._log_destroy_pending = False - nfuts += 1 arg_to_fut[arg] = fut if fut.done(): done_futs.append(fut) else: + if cur_task is not None: + futures.future_add_to_awaited_by(fut, cur_task) fut.add_done_callback(_done_callback) else: @@ -862,6 +908,25 @@ def _done_callback(fut): return outer +def _log_on_exception(fut): + if fut.cancelled(): + return + + exc = fut.exception() + if exc is None: + return + + context = { + 'message': + f'{exc.__class__.__name__} exception in shielded future', + 'exception': exc, + 'future': fut, + } + if fut._source_traceback: + context['source_traceback'] = fut._source_traceback + fut._loop.call_exception_handler(context) + + def shield(arg): """Wait for a future, shielding it from cancellation. @@ -902,11 +967,16 @@ def shield(arg): loop = futures._get_loop(inner) outer = loop.create_future() + if loop is not None and (cur_task := current_task(loop)) is not None: + futures.future_add_to_awaited_by(inner, cur_task) + else: + cur_task = None + + def _clear_awaited_by_callback(inner): + futures.future_discard_from_awaited_by(inner, cur_task) + def _inner_done_callback(inner): if outer.cancelled(): - if not inner.cancelled(): - # Mark inner's result as retrieved. - inner.exception() return if inner.cancelled(): @@ -918,10 +988,16 @@ def _inner_done_callback(inner): else: outer.set_result(inner.result()) - def _outer_done_callback(outer): if not inner.done(): inner.remove_done_callback(_inner_done_callback) + # Keep only one callback to log on cancel + inner.remove_done_callback(_log_on_exception) + inner.add_done_callback(_log_on_exception) + + if cur_task is not None: + inner.add_done_callback(_clear_awaited_by_callback) + inner.add_done_callback(_inner_done_callback) outer.add_done_callback(_outer_done_callback) @@ -970,9 +1046,9 @@ def create_eager_task_factory(custom_task_constructor): used. E.g. `loop.set_task_factory(asyncio.eager_task_factory)`. """ - def factory(loop, coro, *, name=None, context=None): + def factory(loop, coro, *, eager_start=True, **kwargs): return custom_task_constructor( - coro, loop=loop, name=name, context=context, eager_start=True) + coro, loop=loop, eager_start=eager_start, **kwargs) return factory @@ -1044,14 +1120,13 @@ def _unregister_eager_task(task): _py_enter_task = _enter_task _py_leave_task = _leave_task _py_swap_current_task = _swap_current_task - +_py_all_tasks = all_tasks try: from _asyncio import (_register_task, _register_eager_task, _unregister_task, _unregister_eager_task, _enter_task, _leave_task, _swap_current_task, - _scheduled_tasks, _eager_tasks, _current_tasks, - current_task) + current_task, all_tasks) except ImportError: pass else: @@ -1063,3 +1138,4 @@ def _unregister_eager_task(task): _c_enter_task = _enter_task _c_leave_task = _leave_task _c_swap_current_task = _swap_current_task + _c_all_tasks = all_tasks diff --git a/PythonLib/full/asyncio/timeouts.py b/PythonLib/full/asyncio/timeouts.py index 30042abb..09342dc7 100644 --- a/PythonLib/full/asyncio/timeouts.py +++ b/PythonLib/full/asyncio/timeouts.py @@ -1,7 +1,6 @@ import enum from types import TracebackType -from typing import final, Optional, Type from . import events from . import exceptions @@ -23,14 +22,13 @@ class _State(enum.Enum): EXITED = "finished" -@final class Timeout: """Asynchronous context manager for cancelling overdue coroutines. Use `timeout()` or `timeout_at()` rather than instantiating this class directly. """ - def __init__(self, when: Optional[float]) -> None: + def __init__(self, when: float | None) -> None: """Schedule a timeout that will trigger at a given loop time. - If `when` is `None`, the timeout will never trigger. @@ -39,15 +37,15 @@ def __init__(self, when: Optional[float]) -> None: """ self._state = _State.CREATED - self._timeout_handler: Optional[events.TimerHandle] = None - self._task: Optional[tasks.Task] = None + self._timeout_handler: events.TimerHandle | None = None + self._task: tasks.Task | None = None self._when = when - def when(self) -> Optional[float]: + def when(self) -> float | None: """Return the current deadline.""" return self._when - def reschedule(self, when: Optional[float]) -> None: + def reschedule(self, when: float | None) -> None: """Reschedule the timeout.""" if self._state is not _State.ENTERED: if self._state is _State.CREATED: @@ -96,10 +94,10 @@ async def __aenter__(self) -> "Timeout": async def __aexit__( self, - exc_type: Optional[Type[BaseException]], - exc_val: Optional[BaseException], - exc_tb: Optional[TracebackType], - ) -> Optional[bool]: + exc_type: type[BaseException] | None, + exc_val: BaseException | None, + exc_tb: TracebackType | None, + ) -> bool | None: assert self._state in (_State.ENTERED, _State.EXPIRING) if self._timeout_handler is not None: @@ -109,10 +107,16 @@ async def __aexit__( if self._state is _State.EXPIRING: self._state = _State.EXPIRED - if self._task.uncancel() <= self._cancelling and exc_type is exceptions.CancelledError: + if self._task.uncancel() <= self._cancelling and exc_type is not None: # Since there are no new cancel requests, we're # handling this. - raise TimeoutError from exc_val + if issubclass(exc_type, exceptions.CancelledError): + raise TimeoutError from exc_val + elif exc_val is not None: + self._insert_timeout_error(exc_val) + if isinstance(exc_val, ExceptionGroup): + for exc in exc_val.exceptions: + self._insert_timeout_error(exc) elif self._state is _State.ENTERED: self._state = _State.EXITED @@ -125,8 +129,18 @@ def _on_timeout(self) -> None: # drop the reference early self._timeout_handler = None + @staticmethod + def _insert_timeout_error(exc_val: BaseException) -> None: + while exc_val.__context__ is not None: + if isinstance(exc_val.__context__, exceptions.CancelledError): + te = TimeoutError() + te.__context__ = te.__cause__ = exc_val.__context__ + exc_val.__context__ = te + break + exc_val = exc_val.__context__ -def timeout(delay: Optional[float]) -> Timeout: + +def timeout(delay: float | None) -> Timeout: """Timeout async context manager. Useful in cases when you want to apply timeout logic around block @@ -146,7 +160,7 @@ def timeout(delay: Optional[float]) -> Timeout: return Timeout(loop.time() + delay if delay is not None else None) -def timeout_at(when: Optional[float]) -> Timeout: +def timeout_at(when: float | None) -> Timeout: """Schedule the timeout at absolute time. Like timeout() but argument gives absolute time in the same clock system diff --git a/PythonLib/full/asyncio/tools.py b/PythonLib/full/asyncio/tools.py new file mode 100644 index 00000000..f39e11fd --- /dev/null +++ b/PythonLib/full/asyncio/tools.py @@ -0,0 +1,276 @@ +"""Tools to analyze tasks running in asyncio programs.""" + +from collections import defaultdict, namedtuple +from itertools import count +from enum import Enum +import sys +from _remote_debugging import RemoteUnwinder, FrameInfo + +class NodeType(Enum): + COROUTINE = 1 + TASK = 2 + + +class CycleFoundException(Exception): + """Raised when there is a cycle when drawing the call tree.""" + def __init__( + self, + cycles: list[list[int]], + id2name: dict[int, str], + ) -> None: + super().__init__(cycles, id2name) + self.cycles = cycles + self.id2name = id2name + + + +# ─── indexing helpers ─────────────────────────────────────────── +def _format_stack_entry(elem: str|FrameInfo) -> str: + if not isinstance(elem, str): + if elem.lineno == 0 and elem.filename == "": + return f"{elem.funcname}" + else: + return f"{elem.funcname} {elem.filename}:{elem.lineno}" + return elem + + +def _index(result): + id2name, awaits, task_stacks = {}, [], {} + for awaited_info in result: + for task_info in awaited_info.awaited_by: + task_id = task_info.task_id + task_name = task_info.task_name + id2name[task_id] = task_name + + # Store the internal coroutine stack for this task + if task_info.coroutine_stack: + for coro_info in task_info.coroutine_stack: + call_stack = coro_info.call_stack + internal_stack = [_format_stack_entry(frame) for frame in call_stack] + task_stacks[task_id] = internal_stack + + # Add the awaited_by relationships (external dependencies) + if task_info.awaited_by: + for coro_info in task_info.awaited_by: + call_stack = coro_info.call_stack + parent_task_id = coro_info.task_name + stack = [_format_stack_entry(frame) for frame in call_stack] + awaits.append((parent_task_id, stack, task_id)) + return id2name, awaits, task_stacks + + +def _build_tree(id2name, awaits, task_stacks): + id2label = {(NodeType.TASK, tid): name for tid, name in id2name.items()} + children = defaultdict(list) + cor_nodes = defaultdict(dict) # Maps parent -> {frame_name: node_key} + next_cor_id = count(1) + + def get_or_create_cor_node(parent, frame): + """Get existing coroutine node or create new one under parent""" + if frame in cor_nodes[parent]: + return cor_nodes[parent][frame] + + node_key = (NodeType.COROUTINE, f"c{next(next_cor_id)}") + id2label[node_key] = frame + children[parent].append(node_key) + cor_nodes[parent][frame] = node_key + return node_key + + # Build task dependency tree with coroutine frames + for parent_id, stack, child_id in awaits: + cur = (NodeType.TASK, parent_id) + for frame in reversed(stack): + cur = get_or_create_cor_node(cur, frame) + + child_key = (NodeType.TASK, child_id) + if child_key not in children[cur]: + children[cur].append(child_key) + + # Add coroutine stacks for leaf tasks + awaiting_tasks = {parent_id for parent_id, _, _ in awaits} + for task_id in id2name: + if task_id not in awaiting_tasks and task_id in task_stacks: + cur = (NodeType.TASK, task_id) + for frame in reversed(task_stacks[task_id]): + cur = get_or_create_cor_node(cur, frame) + + return id2label, children + + +def _roots(id2label, children): + all_children = {c for kids in children.values() for c in kids} + return [n for n in id2label if n not in all_children] + +# ─── detect cycles in the task-to-task graph ─────────────────────── +def _task_graph(awaits): + """Return {parent_task_id: {child_task_id, …}, …}.""" + g = defaultdict(set) + for parent_id, _stack, child_id in awaits: + g[parent_id].add(child_id) + return g + + +def _find_cycles(graph): + """ + Depth-first search for back-edges. + + Returns a list of cycles (each cycle is a list of task-ids) or an + empty list if the graph is acyclic. + """ + WHITE, GREY, BLACK = 0, 1, 2 + color = defaultdict(lambda: WHITE) + path, cycles = [], [] + + def dfs(v): + color[v] = GREY + path.append(v) + for w in graph.get(v, ()): + if color[w] == WHITE: + dfs(w) + elif color[w] == GREY: # back-edge → cycle! + i = path.index(w) + cycles.append(path[i:] + [w]) # make a copy + color[v] = BLACK + path.pop() + + for v in list(graph): + if color[v] == WHITE: + dfs(v) + return cycles + + +# ─── PRINT TREE FUNCTION ─────────────────────────────────────── +def get_all_awaited_by(pid): + unwinder = RemoteUnwinder(pid) + return unwinder.get_all_awaited_by() + + +def build_async_tree(result, task_emoji="(T)", cor_emoji=""): + """ + Build a list of strings for pretty-print an async call tree. + + The call tree is produced by `get_all_async_stacks()`, prefixing tasks + with `task_emoji` and coroutine frames with `cor_emoji`. + """ + id2name, awaits, task_stacks = _index(result) + g = _task_graph(awaits) + cycles = _find_cycles(g) + if cycles: + raise CycleFoundException(cycles, id2name) + labels, children = _build_tree(id2name, awaits, task_stacks) + + def pretty(node): + flag = task_emoji if node[0] == NodeType.TASK else cor_emoji + return f"{flag} {labels[node]}" + + def render(node, prefix="", last=True, buf=None): + if buf is None: + buf = [] + buf.append(f"{prefix}{'└── ' if last else '├── '}{pretty(node)}") + new_pref = prefix + (" " if last else "│ ") + kids = children.get(node, []) + for i, kid in enumerate(kids): + render(kid, new_pref, i == len(kids) - 1, buf) + return buf + + return [render(root) for root in _roots(labels, children)] + + +def build_task_table(result): + id2name, _, _ = _index(result) + table = [] + + for awaited_info in result: + thread_id = awaited_info.thread_id + for task_info in awaited_info.awaited_by: + # Get task info + task_id = task_info.task_id + task_name = task_info.task_name + + # Build coroutine stack string + frames = [frame for coro in task_info.coroutine_stack + for frame in coro.call_stack] + coro_stack = " -> ".join(_format_stack_entry(x).split(" ")[0] + for x in frames) + + # Handle tasks with no awaiters + if not task_info.awaited_by: + table.append([thread_id, hex(task_id), task_name, coro_stack, + "", "", "0x0"]) + continue + + # Handle tasks with awaiters + for coro_info in task_info.awaited_by: + parent_id = coro_info.task_name + awaiter_frames = [_format_stack_entry(x).split(" ")[0] + for x in coro_info.call_stack] + awaiter_chain = " -> ".join(awaiter_frames) + awaiter_name = id2name.get(parent_id, "Unknown") + parent_id_str = (hex(parent_id) if isinstance(parent_id, int) + else str(parent_id)) + + table.append([thread_id, hex(task_id), task_name, coro_stack, + awaiter_chain, awaiter_name, parent_id_str]) + + return table + +def _print_cycle_exception(exception: CycleFoundException): + print("ERROR: await-graph contains cycles - cannot print a tree!", file=sys.stderr) + print("", file=sys.stderr) + for c in exception.cycles: + inames = " → ".join(exception.id2name.get(tid, hex(tid)) for tid in c) + print(f"cycle: {inames}", file=sys.stderr) + + +def exit_with_permission_help_text(): + """ + Prints a message pointing to platform-specific permission help text and exits the program. + This function is called when a PermissionError is encountered while trying + to attach to a process. + """ + print( + "Error: The specified process cannot be attached to due to insufficient permissions.\n" + "See the Python documentation for details on required privileges and troubleshooting:\n" + "https://docs.python.org/3.14/howto/remote_debugging.html#permission-requirements\n" + ) + sys.exit(1) + + +def _get_awaited_by_tasks(pid: int) -> list: + try: + return get_all_awaited_by(pid) + except RuntimeError as e: + while e.__context__ is not None: + e = e.__context__ + print(f"Error retrieving tasks: {e}") + sys.exit(1) + except PermissionError as e: + exit_with_permission_help_text() + + +def display_awaited_by_tasks_table(pid: int) -> None: + """Build and print a table of all pending tasks under `pid`.""" + + tasks = _get_awaited_by_tasks(pid) + table = build_task_table(tasks) + # Print the table in a simple tabular format + print( + f"{'tid':<10} {'task id':<20} {'task name':<20} {'coroutine stack':<50} {'awaiter chain':<50} {'awaiter name':<15} {'awaiter id':<15}" + ) + print("-" * 180) + for row in table: + print(f"{row[0]:<10} {row[1]:<20} {row[2]:<20} {row[3]:<50} {row[4]:<50} {row[5]:<15} {row[6]:<15}") + + +def display_awaited_by_tasks_tree(pid: int) -> None: + """Build and print a tree of all pending tasks under `pid`.""" + + tasks = _get_awaited_by_tasks(pid) + try: + result = build_async_tree(tasks) + except CycleFoundException as e: + _print_cycle_exception(e) + sys.exit(1) + + for tree in result: + print("\n".join(tree)) diff --git a/PythonLib/full/asyncio/transports.py b/PythonLib/full/asyncio/transports.py index 30fd41d4..34c7ad44 100644 --- a/PythonLib/full/asyncio/transports.py +++ b/PythonLib/full/asyncio/transports.py @@ -181,6 +181,8 @@ def sendto(self, data, addr=None): to be sent out asynchronously. addr is target socket address. If addr is None use target address pointed on transport creation. + If data is an empty bytes object a zero-length datagram will be + sent. """ raise NotImplementedError diff --git a/PythonLib/full/asyncio/unix_events.py b/PythonLib/full/asyncio/unix_events.py index f2e920ad..1c145812 100644 --- a/PythonLib/full/asyncio/unix_events.py +++ b/PythonLib/full/asyncio/unix_events.py @@ -28,10 +28,7 @@ __all__ = ( 'SelectorEventLoop', - 'AbstractChildWatcher', 'SafeChildWatcher', - 'FastChildWatcher', 'PidfdChildWatcher', - 'MultiLoopChildWatcher', 'ThreadedChildWatcher', - 'DefaultEventLoopPolicy', + 'EventLoop', ) @@ -63,6 +60,11 @@ class _UnixSelectorEventLoop(selector_events.BaseSelectorEventLoop): def __init__(self, selector=None): super().__init__(selector) self._signal_handlers = {} + self._unix_server_sockets = {} + if can_use_pidfd(): + self._watcher = _PidfdChildWatcher() + else: + self._watcher = _ThreadedChildWatcher() def close(self): super().close() @@ -92,7 +94,7 @@ def add_signal_handler(self, sig, callback, *args): Raise RuntimeError if there is a problem setting up the handler. """ if (coroutines.iscoroutine(callback) or - coroutines.iscoroutinefunction(callback)): + coroutines._iscoroutinefunction(callback)): raise TypeError("coroutines cannot be used " "with add_signal_handler()") self._check_signal(sig) @@ -195,33 +197,22 @@ def _make_write_pipe_transport(self, pipe, protocol, waiter=None, async def _make_subprocess_transport(self, protocol, args, shell, stdin, stdout, stderr, bufsize, extra=None, **kwargs): - with warnings.catch_warnings(): - warnings.simplefilter('ignore', DeprecationWarning) - watcher = events.get_child_watcher() - - with watcher: - if not watcher.is_active(): - # Check early. - # Raising exception before process creation - # prevents subprocess execution if the watcher - # is not ready to handle it. - raise RuntimeError("asyncio.get_child_watcher() is not activated, " - "subprocess support is not installed.") - waiter = self.create_future() - transp = _UnixSubprocessTransport(self, protocol, args, shell, - stdin, stdout, stderr, bufsize, - waiter=waiter, extra=extra, - **kwargs) - watcher.add_child_handler(transp.get_pid(), - self._child_watcher_callback, transp) - try: - await waiter - except (SystemExit, KeyboardInterrupt): - raise - except BaseException: - transp.close() - await transp._wait() - raise + watcher = self._watcher + waiter = self.create_future() + transp = _UnixSubprocessTransport(self, protocol, args, shell, + stdin, stdout, stderr, bufsize, + waiter=waiter, extra=extra, + **kwargs) + watcher.add_child_handler(transp.get_pid(), + self._child_watcher_callback, transp) + try: + await waiter + except (SystemExit, KeyboardInterrupt): + raise + except BaseException: + transp.close() + await transp._wait() + raise return transp @@ -283,7 +274,7 @@ async def create_unix_server( sock=None, backlog=100, ssl=None, ssl_handshake_timeout=None, ssl_shutdown_timeout=None, - start_serving=True): + start_serving=True, cleanup_socket=True): if isinstance(ssl, bool): raise TypeError('ssl argument must be an SSLContext or None') @@ -339,6 +330,15 @@ async def create_unix_server( raise ValueError( f'A UNIX Domain Stream Socket was expected, got {sock!r}') + if cleanup_socket: + path = sock.getsockname() + # Check for abstract socket. `str` and `bytes` paths are supported. + if path[0] not in (0, '\x00'): + try: + self._unix_server_sockets[sock] = os.stat(path).st_ino + except FileNotFoundError: + pass + sock.setblocking(False) server = base_events.Server(self, [sock], protocol_factory, ssl, backlog, ssl_handshake_timeout, @@ -393,6 +393,9 @@ def _sock_sendfile_native_impl(self, fut, registered_fd, sock, fileno, fut.set_result(total_sent) return + # On 32-bit architectures truncate to 1GiB to avoid OverflowError + blocksize = min(blocksize, sys.maxsize//2 + 1) + try: sent = os.sendfile(fd, fileno, offset, blocksize) except (BlockingIOError, InterruptedError): @@ -456,6 +459,27 @@ def cb(fut): self.remove_writer(fd) fut.add_done_callback(cb) + def _stop_serving(self, sock): + # Is this a unix socket that needs cleanup? + if sock in self._unix_server_sockets: + path = sock.getsockname() + else: + path = None + + super()._stop_serving(sock) + + if path is not None: + prev_ino = self._unix_server_sockets[sock] + del self._unix_server_sockets[sock] + try: + if os.stat(path).st_ino == prev_ino: + os.unlink(path) + except FileNotFoundError: + pass + except OSError as err: + logger.error('Unable to clean up listening UNIX socket ' + '%r: %r', path, err) + class _UnixReadPipeTransport(transports.ReadTransport): @@ -830,93 +854,7 @@ def _start(self, args, shell, stdin, stdout, stderr, bufsize, **kwargs): stdin_w.close() -class AbstractChildWatcher: - """Abstract base class for monitoring child processes. - - Objects derived from this class monitor a collection of subprocesses and - report their termination or interruption by a signal. - - New callbacks are registered with .add_child_handler(). Starting a new - process must be done within a 'with' block to allow the watcher to suspend - its activity until the new process if fully registered (this is needed to - prevent a race condition in some implementations). - - Example: - with watcher: - proc = subprocess.Popen("sleep 1") - watcher.add_child_handler(proc.pid, callback) - - Notes: - Implementations of this class must be thread-safe. - - Since child watcher objects may catch the SIGCHLD signal and call - waitpid(-1), there should be only one active object per process. - """ - - def __init_subclass__(cls) -> None: - if cls.__module__ != __name__: - warnings._deprecated("AbstractChildWatcher", - "{name!r} is deprecated as of Python 3.12 and will be " - "removed in Python {remove}.", - remove=(3, 14)) - - def add_child_handler(self, pid, callback, *args): - """Register a new child handler. - - Arrange for callback(pid, returncode, *args) to be called when - process 'pid' terminates. Specifying another callback for the same - process replaces the previous handler. - - Note: callback() must be thread-safe. - """ - raise NotImplementedError() - - def remove_child_handler(self, pid): - """Removes the handler for process 'pid'. - - The function returns True if the handler was successfully removed, - False if there was nothing to remove.""" - - raise NotImplementedError() - - def attach_loop(self, loop): - """Attach the watcher to an event loop. - - If the watcher was previously attached to an event loop, then it is - first detached before attaching to the new loop. - - Note: loop may be None. - """ - raise NotImplementedError() - - def close(self): - """Close the watcher. - - This must be called to make sure that any underlying resource is freed. - """ - raise NotImplementedError() - - def is_active(self): - """Return ``True`` if the watcher is active and is used by the event loop. - - Return True if the watcher is installed and ready to handle process exit - notifications. - - """ - raise NotImplementedError() - - def __enter__(self): - """Enter the watcher's context and allow starting new processes - - This function must return self""" - raise NotImplementedError() - - def __exit__(self, a, b, c): - """Exit the watcher's context""" - raise NotImplementedError() - - -class PidfdChildWatcher(AbstractChildWatcher): +class _PidfdChildWatcher: """Child watcher implementation using Linux's pid file descriptors. This child watcher polls process file descriptors (pidfds) to await child @@ -928,21 +866,6 @@ class PidfdChildWatcher(AbstractChildWatcher): recent (5.3+) kernels. """ - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_value, exc_traceback): - pass - - def is_active(self): - return True - - def close(self): - pass - - def attach_loop(self, loop): - pass - def add_child_handler(self, pid, callback, *args): loop = events.get_running_loop() pidfd = os.pidfd_open(pid) @@ -967,386 +890,7 @@ def _do_wait(self, pid, pidfd, callback, args): os.close(pidfd) callback(pid, returncode, *args) - def remove_child_handler(self, pid): - # asyncio never calls remove_child_handler() !!! - # The method is no-op but is implemented because - # abstract base classes require it. - return True - - -class BaseChildWatcher(AbstractChildWatcher): - - def __init__(self): - self._loop = None - self._callbacks = {} - - def close(self): - self.attach_loop(None) - - def is_active(self): - return self._loop is not None and self._loop.is_running() - - def _do_waitpid(self, expected_pid): - raise NotImplementedError() - - def _do_waitpid_all(self): - raise NotImplementedError() - - def attach_loop(self, loop): - assert loop is None or isinstance(loop, events.AbstractEventLoop) - - if self._loop is not None and loop is None and self._callbacks: - warnings.warn( - 'A loop is being detached ' - 'from a child watcher with pending handlers', - RuntimeWarning) - - if self._loop is not None: - self._loop.remove_signal_handler(signal.SIGCHLD) - - self._loop = loop - if loop is not None: - loop.add_signal_handler(signal.SIGCHLD, self._sig_chld) - - # Prevent a race condition in case a child terminated - # during the switch. - self._do_waitpid_all() - - def _sig_chld(self): - try: - self._do_waitpid_all() - except (SystemExit, KeyboardInterrupt): - raise - except BaseException as exc: - # self._loop should always be available here - # as '_sig_chld' is added as a signal handler - # in 'attach_loop' - self._loop.call_exception_handler({ - 'message': 'Unknown exception in SIGCHLD handler', - 'exception': exc, - }) - - -class SafeChildWatcher(BaseChildWatcher): - """'Safe' child watcher implementation. - - This implementation avoids disrupting other code spawning processes by - polling explicitly each process in the SIGCHLD handler instead of calling - os.waitpid(-1). - - This is a safe solution but it has a significant overhead when handling a - big number of children (O(n) each time SIGCHLD is raised) - """ - - def __init__(self): - super().__init__() - warnings._deprecated("SafeChildWatcher", - "{name!r} is deprecated as of Python 3.12 and will be " - "removed in Python {remove}.", - remove=(3, 14)) - - def close(self): - self._callbacks.clear() - super().close() - - def __enter__(self): - return self - - def __exit__(self, a, b, c): - pass - - def add_child_handler(self, pid, callback, *args): - self._callbacks[pid] = (callback, args) - - # Prevent a race condition in case the child is already terminated. - self._do_waitpid(pid) - - def remove_child_handler(self, pid): - try: - del self._callbacks[pid] - return True - except KeyError: - return False - - def _do_waitpid_all(self): - - for pid in list(self._callbacks): - self._do_waitpid(pid) - - def _do_waitpid(self, expected_pid): - assert expected_pid > 0 - - try: - pid, status = os.waitpid(expected_pid, os.WNOHANG) - except ChildProcessError: - # The child process is already reaped - # (may happen if waitpid() is called elsewhere). - pid = expected_pid - returncode = 255 - logger.warning( - "Unknown child process pid %d, will report returncode 255", - pid) - else: - if pid == 0: - # The child process is still alive. - return - - returncode = waitstatus_to_exitcode(status) - if self._loop.get_debug(): - logger.debug('process %s exited with returncode %s', - expected_pid, returncode) - - try: - callback, args = self._callbacks.pop(pid) - except KeyError: # pragma: no cover - # May happen if .remove_child_handler() is called - # after os.waitpid() returns. - if self._loop.get_debug(): - logger.warning("Child watcher got an unexpected pid: %r", - pid, exc_info=True) - else: - callback(pid, returncode, *args) - - -class FastChildWatcher(BaseChildWatcher): - """'Fast' child watcher implementation. - - This implementation reaps every terminated processes by calling - os.waitpid(-1) directly, possibly breaking other code spawning processes - and waiting for their termination. - - There is no noticeable overhead when handling a big number of children - (O(1) each time a child terminates). - """ - def __init__(self): - super().__init__() - self._lock = threading.Lock() - self._zombies = {} - self._forks = 0 - warnings._deprecated("FastChildWatcher", - "{name!r} is deprecated as of Python 3.12 and will be " - "removed in Python {remove}.", - remove=(3, 14)) - - def close(self): - self._callbacks.clear() - self._zombies.clear() - super().close() - - def __enter__(self): - with self._lock: - self._forks += 1 - - return self - - def __exit__(self, a, b, c): - with self._lock: - self._forks -= 1 - - if self._forks or not self._zombies: - return - - collateral_victims = str(self._zombies) - self._zombies.clear() - - logger.warning( - "Caught subprocesses termination from unknown pids: %s", - collateral_victims) - - def add_child_handler(self, pid, callback, *args): - assert self._forks, "Must use the context manager" - - with self._lock: - try: - returncode = self._zombies.pop(pid) - except KeyError: - # The child is running. - self._callbacks[pid] = callback, args - return - - # The child is dead already. We can fire the callback. - callback(pid, returncode, *args) - - def remove_child_handler(self, pid): - try: - del self._callbacks[pid] - return True - except KeyError: - return False - - def _do_waitpid_all(self): - # Because of signal coalescing, we must keep calling waitpid() as - # long as we're able to reap a child. - while True: - try: - pid, status = os.waitpid(-1, os.WNOHANG) - except ChildProcessError: - # No more child processes exist. - return - else: - if pid == 0: - # A child process is still alive. - return - - returncode = waitstatus_to_exitcode(status) - - with self._lock: - try: - callback, args = self._callbacks.pop(pid) - except KeyError: - # unknown child - if self._forks: - # It may not be registered yet. - self._zombies[pid] = returncode - if self._loop.get_debug(): - logger.debug('unknown process %s exited ' - 'with returncode %s', - pid, returncode) - continue - callback = None - else: - if self._loop.get_debug(): - logger.debug('process %s exited with returncode %s', - pid, returncode) - - if callback is None: - logger.warning( - "Caught subprocess termination from unknown pid: " - "%d -> %d", pid, returncode) - else: - callback(pid, returncode, *args) - - -class MultiLoopChildWatcher(AbstractChildWatcher): - """A watcher that doesn't require running loop in the main thread. - - This implementation registers a SIGCHLD signal handler on - instantiation (which may conflict with other code that - install own handler for this signal). - - The solution is safe but it has a significant overhead when - handling a big number of processes (*O(n)* each time a - SIGCHLD is received). - """ - - # Implementation note: - # The class keeps compatibility with AbstractChildWatcher ABC - # To achieve this it has empty attach_loop() method - # and doesn't accept explicit loop argument - # for add_child_handler()/remove_child_handler() - # but retrieves the current loop by get_running_loop() - - def __init__(self): - self._callbacks = {} - self._saved_sighandler = None - warnings._deprecated("MultiLoopChildWatcher", - "{name!r} is deprecated as of Python 3.12 and will be " - "removed in Python {remove}.", - remove=(3, 14)) - - def is_active(self): - return self._saved_sighandler is not None - - def close(self): - self._callbacks.clear() - if self._saved_sighandler is None: - return - - handler = signal.getsignal(signal.SIGCHLD) - if handler != self._sig_chld: - logger.warning("SIGCHLD handler was changed by outside code") - else: - signal.signal(signal.SIGCHLD, self._saved_sighandler) - self._saved_sighandler = None - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - pass - - def add_child_handler(self, pid, callback, *args): - loop = events.get_running_loop() - self._callbacks[pid] = (loop, callback, args) - - # Prevent a race condition in case the child is already terminated. - self._do_waitpid(pid) - - def remove_child_handler(self, pid): - try: - del self._callbacks[pid] - return True - except KeyError: - return False - - def attach_loop(self, loop): - # Don't save the loop but initialize itself if called first time - # The reason to do it here is that attach_loop() is called from - # unix policy only for the main thread. - # Main thread is required for subscription on SIGCHLD signal - if self._saved_sighandler is not None: - return - - self._saved_sighandler = signal.signal(signal.SIGCHLD, self._sig_chld) - if self._saved_sighandler is None: - logger.warning("Previous SIGCHLD handler was set by non-Python code, " - "restore to default handler on watcher close.") - self._saved_sighandler = signal.SIG_DFL - - # Set SA_RESTART to limit EINTR occurrences. - signal.siginterrupt(signal.SIGCHLD, False) - - def _do_waitpid_all(self): - for pid in list(self._callbacks): - self._do_waitpid(pid) - - def _do_waitpid(self, expected_pid): - assert expected_pid > 0 - - try: - pid, status = os.waitpid(expected_pid, os.WNOHANG) - except ChildProcessError: - # The child process is already reaped - # (may happen if waitpid() is called elsewhere). - pid = expected_pid - returncode = 255 - logger.warning( - "Unknown child process pid %d, will report returncode 255", - pid) - debug_log = False - else: - if pid == 0: - # The child process is still alive. - return - - returncode = waitstatus_to_exitcode(status) - debug_log = True - try: - loop, callback, args = self._callbacks.pop(pid) - except KeyError: # pragma: no cover - # May happen if .remove_child_handler() is called - # after os.waitpid() returns. - logger.warning("Child watcher got an unexpected pid: %r", - pid, exc_info=True) - else: - if loop.is_closed(): - logger.warning("Loop %r that handles pid %r is closed", loop, pid) - else: - if debug_log and loop.get_debug(): - logger.debug('process %s exited with returncode %s', - expected_pid, returncode) - loop.call_soon_threadsafe(callback, pid, returncode, *args) - - def _sig_chld(self, signum, frame): - try: - self._do_waitpid_all() - except (SystemExit, KeyboardInterrupt): - raise - except BaseException: - logger.warning('Unknown exception in SIGCHLD handler', exc_info=True) - - -class ThreadedChildWatcher(AbstractChildWatcher): +class _ThreadedChildWatcher: """Threaded child watcher implementation. The watcher uses a thread per process @@ -1363,18 +907,6 @@ def __init__(self): self._pid_counter = itertools.count(0) self._threads = {} - def is_active(self): - return True - - def close(self): - pass - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - pass - def __del__(self, _warn=warnings.warn): threads = [thread for thread in list(self._threads.values()) if thread.is_alive()] @@ -1392,15 +924,6 @@ def add_child_handler(self, pid, callback, *args): self._threads[pid] = thread thread.start() - def remove_child_handler(self, pid): - # asyncio never calls remove_child_handler() !!! - # The method is no-op but is implemented because - # abstract base classes require it. - return True - - def attach_loop(self, loop): - pass - def _do_waitpid(self, loop, expected_pid, callback, args): assert expected_pid > 0 @@ -1439,62 +962,11 @@ def can_use_pidfd(): return True -class _UnixDefaultEventLoopPolicy(events.BaseDefaultEventLoopPolicy): - """UNIX event loop policy with a watcher for child processes.""" +class _UnixDefaultEventLoopPolicy(events._BaseDefaultEventLoopPolicy): + """UNIX event loop policy""" _loop_factory = _UnixSelectorEventLoop - def __init__(self): - super().__init__() - self._watcher = None - - def _init_watcher(self): - with events._lock: - if self._watcher is None: # pragma: no branch - if can_use_pidfd(): - self._watcher = PidfdChildWatcher() - else: - self._watcher = ThreadedChildWatcher() - - def set_event_loop(self, loop): - """Set the event loop. - - As a side effect, if a child watcher was set before, then calling - .set_event_loop() from the main thread will call .attach_loop(loop) on - the child watcher. - """ - - super().set_event_loop(loop) - - if (self._watcher is not None and - threading.current_thread() is threading.main_thread()): - self._watcher.attach_loop(loop) - - def get_child_watcher(self): - """Get the watcher for child processes. - - If not yet set, a ThreadedChildWatcher object is automatically created. - """ - if self._watcher is None: - self._init_watcher() - - warnings._deprecated("get_child_watcher", - "{name!r} is deprecated as of Python 3.12 and will be " - "removed in Python {remove}.", remove=(3, 14)) - return self._watcher - - def set_child_watcher(self, watcher): - """Set the watcher for child processes.""" - - assert watcher is None or isinstance(watcher, AbstractChildWatcher) - - if self._watcher is not None: - self._watcher.close() - - self._watcher = watcher - warnings._deprecated("set_child_watcher", - "{name!r} is deprecated as of Python 3.12 and will be " - "removed in Python {remove}.", remove=(3, 14)) - SelectorEventLoop = _UnixSelectorEventLoop -DefaultEventLoopPolicy = _UnixDefaultEventLoopPolicy +_DefaultEventLoopPolicy = _UnixDefaultEventLoopPolicy +EventLoop = SelectorEventLoop diff --git a/PythonLib/full/asyncio/windows_events.py b/PythonLib/full/asyncio/windows_events.py index cb613451..5f75b17d 100644 --- a/PythonLib/full/asyncio/windows_events.py +++ b/PythonLib/full/asyncio/windows_events.py @@ -29,8 +29,8 @@ __all__ = ( 'SelectorEventLoop', 'ProactorEventLoop', 'IocpProactor', - 'DefaultEventLoopPolicy', 'WindowsSelectorEventLoopPolicy', - 'WindowsProactorEventLoopPolicy', + '_DefaultEventLoopPolicy', '_WindowsSelectorEventLoopPolicy', + '_WindowsProactorEventLoopPolicy', 'EventLoop', ) @@ -315,24 +315,25 @@ def __init__(self, proactor=None): proactor = IocpProactor() super().__init__(proactor) - def run_forever(self): - try: - assert self._self_reading_future is None - self.call_soon(self._loop_self_reading) - super().run_forever() - finally: - if self._self_reading_future is not None: - ov = self._self_reading_future._ov - self._self_reading_future.cancel() - # self_reading_future always uses IOCP, so even though it's - # been cancelled, we need to make sure that the IOCP message - # is received so that the kernel is not holding on to the - # memory, possibly causing memory corruption later. Only - # unregister it if IO is complete in all respects. Otherwise - # we need another _poll() later to complete the IO. - if ov is not None and not ov.pending: - self._proactor._unregister(ov) - self._self_reading_future = None + def _run_forever_setup(self): + assert self._self_reading_future is None + self.call_soon(self._loop_self_reading) + super()._run_forever_setup() + + def _run_forever_cleanup(self): + super()._run_forever_cleanup() + if self._self_reading_future is not None: + ov = self._self_reading_future._ov + self._self_reading_future.cancel() + # self_reading_future always uses IOCP, so even though it's + # been cancelled, we need to make sure that the IOCP message + # is received so that the kernel is not holding on to the + # memory, possibly causing memory corruption later. Only + # unregister it if IO is complete in all respects. Otherwise + # we need another _poll() later to complete the IO. + if ov is not None and not ov.pending: + self._proactor._unregister(ov) + self._self_reading_future = None async def create_pipe_connection(self, protocol_factory, address): f = self._proactor.connect_pipe(address) @@ -890,12 +891,13 @@ def callback(f): SelectorEventLoop = _WindowsSelectorEventLoop -class WindowsSelectorEventLoopPolicy(events.BaseDefaultEventLoopPolicy): +class _WindowsSelectorEventLoopPolicy(events._BaseDefaultEventLoopPolicy): _loop_factory = SelectorEventLoop -class WindowsProactorEventLoopPolicy(events.BaseDefaultEventLoopPolicy): +class _WindowsProactorEventLoopPolicy(events._BaseDefaultEventLoopPolicy): _loop_factory = ProactorEventLoop -DefaultEventLoopPolicy = WindowsProactorEventLoopPolicy +_DefaultEventLoopPolicy = _WindowsProactorEventLoopPolicy +EventLoop = ProactorEventLoop diff --git a/PythonLib/full/base64.py b/PythonLib/full/base64.py index 846767a3..f95132a4 100644 --- a/PythonLib/full/base64.py +++ b/PythonLib/full/base64.py @@ -1,12 +1,9 @@ -#! /usr/bin/env python3 - """Base16, Base32, Base64 (RFC 3548), Base85 and Ascii85 data encodings""" # Modified 04-Oct-1995 by Jack Jansen to use binascii module # Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support # Modified 22-May-2007 by Guido van Rossum to use bytes everywhere -import re import struct import binascii @@ -18,7 +15,7 @@ 'b64encode', 'b64decode', 'b32encode', 'b32decode', 'b32hexencode', 'b32hexdecode', 'b16encode', 'b16decode', # Base85 and Ascii85 encodings - 'b85encode', 'b85decode', 'a85encode', 'a85decode', + 'b85encode', 'b85decode', 'a85encode', 'a85decode', 'z85encode', 'z85decode', # Standard Base64 encoding 'standard_b64encode', 'standard_b64decode', # Some common Base64 alternatives. As referenced by RFC 3458, see thread @@ -164,7 +161,6 @@ def urlsafe_b64decode(s): _b32rev = {} def _b32encode(alphabet, s): - global _b32tab2 # Delay the initialization of the table to not waste memory # if the function is never called if alphabet not in _b32tab2: @@ -200,7 +196,6 @@ def _b32encode(alphabet, s): return bytes(encoded) def _b32decode(alphabet, s, casefold=False, map01=None): - global _b32rev # Delay the initialization of the table to not waste memory # if the function is never called if alphabet not in _b32rev: @@ -288,7 +283,7 @@ def b16decode(s, casefold=False): s = _bytes_from_decode_data(s) if casefold: s = s.upper() - if re.search(b'[^0-9A-F]', s): + if s.translate(None, delete=b'0123456789ABCDEF'): raise binascii.Error('Non-base16 digit found') return binascii.unhexlify(s) @@ -467,9 +462,12 @@ def b85decode(b): # Delay the initialization of tables to not waste memory # if the function is never called if _b85dec is None: - _b85dec = [None] * 256 + # we don't assign to _b85dec directly to avoid issues when + # multiple threads call this function simultaneously + b85dec_tmp = [None] * 256 for i, c in enumerate(_b85alphabet): - _b85dec[c] = i + b85dec_tmp[c] = i + _b85dec = b85dec_tmp b = _bytes_from_decode_data(b) padding = (-len(b)) % 5 @@ -499,6 +497,33 @@ def b85decode(b): result = result[:-padding] return result +_z85alphabet = (b'0123456789abcdefghijklmnopqrstuvwxyz' + b'ABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#') +# Translating b85 valid but z85 invalid chars to b'\x00' is required +# to prevent them from being decoded as b85 valid chars. +_z85_b85_decode_diff = b';_`|~' +_z85_decode_translation = bytes.maketrans( + _z85alphabet + _z85_b85_decode_diff, + _b85alphabet + b'\x00' * len(_z85_b85_decode_diff) +) +_z85_encode_translation = bytes.maketrans(_b85alphabet, _z85alphabet) + +def z85encode(s): + """Encode bytes-like object b in z85 format and return a bytes object.""" + return b85encode(s).translate(_z85_encode_translation) + +def z85decode(s): + """Decode the z85-encoded bytes-like object or ASCII string b + + The result is returned as a bytes object. + """ + s = _bytes_from_decode_data(s) + s = s.translate(_z85_decode_translation) + try: + return b85decode(s) + except ValueError as e: + raise ValueError(e.args[0].replace('base85', 'z85')) from None + # Legacy interface. This code could be cleaned up since I don't believe # binascii has any line length limitations. It just doesn't seem worth it # though. The files should be opened in binary mode. @@ -579,7 +604,14 @@ def main(): with open(args[0], 'rb') as f: func(f, sys.stdout.buffer) else: - func(sys.stdin.buffer, sys.stdout.buffer) + if sys.stdin.isatty(): + # gh-138775: read terminal input data all at once to detect EOF + import io + data = sys.stdin.buffer.read() + buffer = io.BytesIO(data) + else: + buffer = sys.stdin.buffer + func(buffer, sys.stdout.buffer) if __name__ == '__main__': diff --git a/PythonLib/full/bdb.py b/PythonLib/full/bdb.py index 085c17ce..79da4bab 100644 --- a/PythonLib/full/bdb.py +++ b/PythonLib/full/bdb.py @@ -2,7 +2,9 @@ import fnmatch import sys +import threading import os +import weakref from contextlib import contextmanager from inspect import CO_GENERATOR, CO_COROUTINE, CO_ASYNC_GENERATOR @@ -15,6 +17,166 @@ class BdbQuit(Exception): """Exception to give up completely.""" +E = sys.monitoring.events + +class _MonitoringTracer: + EVENT_CALLBACK_MAP = { + E.PY_START: 'call', + E.PY_RESUME: 'call', + E.PY_THROW: 'call', + E.LINE: 'line', + E.JUMP: 'jump', + E.PY_RETURN: 'return', + E.PY_YIELD: 'return', + E.PY_UNWIND: 'unwind', + E.RAISE: 'exception', + E.STOP_ITERATION: 'exception', + E.INSTRUCTION: 'opcode', + } + + GLOBAL_EVENTS = E.PY_START | E.PY_RESUME | E.PY_THROW | E.PY_UNWIND | E.RAISE + LOCAL_EVENTS = E.LINE | E.JUMP | E.PY_RETURN | E.PY_YIELD | E.STOP_ITERATION + + def __init__(self): + self._tool_id = sys.monitoring.DEBUGGER_ID + self._name = 'bdbtracer' + self._tracefunc = None + self._disable_current_event = False + self._tracing_thread = None + self._enabled = False + + def start_trace(self, tracefunc): + self._tracefunc = tracefunc + self._tracing_thread = threading.current_thread() + curr_tool = sys.monitoring.get_tool(self._tool_id) + if curr_tool is None: + sys.monitoring.use_tool_id(self._tool_id, self._name) + elif curr_tool == self._name: + sys.monitoring.clear_tool_id(self._tool_id) + else: + raise ValueError('Another debugger is using the monitoring tool') + E = sys.monitoring.events + all_events = 0 + for event, cb_name in self.EVENT_CALLBACK_MAP.items(): + callback = self.callback_wrapper(getattr(self, f'{cb_name}_callback'), event) + sys.monitoring.register_callback(self._tool_id, event, callback) + if event != E.INSTRUCTION: + all_events |= event + self.update_local_events() + sys.monitoring.set_events(self._tool_id, self.GLOBAL_EVENTS) + self._enabled = True + + def stop_trace(self): + self._enabled = False + self._tracing_thread = None + curr_tool = sys.monitoring.get_tool(self._tool_id) + if curr_tool != self._name: + return + sys.monitoring.clear_tool_id(self._tool_id) + sys.monitoring.free_tool_id(self._tool_id) + + def disable_current_event(self): + self._disable_current_event = True + + def restart_events(self): + if sys.monitoring.get_tool(self._tool_id) == self._name: + sys.monitoring.restart_events() + + def callback_wrapper(self, func, event): + import functools + + @functools.wraps(func) + def wrapper(*args): + if self._tracing_thread != threading.current_thread(): + return + try: + frame = sys._getframe().f_back + ret = func(frame, *args) + if self._enabled and frame.f_trace: + self.update_local_events() + if ( + self._disable_current_event + and event not in (E.PY_THROW, E.PY_UNWIND, E.RAISE) + ): + return sys.monitoring.DISABLE + else: + return ret + except BaseException: + self.stop_trace() + sys._getframe().f_back.f_trace = None + raise + finally: + self._disable_current_event = False + + return wrapper + + def call_callback(self, frame, code, *args): + local_tracefunc = self._tracefunc(frame, 'call', None) + if local_tracefunc is not None: + frame.f_trace = local_tracefunc + if self._enabled: + sys.monitoring.set_local_events(self._tool_id, code, self.LOCAL_EVENTS) + + def return_callback(self, frame, code, offset, retval): + if frame.f_trace: + frame.f_trace(frame, 'return', retval) + + def unwind_callback(self, frame, code, *args): + if frame.f_trace: + frame.f_trace(frame, 'return', None) + + def line_callback(self, frame, code, *args): + if frame.f_trace and frame.f_trace_lines: + frame.f_trace(frame, 'line', None) + + def jump_callback(self, frame, code, inst_offset, dest_offset): + if dest_offset > inst_offset: + return sys.monitoring.DISABLE + inst_lineno = self._get_lineno(code, inst_offset) + dest_lineno = self._get_lineno(code, dest_offset) + if inst_lineno != dest_lineno: + return sys.monitoring.DISABLE + if frame.f_trace and frame.f_trace_lines: + frame.f_trace(frame, 'line', None) + + def exception_callback(self, frame, code, offset, exc): + if frame.f_trace: + if exc.__traceback__ and hasattr(exc.__traceback__, 'tb_frame'): + tb = exc.__traceback__ + while tb: + if tb.tb_frame.f_locals.get('self') is self: + return + tb = tb.tb_next + frame.f_trace(frame, 'exception', (type(exc), exc, exc.__traceback__)) + + def opcode_callback(self, frame, code, offset): + if frame.f_trace and frame.f_trace_opcodes: + frame.f_trace(frame, 'opcode', None) + + def update_local_events(self, frame=None): + if sys.monitoring.get_tool(self._tool_id) != self._name: + return + if frame is None: + frame = sys._getframe().f_back + while frame is not None: + if frame.f_trace is not None: + if frame.f_trace_opcodes: + events = self.LOCAL_EVENTS | E.INSTRUCTION + else: + events = self.LOCAL_EVENTS + sys.monitoring.set_local_events(self._tool_id, frame.f_code, events) + frame = frame.f_back + + def _get_lineno(self, code, offset): + import dis + last_lineno = None + for start, lineno in dis.findlinestarts(code): + if offset < start: + return last_lineno + last_lineno = lineno + return last_lineno + + class Bdb: """Generic Python debugger base class. @@ -29,12 +191,24 @@ class Bdb: is determined by the __name__ in the frame globals. """ - def __init__(self, skip=None): + def __init__(self, skip=None, backend='settrace'): self.skip = set(skip) if skip else None self.breaks = {} self.fncache = {} + self.frame_trace_lines_opcodes = {} self.frame_returning = None + self.trace_opcodes = False self.enterframe = None + self.cmdframe = None + self.cmdlineno = None + self.code_linenos = weakref.WeakKeyDictionary() + self.backend = backend + if backend == 'monitoring': + self.monitoring_tracer = _MonitoringTracer() + elif backend == 'settrace': + self.monitoring_tracer = None + else: + raise ValueError(f"Invalid backend '{backend}'") self._load_breaks() @@ -55,6 +229,18 @@ def canonic(self, filename): self.fncache[filename] = canonic return canonic + def start_trace(self): + if self.monitoring_tracer: + self.monitoring_tracer.start_trace(self.trace_dispatch) + else: + sys.settrace(self.trace_dispatch) + + def stop_trace(self): + if self.monitoring_tracer: + self.monitoring_tracer.stop_trace() + else: + sys.settrace(None) + def reset(self): """Set values of attributes as ready to start debugging.""" import linecache @@ -110,6 +296,8 @@ def trace_dispatch(self, frame, event, arg): return self.trace_dispatch if event == 'c_return': return self.trace_dispatch + if event == 'opcode': + return self.dispatch_opcode(frame, arg) print('bdb.Bdb.dispatch: unknown debugging event:', repr(event)) return self.trace_dispatch @@ -120,9 +308,17 @@ def dispatch_line(self, frame): self.user_line(). Raise BdbQuit if self.quitting is set. Return self.trace_dispatch to continue tracing in this scope. """ - if self.stop_here(frame) or self.break_here(frame): + # GH-136057 + # For line events, we don't want to stop at the same line where + # the latest next/step command was issued. + if (self.stop_here(frame) or self.break_here(frame)) and not ( + self.cmdframe == frame and self.cmdlineno == frame.f_lineno + ): self.user_line(frame) + self.restart_events() if self.quitting: raise BdbQuit + elif not self.get_break(frame.f_code.co_filename, frame.f_lineno): + self.disable_current_event() return self.trace_dispatch def dispatch_call(self, frame, arg): @@ -138,12 +334,18 @@ def dispatch_call(self, frame, arg): self.botframe = frame.f_back # (CT) Note that this may also be None! return self.trace_dispatch if not (self.stop_here(frame) or self.break_anywhere(frame)): - # No need to trace this function + # We already know there's no breakpoint in this function + # If it's a next/until/return command, we don't need any CALL event + # and we don't need to set the f_trace on any new frame. + # If it's a step command, it must either hit stop_here, or skip the + # whole module. Either way, we don't need the CALL event here. + self.disable_current_event() return # None # Ignore call events in generator except when stepping. if self.stopframe and frame.f_code.co_flags & GENERATOR_AND_COROUTINE_FLAGS: return self.trace_dispatch self.user_call(frame, arg) + self.restart_events() if self.quitting: raise BdbQuit return self.trace_dispatch @@ -157,10 +359,14 @@ def dispatch_return(self, frame, arg): if self.stop_here(frame) or frame == self.returnframe: # Ignore return events in generator except when stepping. if self.stopframe and frame.f_code.co_flags & GENERATOR_AND_COROUTINE_FLAGS: + # It's possible to trigger a StopIteration exception in + # the caller so we must set the trace function in the caller + self._set_caller_tracefunc(frame) return self.trace_dispatch try: self.frame_returning = frame self.user_return(frame, arg) + self.restart_events() finally: self.frame_returning = None if self.quitting: raise BdbQuit @@ -188,6 +394,7 @@ def dispatch_exception(self, frame, arg): if not (frame.f_code.co_flags & GENERATOR_AND_COROUTINE_FLAGS and arg[0] is StopIteration and arg[2] is None): self.user_exception(frame, arg) + self.restart_events() if self.quitting: raise BdbQuit # Stop at the StopIteration or GeneratorExit exception when the user # has set stopframe in a generator by issuing a return command, or a @@ -197,10 +404,26 @@ def dispatch_exception(self, frame, arg): and self.stopframe.f_code.co_flags & GENERATOR_AND_COROUTINE_FLAGS and arg[0] in (StopIteration, GeneratorExit)): self.user_exception(frame, arg) + self.restart_events() if self.quitting: raise BdbQuit return self.trace_dispatch + def dispatch_opcode(self, frame, arg): + """Invoke user function and return trace function for opcode event. + If the debugger stops on the current opcode, invoke + self.user_opcode(). Raise BdbQuit if self.quitting is set. + Return self.trace_dispatch to continue tracing in this scope. + + Opcode event will always trigger the user callback. For now the only + opcode event is from an inline set_trace() and we want to stop there + unconditionally. + """ + self.user_opcode(frame) + self.restart_events() + if self.quitting: raise BdbQuit + return self.trace_dispatch + # Normally derived classes don't override the following # methods, but they may if they want to redefine the # definition of stopping and breakpoints. @@ -264,9 +487,25 @@ def do_clear(self, arg): raise NotImplementedError("subclass of bdb must implement do_clear()") def break_anywhere(self, frame): - """Return True if there is any breakpoint for frame's filename. + """Return True if there is any breakpoint in that frame """ - return self.canonic(frame.f_code.co_filename) in self.breaks + filename = self.canonic(frame.f_code.co_filename) + if filename not in self.breaks: + return False + for lineno in self.breaks[filename]: + if self._lineno_in_frame(lineno, frame): + return True + return False + + def _lineno_in_frame(self, lineno, frame): + """Return True if the line number is in the frame's code object. + """ + code = frame.f_code + if lineno < code.co_firstlineno: + return False + if code not in self.code_linenos: + self.code_linenos[code] = set(lineno for _, _, lineno in code.co_lines()) + return lineno in self.code_linenos[code] # Derived classes should override the user_* methods # to gain control. @@ -287,7 +526,24 @@ def user_exception(self, frame, exc_info): """Called when we stop on an exception.""" pass - def _set_stopinfo(self, stopframe, returnframe, stoplineno=0): + def user_opcode(self, frame): + """Called when we are about to execute an opcode.""" + pass + + def _set_trace_opcodes(self, trace_opcodes): + if trace_opcodes != self.trace_opcodes: + self.trace_opcodes = trace_opcodes + frame = self.enterframe + while frame is not None: + frame.f_trace_opcodes = trace_opcodes + if frame is self.botframe: + break + frame = frame.f_back + if self.monitoring_tracer: + self.monitoring_tracer.update_local_events() + + def _set_stopinfo(self, stopframe, returnframe, stoplineno=0, opcode=False, + cmdframe=None, cmdlineno=None): """Set the attributes for stopping. If stoplineno is greater than or equal to 0, then stop at line @@ -300,6 +556,11 @@ def _set_stopinfo(self, stopframe, returnframe, stoplineno=0): # stoplineno >= 0 means: stop at line >= the stoplineno # stoplineno -1 means: don't stop at all self.stoplineno = stoplineno + # cmdframe/cmdlineno is the frame/line number when the user issued + # step/next commands. + self.cmdframe = cmdframe + self.cmdlineno = cmdlineno + self._set_trace_opcodes(opcode) def _set_caller_tracefunc(self, current_frame): # Issue #13183: pdb skips frames after hitting a breakpoint and running @@ -324,16 +585,22 @@ def set_until(self, frame, lineno=None): def set_step(self): """Stop after one line of code.""" - self._set_stopinfo(None, None) + # set_step() could be called from signal handler so enterframe might be None + self._set_stopinfo(None, None, cmdframe=self.enterframe, + cmdlineno=getattr(self.enterframe, 'f_lineno', None)) + + def set_stepinstr(self): + """Stop before the next instruction.""" + self._set_stopinfo(None, None, opcode=True) def set_next(self, frame): """Stop on the next line in or below the given frame.""" - self._set_stopinfo(frame, None) + self._set_stopinfo(frame, None, cmdframe=frame, cmdlineno=frame.f_lineno) def set_return(self, frame): """Stop when returning from the given frame.""" if frame.f_code.co_flags & GENERATOR_AND_COROUTINE_FLAGS: - self._set_stopinfo(frame, None, -1) + self._set_stopinfo(frame, frame, -1) else: self._set_stopinfo(frame.f_back, frame) @@ -342,7 +609,7 @@ def set_trace(self, frame=None): If frame is not specified, debugging starts from caller's frame. """ - sys.settrace(None) + self.stop_trace() if frame is None: frame = sys._getframe().f_back self.reset() @@ -350,9 +617,13 @@ def set_trace(self, frame=None): while frame: frame.f_trace = self.trace_dispatch self.botframe = frame + self.frame_trace_lines_opcodes[frame] = (frame.f_trace_lines, frame.f_trace_opcodes) + # We need f_trace_lines == True for the debugger to work + frame.f_trace_lines = True frame = frame.f_back - self.set_step() - sys.settrace(self.trace_dispatch) + self.set_stepinstr() + self.enterframe = None + self.start_trace() def set_continue(self): """Stop only at breakpoints or when finished. @@ -363,11 +634,16 @@ def set_continue(self): self._set_stopinfo(self.botframe, None, -1) if not self.breaks: # no breakpoints; run without debugger overhead - sys.settrace(None) + self.stop_trace() frame = sys._getframe().f_back while frame and frame is not self.botframe: del frame.f_trace frame = frame.f_back + for frame, (trace_lines, trace_opcodes) in self.frame_trace_lines_opcodes.items(): + frame.f_trace_lines, frame.f_trace_opcodes = trace_lines, trace_opcodes + if self.backend == 'monitoring': + self.monitoring_tracer.update_local_events() + self.frame_trace_lines_opcodes = {} def set_quit(self): """Set quitting attribute to True. @@ -377,7 +653,7 @@ def set_quit(self): self.stopframe = self.botframe self.returnframe = None self.quitting = True - sys.settrace(None) + self.stop_trace() # Derived classes and clients can call the following methods # to manipulate breakpoints. These methods return an @@ -605,6 +881,16 @@ def format_stack_entry(self, frame_lineno, lprefix=': '): s += f'{lprefix}Warning: lineno is None' return s + def disable_current_event(self): + """Disable the current event.""" + if self.backend == 'monitoring': + self.monitoring_tracer.disable_current_event() + + def restart_events(self): + """Restart all events.""" + if self.backend == 'monitoring': + self.monitoring_tracer.restart_events() + # The following methods can be called by clients to use # a debugger to debug a statement or an expression. # Both can be given as a string, or a code object. @@ -622,14 +908,14 @@ def run(self, cmd, globals=None, locals=None): self.reset() if isinstance(cmd, str): cmd = compile(cmd, "", "exec") - sys.settrace(self.trace_dispatch) + self.start_trace() try: exec(cmd, globals, locals) except BdbQuit: pass finally: self.quitting = True - sys.settrace(None) + self.stop_trace() def runeval(self, expr, globals=None, locals=None): """Debug an expression executed via the eval() function. @@ -642,14 +928,14 @@ def runeval(self, expr, globals=None, locals=None): if locals is None: locals = globals self.reset() - sys.settrace(self.trace_dispatch) + self.start_trace() try: return eval(expr, globals, locals) except BdbQuit: pass finally: self.quitting = True - sys.settrace(None) + self.stop_trace() def runctx(self, cmd, globals, locals): """For backwards-compatibility. Defers to run().""" @@ -664,7 +950,7 @@ def runcall(self, func, /, *args, **kwds): Return the result of the function call. """ self.reset() - sys.settrace(self.trace_dispatch) + self.start_trace() res = None try: res = func(*args, **kwds) @@ -672,7 +958,7 @@ def runcall(self, func, /, *args, **kwds): pass finally: self.quitting = True - sys.settrace(None) + self.stop_trace() return res diff --git a/PythonLib/full/bz2.py b/PythonLib/full/bz2.py index fabe4f73..eb58f4da 100644 --- a/PythonLib/full/bz2.py +++ b/PythonLib/full/bz2.py @@ -10,20 +10,20 @@ __author__ = "Nadeem Vawda " from builtins import open as _builtin_open +from compression._common import _streams import io import os -import _compression from _bz2 import BZ2Compressor, BZ2Decompressor -_MODE_CLOSED = 0 +# Value 0 no longer used _MODE_READ = 1 # Value 2 no longer used _MODE_WRITE = 3 -class BZ2File(_compression.BaseStream): +class BZ2File(_streams.BaseStream): """A file object providing transparent bzip2 (de)compression. @@ -54,7 +54,7 @@ def __init__(self, filename, mode="r", *, compresslevel=9): """ self._fp = None self._closefp = False - self._mode = _MODE_CLOSED + self._mode = None if not (1 <= compresslevel <= 9): raise ValueError("compresslevel must be between 1 and 9") @@ -88,7 +88,7 @@ def __init__(self, filename, mode="r", *, compresslevel=9): raise TypeError("filename must be a str, bytes, file or PathLike object") if self._mode == _MODE_READ: - raw = _compression.DecompressReader(self._fp, + raw = _streams.DecompressReader(self._fp, BZ2Decompressor, trailing_error=OSError) self._buffer = io.BufferedReader(raw) else: @@ -100,7 +100,7 @@ def close(self): May be called more than once without error. Once the file is closed, any other operation on it will raise a ValueError. """ - if self._mode == _MODE_CLOSED: + if self.closed: return try: if self._mode == _MODE_READ: @@ -115,13 +115,21 @@ def close(self): finally: self._fp = None self._closefp = False - self._mode = _MODE_CLOSED self._buffer = None @property def closed(self): """True if this file is closed.""" - return self._mode == _MODE_CLOSED + return self._fp is None + + @property + def name(self): + self._check_not_closed() + return self._fp.name + + @property + def mode(self): + return 'wb' if self._mode == _MODE_WRITE else 'rb' def fileno(self): """Return the file descriptor for the underlying file.""" @@ -240,7 +248,7 @@ def writelines(self, seq): Line separators are not added between the written byte strings. """ - return _compression.BaseStream.writelines(self, seq) + return _streams.BaseStream.writelines(self, seq) def seek(self, offset, whence=io.SEEK_SET): """Change the file position. diff --git a/PythonLib/full/cProfile.py b/PythonLib/full/cProfile.py index 135a12c3..770d26f7 100644 --- a/PythonLib/full/cProfile.py +++ b/PythonLib/full/cProfile.py @@ -1,5 +1,3 @@ -#! /usr/bin/env python3 - """Python interface for the 'lsprof' profiler. Compatible with the 'profile' module. """ @@ -8,6 +6,7 @@ import _lsprof import importlib.machinery +import importlib.util import io import profile as _pyprofile @@ -41,7 +40,9 @@ class Profile(_lsprof.Profiler): def print_stats(self, sort=-1): import pstats - pstats.Stats(self).strip_dirs().sort_stats(sort).print_stats() + if not isinstance(sort, tuple): + sort = (sort,) + pstats.Stats(self).strip_dirs().sort_stats(*sort).print_stats() def dump_stats(self, file): import marshal @@ -173,13 +174,22 @@ def main(): code = compile(fp.read(), progname, 'exec') spec = importlib.machinery.ModuleSpec(name='__main__', loader=None, origin=progname) - globs = { + module = importlib.util.module_from_spec(spec) + # Set __main__ so that importing __main__ in the profiled code will + # return the same namespace that the code is executing under. + sys.modules['__main__'] = module + # Ensure that we're using the same __dict__ instance as the module + # for the global variables so that updates to globals are reflected + # in the module's namespace. + globs = module.__dict__ + globs.update({ '__spec__': spec, '__file__': spec.origin, '__name__': spec.name, '__package__': None, '__cached__': None, - } + }) + try: runctx(code, globs, None, options.outfile, options.sort) except BrokenPipeError as exc: diff --git a/PythonLib/full/calendar.py b/PythonLib/full/calendar.py index 35096484..18f76d52 100644 --- a/PythonLib/full/calendar.py +++ b/PythonLib/full/calendar.py @@ -10,7 +10,6 @@ from enum import IntEnum, global_enum import locale as _locale from itertools import repeat -import warnings __all__ = ["IllegalMonthError", "IllegalWeekdayError", "setfirstweekday", "firstweekday", "isleap", "leapdays", "weekday", "monthrange", @@ -46,6 +45,7 @@ def __str__(self): def __getattr__(name): if name in ('January', 'February'): + import warnings warnings.warn(f"The '{name}' attribute is deprecated, use '{name.upper()}' instead", DeprecationWarning, stacklevel=2) if name == 'January': @@ -428,6 +428,7 @@ def formatyear(self, theyear, w=2, l=1, c=6, m=3): headers = (header for k in months) a(formatstring(headers, colwidth, c).rstrip()) a('\n'*l) + # max number of weeks for this row height = max(len(cal) for cal in row) for j in range(height): @@ -593,8 +594,6 @@ def __enter__(self): _locale.setlocale(_locale.LC_TIME, self.locale) def __exit__(self, *args): - if self.oldlocale is None: - return _locale.setlocale(_locale.LC_TIME, self.oldlocale) @@ -648,6 +647,117 @@ def formatmonthname(self, theyear, themonth, withyear=True): with different_locale(self.locale): return super().formatmonthname(theyear, themonth, withyear) + +class _CLIDemoCalendar(TextCalendar): + def __init__(self, highlight_day=None, *args, **kwargs): + super().__init__(*args, **kwargs) + self.highlight_day = highlight_day + + def formatweek(self, theweek, width, *, highlight_day=None): + """ + Returns a single week in a string (no newline). + """ + if highlight_day: + from _colorize import get_colors + + ansi = get_colors() + highlight = f"{ansi.BLACK}{ansi.BACKGROUND_YELLOW}" + reset = ansi.RESET + else: + highlight = reset = "" + + return ' '.join( + ( + f"{highlight}{self.formatday(d, wd, width)}{reset}" + if d == highlight_day + else self.formatday(d, wd, width) + ) + for (d, wd) in theweek + ) + + def formatmonth(self, theyear, themonth, w=0, l=0): + """ + Return a month's calendar string (multi-line). + """ + if ( + self.highlight_day + and self.highlight_day.year == theyear + and self.highlight_day.month == themonth + ): + highlight_day = self.highlight_day.day + else: + highlight_day = None + w = max(2, w) + l = max(1, l) + s = self.formatmonthname(theyear, themonth, 7 * (w + 1) - 1) + s = s.rstrip() + s += '\n' * l + s += self.formatweekheader(w).rstrip() + s += '\n' * l + for week in self.monthdays2calendar(theyear, themonth): + s += self.formatweek(week, w, highlight_day=highlight_day).rstrip() + s += '\n' * l + return s + + def formatyear(self, theyear, w=2, l=1, c=6, m=3): + """ + Returns a year's calendar as a multi-line string. + """ + w = max(2, w) + l = max(1, l) + c = max(2, c) + colwidth = (w + 1) * 7 - 1 + v = [] + a = v.append + a(repr(theyear).center(colwidth*m+c*(m-1)).rstrip()) + a('\n'*l) + header = self.formatweekheader(w) + for (i, row) in enumerate(self.yeardays2calendar(theyear, m)): + # months in this row + months = range(m*i+1, min(m*(i+1)+1, 13)) + a('\n'*l) + names = (self.formatmonthname(theyear, k, colwidth, False) + for k in months) + a(formatstring(names, colwidth, c).rstrip()) + a('\n'*l) + headers = (header for k in months) + a(formatstring(headers, colwidth, c).rstrip()) + a('\n'*l) + + if ( + self.highlight_day + and self.highlight_day.year == theyear + and self.highlight_day.month in months + ): + month_pos = months.index(self.highlight_day.month) + else: + month_pos = None + + # max number of weeks for this row + height = max(len(cal) for cal in row) + for j in range(height): + weeks = [] + for k, cal in enumerate(row): + if j >= len(cal): + weeks.append('') + else: + day = ( + self.highlight_day.day if k == month_pos else None + ) + weeks.append( + self.formatweek(cal[j], w, highlight_day=day) + ) + a(formatstring(weeks, colwidth, c).rstrip()) + a('\n' * l) + return ''.join(v) + + +class _CLIDemoLocaleCalendar(LocaleTextCalendar, _CLIDemoCalendar): + def __init__(self, highlight_day=None, *args, **kwargs): + super().__init__(*args, **kwargs) + self.highlight_day = highlight_day + + # Support for old module level interface c = TextCalendar() @@ -698,9 +808,9 @@ def timegm(tuple): return seconds -def main(args): +def main(args=None): import argparse - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser(color=True) textgroup = parser.add_argument_group('text only arguments') htmlgroup = parser.add_argument_group('html only arguments') textgroup.add_argument( @@ -744,6 +854,11 @@ def main(args): choices=("text", "html"), help="output type (text or html)" ) + parser.add_argument( + "-f", "--first-weekday", + type=int, default=0, + help="weekday (0 is Monday, 6 is Sunday) to start each week (default 0)" + ) parser.add_argument( "year", nargs='?', type=int, @@ -755,44 +870,47 @@ def main(args): help="month number (1-12, text only)" ) - options = parser.parse_args(args[1:]) + options = parser.parse_args(args) if options.locale and not options.encoding: parser.error("if --locale is specified --encoding is required") sys.exit(1) locale = options.locale, options.encoding + today = datetime.date.today() if options.type == "html": + if options.month: + parser.error("incorrect number of arguments") + sys.exit(1) if options.locale: cal = LocaleHTMLCalendar(locale=locale) else: cal = HTMLCalendar() + cal.setfirstweekday(options.first_weekday) encoding = options.encoding if encoding is None: encoding = sys.getdefaultencoding() optdict = dict(encoding=encoding, css=options.css) write = sys.stdout.buffer.write if options.year is None: - write(cal.formatyearpage(datetime.date.today().year, **optdict)) - elif options.month is None: - write(cal.formatyearpage(options.year, **optdict)) + write(cal.formatyearpage(today.year, **optdict)) else: - parser.error("incorrect number of arguments") - sys.exit(1) + write(cal.formatyearpage(options.year, **optdict)) else: if options.locale: - cal = LocaleTextCalendar(locale=locale) + cal = _CLIDemoLocaleCalendar(highlight_day=today, locale=locale) else: - cal = TextCalendar() + cal = _CLIDemoCalendar(highlight_day=today) + cal.setfirstweekday(options.first_weekday) optdict = dict(w=options.width, l=options.lines) if options.month is None: optdict["c"] = options.spacing optdict["m"] = options.months - if options.month is not None: + else: _validate_month(options.month) if options.year is None: - result = cal.formatyear(datetime.date.today().year, **optdict) + result = cal.formatyear(today.year, **optdict) elif options.month is None: result = cal.formatyear(options.year, **optdict) else: @@ -805,4 +923,4 @@ def main(args): if __name__ == "__main__": - main(sys.argv) + main() diff --git a/PythonLib/full/cgi.py b/PythonLib/full/cgi.py deleted file mode 100644 index 8787567b..00000000 --- a/PythonLib/full/cgi.py +++ /dev/null @@ -1,1012 +0,0 @@ -#! /usr/local/bin/python - -# NOTE: the above "/usr/local/bin/python" is NOT a mistake. It is -# intentionally NOT "/usr/bin/env python". On many systems -# (e.g. Solaris), /usr/local/bin is not in $PATH as passed to CGI -# scripts, and /usr/local/bin is the default directory where Python is -# installed, so /usr/bin/env would be unable to find python. Granted, -# binary installations by Linux vendors often install Python in -# /usr/bin. So let those vendors patch cgi.py to match their choice -# of installation. - -"""Support module for CGI (Common Gateway Interface) scripts. - -This module defines a number of utilities for use by CGI scripts -written in Python. - -The global variable maxlen can be set to an integer indicating the maximum size -of a POST request. POST requests larger than this size will result in a -ValueError being raised during parsing. The default value of this variable is 0, -meaning the request size is unlimited. -""" - -# History -# ------- -# -# Michael McLay started this module. Steve Majewski changed the -# interface to SvFormContentDict and FormContentDict. The multipart -# parsing was inspired by code submitted by Andreas Paepcke. Guido van -# Rossum rewrote, reformatted and documented the module and is currently -# responsible for its maintenance. -# - -__version__ = "2.6" - - -# Imports -# ======= - -from io import StringIO, BytesIO, TextIOWrapper -from collections.abc import Mapping -import sys -import os -import urllib.parse -from email.parser import FeedParser -from email.message import Message -import html -import locale -import tempfile -import warnings - -__all__ = ["MiniFieldStorage", "FieldStorage", "parse", "parse_multipart", - "parse_header", "test", "print_exception", "print_environ", - "print_form", "print_directory", "print_arguments", - "print_environ_usage"] - - -warnings._deprecated(__name__, remove=(3,13)) - -# Logging support -# =============== - -logfile = "" # Filename to log to, if not empty -logfp = None # File object to log to, if not None - -def initlog(*allargs): - """Write a log message, if there is a log file. - - Even though this function is called initlog(), you should always - use log(); log is a variable that is set either to initlog - (initially), to dolog (once the log file has been opened), or to - nolog (when logging is disabled). - - The first argument is a format string; the remaining arguments (if - any) are arguments to the % operator, so e.g. - log("%s: %s", "a", "b") - will write "a: b" to the log file, followed by a newline. - - If the global logfp is not None, it should be a file object to - which log data is written. - - If the global logfp is None, the global logfile may be a string - giving a filename to open, in append mode. This file should be - world writable!!! If the file can't be opened, logging is - silently disabled (since there is no safe place where we could - send an error message). - - """ - global log, logfile, logfp - warnings.warn("cgi.log() is deprecated as of 3.10. Use logging instead", - DeprecationWarning, stacklevel=2) - if logfile and not logfp: - try: - logfp = open(logfile, "a", encoding="locale") - except OSError: - pass - if not logfp: - log = nolog - else: - log = dolog - log(*allargs) - -def dolog(fmt, *args): - """Write a log message to the log file. See initlog() for docs.""" - logfp.write(fmt%args + "\n") - -def nolog(*allargs): - """Dummy function, assigned to log when logging is disabled.""" - pass - -def closelog(): - """Close the log file.""" - global log, logfile, logfp - logfile = '' - if logfp: - logfp.close() - logfp = None - log = initlog - -log = initlog # The current logging function - - -# Parsing functions -# ================= - -# Maximum input we will accept when REQUEST_METHOD is POST -# 0 ==> unlimited input -maxlen = 0 - -def parse(fp=None, environ=os.environ, keep_blank_values=0, - strict_parsing=0, separator='&'): - """Parse a query in the environment or from a file (default stdin) - - Arguments, all optional: - - fp : file pointer; default: sys.stdin.buffer - - environ : environment dictionary; default: os.environ - - keep_blank_values: flag indicating whether blank values in - percent-encoded forms should be treated as blank strings. - A true value indicates that blanks should be retained as - blank strings. The default false value indicates that - blank values are to be ignored and treated as if they were - not included. - - strict_parsing: flag indicating what to do with parsing errors. - If false (the default), errors are silently ignored. - If true, errors raise a ValueError exception. - - separator: str. The symbol to use for separating the query arguments. - Defaults to &. - """ - if fp is None: - fp = sys.stdin - - # field keys and values (except for files) are returned as strings - # an encoding is required to decode the bytes read from self.fp - if hasattr(fp,'encoding'): - encoding = fp.encoding - else: - encoding = 'latin-1' - - # fp.read() must return bytes - if isinstance(fp, TextIOWrapper): - fp = fp.buffer - - if not 'REQUEST_METHOD' in environ: - environ['REQUEST_METHOD'] = 'GET' # For testing stand-alone - if environ['REQUEST_METHOD'] == 'POST': - ctype, pdict = parse_header(environ['CONTENT_TYPE']) - if ctype == 'multipart/form-data': - return parse_multipart(fp, pdict, separator=separator) - elif ctype == 'application/x-www-form-urlencoded': - clength = int(environ['CONTENT_LENGTH']) - if maxlen and clength > maxlen: - raise ValueError('Maximum content length exceeded') - qs = fp.read(clength).decode(encoding) - else: - qs = '' # Unknown content-type - if 'QUERY_STRING' in environ: - if qs: qs = qs + '&' - qs = qs + environ['QUERY_STRING'] - elif sys.argv[1:]: - if qs: qs = qs + '&' - qs = qs + sys.argv[1] - environ['QUERY_STRING'] = qs # XXX Shouldn't, really - elif 'QUERY_STRING' in environ: - qs = environ['QUERY_STRING'] - else: - if sys.argv[1:]: - qs = sys.argv[1] - else: - qs = "" - environ['QUERY_STRING'] = qs # XXX Shouldn't, really - return urllib.parse.parse_qs(qs, keep_blank_values, strict_parsing, - encoding=encoding, separator=separator) - - -def parse_multipart(fp, pdict, encoding="utf-8", errors="replace", separator='&'): - """Parse multipart input. - - Arguments: - fp : input file - pdict: dictionary containing other parameters of content-type header - encoding, errors: request encoding and error handler, passed to - FieldStorage - - Returns a dictionary just like parse_qs(): keys are the field names, each - value is a list of values for that field. For non-file fields, the value - is a list of strings. - """ - # RFC 2046, Section 5.1 : The "multipart" boundary delimiters are always - # represented as 7bit US-ASCII. - boundary = pdict['boundary'].decode('ascii') - ctype = "multipart/form-data; boundary={}".format(boundary) - headers = Message() - headers.set_type(ctype) - try: - headers['Content-Length'] = pdict['CONTENT-LENGTH'] - except KeyError: - pass - fs = FieldStorage(fp, headers=headers, encoding=encoding, errors=errors, - environ={'REQUEST_METHOD': 'POST'}, separator=separator) - return {k: fs.getlist(k) for k in fs} - -def _parseparam(s): - while s[:1] == ';': - s = s[1:] - end = s.find(';') - while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: - end = s.find(';', end + 1) - if end < 0: - end = len(s) - f = s[:end] - yield f.strip() - s = s[end:] - -def parse_header(line): - """Parse a Content-type like header. - - Return the main content-type and a dictionary of options. - - """ - parts = _parseparam(';' + line) - key = parts.__next__() - pdict = {} - for p in parts: - i = p.find('=') - if i >= 0: - name = p[:i].strip().lower() - value = p[i+1:].strip() - if len(value) >= 2 and value[0] == value[-1] == '"': - value = value[1:-1] - value = value.replace('\\\\', '\\').replace('\\"', '"') - pdict[name] = value - return key, pdict - - -# Classes for field storage -# ========================= - -class MiniFieldStorage: - - """Like FieldStorage, for use when no file uploads are possible.""" - - # Dummy attributes - filename = None - list = None - type = None - file = None - type_options = {} - disposition = None - disposition_options = {} - headers = {} - - def __init__(self, name, value): - """Constructor from field name and value.""" - self.name = name - self.value = value - # self.file = StringIO(value) - - def __repr__(self): - """Return printable representation.""" - return "MiniFieldStorage(%r, %r)" % (self.name, self.value) - - -class FieldStorage: - - """Store a sequence of fields, reading multipart/form-data. - - This class provides naming, typing, files stored on disk, and - more. At the top level, it is accessible like a dictionary, whose - keys are the field names. (Note: None can occur as a field name.) - The items are either a Python list (if there's multiple values) or - another FieldStorage or MiniFieldStorage object. If it's a single - object, it has the following attributes: - - name: the field name, if specified; otherwise None - - filename: the filename, if specified; otherwise None; this is the - client side filename, *not* the file name on which it is - stored (that's a temporary file you don't deal with) - - value: the value as a *string*; for file uploads, this - transparently reads the file every time you request the value - and returns *bytes* - - file: the file(-like) object from which you can read the data *as - bytes* ; None if the data is stored a simple string - - type: the content-type, or None if not specified - - type_options: dictionary of options specified on the content-type - line - - disposition: content-disposition, or None if not specified - - disposition_options: dictionary of corresponding options - - headers: a dictionary(-like) object (sometimes email.message.Message or a - subclass thereof) containing *all* headers - - The class is subclassable, mostly for the purpose of overriding - the make_file() method, which is called internally to come up with - a file open for reading and writing. This makes it possible to - override the default choice of storing all files in a temporary - directory and unlinking them as soon as they have been opened. - - """ - def __init__(self, fp=None, headers=None, outerboundary=b'', - environ=os.environ, keep_blank_values=0, strict_parsing=0, - limit=None, encoding='utf-8', errors='replace', - max_num_fields=None, separator='&'): - """Constructor. Read multipart/* until last part. - - Arguments, all optional: - - fp : file pointer; default: sys.stdin.buffer - (not used when the request method is GET) - Can be : - 1. a TextIOWrapper object - 2. an object whose read() and readline() methods return bytes - - headers : header dictionary-like object; default: - taken from environ as per CGI spec - - outerboundary : terminating multipart boundary - (for internal use only) - - environ : environment dictionary; default: os.environ - - keep_blank_values: flag indicating whether blank values in - percent-encoded forms should be treated as blank strings. - A true value indicates that blanks should be retained as - blank strings. The default false value indicates that - blank values are to be ignored and treated as if they were - not included. - - strict_parsing: flag indicating what to do with parsing errors. - If false (the default), errors are silently ignored. - If true, errors raise a ValueError exception. - - limit : used internally to read parts of multipart/form-data forms, - to exit from the reading loop when reached. It is the difference - between the form content-length and the number of bytes already - read - - encoding, errors : the encoding and error handler used to decode the - binary stream to strings. Must be the same as the charset defined - for the page sending the form (content-type : meta http-equiv or - header) - - max_num_fields: int. If set, then __init__ throws a ValueError - if there are more than n fields read by parse_qsl(). - - """ - method = 'GET' - self.keep_blank_values = keep_blank_values - self.strict_parsing = strict_parsing - self.max_num_fields = max_num_fields - self.separator = separator - if 'REQUEST_METHOD' in environ: - method = environ['REQUEST_METHOD'].upper() - self.qs_on_post = None - if method == 'GET' or method == 'HEAD': - if 'QUERY_STRING' in environ: - qs = environ['QUERY_STRING'] - elif sys.argv[1:]: - qs = sys.argv[1] - else: - qs = "" - qs = qs.encode(locale.getpreferredencoding(), 'surrogateescape') - fp = BytesIO(qs) - if headers is None: - headers = {'content-type': - "application/x-www-form-urlencoded"} - if headers is None: - headers = {} - if method == 'POST': - # Set default content-type for POST to what's traditional - headers['content-type'] = "application/x-www-form-urlencoded" - if 'CONTENT_TYPE' in environ: - headers['content-type'] = environ['CONTENT_TYPE'] - if 'QUERY_STRING' in environ: - self.qs_on_post = environ['QUERY_STRING'] - if 'CONTENT_LENGTH' in environ: - headers['content-length'] = environ['CONTENT_LENGTH'] - else: - if not (isinstance(headers, (Mapping, Message))): - raise TypeError("headers must be mapping or an instance of " - "email.message.Message") - self.headers = headers - if fp is None: - self.fp = sys.stdin.buffer - # self.fp.read() must return bytes - elif isinstance(fp, TextIOWrapper): - self.fp = fp.buffer - else: - if not (hasattr(fp, 'read') and hasattr(fp, 'readline')): - raise TypeError("fp must be file pointer") - self.fp = fp - - self.encoding = encoding - self.errors = errors - - if not isinstance(outerboundary, bytes): - raise TypeError('outerboundary must be bytes, not %s' - % type(outerboundary).__name__) - self.outerboundary = outerboundary - - self.bytes_read = 0 - self.limit = limit - - # Process content-disposition header - cdisp, pdict = "", {} - if 'content-disposition' in self.headers: - cdisp, pdict = parse_header(self.headers['content-disposition']) - self.disposition = cdisp - self.disposition_options = pdict - self.name = None - if 'name' in pdict: - self.name = pdict['name'] - self.filename = None - if 'filename' in pdict: - self.filename = pdict['filename'] - self._binary_file = self.filename is not None - - # Process content-type header - # - # Honor any existing content-type header. But if there is no - # content-type header, use some sensible defaults. Assume - # outerboundary is "" at the outer level, but something non-false - # inside a multi-part. The default for an inner part is text/plain, - # but for an outer part it should be urlencoded. This should catch - # bogus clients which erroneously forget to include a content-type - # header. - # - # See below for what we do if there does exist a content-type header, - # but it happens to be something we don't understand. - if 'content-type' in self.headers: - ctype, pdict = parse_header(self.headers['content-type']) - elif self.outerboundary or method != 'POST': - ctype, pdict = "text/plain", {} - else: - ctype, pdict = 'application/x-www-form-urlencoded', {} - self.type = ctype - self.type_options = pdict - if 'boundary' in pdict: - self.innerboundary = pdict['boundary'].encode(self.encoding, - self.errors) - else: - self.innerboundary = b"" - - clen = -1 - if 'content-length' in self.headers: - try: - clen = int(self.headers['content-length']) - except ValueError: - pass - if maxlen and clen > maxlen: - raise ValueError('Maximum content length exceeded') - self.length = clen - if self.limit is None and clen >= 0: - self.limit = clen - - self.list = self.file = None - self.done = 0 - if ctype == 'application/x-www-form-urlencoded': - self.read_urlencoded() - elif ctype[:10] == 'multipart/': - self.read_multi(environ, keep_blank_values, strict_parsing) - else: - self.read_single() - - def __del__(self): - try: - self.file.close() - except AttributeError: - pass - - def __enter__(self): - return self - - def __exit__(self, *args): - self.file.close() - - def __repr__(self): - """Return a printable representation.""" - return "FieldStorage(%r, %r, %r)" % ( - self.name, self.filename, self.value) - - def __iter__(self): - return iter(self.keys()) - - def __getattr__(self, name): - if name != 'value': - raise AttributeError(name) - if self.file: - self.file.seek(0) - value = self.file.read() - self.file.seek(0) - elif self.list is not None: - value = self.list - else: - value = None - return value - - def __getitem__(self, key): - """Dictionary style indexing.""" - if self.list is None: - raise TypeError("not indexable") - found = [] - for item in self.list: - if item.name == key: found.append(item) - if not found: - raise KeyError(key) - if len(found) == 1: - return found[0] - else: - return found - - def getvalue(self, key, default=None): - """Dictionary style get() method, including 'value' lookup.""" - if key in self: - value = self[key] - if isinstance(value, list): - return [x.value for x in value] - else: - return value.value - else: - return default - - def getfirst(self, key, default=None): - """ Return the first value received.""" - if key in self: - value = self[key] - if isinstance(value, list): - return value[0].value - else: - return value.value - else: - return default - - def getlist(self, key): - """ Return list of received values.""" - if key in self: - value = self[key] - if isinstance(value, list): - return [x.value for x in value] - else: - return [value.value] - else: - return [] - - def keys(self): - """Dictionary style keys() method.""" - if self.list is None: - raise TypeError("not indexable") - return list(set(item.name for item in self.list)) - - def __contains__(self, key): - """Dictionary style __contains__ method.""" - if self.list is None: - raise TypeError("not indexable") - return any(item.name == key for item in self.list) - - def __len__(self): - """Dictionary style len(x) support.""" - return len(self.keys()) - - def __bool__(self): - if self.list is None: - raise TypeError("Cannot be converted to bool.") - return bool(self.list) - - def read_urlencoded(self): - """Internal: read data in query string format.""" - qs = self.fp.read(self.length) - if not isinstance(qs, bytes): - raise ValueError("%s should return bytes, got %s" \ - % (self.fp, type(qs).__name__)) - qs = qs.decode(self.encoding, self.errors) - if self.qs_on_post: - qs += '&' + self.qs_on_post - query = urllib.parse.parse_qsl( - qs, self.keep_blank_values, self.strict_parsing, - encoding=self.encoding, errors=self.errors, - max_num_fields=self.max_num_fields, separator=self.separator) - self.list = [MiniFieldStorage(key, value) for key, value in query] - self.skip_lines() - - FieldStorageClass = None - - def read_multi(self, environ, keep_blank_values, strict_parsing): - """Internal: read a part that is itself multipart.""" - ib = self.innerboundary - if not valid_boundary(ib): - raise ValueError('Invalid boundary in multipart form: %r' % (ib,)) - self.list = [] - if self.qs_on_post: - query = urllib.parse.parse_qsl( - self.qs_on_post, self.keep_blank_values, self.strict_parsing, - encoding=self.encoding, errors=self.errors, - max_num_fields=self.max_num_fields, separator=self.separator) - self.list.extend(MiniFieldStorage(key, value) for key, value in query) - - klass = self.FieldStorageClass or self.__class__ - first_line = self.fp.readline() # bytes - if not isinstance(first_line, bytes): - raise ValueError("%s should return bytes, got %s" \ - % (self.fp, type(first_line).__name__)) - self.bytes_read += len(first_line) - - # Ensure that we consume the file until we've hit our inner boundary - while (first_line.strip() != (b"--" + self.innerboundary) and - first_line): - first_line = self.fp.readline() - self.bytes_read += len(first_line) - - # Propagate max_num_fields into the sub class appropriately - max_num_fields = self.max_num_fields - if max_num_fields is not None: - max_num_fields -= len(self.list) - - while True: - parser = FeedParser() - hdr_text = b"" - while True: - data = self.fp.readline() - hdr_text += data - if not data.strip(): - break - if not hdr_text: - break - # parser takes strings, not bytes - self.bytes_read += len(hdr_text) - parser.feed(hdr_text.decode(self.encoding, self.errors)) - headers = parser.close() - - # Some clients add Content-Length for part headers, ignore them - if 'content-length' in headers: - del headers['content-length'] - - limit = None if self.limit is None \ - else self.limit - self.bytes_read - part = klass(self.fp, headers, ib, environ, keep_blank_values, - strict_parsing, limit, - self.encoding, self.errors, max_num_fields, self.separator) - - if max_num_fields is not None: - max_num_fields -= 1 - if part.list: - max_num_fields -= len(part.list) - if max_num_fields < 0: - raise ValueError('Max number of fields exceeded') - - self.bytes_read += part.bytes_read - self.list.append(part) - if part.done or self.bytes_read >= self.length > 0: - break - self.skip_lines() - - def read_single(self): - """Internal: read an atomic part.""" - if self.length >= 0: - self.read_binary() - self.skip_lines() - else: - self.read_lines() - self.file.seek(0) - - bufsize = 8*1024 # I/O buffering size for copy to file - - def read_binary(self): - """Internal: read binary data.""" - self.file = self.make_file() - todo = self.length - if todo >= 0: - while todo > 0: - data = self.fp.read(min(todo, self.bufsize)) # bytes - if not isinstance(data, bytes): - raise ValueError("%s should return bytes, got %s" - % (self.fp, type(data).__name__)) - self.bytes_read += len(data) - if not data: - self.done = -1 - break - self.file.write(data) - todo = todo - len(data) - - def read_lines(self): - """Internal: read lines until EOF or outerboundary.""" - if self._binary_file: - self.file = self.__file = BytesIO() # store data as bytes for files - else: - self.file = self.__file = StringIO() # as strings for other fields - if self.outerboundary: - self.read_lines_to_outerboundary() - else: - self.read_lines_to_eof() - - def __write(self, line): - """line is always bytes, not string""" - if self.__file is not None: - if self.__file.tell() + len(line) > 1000: - self.file = self.make_file() - data = self.__file.getvalue() - self.file.write(data) - self.__file = None - if self._binary_file: - # keep bytes - self.file.write(line) - else: - # decode to string - self.file.write(line.decode(self.encoding, self.errors)) - - def read_lines_to_eof(self): - """Internal: read lines until EOF.""" - while 1: - line = self.fp.readline(1<<16) # bytes - self.bytes_read += len(line) - if not line: - self.done = -1 - break - self.__write(line) - - def read_lines_to_outerboundary(self): - """Internal: read lines until outerboundary. - Data is read as bytes: boundaries and line ends must be converted - to bytes for comparisons. - """ - next_boundary = b"--" + self.outerboundary - last_boundary = next_boundary + b"--" - delim = b"" - last_line_lfend = True - _read = 0 - while 1: - - if self.limit is not None and 0 <= self.limit <= _read: - break - line = self.fp.readline(1<<16) # bytes - self.bytes_read += len(line) - _read += len(line) - if not line: - self.done = -1 - break - if delim == b"\r": - line = delim + line - delim = b"" - if line.startswith(b"--") and last_line_lfend: - strippedline = line.rstrip() - if strippedline == next_boundary: - break - if strippedline == last_boundary: - self.done = 1 - break - odelim = delim - if line.endswith(b"\r\n"): - delim = b"\r\n" - line = line[:-2] - last_line_lfend = True - elif line.endswith(b"\n"): - delim = b"\n" - line = line[:-1] - last_line_lfend = True - elif line.endswith(b"\r"): - # We may interrupt \r\n sequences if they span the 2**16 - # byte boundary - delim = b"\r" - line = line[:-1] - last_line_lfend = False - else: - delim = b"" - last_line_lfend = False - self.__write(odelim + line) - - def skip_lines(self): - """Internal: skip lines until outer boundary if defined.""" - if not self.outerboundary or self.done: - return - next_boundary = b"--" + self.outerboundary - last_boundary = next_boundary + b"--" - last_line_lfend = True - while True: - line = self.fp.readline(1<<16) - self.bytes_read += len(line) - if not line: - self.done = -1 - break - if line.endswith(b"--") and last_line_lfend: - strippedline = line.strip() - if strippedline == next_boundary: - break - if strippedline == last_boundary: - self.done = 1 - break - last_line_lfend = line.endswith(b'\n') - - def make_file(self): - """Overridable: return a readable & writable file. - - The file will be used as follows: - - data is written to it - - seek(0) - - data is read from it - - The file is opened in binary mode for files, in text mode - for other fields - - This version opens a temporary file for reading and writing, - and immediately deletes (unlinks) it. The trick (on Unix!) is - that the file can still be used, but it can't be opened by - another process, and it will automatically be deleted when it - is closed or when the current process terminates. - - If you want a more permanent file, you derive a class which - overrides this method. If you want a visible temporary file - that is nevertheless automatically deleted when the script - terminates, try defining a __del__ method in a derived class - which unlinks the temporary files you have created. - - """ - if self._binary_file: - return tempfile.TemporaryFile("wb+") - else: - return tempfile.TemporaryFile("w+", - encoding=self.encoding, newline = '\n') - - -# Test/debug code -# =============== - -def test(environ=os.environ): - """Robust test CGI script, usable as main program. - - Write minimal HTTP headers and dump all information provided to - the script in HTML form. - - """ - print("Content-type: text/html") - print() - sys.stderr = sys.stdout - try: - form = FieldStorage() # Replace with other classes to test those - print_directory() - print_arguments() - print_form(form) - print_environ(environ) - print_environ_usage() - def f(): - exec("testing print_exception() -- italics?") - def g(f=f): - f() - print("

What follows is a test, not an actual exception:

") - g() - except: - print_exception() - - print("

Second try with a small maxlen...

") - - global maxlen - maxlen = 50 - try: - form = FieldStorage() # Replace with other classes to test those - print_directory() - print_arguments() - print_form(form) - print_environ(environ) - except: - print_exception() - -def print_exception(type=None, value=None, tb=None, limit=None): - if type is None: - type, value, tb = sys.exc_info() - import traceback - print() - print("

Traceback (most recent call last):

") - list = traceback.format_tb(tb, limit) + \ - traceback.format_exception_only(type, value) - print("
%s%s
" % ( - html.escape("".join(list[:-1])), - html.escape(list[-1]), - )) - del tb - -def print_environ(environ=os.environ): - """Dump the shell environment as HTML.""" - keys = sorted(environ.keys()) - print() - print("

Shell Environment:

") - print("
") - for key in keys: - print("
", html.escape(key), "
", html.escape(environ[key])) - print("
") - print() - -def print_form(form): - """Dump the contents of a form as HTML.""" - keys = sorted(form.keys()) - print() - print("

Form Contents:

") - if not keys: - print("

No form fields.") - print("

") - for key in keys: - print("
" + html.escape(key) + ":", end=' ') - value = form[key] - print("" + html.escape(repr(type(value))) + "") - print("
" + html.escape(repr(value))) - print("
") - print() - -def print_directory(): - """Dump the current directory as HTML.""" - print() - print("

Current Working Directory:

") - try: - pwd = os.getcwd() - except OSError as msg: - print("OSError:", html.escape(str(msg))) - else: - print(html.escape(pwd)) - print() - -def print_arguments(): - print() - print("

Command Line Arguments:

") - print() - print(sys.argv) - print() - -def print_environ_usage(): - """Dump a list of environment variables used by CGI as HTML.""" - print(""" -

These environment variables could have been set:

-
    -
  • AUTH_TYPE -
  • CONTENT_LENGTH -
  • CONTENT_TYPE -
  • DATE_GMT -
  • DATE_LOCAL -
  • DOCUMENT_NAME -
  • DOCUMENT_ROOT -
  • DOCUMENT_URI -
  • GATEWAY_INTERFACE -
  • LAST_MODIFIED -
  • PATH -
  • PATH_INFO -
  • PATH_TRANSLATED -
  • QUERY_STRING -
  • REMOTE_ADDR -
  • REMOTE_HOST -
  • REMOTE_IDENT -
  • REMOTE_USER -
  • REQUEST_METHOD -
  • SCRIPT_NAME -
  • SERVER_NAME -
  • SERVER_PORT -
  • SERVER_PROTOCOL -
  • SERVER_ROOT -
  • SERVER_SOFTWARE -
-In addition, HTTP headers sent by the server may be passed in the -environment as well. Here are some common variable names: -
    -
  • HTTP_ACCEPT -
  • HTTP_CONNECTION -
  • HTTP_HOST -
  • HTTP_PRAGMA -
  • HTTP_REFERER -
  • HTTP_USER_AGENT -
-""") - - -# Utilities -# ========= - -def valid_boundary(s): - import re - if isinstance(s, bytes): - _vb_pattern = b"^[ -~]{0,200}[!-~]$" - else: - _vb_pattern = "^[ -~]{0,200}[!-~]$" - return re.match(_vb_pattern, s) - -# Invoke mainline -# =============== - -# Call test() when this file is run as a script (not imported as a module) -if __name__ == '__main__': - test() diff --git a/PythonLib/full/cgitb.py b/PythonLib/full/cgitb.py deleted file mode 100644 index f6b97f25..00000000 --- a/PythonLib/full/cgitb.py +++ /dev/null @@ -1,332 +0,0 @@ -"""More comprehensive traceback formatting for Python scripts. - -To enable this module, do: - - import cgitb; cgitb.enable() - -at the top of your script. The optional arguments to enable() are: - - display - if true, tracebacks are displayed in the web browser - logdir - if set, tracebacks are written to files in this directory - context - number of lines of source code to show for each stack frame - format - 'text' or 'html' controls the output format - -By default, tracebacks are displayed but not saved, the context is 5 lines -and the output format is 'html' (for backwards compatibility with the -original use of this module) - -Alternatively, if you have caught an exception and want cgitb to display it -for you, call cgitb.handler(). The optional argument to handler() is a -3-item tuple (etype, evalue, etb) just like the value of sys.exc_info(). -The default handler displays output as HTML. - -""" -import inspect -import keyword -import linecache -import os -import pydoc -import sys -import tempfile -import time -import tokenize -import traceback -import warnings -from html import escape as html_escape - -warnings._deprecated(__name__, remove=(3, 13)) - - -def reset(): - """Return a string that resets the CGI and browser to a known state.""" - return ''' - --> --> - - ''' - -__UNDEF__ = [] # a special sentinel object -def small(text): - if text: - return '' + text + '' - else: - return '' - -def strong(text): - if text: - return '' + text + '' - else: - return '' - -def grey(text): - if text: - return '' + text + '' - else: - return '' - -def lookup(name, frame, locals): - """Find the value for a given name in the given environment.""" - if name in locals: - return 'local', locals[name] - if name in frame.f_globals: - return 'global', frame.f_globals[name] - if '__builtins__' in frame.f_globals: - builtins = frame.f_globals['__builtins__'] - if isinstance(builtins, dict): - if name in builtins: - return 'builtin', builtins[name] - else: - if hasattr(builtins, name): - return 'builtin', getattr(builtins, name) - return None, __UNDEF__ - -def scanvars(reader, frame, locals): - """Scan one logical line of Python and look up values of variables used.""" - vars, lasttoken, parent, prefix, value = [], None, None, '', __UNDEF__ - for ttype, token, start, end, line in tokenize.generate_tokens(reader): - if ttype == tokenize.NEWLINE: break - if ttype == tokenize.NAME and token not in keyword.kwlist: - if lasttoken == '.': - if parent is not __UNDEF__: - value = getattr(parent, token, __UNDEF__) - vars.append((prefix + token, prefix, value)) - else: - where, value = lookup(token, frame, locals) - vars.append((token, where, value)) - elif token == '.': - prefix += lasttoken + '.' - parent = value - else: - parent, prefix = None, '' - lasttoken = token - return vars - -def html(einfo, context=5): - """Return a nice HTML document describing a given traceback.""" - etype, evalue, etb = einfo - if isinstance(etype, type): - etype = etype.__name__ - pyver = 'Python ' + sys.version.split()[0] + ': ' + sys.executable - date = time.ctime(time.time()) - head = f''' - - - - - -
 
- 
-{html_escape(str(etype))}
-{pyver}
{date}
-

A problem occurred in a Python script. Here is the sequence of -function calls leading up to the error, in the order they occurred.

''' - - indent = '' + small(' ' * 5) + ' ' - frames = [] - records = inspect.getinnerframes(etb, context) - for frame, file, lnum, func, lines, index in records: - if file: - file = os.path.abspath(file) - link = '%s' % (file, pydoc.html.escape(file)) - else: - file = link = '?' - args, varargs, varkw, locals = inspect.getargvalues(frame) - call = '' - if func != '?': - call = 'in ' + strong(pydoc.html.escape(func)) - if func != "": - call += inspect.formatargvalues(args, varargs, varkw, locals, - formatvalue=lambda value: '=' + pydoc.html.repr(value)) - - highlight = {} - def reader(lnum=[lnum]): - highlight[lnum[0]] = 1 - try: return linecache.getline(file, lnum[0]) - finally: lnum[0] += 1 - vars = scanvars(reader, frame, locals) - - rows = ['%s%s %s' % - (' ', link, call)] - if index is not None: - i = lnum - index - for line in lines: - num = small(' ' * (5-len(str(i))) + str(i)) + ' ' - if i in highlight: - line = '=>%s%s' % (num, pydoc.html.preformat(line)) - rows.append('%s' % line) - else: - line = '  %s%s' % (num, pydoc.html.preformat(line)) - rows.append('%s' % grey(line)) - i += 1 - - done, dump = {}, [] - for name, where, value in vars: - if name in done: continue - done[name] = 1 - if value is not __UNDEF__: - if where in ('global', 'builtin'): - name = ('%s ' % where) + strong(name) - elif where == 'local': - name = strong(name) - else: - name = where + strong(name.split('.')[-1]) - dump.append('%s = %s' % (name, pydoc.html.repr(value))) - else: - dump.append(name + ' undefined') - - rows.append('%s' % small(grey(', '.join(dump)))) - frames.append(''' - -%s
''' % '\n'.join(rows)) - - exception = ['

%s: %s' % (strong(pydoc.html.escape(str(etype))), - pydoc.html.escape(str(evalue)))] - for name in dir(evalue): - if name[:1] == '_': continue - value = pydoc.html.repr(getattr(evalue, name)) - exception.append('\n
%s%s =\n%s' % (indent, name, value)) - - return head + ''.join(frames) + ''.join(exception) + ''' - - - -''' % pydoc.html.escape( - ''.join(traceback.format_exception(etype, evalue, etb))) - -def text(einfo, context=5): - """Return a plain text document describing a given traceback.""" - etype, evalue, etb = einfo - if isinstance(etype, type): - etype = etype.__name__ - pyver = 'Python ' + sys.version.split()[0] + ': ' + sys.executable - date = time.ctime(time.time()) - head = "%s\n%s\n%s\n" % (str(etype), pyver, date) + ''' -A problem occurred in a Python script. Here is the sequence of -function calls leading up to the error, in the order they occurred. -''' - - frames = [] - records = inspect.getinnerframes(etb, context) - for frame, file, lnum, func, lines, index in records: - file = file and os.path.abspath(file) or '?' - args, varargs, varkw, locals = inspect.getargvalues(frame) - call = '' - if func != '?': - call = 'in ' + func - if func != "": - call += inspect.formatargvalues(args, varargs, varkw, locals, - formatvalue=lambda value: '=' + pydoc.text.repr(value)) - - highlight = {} - def reader(lnum=[lnum]): - highlight[lnum[0]] = 1 - try: return linecache.getline(file, lnum[0]) - finally: lnum[0] += 1 - vars = scanvars(reader, frame, locals) - - rows = [' %s %s' % (file, call)] - if index is not None: - i = lnum - index - for line in lines: - num = '%5d ' % i - rows.append(num+line.rstrip()) - i += 1 - - done, dump = {}, [] - for name, where, value in vars: - if name in done: continue - done[name] = 1 - if value is not __UNDEF__: - if where == 'global': name = 'global ' + name - elif where != 'local': name = where + name.split('.')[-1] - dump.append('%s = %s' % (name, pydoc.text.repr(value))) - else: - dump.append(name + ' undefined') - - rows.append('\n'.join(dump)) - frames.append('\n%s\n' % '\n'.join(rows)) - - exception = ['%s: %s' % (str(etype), str(evalue))] - for name in dir(evalue): - value = pydoc.text.repr(getattr(evalue, name)) - exception.append('\n%s%s = %s' % (" "*4, name, value)) - - return head + ''.join(frames) + ''.join(exception) + ''' - -The above is a description of an error in a Python program. Here is -the original traceback: - -%s -''' % ''.join(traceback.format_exception(etype, evalue, etb)) - -class Hook: - """A hook to replace sys.excepthook that shows tracebacks in HTML.""" - - def __init__(self, display=1, logdir=None, context=5, file=None, - format="html"): - self.display = display # send tracebacks to browser if true - self.logdir = logdir # log tracebacks to files if not None - self.context = context # number of source code lines per frame - self.file = file or sys.stdout # place to send the output - self.format = format - - def __call__(self, etype, evalue, etb): - self.handle((etype, evalue, etb)) - - def handle(self, info=None): - info = info or sys.exc_info() - if self.format == "html": - self.file.write(reset()) - - formatter = (self.format=="html") and html or text - plain = False - try: - doc = formatter(info, self.context) - except: # just in case something goes wrong - doc = ''.join(traceback.format_exception(*info)) - plain = True - - if self.display: - if plain: - doc = pydoc.html.escape(doc) - self.file.write('

' + doc + '
\n') - else: - self.file.write(doc + '\n') - else: - self.file.write('

A problem occurred in a Python script.\n') - - if self.logdir is not None: - suffix = ['.txt', '.html'][self.format=="html"] - (fd, path) = tempfile.mkstemp(suffix=suffix, dir=self.logdir) - - try: - with os.fdopen(fd, 'w') as file: - file.write(doc) - msg = '%s contains the description of this error.' % path - except: - msg = 'Tried to save traceback to %s, but failed.' % path - - if self.format == 'html': - self.file.write('

%s

\n' % msg) - else: - self.file.write(msg + '\n') - try: - self.file.flush() - except: pass - -handler = Hook().handle -def enable(display=1, logdir=None, context=5, format="html"): - """Install an exception handler that formats tracebacks as HTML. - - The optional argument 'display' can be set to 0 to suppress sending the - traceback to the browser, and 'logdir' can be set to a directory to cause - tracebacks to be written to files there.""" - sys.excepthook = Hook(display=display, logdir=logdir, - context=context, format=format) diff --git a/PythonLib/full/chunk.py b/PythonLib/full/chunk.py deleted file mode 100644 index 618781ef..00000000 --- a/PythonLib/full/chunk.py +++ /dev/null @@ -1,173 +0,0 @@ -"""Simple class to read IFF chunks. - -An IFF chunk (used in formats such as AIFF, TIFF, RMFF (RealMedia File -Format)) has the following structure: - -+----------------+ -| ID (4 bytes) | -+----------------+ -| size (4 bytes) | -+----------------+ -| data | -| ... | -+----------------+ - -The ID is a 4-byte string which identifies the type of chunk. - -The size field (a 32-bit value, encoded using big-endian byte order) -gives the size of the whole chunk, including the 8-byte header. - -Usually an IFF-type file consists of one or more chunks. The proposed -usage of the Chunk class defined here is to instantiate an instance at -the start of each chunk and read from the instance until it reaches -the end, after which a new instance can be instantiated. At the end -of the file, creating a new instance will fail with an EOFError -exception. - -Usage: -while True: - try: - chunk = Chunk(file) - except EOFError: - break - chunktype = chunk.getname() - while True: - data = chunk.read(nbytes) - if not data: - pass - # do something with data - -The interface is file-like. The implemented methods are: -read, close, seek, tell, isatty. -Extra methods are: skip() (called by close, skips to the end of the chunk), -getname() (returns the name (ID) of the chunk) - -The __init__ method has one required argument, a file-like object -(including a chunk instance), and one optional argument, a flag which -specifies whether or not chunks are aligned on 2-byte boundaries. The -default is 1, i.e. aligned. -""" - -import warnings - -warnings._deprecated(__name__, remove=(3, 13)) - -class Chunk: - def __init__(self, file, align=True, bigendian=True, inclheader=False): - import struct - self.closed = False - self.align = align # whether to align to word (2-byte) boundaries - if bigendian: - strflag = '>' - else: - strflag = '<' - self.file = file - self.chunkname = file.read(4) - if len(self.chunkname) < 4: - raise EOFError - try: - self.chunksize = struct.unpack_from(strflag+'L', file.read(4))[0] - except struct.error: - raise EOFError from None - if inclheader: - self.chunksize = self.chunksize - 8 # subtract header - self.size_read = 0 - try: - self.offset = self.file.tell() - except (AttributeError, OSError): - self.seekable = False - else: - self.seekable = True - - def getname(self): - """Return the name (ID) of the current chunk.""" - return self.chunkname - - def getsize(self): - """Return the size of the current chunk.""" - return self.chunksize - - def close(self): - if not self.closed: - try: - self.skip() - finally: - self.closed = True - - def isatty(self): - if self.closed: - raise ValueError("I/O operation on closed file") - return False - - def seek(self, pos, whence=0): - """Seek to specified position into the chunk. - Default position is 0 (start of chunk). - If the file is not seekable, this will result in an error. - """ - - if self.closed: - raise ValueError("I/O operation on closed file") - if not self.seekable: - raise OSError("cannot seek") - if whence == 1: - pos = pos + self.size_read - elif whence == 2: - pos = pos + self.chunksize - if pos < 0 or pos > self.chunksize: - raise RuntimeError - self.file.seek(self.offset + pos, 0) - self.size_read = pos - - def tell(self): - if self.closed: - raise ValueError("I/O operation on closed file") - return self.size_read - - def read(self, size=-1): - """Read at most size bytes from the chunk. - If size is omitted or negative, read until the end - of the chunk. - """ - - if self.closed: - raise ValueError("I/O operation on closed file") - if self.size_read >= self.chunksize: - return b'' - if size < 0: - size = self.chunksize - self.size_read - if size > self.chunksize - self.size_read: - size = self.chunksize - self.size_read - data = self.file.read(size) - self.size_read = self.size_read + len(data) - if self.size_read == self.chunksize and \ - self.align and \ - (self.chunksize & 1): - dummy = self.file.read(1) - self.size_read = self.size_read + len(dummy) - return data - - def skip(self): - """Skip the rest of the chunk. - If you are not interested in the contents of the chunk, - this method should be called so that the file points to - the start of the next chunk. - """ - - if self.closed: - raise ValueError("I/O operation on closed file") - if self.seekable: - try: - n = self.chunksize - self.size_read - # maybe fix alignment - if self.align and (self.chunksize & 1): - n = n + 1 - self.file.seek(n, 1) - self.size_read = self.size_read + n - return - except OSError: - pass - while self.size_read < self.chunksize: - n = min(8192, self.chunksize - self.size_read) - dummy = self.read(n) - if not dummy: - raise EOFError diff --git a/PythonLib/full/cmd.py b/PythonLib/full/cmd.py index 88ee7d3d..51495fb3 100644 --- a/PythonLib/full/cmd.py +++ b/PythonLib/full/cmd.py @@ -5,16 +5,16 @@ 1. End of file on input is processed as the command 'EOF'. 2. A command is parsed out of each line by collecting the prefix composed of characters in the identchars member. -3. A command `foo' is dispatched to a method 'do_foo()'; the do_ method +3. A command 'foo' is dispatched to a method 'do_foo()'; the do_ method is passed a single argument consisting of the remainder of the line. 4. Typing an empty line repeats the last command. (Actually, it calls the - method `emptyline', which may be overridden in a subclass.) -5. There is a predefined `help' method. Given an argument `topic', it - calls the command `help_topic'. With no arguments, it lists all topics + method 'emptyline', which may be overridden in a subclass.) +5. There is a predefined 'help' method. Given an argument 'topic', it + calls the command 'help_topic'. With no arguments, it lists all topics with defined help_ functions, broken into up to three topics; documented commands, miscellaneous help topics, and undocumented commands. -6. The command '?' is a synonym for `help'. The command '!' is a synonym - for `shell', if a do_shell method exists. +6. The command '?' is a synonym for 'help'. The command '!' is a synonym + for 'shell', if a do_shell method exists. 7. If completion is enabled, completing commands will be done automatically, and completing of commands args is done by calling complete_foo() with arguments text, line, begidx, endidx. text is string we are matching @@ -23,31 +23,34 @@ indexes of the text being matched, which could be used to provide different completion depending upon which position the argument is in. -The `default' method may be overridden to intercept commands for which there +The 'default' method may be overridden to intercept commands for which there is no do_ method. -The `completedefault' method may be overridden to intercept completions for +The 'completedefault' method may be overridden to intercept completions for commands that have no complete_ method. -The data member `self.ruler' sets the character used to draw separator lines +The data member 'self.ruler' sets the character used to draw separator lines in the help messages. If empty, no ruler line is drawn. It defaults to "=". -If the value of `self.intro' is nonempty when the cmdloop method is called, +If the value of 'self.intro' is nonempty when the cmdloop method is called, it is printed out on interpreter startup. This value may be overridden via an optional argument to the cmdloop() method. -The data members `self.doc_header', `self.misc_header', and -`self.undoc_header' set the headers used for the help function's +The data members 'self.doc_header', 'self.misc_header', and +'self.undoc_header' set the headers used for the help function's listings of documented functions, miscellaneous topics, and undocumented functions respectively. """ -import string, sys +import sys __all__ = ["Cmd"] PROMPT = '(Cmd) ' -IDENTCHARS = string.ascii_letters + string.digits + '_' +IDENTCHARS = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ' + 'abcdefghijklmnopqrstuvwxyz' + '0123456789' + '_') class Cmd: """A simple framework for writing line-oriented command interpreters. @@ -108,7 +111,15 @@ def cmdloop(self, intro=None): import readline self.old_completer = readline.get_completer() readline.set_completer(self.complete) - readline.parse_and_bind(self.completekey+": complete") + if readline.backend == "editline": + if self.completekey == 'tab': + # libedit uses "^I" instead of "tab" + command_string = "bind ^I rl_complete" + else: + command_string = f"bind {self.completekey} rl_complete" + else: + command_string = f"{self.completekey}: complete" + readline.parse_and_bind(command_string) except ImportError: pass try: @@ -210,9 +221,8 @@ def onecmd(self, line): if cmd == '': return self.default(line) else: - try: - func = getattr(self, 'do_' + cmd) - except AttributeError: + func = getattr(self, 'do_' + cmd, None) + if func is None: return self.default(line) return func(arg) @@ -263,7 +273,7 @@ def complete(self, text, state): endidx = readline.get_endidx() - stripped if begidx>0: cmd, args, foo = self.parseline(line) - if cmd == '': + if not cmd: compfunc = self.completedefault else: try: @@ -296,8 +306,11 @@ def do_help(self, arg): try: func = getattr(self, 'help_' + arg) except AttributeError: + from inspect import cleandoc + try: doc=getattr(self, 'do_' + arg).__doc__ + doc = cleandoc(doc) if doc: self.stdout.write("%s\n"%str(doc)) return diff --git a/PythonLib/full/code.py b/PythonLib/full/code.py index cb7dd44b..b134886d 100644 --- a/PythonLib/full/code.py +++ b/PythonLib/full/code.py @@ -5,6 +5,7 @@ # Inspired by similar code by Jeff Epler and Fredrik Lundh. +import builtins import sys import traceback from codeop import CommandCompiler, compile_command @@ -24,10 +25,10 @@ class InteractiveInterpreter: def __init__(self, locals=None): """Constructor. - The optional 'locals' argument specifies the dictionary in - which code will be executed; it defaults to a newly created - dictionary with key "__name__" set to "__console__" and key - "__doc__" set to None. + The optional 'locals' argument specifies a mapping to use as the + namespace in which code will be executed; it defaults to a newly + created dictionary with key "__name__" set to "__console__" and + key "__doc__" set to None. """ if locals is None: @@ -63,7 +64,7 @@ def runsource(self, source, filename="", symbol="single"): code = self.compile(source, filename, symbol) except (OverflowError, SyntaxError, ValueError): # Case 1 - self.showsyntaxerror(filename) + self.showsyntaxerror(filename, source=source) return False if code is None: @@ -93,7 +94,7 @@ def runcode(self, code): except: self.showtraceback() - def showsyntaxerror(self, filename=None): + def showsyntaxerror(self, filename=None, **kwargs): """Display the syntax error that just occurred. This doesn't display a stack trace because there isn't one. @@ -107,17 +108,10 @@ def showsyntaxerror(self, filename=None): """ try: typ, value, tb = sys.exc_info() - if filename and typ is SyntaxError: - # Work hard to stuff the correct filename in the exception - try: - msg, (dummy_filename, lineno, offset, line) = value.args - except ValueError: - # Not the format we expect; leave it alone - pass - else: - # Stuff in the right filename - value = SyntaxError(msg, (filename, lineno, offset, line)) - self._showtraceback(typ, value, None) + if filename and issubclass(typ, SyntaxError): + value.filename = filename + source = kwargs.pop('source', "") + self._showtraceback(typ, value, None, source) finally: typ = value = tb = None @@ -131,17 +125,23 @@ def showtraceback(self): """ try: typ, value, tb = sys.exc_info() - self._showtraceback(typ, value, tb.tb_next) + self._showtraceback(typ, value, tb.tb_next, "") finally: typ = value = tb = None - def _showtraceback(self, typ, value, tb): + def _showtraceback(self, typ, value, tb, source): sys.last_type = typ sys.last_traceback = tb - sys.last_exc = sys.last_value = value = value.with_traceback(tb) + value = value.with_traceback(tb) + # Set the line of text that the exception refers to + lines = source.splitlines() + if (source and typ is SyntaxError + and not value.text and value.lineno is not None + and len(lines) >= value.lineno): + value.text = lines[value.lineno - 1] + sys.last_exc = sys.last_value = value if sys.excepthook is sys.__excepthook__: - lines = traceback.format_exception(typ, value, tb) - self.write(''.join(lines)) + self._excepthook(typ, value, tb) else: # If someone has set sys.excepthook, we let that take precedence # over self.write @@ -158,6 +158,12 @@ def _showtraceback(self, typ, value, tb): print('Original exception was:', file=sys.stderr) sys.__excepthook__(typ, value, tb) + def _excepthook(self, typ, value, tb): + # This method is being overwritten in + # _pyrepl.console.InteractiveColoredConsole + lines = traceback.format_exception(typ, value, tb) + self.write(''.join(lines)) + def write(self, data): """Write a string. @@ -176,7 +182,7 @@ class InteractiveConsole(InteractiveInterpreter): """ - def __init__(self, locals=None, filename=""): + def __init__(self, locals=None, filename="", *, local_exit=False): """Constructor. The optional locals argument will be passed to the @@ -188,6 +194,7 @@ def __init__(self, locals=None, filename=""): """ InteractiveInterpreter.__init__(self, locals) self.filename = filename + self.local_exit = local_exit self.resetbuffer() def resetbuffer(self): @@ -212,12 +219,17 @@ def interact(self, banner=None, exitmsg=None): """ try: sys.ps1 + delete_ps1_after = False except AttributeError: sys.ps1 = ">>> " + delete_ps1_after = True try: - sys.ps2 + _ps2 = sys.ps2 + delete_ps2_after = False except AttributeError: sys.ps2 = "... " + delete_ps2_after = True + cprt = 'Type "help", "copyright", "credits" or "license" for more information.' if banner is None: self.write("Python %s on %s\n%s\n(%s)\n" % @@ -226,29 +238,72 @@ def interact(self, banner=None, exitmsg=None): elif banner: self.write("%s\n" % str(banner)) more = 0 - while 1: - try: - if more: - prompt = sys.ps2 - else: - prompt = sys.ps1 + + # When the user uses exit() or quit() in their interactive shell + # they probably just want to exit the created shell, not the whole + # process. exit and quit in builtins closes sys.stdin which makes + # it super difficult to restore + # + # When self.local_exit is True, we overwrite the builtins so + # exit() and quit() only raises SystemExit and we can catch that + # to only exit the interactive shell + + _exit = None + _quit = None + + if self.local_exit: + if hasattr(builtins, "exit"): + _exit = builtins.exit + builtins.exit = Quitter("exit") + + if hasattr(builtins, "quit"): + _quit = builtins.quit + builtins.quit = Quitter("quit") + + try: + while True: try: - line = self.raw_input(prompt) - except EOFError: - self.write("\n") - break - else: - more = self.push(line) - except KeyboardInterrupt: - self.write("\nKeyboardInterrupt\n") - self.resetbuffer() - more = 0 - if exitmsg is None: - self.write('now exiting %s...\n' % self.__class__.__name__) - elif exitmsg != '': - self.write('%s\n' % exitmsg) - - def push(self, line): + if more: + prompt = sys.ps2 + else: + prompt = sys.ps1 + try: + line = self.raw_input(prompt) + except EOFError: + self.write("\n") + break + else: + more = self.push(line) + except KeyboardInterrupt: + self.write("\nKeyboardInterrupt\n") + self.resetbuffer() + more = 0 + except SystemExit as e: + if self.local_exit: + self.write("\n") + break + else: + raise e + finally: + # restore exit and quit in builtins if they were modified + if _exit is not None: + builtins.exit = _exit + + if _quit is not None: + builtins.quit = _quit + + if delete_ps1_after: + del sys.ps1 + + if delete_ps2_after: + del sys.ps2 + + if exitmsg is None: + self.write('now exiting %s...\n' % self.__class__.__name__) + elif exitmsg != '': + self.write('%s\n' % exitmsg) + + def push(self, line, filename=None, _symbol="single"): """Push a line to the interpreter. The line should not have a trailing newline; it may have @@ -264,7 +319,9 @@ def push(self, line): """ self.buffer.append(line) source = "\n".join(self.buffer) - more = self.runsource(source, self.filename) + if filename is None: + filename = self.filename + more = self.runsource(source, filename, symbol=_symbol) if not more: self.resetbuffer() return more @@ -283,8 +340,22 @@ def raw_input(self, prompt=""): return input(prompt) +class Quitter: + def __init__(self, name): + self.name = name + if sys.platform == "win32": + self.eof = 'Ctrl-Z plus Return' + else: + self.eof = 'Ctrl-D (i.e. EOF)' + + def __repr__(self): + return f'Use {self.name} or {self.eof} to exit' + + def __call__(self, code=None): + raise SystemExit(code) + -def interact(banner=None, readfunc=None, local=None, exitmsg=None): +def interact(banner=None, readfunc=None, local=None, exitmsg=None, local_exit=False): """Closely emulate the interactive Python interpreter. This is a backwards compatible interface to the InteractiveConsole @@ -297,14 +368,15 @@ def interact(banner=None, readfunc=None, local=None, exitmsg=None): readfunc -- if not None, replaces InteractiveConsole.raw_input() local -- passed to InteractiveInterpreter.__init__() exitmsg -- passed to InteractiveConsole.interact() + local_exit -- passed to InteractiveConsole.__init__() """ - console = InteractiveConsole(local) + console = InteractiveConsole(local, local_exit=local_exit) if readfunc is not None: console.raw_input = readfunc else: try: - import readline + import readline # noqa: F401 except ImportError: pass console.interact(banner, exitmsg) @@ -313,7 +385,7 @@ def interact(banner=None, readfunc=None, local=None, exitmsg=None): if __name__ == "__main__": import argparse - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser(color=True) parser.add_argument('-q', action='store_true', help="don't print version and copyright messages") args = parser.parse_args() diff --git a/PythonLib/full/codecs.py b/PythonLib/full/codecs.py index 82f23983..e4a8010a 100644 --- a/PythonLib/full/codecs.py +++ b/PythonLib/full/codecs.py @@ -111,6 +111,9 @@ def __repr__(self): (self.__class__.__module__, self.__class__.__qualname__, self.name, id(self)) + def __getnewargs__(self): + return tuple(self) + class Codec: """ Defines the interface for stateless encoders/decoders. @@ -615,7 +618,7 @@ def readlines(self, sizehint=None, keepends=True): method and are included in the list entries. sizehint, if given, is ignored since there is no efficient - way to finding the true end-of-line. + way of finding the true end-of-line. """ data = self.read() @@ -706,13 +709,13 @@ def read(self, size=-1): return self.reader.read(size) - def readline(self, size=None): + def readline(self, size=None, keepends=True): - return self.reader.readline(size) + return self.reader.readline(size, keepends) - def readlines(self, sizehint=None): + def readlines(self, sizehint=None, keepends=True): - return self.reader.readlines(sizehint) + return self.reader.readlines(sizehint, keepends) def __next__(self): @@ -881,7 +884,6 @@ def __reduce_ex__(self, proto): ### Shortcuts def open(filename, mode='r', encoding=None, errors='strict', buffering=-1): - """ Open an encoded file using the given mode and return a wrapped version providing transparent encoding/decoding. @@ -909,8 +911,11 @@ def open(filename, mode='r', encoding=None, errors='strict', buffering=-1): .encoding which allows querying the used encoding. This attribute is only available if an encoding was specified as parameter. - """ + import warnings + warnings.warn("codecs.open() is deprecated. Use open() instead.", + DeprecationWarning, stacklevel=2) + if encoding is not None and \ 'b' not in mode: # Force opening of the file in binary mode @@ -1106,24 +1111,15 @@ def make_encoding_map(decoding_map): ### error handlers -try: - strict_errors = lookup_error("strict") - ignore_errors = lookup_error("ignore") - replace_errors = lookup_error("replace") - xmlcharrefreplace_errors = lookup_error("xmlcharrefreplace") - backslashreplace_errors = lookup_error("backslashreplace") - namereplace_errors = lookup_error("namereplace") -except LookupError: - # In --disable-unicode builds, these error handler are missing - strict_errors = None - ignore_errors = None - replace_errors = None - xmlcharrefreplace_errors = None - backslashreplace_errors = None - namereplace_errors = None +strict_errors = lookup_error("strict") +ignore_errors = lookup_error("ignore") +replace_errors = lookup_error("replace") +xmlcharrefreplace_errors = lookup_error("xmlcharrefreplace") +backslashreplace_errors = lookup_error("backslashreplace") +namereplace_errors = lookup_error("namereplace") # Tell modulefinder that using codecs probably needs the encodings # package _false = 0 if _false: - import encodings + import encodings # noqa: F401 diff --git a/PythonLib/full/codeop.py b/PythonLib/full/codeop.py index 4dd09657..8cac0044 100644 --- a/PythonLib/full/codeop.py +++ b/PythonLib/full/codeop.py @@ -44,9 +44,10 @@ # Caveat emptor: These flags are undocumented on purpose and depending # on their effect outside the standard library is **unsupported**. PyCF_DONT_IMPLY_DEDENT = 0x200 +PyCF_ONLY_AST = 0x400 PyCF_ALLOW_INCOMPLETE_INPUT = 0x4000 -def _maybe_compile(compiler, source, filename, symbol): +def _maybe_compile(compiler, source, filename, symbol, flags): # Check for source consisting of only blank lines and comments. for line in source.split("\n"): line = line.strip() @@ -60,36 +61,26 @@ def _maybe_compile(compiler, source, filename, symbol): with warnings.catch_warnings(): warnings.simplefilter("ignore", (SyntaxWarning, DeprecationWarning)) try: - compiler(source, filename, symbol) + compiler(source, filename, symbol, flags=flags) except SyntaxError: # Let other compile() errors propagate. try: - compiler(source + "\n", filename, symbol) + compiler(source + "\n", filename, symbol, flags=flags) + return None + except _IncompleteInputError as e: return None except SyntaxError as e: - if "incomplete input" in str(e): - return None + pass # fallthrough return compiler(source, filename, symbol, incomplete_input=False) -def _is_syntax_error(err1, err2): - rep1 = repr(err1) - rep2 = repr(err2) - if "was never closed" in rep1 and "was never closed" in rep2: - return False - if rep1 == rep2: - return True - return False - -def _compile(source, filename, symbol, incomplete_input=True): - flags = 0 +def _compile(source, filename, symbol, incomplete_input=True, *, flags=0): if incomplete_input: flags |= PyCF_ALLOW_INCOMPLETE_INPUT flags |= PyCF_DONT_IMPLY_DEDENT return compile(source, filename, symbol, flags) - -def compile_command(source, filename="", symbol="single"): +def compile_command(source, filename="", symbol="single", flags=0): r"""Compile a command and determine whether it is incomplete. Arguments: @@ -108,7 +99,7 @@ def compile_command(source, filename="", symbol="single"): syntax error (OverflowError and ValueError can be produced by malformed literals). """ - return _maybe_compile(_compile, source, filename, symbol) + return _maybe_compile(_compile, source, filename, symbol, flags) class Compile: """Instances of this class behave much like the built-in compile @@ -118,12 +109,14 @@ class Compile: def __init__(self): self.flags = PyCF_DONT_IMPLY_DEDENT | PyCF_ALLOW_INCOMPLETE_INPUT - def __call__(self, source, filename, symbol, **kwargs): - flags = self.flags + def __call__(self, source, filename, symbol, flags=0, **kwargs): + flags |= self.flags if kwargs.get('incomplete_input', True) is False: flags &= ~PyCF_DONT_IMPLY_DEDENT flags &= ~PyCF_ALLOW_INCOMPLETE_INPUT codeob = compile(source, filename, symbol, flags, True) + if flags & PyCF_ONLY_AST: + return codeob # this is an ast.Module in this case for feature in _features: if codeob.co_flags & feature.compiler_flag: self.flags |= feature.compiler_flag @@ -158,4 +151,4 @@ def __call__(self, source, filename="", symbol="single"): syntax error (OverflowError and ValueError can be produced by malformed literals). """ - return _maybe_compile(self.compiler, source, filename, symbol) + return _maybe_compile(self.compiler, source, filename, symbol, flags=self.compiler.flags) diff --git a/PythonLib/full/collections/__init__.py b/PythonLib/full/collections/__init__.py index 2da0cd5b..803de0c6 100644 --- a/PythonLib/full/collections/__init__.py +++ b/PythonLib/full/collections/__init__.py @@ -29,6 +29,9 @@ import _collections_abc import sys as _sys +_sys.modules['collections.abc'] = _collections_abc +abc = _collections_abc + from itertools import chain as _chain from itertools import repeat as _repeat from itertools import starmap as _starmap @@ -46,7 +49,8 @@ _collections_abc.MutableSequence.register(deque) try: - from _collections import _deque_iterator + # Expose _deque_iterator to support pickling deque iterators + from _collections import _deque_iterator # noqa: F401 except ImportError: pass @@ -55,6 +59,8 @@ except ImportError: pass +heapq = None # Lazily imported + ################################################################################ ### OrderedDict @@ -457,7 +463,7 @@ def _make(cls, iterable): def _replace(self, /, **kwds): result = self._make(_map(kwds.pop, field_names, self)) if kwds: - raise ValueError(f'Got unexpected field names: {list(kwds)!r}') + raise TypeError(f'Got unexpected field names: {list(kwds)!r}') return result _replace.__doc__ = (f'Return a new {typename} object replacing specified ' @@ -495,6 +501,7 @@ def __getnewargs__(self): '_field_defaults': field_defaults, '__new__': __new__, '_make': _make, + '__replace__': _replace, '_replace': _replace, '__repr__': __repr__, '_asdict': _asdict, @@ -628,7 +635,10 @@ def most_common(self, n=None): return sorted(self.items(), key=_itemgetter(1), reverse=True) # Lazy import to speedup Python startup time - import heapq + global heapq + if heapq is None: + import heapq + return heapq.nlargest(n, self.items(), key=_itemgetter(1)) def elements(self): @@ -1015,7 +1025,7 @@ def __getitem__(self, key): return self.__missing__(key) # support subclasses that define __missing__ def get(self, key, default=None): - return self[key] if key in self else default + return self[key] if key in self else default # needs to make use of __contains__ def __len__(self): return len(set().union(*self.maps)) # reuses stored hash values if possible @@ -1027,7 +1037,10 @@ def __iter__(self): return iter(d) def __contains__(self, key): - return any(key in m for m in self.maps) + for mapping in self.maps: + if key in mapping: + return True + return False def __bool__(self): return any(self.maps) @@ -1037,9 +1050,9 @@ def __repr__(self): return f'{self.__class__.__name__}({", ".join(map(repr, self.maps))})' @classmethod - def fromkeys(cls, iterable, *args): - 'Create a ChainMap with a single dict created from the iterable.' - return cls(dict.fromkeys(iterable, *args)) + def fromkeys(cls, iterable, value=None, /): + 'Create a new ChainMap with keys from iterable and values set to value.' + return cls(dict.fromkeys(iterable, value)) def copy(self): 'New ChainMap or subclass with a new copy of maps[0] and refs to maps[1:]' @@ -1482,6 +1495,8 @@ def format_map(self, mapping): return self.data.format_map(mapping) def index(self, sub, start=0, end=_sys.maxsize): + if isinstance(sub, UserString): + sub = sub.data return self.data.index(sub, start, end) def isalpha(self): @@ -1550,6 +1565,8 @@ def rfind(self, sub, start=0, end=_sys.maxsize): return self.data.rfind(sub, start, end) def rindex(self, sub, start=0, end=_sys.maxsize): + if isinstance(sub, UserString): + sub = sub.data return self.data.rindex(sub, start, end) def rjust(self, width, *args): diff --git a/PythonLib/full/collections/abc.py b/PythonLib/full/collections/abc.py deleted file mode 100644 index 86ca8b8a..00000000 --- a/PythonLib/full/collections/abc.py +++ /dev/null @@ -1,3 +0,0 @@ -from _collections_abc import * -from _collections_abc import __all__ -from _collections_abc import _CallableGenericAlias diff --git a/PythonLib/full/compileall.py b/PythonLib/full/compileall.py index d394156c..67fe3704 100644 --- a/PythonLib/full/compileall.py +++ b/PythonLib/full/compileall.py @@ -116,7 +116,8 @@ def compile_dir(dir, maxlevels=None, ddir=None, force=False, prependdir=prependdir, limit_sl_dest=limit_sl_dest, hardlink_dupes=hardlink_dupes), - files) + files, + chunksize=4) success = min(results, default=True) else: for file in files: @@ -172,13 +173,13 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0, if stripdir is not None: fullname_parts = fullname.split(os.path.sep) stripdir_parts = stripdir.split(os.path.sep) - ddir_parts = list(fullname_parts) - for spart, opart in zip(stripdir_parts, fullname_parts): - if spart == opart: - ddir_parts.remove(spart) - - dfile = os.path.join(*ddir_parts) + if stripdir_parts != fullname_parts[:len(stripdir_parts)]: + if quiet < 2: + print("The stripdir path {!r} is not a valid prefix for " + "source path {!r}; ignoring".format(stripdir, fullname)) + else: + dfile = os.path.join(*fullname_parts[len(stripdir_parts):]) if prependdir is not None: if dfile is None: @@ -316,7 +317,9 @@ def main(): import argparse parser = argparse.ArgumentParser( - description='Utilities to support installing Python libraries.') + description='Utilities to support installing Python libraries.', + color=True, + ) parser.add_argument('-l', action='store_const', const=0, default=None, dest='maxlevels', help="don't recurse into subdirectories") diff --git a/PythonLib/full/compression/__init__.py b/PythonLib/full/compression/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/PythonLib/full/compression/_common/__init__.py b/PythonLib/full/compression/_common/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/PythonLib/full/compression/_common/_streams.py b/PythonLib/full/compression/_common/_streams.py new file mode 100644 index 00000000..9f367d4e --- /dev/null +++ b/PythonLib/full/compression/_common/_streams.py @@ -0,0 +1,162 @@ +"""Internal classes used by compression modules""" + +import io +import sys + +BUFFER_SIZE = io.DEFAULT_BUFFER_SIZE # Compressed data read chunk size + + +class BaseStream(io.BufferedIOBase): + """Mode-checking helper functions.""" + + def _check_not_closed(self): + if self.closed: + raise ValueError("I/O operation on closed file") + + def _check_can_read(self): + if not self.readable(): + raise io.UnsupportedOperation("File not open for reading") + + def _check_can_write(self): + if not self.writable(): + raise io.UnsupportedOperation("File not open for writing") + + def _check_can_seek(self): + if not self.readable(): + raise io.UnsupportedOperation("Seeking is only supported " + "on files open for reading") + if not self.seekable(): + raise io.UnsupportedOperation("The underlying file object " + "does not support seeking") + + +class DecompressReader(io.RawIOBase): + """Adapts the decompressor API to a RawIOBase reader API""" + + def readable(self): + return True + + def __init__(self, fp, decomp_factory, trailing_error=(), **decomp_args): + self._fp = fp + self._eof = False + self._pos = 0 # Current offset in decompressed stream + + # Set to size of decompressed stream once it is known, for SEEK_END + self._size = -1 + + # Save the decompressor factory and arguments. + # If the file contains multiple compressed streams, each + # stream will need a separate decompressor object. A new decompressor + # object is also needed when implementing a backwards seek(). + self._decomp_factory = decomp_factory + self._decomp_args = decomp_args + self._decompressor = self._decomp_factory(**self._decomp_args) + + # Exception class to catch from decompressor signifying invalid + # trailing data to ignore + self._trailing_error = trailing_error + + def close(self): + self._decompressor = None + return super().close() + + def seekable(self): + return self._fp.seekable() + + def readinto(self, b): + with memoryview(b) as view, view.cast("B") as byte_view: + data = self.read(len(byte_view)) + byte_view[:len(data)] = data + return len(data) + + def read(self, size=-1): + if size < 0: + return self.readall() + + if not size or self._eof: + return b"" + data = None # Default if EOF is encountered + # Depending on the input data, our call to the decompressor may not + # return any data. In this case, try again after reading another block. + while True: + if self._decompressor.eof: + rawblock = (self._decompressor.unused_data or + self._fp.read(BUFFER_SIZE)) + if not rawblock: + break + # Continue to next stream. + self._decompressor = self._decomp_factory( + **self._decomp_args) + try: + data = self._decompressor.decompress(rawblock, size) + except self._trailing_error: + # Trailing data isn't a valid compressed stream; ignore it. + break + else: + if self._decompressor.needs_input: + rawblock = self._fp.read(BUFFER_SIZE) + if not rawblock: + raise EOFError("Compressed file ended before the " + "end-of-stream marker was reached") + else: + rawblock = b"" + data = self._decompressor.decompress(rawblock, size) + if data: + break + if not data: + self._eof = True + self._size = self._pos + return b"" + self._pos += len(data) + return data + + def readall(self): + chunks = [] + # sys.maxsize means the max length of output buffer is unlimited, + # so that the whole input buffer can be decompressed within one + # .decompress() call. + while data := self.read(sys.maxsize): + chunks.append(data) + + return b"".join(chunks) + + # Rewind the file to the beginning of the data stream. + def _rewind(self): + self._fp.seek(0) + self._eof = False + self._pos = 0 + self._decompressor = self._decomp_factory(**self._decomp_args) + + def seek(self, offset, whence=io.SEEK_SET): + # Recalculate offset as an absolute file position. + if whence == io.SEEK_SET: + pass + elif whence == io.SEEK_CUR: + offset = self._pos + offset + elif whence == io.SEEK_END: + # Seeking relative to EOF - we need to know the file's size. + if self._size < 0: + while self.read(io.DEFAULT_BUFFER_SIZE): + pass + offset = self._size + offset + else: + raise ValueError("Invalid value for whence: {}".format(whence)) + + # Make it so that offset is the number of bytes to skip forward. + if offset < self._pos: + self._rewind() + else: + offset -= self._pos + + # Read and discard data until we reach the desired position. + while offset > 0: + data = self.read(min(io.DEFAULT_BUFFER_SIZE, offset)) + if not data: + break + offset -= len(data) + + return self._pos + + def tell(self): + """Return the current file position.""" + return self._pos diff --git a/PythonLib/full/compression/bz2.py b/PythonLib/full/compression/bz2.py new file mode 100644 index 00000000..16815d6c --- /dev/null +++ b/PythonLib/full/compression/bz2.py @@ -0,0 +1,5 @@ +import bz2 +__doc__ = bz2.__doc__ +del bz2 + +from bz2 import * diff --git a/PythonLib/full/compression/gzip.py b/PythonLib/full/compression/gzip.py new file mode 100644 index 00000000..552f48f9 --- /dev/null +++ b/PythonLib/full/compression/gzip.py @@ -0,0 +1,5 @@ +import gzip +__doc__ = gzip.__doc__ +del gzip + +from gzip import * diff --git a/PythonLib/full/compression/lzma.py b/PythonLib/full/compression/lzma.py new file mode 100644 index 00000000..b4bc7ccb --- /dev/null +++ b/PythonLib/full/compression/lzma.py @@ -0,0 +1,5 @@ +import lzma +__doc__ = lzma.__doc__ +del lzma + +from lzma import * diff --git a/PythonLib/full/compression/zlib.py b/PythonLib/full/compression/zlib.py new file mode 100644 index 00000000..3aa7e2db --- /dev/null +++ b/PythonLib/full/compression/zlib.py @@ -0,0 +1,5 @@ +import zlib +__doc__ = zlib.__doc__ +del zlib + +from zlib import * diff --git a/PythonLib/full/compression/zstd/__init__.py b/PythonLib/full/compression/zstd/__init__.py new file mode 100644 index 00000000..84b25914 --- /dev/null +++ b/PythonLib/full/compression/zstd/__init__.py @@ -0,0 +1,242 @@ +"""Python bindings to the Zstandard (zstd) compression library (RFC-8878).""" + +__all__ = ( + # compression.zstd + 'COMPRESSION_LEVEL_DEFAULT', + 'compress', + 'CompressionParameter', + 'decompress', + 'DecompressionParameter', + 'finalize_dict', + 'get_frame_info', + 'Strategy', + 'train_dict', + + # compression.zstd._zstdfile + 'open', + 'ZstdFile', + + # _zstd + 'get_frame_size', + 'zstd_version', + 'zstd_version_info', + 'ZstdCompressor', + 'ZstdDecompressor', + 'ZstdDict', + 'ZstdError', +) + +import _zstd +import enum +from _zstd import (ZstdCompressor, ZstdDecompressor, ZstdDict, ZstdError, + get_frame_size, zstd_version) +from compression.zstd._zstdfile import ZstdFile, open, _nbytes + +# zstd_version_number is (MAJOR * 100 * 100 + MINOR * 100 + RELEASE) +zstd_version_info = (*divmod(_zstd.zstd_version_number // 100, 100), + _zstd.zstd_version_number % 100) +"""Version number of the runtime zstd library as a tuple of integers.""" + +COMPRESSION_LEVEL_DEFAULT = _zstd.ZSTD_CLEVEL_DEFAULT +"""The default compression level for Zstandard, currently '3'.""" + + +class FrameInfo: + """Information about a Zstandard frame.""" + + __slots__ = 'decompressed_size', 'dictionary_id' + + def __init__(self, decompressed_size, dictionary_id): + super().__setattr__('decompressed_size', decompressed_size) + super().__setattr__('dictionary_id', dictionary_id) + + def __repr__(self): + return (f'FrameInfo(decompressed_size={self.decompressed_size}, ' + f'dictionary_id={self.dictionary_id})') + + def __setattr__(self, name, _): + raise AttributeError(f"can't set attribute {name!r}") + + +def get_frame_info(frame_buffer): + """Get Zstandard frame information from a frame header. + + *frame_buffer* is a bytes-like object. It should start from the beginning + of a frame, and needs to include at least the frame header (6 to 18 bytes). + + The returned FrameInfo object has two attributes. + 'decompressed_size' is the size in bytes of the data in the frame when + decompressed, or None when the decompressed size is unknown. + 'dictionary_id' is an int in the range (0, 2**32). The special value 0 + means that the dictionary ID was not recorded in the frame header, + the frame may or may not need a dictionary to be decoded, + and the ID of such a dictionary is not specified. + """ + return FrameInfo(*_zstd.get_frame_info(frame_buffer)) + + +def train_dict(samples, dict_size): + """Return a ZstdDict representing a trained Zstandard dictionary. + + *samples* is an iterable of samples, where a sample is a bytes-like + object representing a file. + + *dict_size* is the dictionary's maximum size, in bytes. + """ + if not isinstance(dict_size, int): + ds_cls = type(dict_size).__qualname__ + raise TypeError(f'dict_size must be an int object, not {ds_cls!r}.') + + samples = tuple(samples) + chunks = b''.join(samples) + chunk_sizes = tuple(_nbytes(sample) for sample in samples) + if not chunks: + raise ValueError("samples contained no data; can't train dictionary.") + dict_content = _zstd.train_dict(chunks, chunk_sizes, dict_size) + return ZstdDict(dict_content) + + +def finalize_dict(zstd_dict, /, samples, dict_size, level): + """Return a ZstdDict representing a finalized Zstandard dictionary. + + Given a custom content as a basis for dictionary, and a set of samples, + finalize *zstd_dict* by adding headers and statistics according to the + Zstandard dictionary format. + + You may compose an effective dictionary content by hand, which is used as + basis dictionary, and use some samples to finalize a dictionary. The basis + dictionary may be a "raw content" dictionary. See *is_raw* in ZstdDict. + + *samples* is an iterable of samples, where a sample is a bytes-like object + representing a file. + *dict_size* is the dictionary's maximum size, in bytes. + *level* is the expected compression level. The statistics for each + compression level differ, so tuning the dictionary to the compression level + can provide improvements. + """ + + if not isinstance(zstd_dict, ZstdDict): + raise TypeError('zstd_dict argument should be a ZstdDict object.') + if not isinstance(dict_size, int): + raise TypeError('dict_size argument should be an int object.') + if not isinstance(level, int): + raise TypeError('level argument should be an int object.') + + samples = tuple(samples) + chunks = b''.join(samples) + chunk_sizes = tuple(_nbytes(sample) for sample in samples) + if not chunks: + raise ValueError("The samples are empty content, can't finalize the " + "dictionary.") + dict_content = _zstd.finalize_dict(zstd_dict.dict_content, chunks, + chunk_sizes, dict_size, level) + return ZstdDict(dict_content) + + +def compress(data, level=None, options=None, zstd_dict=None): + """Return Zstandard compressed *data* as bytes. + + *level* is an int specifying the compression level to use, defaulting to + COMPRESSION_LEVEL_DEFAULT ('3'). + *options* is a dict object that contains advanced compression + parameters. See CompressionParameter for more on options. + *zstd_dict* is a ZstdDict object, a pre-trained Zstandard dictionary. See + the function train_dict for how to train a ZstdDict on sample data. + + For incremental compression, use a ZstdCompressor instead. + """ + comp = ZstdCompressor(level=level, options=options, zstd_dict=zstd_dict) + return comp.compress(data, mode=ZstdCompressor.FLUSH_FRAME) + + +def decompress(data, zstd_dict=None, options=None): + """Decompress one or more frames of Zstandard compressed *data*. + + *zstd_dict* is a ZstdDict object, a pre-trained Zstandard dictionary. See + the function train_dict for how to train a ZstdDict on sample data. + *options* is a dict object that contains advanced compression + parameters. See DecompressionParameter for more on options. + + For incremental decompression, use a ZstdDecompressor instead. + """ + results = [] + while True: + decomp = ZstdDecompressor(options=options, zstd_dict=zstd_dict) + results.append(decomp.decompress(data)) + if not decomp.eof: + raise ZstdError('Compressed data ended before the ' + 'end-of-stream marker was reached') + data = decomp.unused_data + if not data: + break + return b''.join(results) + + +class CompressionParameter(enum.IntEnum): + """Compression parameters.""" + + compression_level = _zstd.ZSTD_c_compressionLevel + window_log = _zstd.ZSTD_c_windowLog + hash_log = _zstd.ZSTD_c_hashLog + chain_log = _zstd.ZSTD_c_chainLog + search_log = _zstd.ZSTD_c_searchLog + min_match = _zstd.ZSTD_c_minMatch + target_length = _zstd.ZSTD_c_targetLength + strategy = _zstd.ZSTD_c_strategy + + enable_long_distance_matching = _zstd.ZSTD_c_enableLongDistanceMatching + ldm_hash_log = _zstd.ZSTD_c_ldmHashLog + ldm_min_match = _zstd.ZSTD_c_ldmMinMatch + ldm_bucket_size_log = _zstd.ZSTD_c_ldmBucketSizeLog + ldm_hash_rate_log = _zstd.ZSTD_c_ldmHashRateLog + + content_size_flag = _zstd.ZSTD_c_contentSizeFlag + checksum_flag = _zstd.ZSTD_c_checksumFlag + dict_id_flag = _zstd.ZSTD_c_dictIDFlag + + nb_workers = _zstd.ZSTD_c_nbWorkers + job_size = _zstd.ZSTD_c_jobSize + overlap_log = _zstd.ZSTD_c_overlapLog + + def bounds(self): + """Return the (lower, upper) int bounds of a compression parameter. + + Both the lower and upper bounds are inclusive. + """ + return _zstd.get_param_bounds(self.value, is_compress=True) + + +class DecompressionParameter(enum.IntEnum): + """Decompression parameters.""" + + window_log_max = _zstd.ZSTD_d_windowLogMax + + def bounds(self): + """Return the (lower, upper) int bounds of a decompression parameter. + + Both the lower and upper bounds are inclusive. + """ + return _zstd.get_param_bounds(self.value, is_compress=False) + + +class Strategy(enum.IntEnum): + """Compression strategies, listed from fastest to strongest. + + Note that new strategies might be added in the future. + Only the order (from fast to strong) is guaranteed, + the numeric value might change. + """ + + fast = _zstd.ZSTD_fast + dfast = _zstd.ZSTD_dfast + greedy = _zstd.ZSTD_greedy + lazy = _zstd.ZSTD_lazy + lazy2 = _zstd.ZSTD_lazy2 + btlazy2 = _zstd.ZSTD_btlazy2 + btopt = _zstd.ZSTD_btopt + btultra = _zstd.ZSTD_btultra + btultra2 = _zstd.ZSTD_btultra2 + + +# Check validity of the CompressionParameter & DecompressionParameter types +_zstd.set_parameter_types(CompressionParameter, DecompressionParameter) diff --git a/PythonLib/full/compression/zstd/_zstdfile.py b/PythonLib/full/compression/zstd/_zstdfile.py new file mode 100644 index 00000000..d709f5ef --- /dev/null +++ b/PythonLib/full/compression/zstd/_zstdfile.py @@ -0,0 +1,345 @@ +import io +from os import PathLike +from _zstd import ZstdCompressor, ZstdDecompressor, ZSTD_DStreamOutSize +from compression._common import _streams + +__all__ = ('ZstdFile', 'open') + +_MODE_CLOSED = 0 +_MODE_READ = 1 +_MODE_WRITE = 2 + + +def _nbytes(dat, /): + if isinstance(dat, (bytes, bytearray)): + return len(dat) + with memoryview(dat) as mv: + return mv.nbytes + + +class ZstdFile(_streams.BaseStream): + """A file-like object providing transparent Zstandard (de)compression. + + A ZstdFile can act as a wrapper for an existing file object, or refer + directly to a named file on disk. + + ZstdFile provides a *binary* file interface. Data is read and returned as + bytes, and may only be written to objects that support the Buffer Protocol. + """ + + FLUSH_BLOCK = ZstdCompressor.FLUSH_BLOCK + FLUSH_FRAME = ZstdCompressor.FLUSH_FRAME + + def __init__(self, file, /, mode='r', *, + level=None, options=None, zstd_dict=None): + """Open a Zstandard compressed file in binary mode. + + *file* can be either an file-like object, or a file name to open. + + *mode* can be 'r' for reading (default), 'w' for (over)writing, 'x' for + creating exclusively, or 'a' for appending. These can equivalently be + given as 'rb', 'wb', 'xb' and 'ab' respectively. + + *level* is an optional int specifying the compression level to use, + or COMPRESSION_LEVEL_DEFAULT if not given. + + *options* is an optional dict for advanced compression parameters. + See CompressionParameter and DecompressionParameter for the possible + options. + + *zstd_dict* is an optional ZstdDict object, a pre-trained Zstandard + dictionary. See train_dict() to train ZstdDict on sample data. + """ + self._fp = None + self._close_fp = False + self._mode = _MODE_CLOSED + self._buffer = None + + if not isinstance(mode, str): + raise ValueError('mode must be a str') + if options is not None and not isinstance(options, dict): + raise TypeError('options must be a dict or None') + mode = mode.removesuffix('b') # handle rb, wb, xb, ab + if mode == 'r': + if level is not None: + raise TypeError('level is illegal in read mode') + self._mode = _MODE_READ + elif mode in {'w', 'a', 'x'}: + if level is not None and not isinstance(level, int): + raise TypeError('level must be int or None') + self._mode = _MODE_WRITE + self._compressor = ZstdCompressor(level=level, options=options, + zstd_dict=zstd_dict) + self._pos = 0 + else: + raise ValueError(f'Invalid mode: {mode!r}') + + if isinstance(file, (str, bytes, PathLike)): + self._fp = io.open(file, f'{mode}b') + self._close_fp = True + elif ((mode == 'r' and hasattr(file, 'read')) + or (mode != 'r' and hasattr(file, 'write'))): + self._fp = file + else: + raise TypeError('file must be a file-like object ' + 'or a str, bytes, or PathLike object') + + if self._mode == _MODE_READ: + raw = _streams.DecompressReader( + self._fp, + ZstdDecompressor, + zstd_dict=zstd_dict, + options=options, + ) + self._buffer = io.BufferedReader(raw) + + def close(self): + """Flush and close the file. + + May be called multiple times. Once the file has been closed, + any other operation on it will raise ValueError. + """ + if self._fp is None: + return + try: + if self._mode == _MODE_READ: + if getattr(self, '_buffer', None): + self._buffer.close() + self._buffer = None + elif self._mode == _MODE_WRITE: + self.flush(self.FLUSH_FRAME) + self._compressor = None + finally: + self._mode = _MODE_CLOSED + try: + if self._close_fp: + self._fp.close() + finally: + self._fp = None + self._close_fp = False + + def write(self, data, /): + """Write a bytes-like object *data* to the file. + + Returns the number of uncompressed bytes written, which is + always the length of data in bytes. Note that due to buffering, + the file on disk may not reflect the data written until .flush() + or .close() is called. + """ + self._check_can_write() + + length = _nbytes(data) + + compressed = self._compressor.compress(data) + self._fp.write(compressed) + self._pos += length + return length + + def flush(self, mode=FLUSH_BLOCK): + """Flush remaining data to the underlying stream. + + The mode argument can be FLUSH_BLOCK or FLUSH_FRAME. Abuse of this + method will reduce compression ratio, use it only when necessary. + + If the program is interrupted afterwards, all data can be recovered. + To ensure saving to disk, also need to use os.fsync(fd). + + This method does nothing in reading mode. + """ + if self._mode == _MODE_READ: + return + self._check_not_closed() + if mode not in {self.FLUSH_BLOCK, self.FLUSH_FRAME}: + raise ValueError('Invalid mode argument, expected either ' + 'ZstdFile.FLUSH_FRAME or ' + 'ZstdFile.FLUSH_BLOCK') + if self._compressor.last_mode == mode: + return + # Flush zstd block/frame, and write. + data = self._compressor.flush(mode) + self._fp.write(data) + if hasattr(self._fp, 'flush'): + self._fp.flush() + + def read(self, size=-1): + """Read up to size uncompressed bytes from the file. + + If size is negative or omitted, read until EOF is reached. + Returns b'' if the file is already at EOF. + """ + if size is None: + size = -1 + self._check_can_read() + return self._buffer.read(size) + + def read1(self, size=-1): + """Read up to size uncompressed bytes, while trying to avoid + making multiple reads from the underlying stream. Reads up to a + buffer's worth of data if size is negative. + + Returns b'' if the file is at EOF. + """ + self._check_can_read() + if size < 0: + # Note this should *not* be io.DEFAULT_BUFFER_SIZE. + # ZSTD_DStreamOutSize is the minimum amount to read guaranteeing + # a full block is read. + size = ZSTD_DStreamOutSize + return self._buffer.read1(size) + + def readinto(self, b): + """Read bytes into b. + + Returns the number of bytes read (0 for EOF). + """ + self._check_can_read() + return self._buffer.readinto(b) + + def readinto1(self, b): + """Read bytes into b, while trying to avoid making multiple reads + from the underlying stream. + + Returns the number of bytes read (0 for EOF). + """ + self._check_can_read() + return self._buffer.readinto1(b) + + def readline(self, size=-1): + """Read a line of uncompressed bytes from the file. + + The terminating newline (if present) is retained. If size is + non-negative, no more than size bytes will be read (in which + case the line may be incomplete). Returns b'' if already at EOF. + """ + self._check_can_read() + return self._buffer.readline(size) + + def seek(self, offset, whence=io.SEEK_SET): + """Change the file position. + + The new position is specified by offset, relative to the + position indicated by whence. Possible values for whence are: + + 0: start of stream (default): offset must not be negative + 1: current stream position + 2: end of stream; offset must not be positive + + Returns the new file position. + + Note that seeking is emulated, so depending on the arguments, + this operation may be extremely slow. + """ + self._check_can_read() + + # BufferedReader.seek() checks seekable + return self._buffer.seek(offset, whence) + + def peek(self, size=-1): + """Return buffered data without advancing the file position. + + Always returns at least one byte of data, unless at EOF. + The exact number of bytes returned is unspecified. + """ + # Relies on the undocumented fact that BufferedReader.peek() always + # returns at least one byte (except at EOF) + self._check_can_read() + return self._buffer.peek(size) + + def __next__(self): + if ret := self._buffer.readline(): + return ret + raise StopIteration + + def tell(self): + """Return the current file position.""" + self._check_not_closed() + if self._mode == _MODE_READ: + return self._buffer.tell() + elif self._mode == _MODE_WRITE: + return self._pos + + def fileno(self): + """Return the file descriptor for the underlying file.""" + self._check_not_closed() + return self._fp.fileno() + + @property + def name(self): + self._check_not_closed() + return self._fp.name + + @property + def mode(self): + return 'wb' if self._mode == _MODE_WRITE else 'rb' + + @property + def closed(self): + """True if this file is closed.""" + return self._mode == _MODE_CLOSED + + def seekable(self): + """Return whether the file supports seeking.""" + return self.readable() and self._buffer.seekable() + + def readable(self): + """Return whether the file was opened for reading.""" + self._check_not_closed() + return self._mode == _MODE_READ + + def writable(self): + """Return whether the file was opened for writing.""" + self._check_not_closed() + return self._mode == _MODE_WRITE + + +def open(file, /, mode='rb', *, level=None, options=None, zstd_dict=None, + encoding=None, errors=None, newline=None): + """Open a Zstandard compressed file in binary or text mode. + + file can be either a file name (given as a str, bytes, or PathLike object), + in which case the named file is opened, or it can be an existing file object + to read from or write to. + + The mode parameter can be 'r', 'rb' (default), 'w', 'wb', 'x', 'xb', 'a', + 'ab' for binary mode, or 'rt', 'wt', 'xt', 'at' for text mode. + + The level, options, and zstd_dict parameters specify the settings the same + as ZstdFile. + + When using read mode (decompression), the options parameter is a dict + representing advanced decompression options. The level parameter is not + supported in this case. When using write mode (compression), only one of + level, an int representing the compression level, or options, a dict + representing advanced compression options, may be passed. In both modes, + zstd_dict is a ZstdDict instance containing a trained Zstandard dictionary. + + For binary mode, this function is equivalent to the ZstdFile constructor: + ZstdFile(filename, mode, ...). In this case, the encoding, errors and + newline parameters must not be provided. + + For text mode, an ZstdFile object is created, and wrapped in an + io.TextIOWrapper instance with the specified encoding, error handling + behavior, and line ending(s). + """ + + text_mode = 't' in mode + mode = mode.replace('t', '') + + if text_mode: + if 'b' in mode: + raise ValueError(f'Invalid mode: {mode!r}') + else: + if encoding is not None: + raise ValueError('Argument "encoding" not supported in binary mode') + if errors is not None: + raise ValueError('Argument "errors" not supported in binary mode') + if newline is not None: + raise ValueError('Argument "newline" not supported in binary mode') + + binary_file = ZstdFile(file, mode, level=level, options=options, + zstd_dict=zstd_dict) + + if text_mode: + return io.TextIOWrapper(binary_file, encoding, errors, newline) + else: + return binary_file diff --git a/PythonLib/full/concurrent/futures/__init__.py b/PythonLib/full/concurrent/futures/__init__.py index 72de617a..d6ac4b3e 100644 --- a/PythonLib/full/concurrent/futures/__init__.py +++ b/PythonLib/full/concurrent/futures/__init__.py @@ -17,7 +17,7 @@ wait, as_completed) -__all__ = ( +__all__ = [ 'FIRST_COMPLETED', 'FIRST_EXCEPTION', 'ALL_COMPLETED', @@ -31,24 +31,35 @@ 'as_completed', 'ProcessPoolExecutor', 'ThreadPoolExecutor', -) +] + + +try: + import _interpreters +except ImportError: + _interpreters = None + +if _interpreters: + __all__.append('InterpreterPoolExecutor') def __dir__(): - return __all__ + ('__author__', '__doc__') + return __all__ + ['__author__', '__doc__'] def __getattr__(name): - global ProcessPoolExecutor, ThreadPoolExecutor + global ProcessPoolExecutor, ThreadPoolExecutor, InterpreterPoolExecutor if name == 'ProcessPoolExecutor': - from .process import ProcessPoolExecutor as pe - ProcessPoolExecutor = pe - return pe + from .process import ProcessPoolExecutor + return ProcessPoolExecutor if name == 'ThreadPoolExecutor': - from .thread import ThreadPoolExecutor as te - ThreadPoolExecutor = te - return te + from .thread import ThreadPoolExecutor + return ThreadPoolExecutor + + if _interpreters and name == 'InterpreterPoolExecutor': + from .interpreter import InterpreterPoolExecutor + return InterpreterPoolExecutor raise AttributeError(f"module {__name__!r} has no attribute {name!r}") diff --git a/PythonLib/full/concurrent/futures/_base.py b/PythonLib/full/concurrent/futures/_base.py index 6742a077..d98b1ebd 100644 --- a/PythonLib/full/concurrent/futures/_base.py +++ b/PythonLib/full/concurrent/futures/_base.py @@ -8,6 +8,8 @@ import threading import time import types +import weakref +from itertools import islice FIRST_COMPLETED = 'FIRST_COMPLETED' FIRST_EXCEPTION = 'FIRST_EXCEPTION' @@ -23,14 +25,6 @@ CANCELLED_AND_NOTIFIED = 'CANCELLED_AND_NOTIFIED' FINISHED = 'FINISHED' -_FUTURE_STATES = [ - PENDING, - RUNNING, - CANCELLED, - CANCELLED_AND_NOTIFIED, - FINISHED -] - _STATE_TO_DESCRIPTION_MAP = { PENDING: "pending", RUNNING: "running", @@ -396,7 +390,7 @@ def done(self): return self._state in [CANCELLED, CANCELLED_AND_NOTIFIED, FINISHED] def __get_result(self): - if self._exception: + if self._exception is not None: try: raise self._exception finally: @@ -580,7 +574,7 @@ def submit(self, fn, /, *args, **kwargs): """ raise NotImplementedError() - def map(self, fn, *iterables, timeout=None, chunksize=1): + def map(self, fn, *iterables, timeout=None, chunksize=1, buffersize=None): """Returns an iterator equivalent to map(fn, iter). Args: @@ -592,6 +586,11 @@ def map(self, fn, *iterables, timeout=None, chunksize=1): before being passed to a child process. This argument is only used by ProcessPoolExecutor; it is ignored by ThreadPoolExecutor. + buffersize: The number of submitted tasks whose results have not + yet been yielded. If the buffer is full, iteration over the + iterables pauses until a result is yielded from the buffer. + If None, all input elements are eagerly collected, and a task is + submitted for each. Returns: An iterator equivalent to: map(func, *iterables) but the calls may @@ -602,10 +601,25 @@ def map(self, fn, *iterables, timeout=None, chunksize=1): before the given timeout. Exception: If fn(*args) raises for any values. """ + if buffersize is not None and not isinstance(buffersize, int): + raise TypeError("buffersize must be an integer or None") + if buffersize is not None and buffersize < 1: + raise ValueError("buffersize must be None or > 0") + if timeout is not None: end_time = timeout + time.monotonic() - fs = [self.submit(fn, *args) for args in zip(*iterables)] + zipped_iterables = zip(*iterables) + if buffersize: + fs = collections.deque( + self.submit(fn, *args) for args in islice(zipped_iterables, buffersize) + ) + else: + fs = [self.submit(fn, *args) for args in zipped_iterables] + + # Use a weak reference to ensure that the executor can be garbage + # collected independently of the result_iterator closure. + executor_weakref = weakref.ref(self) # Yield must be hidden in closure so that the futures are submitted # before the first iterator value is required. @@ -614,6 +628,12 @@ def result_iterator(): # reverse to keep finishing order fs.reverse() while fs: + if ( + buffersize + and (executor := executor_weakref()) + and (args := next(zipped_iterables, None)) + ): + fs.appendleft(executor.submit(fn, *args)) # Careful not to keep a reference to the popped future if timeout is None: yield _result_or_cancel(fs.pop()) diff --git a/PythonLib/full/concurrent/futures/interpreter.py b/PythonLib/full/concurrent/futures/interpreter.py new file mode 100644 index 00000000..85c1da2c --- /dev/null +++ b/PythonLib/full/concurrent/futures/interpreter.py @@ -0,0 +1,123 @@ +"""Implements InterpreterPoolExecutor.""" + +from concurrent import interpreters +import sys +from . import thread as _thread +import traceback + + +def do_call(results, func, args, kwargs): + try: + return func(*args, **kwargs) + except BaseException as exc: + # Send the captured exception out on the results queue, + # but still leave it unhandled for the interpreter to handle. + try: + results.put(exc) + except interpreters.NotShareableError: + # The exception is not shareable. + print('exception is not shareable:', file=sys.stderr) + traceback.print_exception(exc) + results.put(None) + raise # re-raise + + +class WorkerContext(_thread.WorkerContext): + + @classmethod + def prepare(cls, initializer, initargs): + def resolve_task(fn, args, kwargs): + if isinstance(fn, str): + # XXX Circle back to this later. + raise TypeError('scripts not supported') + else: + task = (fn, args, kwargs) + return task + + if initializer is not None: + try: + initdata = resolve_task(initializer, initargs, {}) + except ValueError: + if isinstance(initializer, str) and initargs: + raise ValueError(f'an initializer script does not take args, got {initargs!r}') + raise # re-raise + else: + initdata = None + def create_context(): + return cls(initdata) + return create_context, resolve_task + + def __init__(self, initdata): + self.initdata = initdata + self.interp = None + self.results = None + + def __del__(self): + if self.interp is not None: + self.finalize() + + def initialize(self): + assert self.interp is None, self.interp + self.interp = interpreters.create() + try: + maxsize = 0 + self.results = interpreters.create_queue(maxsize) + + if self.initdata: + self.run(self.initdata) + except BaseException: + self.finalize() + raise # re-raise + + def finalize(self): + interp = self.interp + results = self.results + self.results = None + self.interp = None + if results is not None: + del results + if interp is not None: + interp.close() + + def run(self, task): + try: + return self.interp.call(do_call, self.results, *task) + except interpreters.ExecutionFailed as wrapper: + # Wait for the exception data to show up. + exc = self.results.get() + if exc is None: + # The exception must have been not shareable. + raise # re-raise + raise exc from wrapper + + +class BrokenInterpreterPool(_thread.BrokenThreadPool): + """ + Raised when a worker thread in an InterpreterPoolExecutor failed initializing. + """ + + +class InterpreterPoolExecutor(_thread.ThreadPoolExecutor): + + BROKEN = BrokenInterpreterPool + + @classmethod + def prepare_context(cls, initializer, initargs): + return WorkerContext.prepare(initializer, initargs) + + def __init__(self, max_workers=None, thread_name_prefix='', + initializer=None, initargs=()): + """Initializes a new InterpreterPoolExecutor instance. + + Args: + max_workers: The maximum number of interpreters that can be used to + execute the given calls. + thread_name_prefix: An optional name prefix to give our threads. + initializer: A callable or script used to initialize + each worker interpreter. + initargs: A tuple of arguments to pass to the initializer. + """ + thread_name_prefix = (thread_name_prefix or + (f"InterpreterPoolExecutor-{self._counter()}")) + super().__init__(max_workers, thread_name_prefix, + initializer, initargs) diff --git a/PythonLib/full/concurrent/futures/process.py b/PythonLib/full/concurrent/futures/process.py index ff7c17ef..a14650bf 100644 --- a/PythonLib/full/concurrent/futures/process.py +++ b/PythonLib/full/concurrent/futures/process.py @@ -191,16 +191,6 @@ def _on_queue_feeder_error(self, e, obj): super()._on_queue_feeder_error(e, obj) -def _get_chunks(*iterables, chunksize): - """ Iterates over zip()ed iterables in chunks. """ - it = zip(*iterables) - while True: - chunk = tuple(itertools.islice(it, chunksize)) - if not chunk: - return - yield chunk - - def _process_chunk(fn, chunk): """ Processes a chunk of an iterable passed to map. @@ -306,8 +296,9 @@ def __init__(self, executor): # will wake up the queue management thread so that it can terminate # if there is no pending work item. def weakref_cb(_, - thread_wakeup=self.thread_wakeup): - mp.util.debug('Executor collected: triggering callback for' + thread_wakeup=self.thread_wakeup, + mp_util_debug=mp.util.debug): + mp_util_debug('Executor collected: triggering callback for' ' QueueManager wakeup') thread_wakeup.wakeup() @@ -445,24 +436,14 @@ def process_result_item(self, result_item): # Process the received a result_item. This can be either the PID of a # worker that exited gracefully or a _ResultItem - if isinstance(result_item, int): - # Clean shutdown of a worker using its PID - # (avoids marking the executor broken) - assert self.is_shutting_down() - p = self.processes.pop(result_item) - p.join() - if not self.processes: - self.join_executor_internals() - return - else: - # Received a _ResultItem so mark the future as completed. - work_item = self.pending_work_items.pop(result_item.work_id, None) - # work_item can be None if another process terminated (see above) - if work_item is not None: - if result_item.exception: - work_item.future.set_exception(result_item.exception) - else: - work_item.future.set_result(result_item.result) + # Received a _ResultItem so mark the future as completed. + work_item = self.pending_work_items.pop(result_item.work_id, None) + # work_item can be None if another process terminated (see above) + if work_item is not None: + if result_item.exception is not None: + work_item.future.set_exception(result_item.exception) + else: + work_item.future.set_result(result_item.result) def is_shutting_down(self): # Check whether we should start shutting down the executor. @@ -602,7 +583,7 @@ def _check_system_limits(): raise NotImplementedError(_system_limited) _system_limits_checked = True try: - import multiprocessing.synchronize + import multiprocessing.synchronize # noqa: F401 except ImportError: _system_limited = ( "This Python build lacks multiprocessing.synchronize, usually due " @@ -645,6 +626,14 @@ class BrokenProcessPool(_base.BrokenExecutor): while a future was in the running state. """ +_TERMINATE = "terminate" +_KILL = "kill" + +_SHUTDOWN_CALLBACK_OPERATION = { + _TERMINATE, + _KILL +} + class ProcessPoolExecutor(_base.Executor): def __init__(self, max_workers=None, mp_context=None, @@ -670,7 +659,7 @@ def __init__(self, max_workers=None, mp_context=None, _check_system_limits() if max_workers is None: - self._max_workers = os.cpu_count() or 1 + self._max_workers = os.process_cpu_count() or 1 if sys.platform == 'win32': self._max_workers = min(_MAX_WINDOWS_WORKERS, self._max_workers) @@ -766,6 +755,11 @@ def _start_executor_manager_thread(self): self._executor_manager_thread_wakeup def _adjust_process_count(self): + # gh-132969: avoid error when state is reset and executor is still running, + # which will happen when shutdown(wait=False) is called. + if self._processes is None: + return + # if there's an idle process, we don't need to spawn a new one. if self._idle_worker_semaphore.acquire(blocking=False): return @@ -824,7 +818,7 @@ def submit(self, fn, /, *args, **kwargs): return f submit.__doc__ = _base.Executor.submit.__doc__ - def map(self, fn, *iterables, timeout=None, chunksize=1): + def map(self, fn, *iterables, timeout=None, chunksize=1, buffersize=None): """Returns an iterator equivalent to map(fn, iter). Args: @@ -835,6 +829,11 @@ def map(self, fn, *iterables, timeout=None, chunksize=1): chunksize: If greater than one, the iterables will be chopped into chunks of size chunksize and submitted to the process pool. If set to one, the items in the list will be sent one at a time. + buffersize: The number of submitted tasks whose results have not + yet been yielded. If the buffer is full, iteration over the + iterables pauses until a result is yielded from the buffer. + If None, all input elements are eagerly collected, and a task is + submitted for each. Returns: An iterator equivalent to: map(func, *iterables) but the calls may @@ -849,8 +848,9 @@ def map(self, fn, *iterables, timeout=None, chunksize=1): raise ValueError("chunksize must be >= 1.") results = super().map(partial(_process_chunk, fn), - _get_chunks(*iterables, chunksize=chunksize), - timeout=timeout) + itertools.batched(zip(*iterables), chunksize), + timeout=timeout, + buffersize=buffersize) return _chain_from_iterable_of_lists(results) def shutdown(self, wait=True, *, cancel_futures=False): @@ -874,3 +874,66 @@ def shutdown(self, wait=True, *, cancel_futures=False): self._executor_manager_thread_wakeup = None shutdown.__doc__ = _base.Executor.shutdown.__doc__ + + def _force_shutdown(self, operation): + """Attempts to terminate or kill the executor's workers based off the + given operation. Iterates through all of the current processes and + performs the relevant task if the process is still alive. + + After terminating workers, the pool will be in a broken state + and no longer usable (for instance, new tasks should not be + submitted). + """ + if operation not in _SHUTDOWN_CALLBACK_OPERATION: + raise ValueError(f"Unsupported operation: {operation!r}") + + processes = {} + if self._processes: + processes = self._processes.copy() + + # shutdown will invalidate ._processes, so we copy it right before + # calling. If we waited here, we would deadlock if a process decides not + # to exit. + self.shutdown(wait=False, cancel_futures=True) + + if not processes: + return + + for proc in processes.values(): + try: + if not proc.is_alive(): + continue + except ValueError: + # The process is already exited/closed out. + continue + + try: + if operation == _TERMINATE: + proc.terminate() + elif operation == _KILL: + proc.kill() + except ProcessLookupError: + # The process just ended before our signal + continue + + def terminate_workers(self): + """Attempts to terminate the executor's workers. + Iterates through all of the current worker processes and terminates + each one that is still alive. + + After terminating workers, the pool will be in a broken state + and no longer usable (for instance, new tasks should not be + submitted). + """ + return self._force_shutdown(operation=_TERMINATE) + + def kill_workers(self): + """Attempts to kill the executor's workers. + Iterates through all of the current worker processes and kills + each one that is still alive. + + After killing workers, the pool will be in a broken state + and no longer usable (for instance, new tasks should not be + submitted). + """ + return self._force_shutdown(operation=_KILL) diff --git a/PythonLib/full/concurrent/futures/thread.py b/PythonLib/full/concurrent/futures/thread.py index 61dbff8a..909359b6 100644 --- a/PythonLib/full/concurrent/futures/thread.py +++ b/PythonLib/full/concurrent/futures/thread.py @@ -44,19 +44,46 @@ def _python_exit(): os.register_at_fork(after_in_child=_threads_queues.clear) +class WorkerContext: + + @classmethod + def prepare(cls, initializer, initargs): + if initializer is not None: + if not callable(initializer): + raise TypeError("initializer must be a callable") + def create_context(): + return cls(initializer, initargs) + def resolve_task(fn, args, kwargs): + return (fn, args, kwargs) + return create_context, resolve_task + + def __init__(self, initializer, initargs): + self.initializer = initializer + self.initargs = initargs + + def initialize(self): + if self.initializer is not None: + self.initializer(*self.initargs) + + def finalize(self): + pass + + def run(self, task): + fn, args, kwargs = task + return fn(*args, **kwargs) + + class _WorkItem: - def __init__(self, future, fn, args, kwargs): + def __init__(self, future, task): self.future = future - self.fn = fn - self.args = args - self.kwargs = kwargs + self.task = task - def run(self): + def run(self, ctx): if not self.future.set_running_or_notify_cancel(): return try: - result = self.fn(*self.args, **self.kwargs) + result = ctx.run(self.task) except BaseException as exc: self.future.set_exception(exc) # Break a reference cycle with the exception 'exc' @@ -67,16 +94,15 @@ def run(self): __class_getitem__ = classmethod(types.GenericAlias) -def _worker(executor_reference, work_queue, initializer, initargs): - if initializer is not None: - try: - initializer(*initargs) - except BaseException: - _base.LOGGER.critical('Exception in initializer:', exc_info=True) - executor = executor_reference() - if executor is not None: - executor._initializer_failed() - return +def _worker(executor_reference, ctx, work_queue): + try: + ctx.initialize() + except BaseException: + _base.LOGGER.critical('Exception in initializer:', exc_info=True) + executor = executor_reference() + if executor is not None: + executor._initializer_failed() + return try: while True: try: @@ -90,7 +116,7 @@ def _worker(executor_reference, work_queue, initializer, initargs): work_item = work_queue.get(block=True) if work_item is not None: - work_item.run() + work_item.run(ctx) # Delete references to object. See GH-60488 del work_item continue @@ -111,6 +137,8 @@ def _worker(executor_reference, work_queue, initializer, initargs): del executor except BaseException: _base.LOGGER.critical('Exception in worker', exc_info=True) + finally: + ctx.finalize() class BrokenThreadPool(_base.BrokenExecutor): @@ -121,11 +149,17 @@ class BrokenThreadPool(_base.BrokenExecutor): class ThreadPoolExecutor(_base.Executor): + BROKEN = BrokenThreadPool + # Used to assign unique thread names when thread_name_prefix is not supplied. _counter = itertools.count().__next__ + @classmethod + def prepare_context(cls, initializer, initargs): + return WorkerContext.prepare(initializer, initargs) + def __init__(self, max_workers=None, thread_name_prefix='', - initializer=None, initargs=()): + initializer=None, initargs=(), **ctxkwargs): """Initializes a new ThreadPoolExecutor instance. Args: @@ -134,21 +168,23 @@ def __init__(self, max_workers=None, thread_name_prefix='', thread_name_prefix: An optional name prefix to give our threads. initializer: A callable used to initialize worker threads. initargs: A tuple of arguments to pass to the initializer. + ctxkwargs: Additional arguments to cls.prepare_context(). """ if max_workers is None: # ThreadPoolExecutor is often used to: # * CPU bound task which releases GIL # * I/O bound task (which releases GIL, of course) # - # We use cpu_count + 4 for both types of tasks. + # We use process_cpu_count + 4 for both types of tasks. # But we limit it to 32 to avoid consuming surprisingly large resource # on many core machine. - max_workers = min(32, (os.cpu_count() or 1) + 4) + max_workers = min(32, (os.process_cpu_count() or 1) + 4) if max_workers <= 0: raise ValueError("max_workers must be greater than 0") - if initializer is not None and not callable(initializer): - raise TypeError("initializer must be a callable") + (self._create_worker_context, + self._resolve_work_item_task, + ) = type(self).prepare_context(initializer, initargs, **ctxkwargs) self._max_workers = max_workers self._work_queue = queue.SimpleQueue() @@ -159,13 +195,11 @@ def __init__(self, max_workers=None, thread_name_prefix='', self._shutdown_lock = threading.Lock() self._thread_name_prefix = (thread_name_prefix or ("ThreadPoolExecutor-%d" % self._counter())) - self._initializer = initializer - self._initargs = initargs def submit(self, fn, /, *args, **kwargs): with self._shutdown_lock, _global_shutdown_lock: if self._broken: - raise BrokenThreadPool(self._broken) + raise self.BROKEN(self._broken) if self._shutdown: raise RuntimeError('cannot schedule new futures after shutdown') @@ -174,7 +208,8 @@ def submit(self, fn, /, *args, **kwargs): 'interpreter shutdown') f = _base.Future() - w = _WorkItem(f, fn, args, kwargs) + task = self._resolve_work_item_task(fn, args, kwargs) + w = _WorkItem(f, task) self._work_queue.put(w) self._adjust_thread_count() @@ -197,9 +232,8 @@ def weakref_cb(_, q=self._work_queue): num_threads) t = threading.Thread(name=thread_name, target=_worker, args=(weakref.ref(self, weakref_cb), - self._work_queue, - self._initializer, - self._initargs)) + self._create_worker_context(), + self._work_queue)) t.start() self._threads.add(t) _threads_queues[t] = self._work_queue @@ -215,7 +249,7 @@ def _initializer_failed(self): except queue.Empty: break if work_item is not None: - work_item.future.set_exception(BrokenThreadPool(self._broken)) + work_item.future.set_exception(self.BROKEN(self._broken)) def shutdown(self, wait=True, *, cancel_futures=False): with self._shutdown_lock: diff --git a/PythonLib/full/concurrent/interpreters/__init__.py b/PythonLib/full/concurrent/interpreters/__init__.py new file mode 100644 index 00000000..ea4147ee --- /dev/null +++ b/PythonLib/full/concurrent/interpreters/__init__.py @@ -0,0 +1,247 @@ +"""Subinterpreters High Level Module.""" + +import threading +import weakref +import _interpreters + +# aliases: +from _interpreters import ( + InterpreterError, InterpreterNotFoundError, NotShareableError, + is_shareable, +) +from ._queues import ( + create as create_queue, + Queue, QueueEmpty, QueueFull, +) + + +__all__ = [ + 'get_current', 'get_main', 'create', 'list_all', 'is_shareable', + 'Interpreter', + 'InterpreterError', 'InterpreterNotFoundError', 'ExecutionFailed', + 'NotShareableError', + 'create_queue', 'Queue', 'QueueEmpty', 'QueueFull', +] + + +_EXEC_FAILURE_STR = """ +{superstr} + +Uncaught in the interpreter: + +{formatted} +""".strip() + +class ExecutionFailed(InterpreterError): + """An unhandled exception happened during execution. + + This is raised from Interpreter.exec() and Interpreter.call(). + """ + + def __init__(self, excinfo): + msg = excinfo.formatted + if not msg: + if excinfo.type and excinfo.msg: + msg = f'{excinfo.type.__name__}: {excinfo.msg}' + else: + msg = excinfo.type.__name__ or excinfo.msg + super().__init__(msg) + self.excinfo = excinfo + + def __str__(self): + try: + formatted = self.excinfo.errdisplay + except Exception: + return super().__str__() + else: + return _EXEC_FAILURE_STR.format( + superstr=super().__str__(), + formatted=formatted, + ) + + +def create(): + """Return a new (idle) Python interpreter.""" + id = _interpreters.create(reqrefs=True) + return Interpreter(id, _ownsref=True) + + +def list_all(): + """Return all existing interpreters.""" + return [Interpreter(id, _whence=whence) + for id, whence in _interpreters.list_all(require_ready=True)] + + +def get_current(): + """Return the currently running interpreter.""" + id, whence = _interpreters.get_current() + return Interpreter(id, _whence=whence) + + +def get_main(): + """Return the main interpreter.""" + id, whence = _interpreters.get_main() + assert whence == _interpreters.WHENCE_RUNTIME, repr(whence) + return Interpreter(id, _whence=whence) + + +_known = weakref.WeakValueDictionary() + +class Interpreter: + """A single Python interpreter. + + Attributes: + + "id" - the unique process-global ID number for the interpreter + "whence" - indicates where the interpreter was created + + If the interpreter wasn't created by this module + then any method that modifies the interpreter will fail, + i.e. .close(), .prepare_main(), .exec(), and .call() + """ + + _WHENCE_TO_STR = { + _interpreters.WHENCE_UNKNOWN: 'unknown', + _interpreters.WHENCE_RUNTIME: 'runtime init', + _interpreters.WHENCE_LEGACY_CAPI: 'legacy C-API', + _interpreters.WHENCE_CAPI: 'C-API', + _interpreters.WHENCE_XI: 'cross-interpreter C-API', + _interpreters.WHENCE_STDLIB: '_interpreters module', + } + + def __new__(cls, id, /, _whence=None, _ownsref=None): + # There is only one instance for any given ID. + if not isinstance(id, int): + raise TypeError(f'id must be an int, got {id!r}') + id = int(id) + if _whence is None: + if _ownsref: + _whence = _interpreters.WHENCE_STDLIB + else: + _whence = _interpreters.whence(id) + assert _whence in cls._WHENCE_TO_STR, repr(_whence) + if _ownsref is None: + _ownsref = (_whence == _interpreters.WHENCE_STDLIB) + try: + self = _known[id] + assert hasattr(self, '_ownsref') + except KeyError: + self = super().__new__(cls) + _known[id] = self + self._id = id + self._whence = _whence + self._ownsref = _ownsref + if _ownsref: + # This may raise InterpreterNotFoundError: + _interpreters.incref(id) + return self + + def __repr__(self): + return f'{type(self).__name__}({self.id})' + + def __hash__(self): + return hash(self._id) + + def __del__(self): + self._decref() + + # for pickling: + def __reduce__(self): + return (type(self), (self._id,)) + + # gh-135729: Globals might be destroyed by the time this is called, so we + # need to keep references ourself + def _decref(self, *, + InterpreterNotFoundError=InterpreterNotFoundError, + _interp_decref=_interpreters.decref, + ): + if not self._ownsref: + return + self._ownsref = False + try: + _interp_decref(self._id) + except InterpreterNotFoundError: + pass + + @property + def id(self): + return self._id + + @property + def whence(self): + return self._WHENCE_TO_STR[self._whence] + + def is_running(self): + """Return whether or not the identified interpreter is running.""" + return _interpreters.is_running(self._id) + + # Everything past here is available only to interpreters created by + # interpreters.create(). + + def close(self): + """Finalize and destroy the interpreter. + + Attempting to destroy the current interpreter results + in an InterpreterError. + """ + return _interpreters.destroy(self._id, restrict=True) + + def prepare_main(self, ns=None, /, **kwargs): + """Bind the given values into the interpreter's __main__. + + The values must be shareable. + """ + ns = dict(ns, **kwargs) if ns is not None else kwargs + _interpreters.set___main___attrs(self._id, ns, restrict=True) + + def exec(self, code, /): + """Run the given source code in the interpreter. + + This is essentially the same as calling the builtin "exec" + with this interpreter, using the __dict__ of its __main__ + module as both globals and locals. + + There is no return value. + + If the code raises an unhandled exception then an ExecutionFailed + exception is raised, which summarizes the unhandled exception. + The actual exception is discarded because objects cannot be + shared between interpreters. + + This blocks the current Python thread until done. During + that time, the previous interpreter is allowed to run + in other threads. + """ + excinfo = _interpreters.exec(self._id, code, restrict=True) + if excinfo is not None: + raise ExecutionFailed(excinfo) + + def _call(self, callable, args, kwargs): + res, excinfo = _interpreters.call(self._id, callable, args, kwargs, restrict=True) + if excinfo is not None: + raise ExecutionFailed(excinfo) + return res + + def call(self, callable, /, *args, **kwargs): + """Call the object in the interpreter with given args/kwargs. + + Nearly all callables, args, kwargs, and return values are + supported. All "shareable" objects are supported, as are + "stateless" functions (meaning non-closures that do not use + any globals). This method will fall back to pickle. + + If the callable raises an exception then the error display + (including full traceback) is sent back between the interpreters + and an ExecutionFailed exception is raised, much like what + happens with Interpreter.exec(). + """ + return self._call(callable, args, kwargs) + + def call_in_thread(self, callable, /, *args, **kwargs): + """Return a new thread that calls the object in the interpreter. + + The return value and any raised exception are discarded. + """ + t = threading.Thread(target=self._call, args=(callable, args, kwargs)) + t.start() + return t diff --git a/PythonLib/full/concurrent/interpreters/_crossinterp.py b/PythonLib/full/concurrent/interpreters/_crossinterp.py new file mode 100644 index 00000000..a5f46b20 --- /dev/null +++ b/PythonLib/full/concurrent/interpreters/_crossinterp.py @@ -0,0 +1,107 @@ +"""Common code between queues and channels.""" + + +class ItemInterpreterDestroyed(Exception): + """Raised when trying to get an item whose interpreter was destroyed.""" + + +class classonly: + """A non-data descriptor that makes a value only visible on the class. + + This is like the "classmethod" builtin, but does not show up on + instances of the class. It may be used as a decorator. + """ + + def __init__(self, value): + self.value = value + self.getter = classmethod(value).__get__ + self.name = None + + def __set_name__(self, cls, name): + if self.name is not None: + raise TypeError('already used') + self.name = name + + def __get__(self, obj, cls): + if obj is not None: + raise AttributeError(self.name) + # called on the class + return self.getter(None, cls) + + +class UnboundItem: + """Represents a cross-interpreter item no longer bound to an interpreter. + + An item is unbound when the interpreter that added it to the + cross-interpreter container is destroyed. + """ + + __slots__ = () + + @classonly + def singleton(cls, kind, module, name='UNBOUND'): + doc = cls.__doc__ + if doc: + doc = doc.replace( + 'cross-interpreter container', kind, + ).replace( + 'cross-interpreter', kind, + ) + subclass = type( + f'Unbound{kind.capitalize()}Item', + (cls,), + { + "_MODULE": module, + "_NAME": name, + "__doc__": doc, + }, + ) + return object.__new__(subclass) + + _MODULE = __name__ + _NAME = 'UNBOUND' + + def __new__(cls): + raise Exception(f'use {cls._MODULE}.{cls._NAME}') + + def __repr__(self): + return f'{self._MODULE}.{self._NAME}' +# return f'interpreters._queues.UNBOUND' + + +UNBOUND = object.__new__(UnboundItem) +UNBOUND_ERROR = object() +UNBOUND_REMOVE = object() + +_UNBOUND_CONSTANT_TO_FLAG = { + UNBOUND_REMOVE: 1, + UNBOUND_ERROR: 2, + UNBOUND: 3, +} +_UNBOUND_FLAG_TO_CONSTANT = {v: k + for k, v in _UNBOUND_CONSTANT_TO_FLAG.items()} + + +def serialize_unbound(unbound): + op = unbound + try: + flag = _UNBOUND_CONSTANT_TO_FLAG[op] + except KeyError: + raise NotImplementedError(f'unsupported unbound replacement op {op!r}') + return flag, + + +def resolve_unbound(flag, exctype_destroyed): + try: + op = _UNBOUND_FLAG_TO_CONSTANT[flag] + except KeyError: + raise NotImplementedError(f'unsupported unbound replacement op {flag!r}') + if op is UNBOUND_REMOVE: + # "remove" not possible here + raise NotImplementedError + elif op is UNBOUND_ERROR: + raise exctype_destroyed("item's original interpreter destroyed") + elif op is UNBOUND: + return UNBOUND + else: + raise NotImplementedError(repr(op)) diff --git a/PythonLib/full/concurrent/interpreters/_queues.py b/PythonLib/full/concurrent/interpreters/_queues.py new file mode 100644 index 00000000..81ea1098 --- /dev/null +++ b/PythonLib/full/concurrent/interpreters/_queues.py @@ -0,0 +1,289 @@ +"""Cross-interpreter Queues High Level Module.""" + +import pickle +import queue +import time +import weakref +import _interpqueues as _queues +from . import _crossinterp + +# aliases: +from _interpqueues import ( + QueueError, QueueNotFoundError, +) +from ._crossinterp import ( + UNBOUND_ERROR, UNBOUND_REMOVE, +) + +__all__ = [ + 'UNBOUND', 'UNBOUND_ERROR', 'UNBOUND_REMOVE', + 'create', 'list_all', + 'Queue', + 'QueueError', 'QueueNotFoundError', 'QueueEmpty', 'QueueFull', + 'ItemInterpreterDestroyed', +] + + +class QueueEmpty(QueueError, queue.Empty): + """Raised from get_nowait() when the queue is empty. + + It is also raised from get() if it times out. + """ + + +class QueueFull(QueueError, queue.Full): + """Raised from put_nowait() when the queue is full. + + It is also raised from put() if it times out. + """ + + +class ItemInterpreterDestroyed(QueueError, + _crossinterp.ItemInterpreterDestroyed): + """Raised from get() and get_nowait().""" + + +_SHARED_ONLY = 0 +_PICKLED = 1 + + +UNBOUND = _crossinterp.UnboundItem.singleton('queue', __name__) + + +def _serialize_unbound(unbound): + if unbound is UNBOUND: + unbound = _crossinterp.UNBOUND + return _crossinterp.serialize_unbound(unbound) + + +def _resolve_unbound(flag): + resolved = _crossinterp.resolve_unbound(flag, ItemInterpreterDestroyed) + if resolved is _crossinterp.UNBOUND: + resolved = UNBOUND + return resolved + + +def create(maxsize=0, *, unbounditems=UNBOUND): + """Return a new cross-interpreter queue. + + The queue may be used to pass data safely between interpreters. + + "unbounditems" sets the default for Queue.put(); see that method for + supported values. The default value is UNBOUND, which replaces + the unbound item. + """ + unbound = _serialize_unbound(unbounditems) + unboundop, = unbound + qid = _queues.create(maxsize, unboundop, -1) + self = Queue(qid) + self._set_unbound(unboundop, unbounditems) + return self + + +def list_all(): + """Return a list of all open queues.""" + queues = [] + for qid, unboundop, _ in _queues.list_all(): + self = Queue(qid) + if not hasattr(self, '_unbound'): + self._set_unbound(unboundop) + else: + assert self._unbound[0] == unboundop + queues.append(self) + return queues + + +_known_queues = weakref.WeakValueDictionary() + +class Queue: + """A cross-interpreter queue.""" + + def __new__(cls, id, /): + # There is only one instance for any given ID. + if isinstance(id, int): + id = int(id) + else: + raise TypeError(f'id must be an int, got {id!r}') + try: + self = _known_queues[id] + except KeyError: + self = super().__new__(cls) + self._id = id + _known_queues[id] = self + _queues.bind(id) + return self + + def __del__(self): + try: + _queues.release(self._id) + except QueueNotFoundError: + pass + try: + del _known_queues[self._id] + except KeyError: + pass + + def __repr__(self): + return f'{type(self).__name__}({self.id})' + + def __hash__(self): + return hash(self._id) + + # for pickling: + def __reduce__(self): + return (type(self), (self._id,)) + + def _set_unbound(self, op, items=None): + assert not hasattr(self, '_unbound') + if items is None: + items = _resolve_unbound(op) + unbound = (op, items) + self._unbound = unbound + return unbound + + @property + def id(self): + return self._id + + @property + def unbounditems(self): + try: + _, items = self._unbound + except AttributeError: + op, _ = _queues.get_queue_defaults(self._id) + _, items = self._set_unbound(op) + return items + + @property + def maxsize(self): + try: + return self._maxsize + except AttributeError: + self._maxsize = _queues.get_maxsize(self._id) + return self._maxsize + + def empty(self): + return self.qsize() == 0 + + def full(self): + return _queues.is_full(self._id) + + def qsize(self): + return _queues.get_count(self._id) + + def put(self, obj, block=True, timeout=None, *, + unbounditems=None, + _delay=10 / 1000, # 10 milliseconds + ): + """Add the object to the queue. + + If "block" is true, this blocks while the queue is full. + + For most objects, the object received through Queue.get() will + be a new one, equivalent to the original and not sharing any + actual underlying data. The notable exceptions include + cross-interpreter types (like Queue) and memoryview, where the + underlying data is actually shared. Furthermore, some types + can be sent through a queue more efficiently than others. This + group includes various immutable types like int, str, bytes, and + tuple (if the items are likewise efficiently shareable). See interpreters.is_shareable(). + + "unbounditems" controls the behavior of Queue.get() for the given + object if the current interpreter (calling put()) is later + destroyed. + + If "unbounditems" is None (the default) then it uses the + queue's default, set with create_queue(), + which is usually UNBOUND. + + If "unbounditems" is UNBOUND_ERROR then get() will raise an + ItemInterpreterDestroyed exception if the original interpreter + has been destroyed. This does not otherwise affect the queue; + the next call to put() will work like normal, returning the next + item in the queue. + + If "unbounditems" is UNBOUND_REMOVE then the item will be removed + from the queue as soon as the original interpreter is destroyed. + Be aware that this will introduce an imbalance between put() + and get() calls. + + If "unbounditems" is UNBOUND then it is returned by get() in place + of the unbound item. + """ + if not block: + return self.put_nowait(obj, unbounditems=unbounditems) + if unbounditems is None: + unboundop = -1 + else: + unboundop, = _serialize_unbound(unbounditems) + if timeout is not None: + timeout = int(timeout) + if timeout < 0: + raise ValueError(f'timeout value must be non-negative') + end = time.time() + timeout + while True: + try: + _queues.put(self._id, obj, unboundop) + except QueueFull as exc: + if timeout is not None and time.time() >= end: + raise # re-raise + time.sleep(_delay) + else: + break + + def put_nowait(self, obj, *, unbounditems=None): + if unbounditems is None: + unboundop = -1 + else: + unboundop, = _serialize_unbound(unbounditems) + _queues.put(self._id, obj, unboundop) + + def get(self, block=True, timeout=None, *, + _delay=10 / 1000, # 10 milliseconds + ): + """Return the next object from the queue. + + If "block" is true, this blocks while the queue is empty. + + If the next item's original interpreter has been destroyed + then the "next object" is determined by the value of the + "unbounditems" argument to put(). + """ + if not block: + return self.get_nowait() + if timeout is not None: + timeout = int(timeout) + if timeout < 0: + raise ValueError(f'timeout value must be non-negative') + end = time.time() + timeout + while True: + try: + obj, unboundop = _queues.get(self._id) + except QueueEmpty as exc: + if timeout is not None and time.time() >= end: + raise # re-raise + time.sleep(_delay) + else: + break + if unboundop is not None: + assert obj is None, repr(obj) + return _resolve_unbound(unboundop) + return obj + + def get_nowait(self): + """Return the next object from the channel. + + If the queue is empty then raise QueueEmpty. Otherwise this + is the same as get(). + """ + try: + obj, unboundop = _queues.get(self._id) + except QueueEmpty as exc: + raise # re-raise + if unboundop is not None: + assert obj is None, repr(obj) + return _resolve_unbound(unboundop) + return obj + + +_queues._register_heap_types(Queue, QueueEmpty, QueueFull) diff --git a/PythonLib/full/configparser.py b/PythonLib/full/configparser.py index f96704eb..d435a5c2 100644 --- a/PythonLib/full/configparser.py +++ b/PythonLib/full/configparser.py @@ -18,8 +18,8 @@ delimiters=('=', ':'), comment_prefixes=('#', ';'), inline_comment_prefixes=None, strict=True, empty_lines_in_values=True, default_section='DEFAULT', - interpolation=, converters=): - + interpolation=, converters=, + allow_unnamed_section=False): Create the parser. When `defaults` is given, it is initialized into the dictionary or intrinsic defaults. The keys must be strings, the values must be appropriate for %()s string interpolation. @@ -68,6 +68,10 @@ converter gets its corresponding get*() method on the parser object and section proxies. + When `allow_unnamed_section` is True (default: False), options + without section are accepted: the section for these is + ``configparser.UNNAMED_SECTION``. + sections() Return all the configuration section names, sans DEFAULT. @@ -139,24 +143,27 @@ between keys and values are surrounded by spaces. """ -from collections.abc import MutableMapping +# Do not import dataclasses; overhead is unacceptable (gh-117703) + +from collections.abc import Iterable, MutableMapping from collections import ChainMap as _ChainMap +import contextlib import functools import io import itertools import os import re import sys -import warnings __all__ = ("NoSectionError", "DuplicateOptionError", "DuplicateSectionError", "NoOptionError", "InterpolationError", "InterpolationDepthError", "InterpolationMissingOptionError", "InterpolationSyntaxError", "ParsingError", "MissingSectionHeaderError", - "ConfigParser", "RawConfigParser", + "MultilineContinuationError", "UnnamedSectionDisabledError", + "InvalidWriteError", "ConfigParser", "RawConfigParser", "Interpolation", "BasicInterpolation", "ExtendedInterpolation", - "LegacyInterpolation", "SectionProxy", "ConverterMapping", - "DEFAULTSECT", "MAX_INTERPOLATION_DEPTH") + "SectionProxy", "ConverterMapping", + "DEFAULTSECT", "MAX_INTERPOLATION_DEPTH", "UNNAMED_SECTION") _default_dict = dict DEFAULTSECT = "DEFAULT" @@ -298,15 +305,33 @@ def __init__(self, option, section, rawval): class ParsingError(Error): """Raised when a configuration file does not follow legal syntax.""" - def __init__(self, source): + def __init__(self, source, *args): super().__init__(f'Source contains parsing errors: {source!r}') self.source = source self.errors = [] self.args = (source, ) + if args: + self.append(*args) def append(self, lineno, line): self.errors.append((lineno, line)) - self.message += '\n\t[line %2d]: %s' % (lineno, line) + self.message += '\n\t[line %2d]: %s' % (lineno, repr(line)) + + def combine(self, others): + for other in others: + for error in other.errors: + self.append(*error) + return self + + @staticmethod + def _raise_all(exceptions: Iterable['ParsingError']): + """ + Combine any number of ParsingErrors into one and raise it. + """ + exceptions = iter(exceptions) + with contextlib.suppress(StopIteration): + raise next(exceptions).combine(exceptions) + class MissingSectionHeaderError(ParsingError): @@ -323,6 +348,44 @@ def __init__(self, filename, lineno, line): self.args = (filename, lineno, line) +class MultilineContinuationError(ParsingError): + """Raised when a key without value is followed by continuation line""" + def __init__(self, filename, lineno, line): + Error.__init__( + self, + "Key without value continued with an indented line.\n" + "file: %r, line: %d\n%r" + %(filename, lineno, line)) + self.source = filename + self.lineno = lineno + self.line = line + self.args = (filename, lineno, line) + + +class UnnamedSectionDisabledError(Error): + """Raised when an attempt to use UNNAMED_SECTION is made with the + feature disabled.""" + def __init__(self): + Error.__init__(self, "Support for UNNAMED_SECTION is disabled.") + + +class _UnnamedSection: + + def __repr__(self): + return "" + +class InvalidWriteError(Error): + """Raised when attempting to write data that the parser would read back differently. + ex: writing a key which begins with the section header pattern would read back as a + new section """ + + def __init__(self, msg=''): + Error.__init__(self, msg) + + +UNNAMED_SECTION = _UnnamedSection() + + # Used in parser getters to indicate the default behaviour when a specific # option is not found it to raise an exception. Created to enable `None` as # a valid fallback value. @@ -478,6 +541,8 @@ def _interpolate_some(self, parser, option, accum, rest, section, map, except (KeyError, NoSectionError, NoOptionError): raise InterpolationMissingOptionError( option, section, rawval, ":".join(path)) from None + if v is None: + continue if "$" in v: self._interpolate_some(parser, opt, accum, v, sect, dict(parser.items(sect, raw=True)), @@ -491,51 +556,51 @@ def _interpolate_some(self, parser, option, accum, rest, section, map, "found: %r" % (rest,)) -class LegacyInterpolation(Interpolation): - """Deprecated interpolation used in old versions of ConfigParser. - Use BasicInterpolation or ExtendedInterpolation instead.""" +class _ReadState: + elements_added : set[str] + cursect : dict[str, str] | None = None + sectname : str | None = None + optname : str | None = None + lineno : int = 0 + indent_level : int = 0 + errors : list[ParsingError] - _KEYCRE = re.compile(r"%\(([^)]*)\)s|.") + def __init__(self): + self.elements_added = set() + self.errors = list() - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - warnings.warn( - "LegacyInterpolation has been deprecated since Python 3.2 " - "and will be removed from the configparser module in Python 3.13. " - "Use BasicInterpolation or ExtendedInterpolation instead.", - DeprecationWarning, stacklevel=2 - ) - def before_get(self, parser, section, option, value, vars): - rawval = value - depth = MAX_INTERPOLATION_DEPTH - while depth: # Loop through this until it's done - depth -= 1 - if value and "%(" in value: - replace = functools.partial(self._interpolation_replace, - parser=parser) - value = self._KEYCRE.sub(replace, value) - try: - value = value % vars - except KeyError as e: - raise InterpolationMissingOptionError( - option, section, rawval, e.args[0]) from None - else: - break - if value and "%(" in value: - raise InterpolationDepthError(option, section, rawval) - return value +class _Line(str): + __slots__ = 'clean', 'has_comments' - def before_set(self, parser, section, option, value): - return value + def __new__(cls, val, *args, **kwargs): + return super().__new__(cls, val) - @staticmethod - def _interpolation_replace(match, parser): - s = match.group(1) - if s is None: - return match.group() - else: - return "%%(%s)s" % parser.optionxform(s) + def __init__(self, val, comments): + trimmed = val.strip() + self.clean = comments.strip(trimmed) + self.has_comments = trimmed != self.clean + + +class _CommentSpec: + def __init__(self, full_prefixes, inline_prefixes): + full_patterns = ( + # prefix at the beginning of a line + fr'^({re.escape(prefix)}).*' + for prefix in full_prefixes + ) + inline_patterns = ( + # prefix at the beginning of the line or following a space + fr'(^|\s)({re.escape(prefix)}.*)' + for prefix in inline_prefixes + ) + self.pattern = re.compile('|'.join(itertools.chain(full_patterns, inline_patterns))) + + def strip(self, text): + return self.pattern.sub('', text).rstrip() + + def wrap(self, text): + return _Line(text, self) class RawConfigParser(MutableMapping): @@ -584,7 +649,8 @@ def __init__(self, defaults=None, dict_type=_default_dict, comment_prefixes=('#', ';'), inline_comment_prefixes=None, strict=True, empty_lines_in_values=True, default_section=DEFAULTSECT, - interpolation=_UNSET, converters=_UNSET): + interpolation=_UNSET, converters=_UNSET, + allow_unnamed_section=False,): self._dict = dict_type self._sections = self._dict() @@ -603,8 +669,7 @@ def __init__(self, defaults=None, dict_type=_default_dict, else: self._optcre = re.compile(self._OPT_TMPL.format(delim=d), re.VERBOSE) - self._comment_prefixes = tuple(comment_prefixes or ()) - self._inline_comment_prefixes = tuple(inline_comment_prefixes or ()) + self._comments = _CommentSpec(comment_prefixes or (), inline_comment_prefixes or ()) self._strict = strict self._allow_no_value = allow_no_value self._empty_lines_in_values = empty_lines_in_values @@ -623,6 +688,7 @@ def __init__(self, defaults=None, dict_type=_default_dict, self._converters.update(converters) if defaults: self._read_defaults(defaults) + self._allow_unnamed_section = allow_unnamed_section def defaults(self): return self._defaults @@ -641,6 +707,10 @@ def add_section(self, section): if section == self.default_section: raise ValueError('Invalid section name: %r' % section) + if section is UNNAMED_SECTION: + if not self._allow_unnamed_section: + raise UnnamedSectionDisabledError + if section in self._sections: raise DuplicateSectionError(section) self._sections[section] = self._dict() @@ -724,7 +794,8 @@ def read_dict(self, dictionary, source=''): """ elements_added = set() for section, keys in dictionary.items(): - section = str(section) + if section is not UNNAMED_SECTION: + section = str(section) try: self.add_section(section) except (DuplicateSectionError, ValueError): @@ -896,14 +967,21 @@ def write(self, fp, space_around_delimiters=True): if self._defaults: self._write_section(fp, self.default_section, self._defaults.items(), d) + if UNNAMED_SECTION in self._sections and self._sections[UNNAMED_SECTION]: + self._write_section(fp, UNNAMED_SECTION, self._sections[UNNAMED_SECTION].items(), d, unnamed=True) + for section in self._sections: + if section is UNNAMED_SECTION: + continue self._write_section(fp, section, self._sections[section].items(), d) - def _write_section(self, fp, section_name, section_items, delimiter): - """Write a single section to the specified `fp`.""" - fp.write("[{}]\n".format(section_name)) + def _write_section(self, fp, section_name, section_items, delimiter, unnamed=False): + """Write a single section to the specified 'fp'.""" + if not unnamed: + fp.write("[{}]\n".format(section_name)) for key, value in section_items: + self._validate_key_contents(key) value = self._interpolation.before_write(self, section_name, key, value) if value is not None or not self._allow_no_value: @@ -988,112 +1066,111 @@ def _read(self, fp, fpname): in an otherwise empty line or may be entered in lines holding values or section names. Please note that comments get stripped off when reading configuration files. """ - elements_added = set() - cursect = None # None, or a dictionary - sectname = None - optname = None - lineno = 0 - indent_level = 0 - e = None # None, or an exception try: - for lineno, line in enumerate(fp, start=1): - comment_start = sys.maxsize - # strip inline comments - inline_prefixes = {p: -1 for p in self._inline_comment_prefixes} - while comment_start == sys.maxsize and inline_prefixes: - next_prefixes = {} - for prefix, index in inline_prefixes.items(): - index = line.find(prefix, index+1) - if index == -1: - continue - next_prefixes[prefix] = index - if index == 0 or (index > 0 and line[index-1].isspace()): - comment_start = min(comment_start, index) - inline_prefixes = next_prefixes - # strip full line comments - for prefix in self._comment_prefixes: - if line.strip().startswith(prefix): - comment_start = 0 - break - if comment_start == sys.maxsize: - comment_start = None - value = line[:comment_start].strip() - if not value: - if self._empty_lines_in_values: - # add empty line to the value, but only if there was no - # comment on the line - if (comment_start is None and - cursect is not None and - optname and - cursect[optname] is not None): - cursect[optname].append('') # newlines added at join - else: - # empty line marks end of value - indent_level = sys.maxsize - continue - # continuation line? - first_nonspace = self.NONSPACECRE.search(line) - cur_indent_level = first_nonspace.start() if first_nonspace else 0 - if (cursect is not None and optname and - cur_indent_level > indent_level): - cursect[optname].append(value) - # a section header or option header? - else: - indent_level = cur_indent_level - # is it a section header? - mo = self.SECTCRE.match(value) - if mo: - sectname = mo.group('header') - if sectname in self._sections: - if self._strict and sectname in elements_added: - raise DuplicateSectionError(sectname, fpname, - lineno) - cursect = self._sections[sectname] - elements_added.add(sectname) - elif sectname == self.default_section: - cursect = self._defaults - else: - cursect = self._dict() - self._sections[sectname] = cursect - self._proxies[sectname] = SectionProxy(self, sectname) - elements_added.add(sectname) - # So sections can't start with a continuation line - optname = None - # no section header in the file? - elif cursect is None: - raise MissingSectionHeaderError(fpname, lineno, line) - # an option line? - else: - mo = self._optcre.match(value) - if mo: - optname, vi, optval = mo.group('option', 'vi', 'value') - if not optname: - e = self._handle_error(e, fpname, lineno, line) - optname = self.optionxform(optname.rstrip()) - if (self._strict and - (sectname, optname) in elements_added): - raise DuplicateOptionError(sectname, optname, - fpname, lineno) - elements_added.add((sectname, optname)) - # This check is fine because the OPTCRE cannot - # match if it would set optval to None - if optval is not None: - optval = optval.strip() - cursect[optname] = [optval] - else: - # valueless option handling - cursect[optname] = None - else: - # a non-fatal parsing error occurred. set up the - # exception but keep going. the exception will be - # raised at the end of the file and will contain a - # list of all bogus lines - e = self._handle_error(e, fpname, lineno, line) + ParsingError._raise_all(self._read_inner(fp, fpname)) finally: self._join_multiline_values() - # if any parsing errors occurred, raise an exception - if e: - raise e + + def _read_inner(self, fp, fpname): + st = _ReadState() + + for st.lineno, line in enumerate(map(self._comments.wrap, fp), start=1): + if not line.clean: + if self._empty_lines_in_values: + # add empty line to the value, but only if there was no + # comment on the line + if (not line.has_comments and + st.cursect is not None and + st.optname and + st.cursect[st.optname] is not None): + st.cursect[st.optname].append('') # newlines added at join + else: + # empty line marks end of value + st.indent_level = sys.maxsize + continue + + first_nonspace = self.NONSPACECRE.search(line) + st.cur_indent_level = first_nonspace.start() if first_nonspace else 0 + + if self._handle_continuation_line(st, line, fpname): + continue + + self._handle_rest(st, line, fpname) + + return st.errors + + def _handle_continuation_line(self, st, line, fpname): + # continuation line? + is_continue = (st.cursect is not None and st.optname and + st.cur_indent_level > st.indent_level) + if is_continue: + if st.cursect[st.optname] is None: + raise MultilineContinuationError(fpname, st.lineno, line) + st.cursect[st.optname].append(line.clean) + return is_continue + + def _handle_rest(self, st, line, fpname): + # a section header or option header? + if self._allow_unnamed_section and st.cursect is None: + self._handle_header(st, UNNAMED_SECTION, fpname) + + st.indent_level = st.cur_indent_level + # is it a section header? + mo = self.SECTCRE.match(line.clean) + + if not mo and st.cursect is None: + raise MissingSectionHeaderError(fpname, st.lineno, line) + + self._handle_header(st, mo.group('header'), fpname) if mo else self._handle_option(st, line, fpname) + + def _handle_header(self, st, sectname, fpname): + st.sectname = sectname + if st.sectname in self._sections: + if self._strict and st.sectname in st.elements_added: + raise DuplicateSectionError(st.sectname, fpname, + st.lineno) + st.cursect = self._sections[st.sectname] + st.elements_added.add(st.sectname) + elif st.sectname == self.default_section: + st.cursect = self._defaults + else: + st.cursect = self._dict() + self._sections[st.sectname] = st.cursect + self._proxies[st.sectname] = SectionProxy(self, st.sectname) + st.elements_added.add(st.sectname) + # So sections can't start with a continuation line + st.optname = None + + def _handle_option(self, st, line, fpname): + # an option line? + st.indent_level = st.cur_indent_level + + mo = self._optcre.match(line.clean) + if not mo: + # a non-fatal parsing error occurred. set up the + # exception but keep going. the exception will be + # raised at the end of the file and will contain a + # list of all bogus lines + st.errors.append(ParsingError(fpname, st.lineno, line)) + return + + st.optname, vi, optval = mo.group('option', 'vi', 'value') + if not st.optname: + st.errors.append(ParsingError(fpname, st.lineno, line)) + st.optname = self.optionxform(st.optname.rstrip()) + if (self._strict and + (st.sectname, st.optname) in st.elements_added): + raise DuplicateOptionError(st.sectname, st.optname, + fpname, st.lineno) + st.elements_added.add((st.sectname, st.optname)) + # This check is fine because the OPTCRE cannot + # match if it would set optval to None + if optval is not None: + optval = optval.strip() + st.cursect[st.optname] = [optval] + else: + # valueless option handling + st.cursect[st.optname] = None def _join_multiline_values(self): defaults = self.default_section, self._defaults @@ -1113,12 +1190,6 @@ def _read_defaults(self, defaults): for key, value in defaults.items(): self._defaults[self.optionxform(key)] = value - def _handle_error(self, exc, fpname, lineno, line): - if not exc: - exc = ParsingError(fpname) - exc.append(lineno, repr(line)) - return exc - def _unify_values(self, section, vars): """Create a sequence of lookups with 'vars' taking priority over the 'section' which takes priority over the DEFAULTSECT. @@ -1146,21 +1217,32 @@ def _convert_to_boolean(self, value): raise ValueError('Not a boolean: %s' % value) return self.BOOLEAN_STATES[value.lower()] + def _validate_key_contents(self, key): + """Raises an InvalidWriteError for any keys containing + delimiters or that begins with the section header pattern""" + if re.match(self.SECTCRE, key): + raise InvalidWriteError( + f"Cannot write key {key}; begins with section pattern") + for delim in self._delimiters: + if delim in key: + raise InvalidWriteError( + f"Cannot write key {key}; contains delimiter {delim}") + def _validate_value_types(self, *, section="", option="", value=""): - """Raises a TypeError for non-string values. + """Raises a TypeError for illegal non-string values. - The only legal non-string value if we allow valueless - options is None, so we need to check if the value is a - string if: - - we do not allow valueless options, or - - we allow valueless options but the value is not None + Legal non-string values are UNNAMED_SECTION and falsey values if + they are allowed. For compatibility reasons this method is not used in classic set() for RawConfigParsers. It is invoked in every case for mapping protocol access and in ConfigParser.set(). """ - if not isinstance(section, str): - raise TypeError("section names must be strings") + if section is UNNAMED_SECTION: + if not self._allow_unnamed_section: + raise UnnamedSectionDisabledError + elif not isinstance(section, str): + raise TypeError("section names must be strings or UNNAMED_SECTION") if not isinstance(option, str): raise TypeError("option keys must be strings") if not self._allow_no_value or value: diff --git a/PythonLib/full/contextlib.py b/PythonLib/full/contextlib.py index b831d891..5b646fab 100644 --- a/PythonLib/full/contextlib.py +++ b/PythonLib/full/contextlib.py @@ -20,6 +20,8 @@ class AbstractContextManager(abc.ABC): __class_getitem__ = classmethod(GenericAlias) + __slots__ = () + def __enter__(self): """Return `self` upon entering the runtime context.""" return self @@ -42,6 +44,8 @@ class AbstractAsyncContextManager(abc.ABC): __class_getitem__ = classmethod(GenericAlias) + __slots__ = () + async def __aenter__(self): """Return `self` upon entering the runtime context.""" return self @@ -565,11 +569,12 @@ def __enter__(self): return self def __exit__(self, *exc_details): - received_exc = exc_details[0] is not None + exc = exc_details[1] + received_exc = exc is not None # We manipulate the exception state so it behaves as though # we were actually nesting multiple with statements - frame_exc = sys.exc_info()[1] + frame_exc = sys.exception() def _fix_exception_context(new_exc, old_exc): # Context may not be correct, so find the end of the chain while 1: @@ -592,24 +597,28 @@ def _fix_exception_context(new_exc, old_exc): is_sync, cb = self._exit_callbacks.pop() assert is_sync try: + if exc is None: + exc_details = None, None, None + else: + exc_details = type(exc), exc, exc.__traceback__ if cb(*exc_details): suppressed_exc = True pending_raise = False - exc_details = (None, None, None) - except: - new_exc_details = sys.exc_info() + exc = None + except BaseException as new_exc: # simulate the stack of exceptions by setting the context - _fix_exception_context(new_exc_details[1], exc_details[1]) + _fix_exception_context(new_exc, exc) pending_raise = True - exc_details = new_exc_details + exc = new_exc + if pending_raise: try: - # bare "raise exc_details[1]" replaces our carefully + # bare "raise exc" replaces our carefully # set-up context - fixed_ctx = exc_details[1].__context__ - raise exc_details[1] + fixed_ctx = exc.__context__ + raise exc except BaseException: - exc_details[1].__context__ = fixed_ctx + exc.__context__ = fixed_ctx raise return received_exc and suppressed_exc @@ -705,11 +714,12 @@ async def __aenter__(self): return self async def __aexit__(self, *exc_details): - received_exc = exc_details[0] is not None + exc = exc_details[1] + received_exc = exc is not None # We manipulate the exception state so it behaves as though # we were actually nesting multiple with statements - frame_exc = sys.exc_info()[1] + frame_exc = sys.exception() def _fix_exception_context(new_exc, old_exc): # Context may not be correct, so find the end of the chain while 1: @@ -731,6 +741,10 @@ def _fix_exception_context(new_exc, old_exc): while self._exit_callbacks: is_sync, cb = self._exit_callbacks.pop() try: + if exc is None: + exc_details = None, None, None + else: + exc_details = type(exc), exc, exc.__traceback__ if is_sync: cb_suppress = cb(*exc_details) else: @@ -739,21 +753,21 @@ def _fix_exception_context(new_exc, old_exc): if cb_suppress: suppressed_exc = True pending_raise = False - exc_details = (None, None, None) - except: - new_exc_details = sys.exc_info() + exc = None + except BaseException as new_exc: # simulate the stack of exceptions by setting the context - _fix_exception_context(new_exc_details[1], exc_details[1]) + _fix_exception_context(new_exc, exc) pending_raise = True - exc_details = new_exc_details + exc = new_exc + if pending_raise: try: - # bare "raise exc_details[1]" replaces our carefully + # bare "raise exc" replaces our carefully # set-up context - fixed_ctx = exc_details[1].__context__ - raise exc_details[1] + fixed_ctx = exc.__context__ + raise exc except BaseException: - exc_details[1].__context__ = fixed_ctx + exc.__context__ = fixed_ctx raise return received_exc and suppressed_exc diff --git a/PythonLib/full/contextvars.py b/PythonLib/full/contextvars.py index d78c80df..14514f18 100644 --- a/PythonLib/full/contextvars.py +++ b/PythonLib/full/contextvars.py @@ -1,4 +1,8 @@ +import _collections_abc from _contextvars import Context, ContextVar, Token, copy_context __all__ = ('Context', 'ContextVar', 'Token', 'copy_context') + + +_collections_abc.Mapping.register(Context) diff --git a/PythonLib/full/copy.py b/PythonLib/full/copy.py index da2908ef..c64fc076 100644 --- a/PythonLib/full/copy.py +++ b/PythonLib/full/copy.py @@ -4,8 +4,9 @@ import copy - x = copy.copy(y) # make a shallow copy of y - x = copy.deepcopy(y) # make a deep copy of y + x = copy.copy(y) # make a shallow copy of y + x = copy.deepcopy(y) # make a deep copy of y + x = copy.replace(y, a=1, b=2) # new object with fields replaced, as defined by `__replace__` For module specific errors, copy.Error is raised. @@ -56,7 +57,7 @@ class Error(Exception): pass error = Error # backward compatibility -__all__ = ["Error", "copy", "deepcopy"] +__all__ = ["Error", "copy", "deepcopy", "replace"] def copy(x): """Shallow copy operation on arbitrary Python objects. @@ -66,13 +67,15 @@ def copy(x): cls = type(x) - copier = _copy_dispatch.get(cls) - if copier: - return copier(x) + if cls in _copy_atomic_types: + return x + if cls in _copy_builtin_containers: + return cls.copy(x) + if issubclass(cls, type): # treat it as a regular class: - return _copy_immutable(x) + return x copier = getattr(cls, "__copy__", None) if copier is not None: @@ -97,23 +100,12 @@ def copy(x): return _reconstruct(x, None, *rv) -_copy_dispatch = d = {} - -def _copy_immutable(x): - return x -for t in (types.NoneType, int, float, bool, complex, str, tuple, +_copy_atomic_types = {types.NoneType, int, float, bool, complex, str, tuple, bytes, frozenset, type, range, slice, property, types.BuiltinFunctionType, types.EllipsisType, types.NotImplementedType, types.FunctionType, types.CodeType, - weakref.ref): - d[t] = _copy_immutable - -d[list] = list.copy -d[dict] = dict.copy -d[set] = set.copy -d[bytearray] = bytearray.copy - -del d, t + weakref.ref, super} +_copy_builtin_containers = {list, dict, set, bytearray} def deepcopy(x, memo=None, _nil=[]): """Deep copy operation on arbitrary Python objects. @@ -121,22 +113,25 @@ def deepcopy(x, memo=None, _nil=[]): See the module's __doc__ string for more info. """ - if memo is None: - memo = {} + cls = type(x) - d = id(x) - y = memo.get(d, _nil) - if y is not _nil: - return y + if cls in _atomic_types: + return x - cls = type(x) + d = id(x) + if memo is None: + memo = {} + else: + y = memo.get(d, _nil) + if y is not _nil: + return y copier = _deepcopy_dispatch.get(cls) if copier is not None: y = copier(x, memo) else: if issubclass(cls, type): - y = _deepcopy_atomic(x, memo) + y = x # atomic copy else: copier = getattr(x, "__deepcopy__", None) if copier is not None: @@ -167,26 +162,12 @@ def deepcopy(x, memo=None, _nil=[]): _keep_alive(x, memo) # Make sure x lives at least as long as d return y +_atomic_types = {types.NoneType, types.EllipsisType, types.NotImplementedType, + int, float, bool, complex, bytes, str, types.CodeType, type, range, + types.BuiltinFunctionType, types.FunctionType, weakref.ref, property} + _deepcopy_dispatch = d = {} -def _deepcopy_atomic(x, memo): - return x -d[types.NoneType] = _deepcopy_atomic -d[types.EllipsisType] = _deepcopy_atomic -d[types.NotImplementedType] = _deepcopy_atomic -d[int] = _deepcopy_atomic -d[float] = _deepcopy_atomic -d[bool] = _deepcopy_atomic -d[complex] = _deepcopy_atomic -d[bytes] = _deepcopy_atomic -d[str] = _deepcopy_atomic -d[types.CodeType] = _deepcopy_atomic -d[type] = _deepcopy_atomic -d[range] = _deepcopy_atomic -d[types.BuiltinFunctionType] = _deepcopy_atomic -d[types.FunctionType] = _deepcopy_atomic -d[weakref.ref] = _deepcopy_atomic -d[property] = _deepcopy_atomic def _deepcopy_list(x, memo, deepcopy=deepcopy): y = [] @@ -290,3 +271,16 @@ def _reconstruct(x, memo, func, args, return y del types, weakref + + +def replace(obj, /, **changes): + """Return a new object replacing specified fields with new values. + + This is especially useful for immutable objects, like named tuples or + frozen dataclasses. + """ + cls = obj.__class__ + func = getattr(cls, '__replace__', None) + if func is None: + raise TypeError(f"replace() does not support {cls.__name__} objects") + return func(obj, **changes) diff --git a/PythonLib/full/copyreg.py b/PythonLib/full/copyreg.py index 57839240..a5e8add4 100644 --- a/PythonLib/full/copyreg.py +++ b/PythonLib/full/copyreg.py @@ -31,11 +31,16 @@ def pickle_complex(c): pickle(complex, pickle_complex, complex) def pickle_union(obj): - import functools, operator - return functools.reduce, (operator.or_, obj.__args__) + import typing, operator + return operator.getitem, (typing.Union, obj.__args__) pickle(type(int | str), pickle_union) +def pickle_super(obj): + return super, (obj.__thisclass__, obj.__self__) + +pickle(super, pickle_super) + # Support for pickling new-style objects def _reconstructor(cls, base, state): diff --git a/PythonLib/full/crypt.py b/PythonLib/full/crypt.py deleted file mode 100644 index de4a14a3..00000000 --- a/PythonLib/full/crypt.py +++ /dev/null @@ -1,124 +0,0 @@ -"""Wrapper to the POSIX crypt library call and associated functionality.""" - -import sys as _sys - -try: - import _crypt -except ModuleNotFoundError: - if _sys.platform == 'win32': - raise ImportError("The crypt module is not supported on Windows") - else: - raise ImportError("The required _crypt module was not built as part of CPython") - -import errno -import string as _string -import warnings -from random import SystemRandom as _SystemRandom -from collections import namedtuple as _namedtuple - - -warnings._deprecated(__name__, remove=(3, 13)) - - -_saltchars = _string.ascii_letters + _string.digits + './' -_sr = _SystemRandom() - - -class _Method(_namedtuple('_Method', 'name ident salt_chars total_size')): - - """Class representing a salt method per the Modular Crypt Format or the - legacy 2-character crypt method.""" - - def __repr__(self): - return ''.format(self.name) - - -def mksalt(method=None, *, rounds=None): - """Generate a salt for the specified method. - - If not specified, the strongest available method will be used. - - """ - if method is None: - method = methods[0] - if rounds is not None and not isinstance(rounds, int): - raise TypeError(f'{rounds.__class__.__name__} object cannot be ' - f'interpreted as an integer') - if not method.ident: # traditional - s = '' - else: # modular - s = f'${method.ident}$' - - if method.ident and method.ident[0] == '2': # Blowfish variants - if rounds is None: - log_rounds = 12 - else: - log_rounds = int.bit_length(rounds-1) - if rounds != 1 << log_rounds: - raise ValueError('rounds must be a power of 2') - if not 4 <= log_rounds <= 31: - raise ValueError('rounds out of the range 2**4 to 2**31') - s += f'{log_rounds:02d}$' - elif method.ident in ('5', '6'): # SHA-2 - if rounds is not None: - if not 1000 <= rounds <= 999_999_999: - raise ValueError('rounds out of the range 1000 to 999_999_999') - s += f'rounds={rounds}$' - elif rounds is not None: - raise ValueError(f"{method} doesn't support the rounds argument") - - s += ''.join(_sr.choice(_saltchars) for char in range(method.salt_chars)) - return s - - -def crypt(word, salt=None): - """Return a string representing the one-way hash of a password, with a salt - prepended. - - If ``salt`` is not specified or is ``None``, the strongest - available method will be selected and a salt generated. Otherwise, - ``salt`` may be one of the ``crypt.METHOD_*`` values, or a string as - returned by ``crypt.mksalt()``. - - """ - if salt is None or isinstance(salt, _Method): - salt = mksalt(salt) - return _crypt.crypt(word, salt) - - -# available salting/crypto methods -methods = [] - -def _add_method(name, *args, rounds=None): - method = _Method(name, *args) - globals()['METHOD_' + name] = method - salt = mksalt(method, rounds=rounds) - result = None - try: - result = crypt('', salt) - except OSError as e: - # Not all libc libraries support all encryption methods. - if e.errno in {errno.EINVAL, errno.EPERM, errno.ENOSYS}: - return False - raise - if result and len(result) == method.total_size: - methods.append(method) - return True - return False - -_add_method('SHA512', '6', 16, 106) -_add_method('SHA256', '5', 16, 63) - -# Choose the strongest supported version of Blowfish hashing. -# Early versions have flaws. Version 'a' fixes flaws of -# the initial implementation, 'b' fixes flaws of 'a'. -# 'y' is the same as 'b', for compatibility -# with openwall crypt_blowfish. -for _v in 'b', 'y', 'a', '': - if _add_method('BLOWFISH', '2' + _v, 22, 59 + len(_v), rounds=1<<4): - break - -_add_method('MD5', '1', 8, 34) -_add_method('CRYPT', None, 2, 13) - -del _v, _add_method diff --git a/PythonLib/full/csv.py b/PythonLib/full/csv.py index 77f30c8d..0a627ba7 100644 --- a/PythonLib/full/csv.py +++ b/PythonLib/full/csv.py @@ -1,28 +1,89 @@ -""" -csv.py - read/write/investigate CSV files +r""" +CSV parsing and writing. + +This module provides classes that assist in the reading and writing +of Comma Separated Value (CSV) files, and implements the interface +described by PEP 305. Although many CSV files are simple to parse, +the format is not formally defined by a stable specification and +is subtle enough that parsing lines of a CSV file with something +like line.split(",") is bound to fail. The module supports three +basic APIs: reading, writing, and registration of dialects. + + +DIALECT REGISTRATION: + +Readers and writers support a dialect argument, which is a convenient +handle on a group of settings. When the dialect argument is a string, +it identifies one of the dialects previously registered with the module. +If it is a class or instance, the attributes of the argument are used as +the settings for the reader or writer: + + class excel: + delimiter = ',' + quotechar = '"' + escapechar = None + doublequote = True + skipinitialspace = False + lineterminator = '\r\n' + quoting = QUOTE_MINIMAL + +SETTINGS: + + * quotechar - specifies a one-character string to use as the + quoting character. It defaults to '"'. + * delimiter - specifies a one-character string to use as the + field separator. It defaults to ','. + * skipinitialspace - specifies how to interpret spaces which + immediately follow a delimiter. It defaults to False, which + means that spaces immediately following a delimiter is part + of the following field. + * lineterminator - specifies the character sequence which should + terminate rows. + * quoting - controls when quotes should be generated by the writer. + It can take on any of the following module constants: + + csv.QUOTE_MINIMAL means only when required, for example, when a + field contains either the quotechar or the delimiter + csv.QUOTE_ALL means that quotes are always placed around fields. + csv.QUOTE_NONNUMERIC means that quotes are always placed around + fields which do not parse as integers or floating-point + numbers. + csv.QUOTE_STRINGS means that quotes are always placed around + fields which are strings. Note that the Python value None + is not a string. + csv.QUOTE_NOTNULL means that quotes are only placed around fields + that are not the Python value None. + csv.QUOTE_NONE means that quotes are never placed around fields. + * escapechar - specifies a one-character string used to escape + the delimiter when quoting is set to QUOTE_NONE. + * doublequote - controls the handling of quotes inside fields. When + True, two consecutive quotes are interpreted as one during read, + and when writing, each quote character embedded in the data is + written as two quotes """ -import re import types -from _csv import Error, __version__, writer, reader, register_dialect, \ +from _csv import Error, writer, reader, register_dialect, \ unregister_dialect, get_dialect, list_dialects, \ field_size_limit, \ QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE, \ - QUOTE_STRINGS, QUOTE_NOTNULL, \ - __doc__ + QUOTE_STRINGS, QUOTE_NOTNULL from _csv import Dialect as _Dialect from io import StringIO __all__ = ["QUOTE_MINIMAL", "QUOTE_ALL", "QUOTE_NONNUMERIC", "QUOTE_NONE", "QUOTE_STRINGS", "QUOTE_NOTNULL", - "Error", "Dialect", "__doc__", "excel", "excel_tab", + "Error", "Dialect", "excel", "excel_tab", "field_size_limit", "reader", "writer", "register_dialect", "get_dialect", "list_dialects", "Sniffer", - "unregister_dialect", "__version__", "DictReader", "DictWriter", + "unregister_dialect", "DictReader", "DictWriter", "unix_dialect"] +__version__ = "1.0" + + class Dialect: """Describe a CSV dialect. @@ -51,8 +112,8 @@ def _validate(self): try: _Dialect(self) except TypeError as e: - # We do this for compatibility with py2.3 - raise Error(str(e)) + # Re-raise to get a traceback showing more user code. + raise Error(str(e)) from None class excel(Dialect): """Describe the usual properties of Excel-generated CSV files.""" @@ -219,6 +280,7 @@ def _guess_quote_and_delimiter(self, data, delimiters): If there is no quotechar the delimiter can't be determined this way. """ + import re matches = [] for restr in (r'(?P[^\w\n"\'])(?P ?)(?P["\']).*?(?P=quote)(?P=delim)', # ,".*?", diff --git a/PythonLib/full/ctypes/__init__.py b/PythonLib/full/ctypes/__init__.py index 6cedee74..04ec0270 100644 --- a/PythonLib/full/ctypes/__init__.py +++ b/PythonLib/full/ctypes/__init__.py @@ -1,6 +1,8 @@ """create and manipulate C data types in Python""" -import os as _os, sys as _sys +import os as _os +import sys as _sys +import sysconfig as _sysconfig import types as _types __version__ = "1.1.0" @@ -12,6 +14,7 @@ from _ctypes import RTLD_LOCAL, RTLD_GLOBAL from _ctypes import ArgumentError from _ctypes import SIZEOF_TIME_T +from _ctypes import CField from struct import calcsize as _calcsize @@ -19,7 +22,7 @@ raise Exception("Version number mismatch", __version__, _ctypes_version) if _os.name == "nt": - from _ctypes import FormatError + from _ctypes import COMError, CopyComPointer, FormatError DEFAULT_MODE = RTLD_LOCAL if _os.name == "posix" and _sys.platform == "darwin": @@ -107,7 +110,7 @@ class CFunctionType(_CFuncPtr): return CFunctionType if _os.name == "nt": - from _ctypes import LoadLibrary as _dlopen + from _ctypes import LoadLibrary as _LoadLibrary from _ctypes import FUNCFLAG_STDCALL as _FUNCFLAG_STDCALL _win_functype_cache = {} @@ -161,6 +164,7 @@ def __repr__(self): return super().__repr__() except ValueError: return "%s()" % type(self).__name__ + __class_getitem__ = classmethod(_types.GenericAlias) _check_size(py_object, "P") class c_short(_SimpleCData): @@ -205,6 +209,18 @@ class c_longdouble(_SimpleCData): if sizeof(c_longdouble) == sizeof(c_double): c_longdouble = c_double +try: + class c_double_complex(_SimpleCData): + _type_ = "D" + _check_size(c_double_complex) + class c_float_complex(_SimpleCData): + _type_ = "F" + _check_size(c_float_complex) + class c_longdouble_complex(_SimpleCData): + _type_ = "G" +except AttributeError: + pass + if _calcsize("l") == _calcsize("q"): # if long and long long have the same size, make c_longlong an alias for c_long c_longlong = c_long @@ -252,7 +268,72 @@ class c_void_p(_SimpleCData): class c_bool(_SimpleCData): _type_ = "?" -from _ctypes import POINTER, pointer, _pointer_type_cache +def POINTER(cls): + """Create and return a new ctypes pointer type. + + Pointer types are cached and reused internally, + so calling this function repeatedly is cheap. + """ + if cls is None: + return c_void_p + try: + return cls.__pointer_type__ + except AttributeError: + pass + if isinstance(cls, str): + # handle old-style incomplete types (see test_ctypes.test_incomplete) + import warnings + warnings._deprecated("ctypes.POINTER with string", remove=(3, 19)) + try: + return _pointer_type_cache_fallback[cls] + except KeyError: + result = type(f'LP_{cls}', (_Pointer,), {}) + _pointer_type_cache_fallback[cls] = result + return result + + # create pointer type and set __pointer_type__ for cls + return type(f'LP_{cls.__name__}', (_Pointer,), {'_type_': cls}) + +def pointer(obj): + """Create a new pointer instance, pointing to 'obj'. + + The returned object is of the type POINTER(type(obj)). Note that if you + just want to pass a pointer to an object to a foreign function call, you + should use byref(obj) which is much faster. + """ + typ = POINTER(type(obj)) + return typ(obj) + +class _PointerTypeCache: + def __setitem__(self, cls, pointer_type): + import warnings + warnings._deprecated("ctypes._pointer_type_cache", remove=(3, 19)) + try: + cls.__pointer_type__ = pointer_type + except AttributeError: + _pointer_type_cache_fallback[cls] = pointer_type + + def __getitem__(self, cls): + import warnings + warnings._deprecated("ctypes._pointer_type_cache", remove=(3, 19)) + try: + return cls.__pointer_type__ + except AttributeError: + return _pointer_type_cache_fallback[cls] + + def get(self, cls, default=None): + import warnings + warnings._deprecated("ctypes._pointer_type_cache", remove=(3, 19)) + try: + return cls.__pointer_type__ + except AttributeError: + return _pointer_type_cache_fallback.get(cls, default) + + def __contains__(self, cls): + return hasattr(cls, '__pointer_type__') + +_pointer_type_cache_fallback = {} +_pointer_type_cache = _PointerTypeCache() class c_wchar_p(_SimpleCData): _type_ = "Z" @@ -263,7 +344,7 @@ class c_wchar(_SimpleCData): _type_ = "u" def _reset_cache(): - _pointer_type_cache.clear() + _pointer_type_cache_fallback.clear() _c_functype_cache.clear() if _os.name == "nt": _win_functype_cache.clear() @@ -271,7 +352,6 @@ def _reset_cache(): POINTER(c_wchar).from_param = c_wchar_p.from_param # _SimpleCData.c_char_p_from_param POINTER(c_char).from_param = c_char_p.from_param - _pointer_type_cache[None] = c_void_p def create_unicode_buffer(init, size=None): """create_unicode_buffer(aString) -> character array @@ -302,17 +382,11 @@ def create_unicode_buffer(init, size=None): raise TypeError(init) -# XXX Deprecated def SetPointerType(pointer, cls): - if _pointer_type_cache.get(cls, None) is not None: - raise RuntimeError("This type already exists in the cache") - if id(pointer) not in _pointer_type_cache: - raise RuntimeError("What's this???") + import warnings + warnings._deprecated("ctypes.SetPointerType", remove=(3, 15)) pointer.set_type(cls) - _pointer_type_cache[cls] = pointer - del _pointer_type_cache[id(pointer)] -# XXX Deprecated def ARRAY(typ, len): return typ * len @@ -344,41 +418,59 @@ def __init__(self, name, mode=DEFAULT_MODE, handle=None, use_errno=False, use_last_error=False, winmode=None): - if name: - name = _os.fspath(name) - self._name = name - flags = self._func_flags_ - if use_errno: - flags |= _FUNCFLAG_USE_ERRNO - if use_last_error: - flags |= _FUNCFLAG_USE_LASTERROR - if _sys.platform.startswith("aix"): - """When the name contains ".a(" and ends with ")", - e.g., "libFOO.a(libFOO.so)" - this is taken to be an - archive(member) syntax for dlopen(), and the mode is adjusted. - Otherwise, name is presented to dlopen() as a file argument. - """ - if name and name.endswith(")") and ".a(" in name: - mode |= ( _os.RTLD_MEMBER | _os.RTLD_NOW ) - if _os.name == "nt": - if winmode is not None: - mode = winmode - else: - import nt - mode = nt._LOAD_LIBRARY_SEARCH_DEFAULT_DIRS - if '/' in name or '\\' in name: - self._name = nt._getfullpathname(self._name) - mode |= nt._LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR - class _FuncPtr(_CFuncPtr): - _flags_ = flags + _flags_ = self._func_flags_ _restype_ = self._func_restype_ + if use_errno: + _flags_ |= _FUNCFLAG_USE_ERRNO + if use_last_error: + _flags_ |= _FUNCFLAG_USE_LASTERROR + self._FuncPtr = _FuncPtr + if name: + name = _os.fspath(name) - if handle is None: - self._handle = _dlopen(self._name, mode) - else: - self._handle = handle + self._handle = self._load_library(name, mode, handle, winmode) + + if _os.name == "nt": + def _load_library(self, name, mode, handle, winmode): + if winmode is None: + import nt as _nt + winmode = _nt._LOAD_LIBRARY_SEARCH_DEFAULT_DIRS + # WINAPI LoadLibrary searches for a DLL if the given name + # is not fully qualified with an explicit drive. For POSIX + # compatibility, and because the DLL search path no longer + # contains the working directory, begin by fully resolving + # any name that contains a path separator. + if name is not None and ('/' in name or '\\' in name): + name = _nt._getfullpathname(name) + winmode |= _nt._LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR + self._name = name + if handle is not None: + return handle + return _LoadLibrary(self._name, winmode) + + else: + def _load_library(self, name, mode, handle, winmode): + # If the filename that has been provided is an iOS/tvOS/watchOS + # .fwork file, dereference the location to the true origin of the + # binary. + if name and name.endswith(".fwork"): + with open(name) as f: + name = _os.path.join( + _os.path.dirname(_sys.executable), + f.read().strip() + ) + if _sys.platform.startswith("aix"): + """When the name contains ".a(" and ends with ")", + e.g., "libFOO.a(libFOO.so)" - this is taken to be an + archive(member) syntax for dlopen(), and the mode is adjusted. + Otherwise, name is presented to dlopen() as a file argument. + """ + if name and name.endswith(")") and ".a(" in name: + mode |= _os.RTLD_MEMBER | _os.RTLD_NOW + self._name = name + return _dlopen(name, mode) def __repr__(self): return "<%s '%s', handle %x at %#x>" % \ @@ -466,8 +558,9 @@ def LoadLibrary(self, name): if _os.name == "nt": pythonapi = PyDLL("python dll", None, _sys.dllhandle) -elif _sys.platform == "cygwin": - pythonapi = PyDLL("libpython%d.%d.dll" % _sys.version_info[:2]) +elif _sys.platform in ["android", "cygwin"]: + # These are Unix-like platforms which use a dynamically-linked libpython. + pythonapi = PyDLL(_sysconfig.get_config_var("LDLIBRARY")) else: pythonapi = PyDLL(None) @@ -499,6 +592,7 @@ def WinError(code=None, descr=None): # functions from _ctypes import _memmove_addr, _memset_addr, _string_at_addr, _cast_addr +from _ctypes import _memoryview_at_addr ## void *memmove(void *, const void *, size_t); memmove = CFUNCTYPE(c_void_p, c_void_p, c_void_p, c_size_t)(_memmove_addr) @@ -524,6 +618,14 @@ def string_at(ptr, size=-1): Return the byte string at void *ptr.""" return _string_at(ptr, size) +_memoryview_at = PYFUNCTYPE( + py_object, c_void_p, c_ssize_t, c_int)(_memoryview_at_addr) +def memoryview_at(ptr, size, readonly=False): + """memoryview_at(ptr, size[, readonly]) -> memoryview + + Return a memoryview representing the memory at void *ptr.""" + return _memoryview_at(ptr, size, bool(readonly)) + try: from _ctypes import _wstring_at_addr except ImportError: diff --git a/PythonLib/full/ctypes/_endian.py b/PythonLib/full/ctypes/_endian.py index 04389008..6382dd22 100644 --- a/PythonLib/full/ctypes/_endian.py +++ b/PythonLib/full/ctypes/_endian.py @@ -1,5 +1,5 @@ import sys -from ctypes import * +from ctypes import Array, Structure, Union _array_type = type(Array) diff --git a/PythonLib/full/ctypes/_layout.py b/PythonLib/full/ctypes/_layout.py new file mode 100644 index 00000000..2048ccb6 --- /dev/null +++ b/PythonLib/full/ctypes/_layout.py @@ -0,0 +1,330 @@ +"""Python implementation of computing the layout of a struct/union + +This code is internal and tightly coupled to the C part. The interface +may change at any time. +""" + +import sys +import warnings + +from _ctypes import CField, buffer_info +import ctypes + +def round_down(n, multiple): + assert n >= 0 + assert multiple > 0 + return (n // multiple) * multiple + +def round_up(n, multiple): + assert n >= 0 + assert multiple > 0 + return ((n + multiple - 1) // multiple) * multiple + +_INT_MAX = (1 << (ctypes.sizeof(ctypes.c_int) * 8) - 1) - 1 + + +class StructUnionLayout: + def __init__(self, fields, size, align, format_spec): + # sequence of CField objects + self.fields = fields + + # total size of the aggregate (rounded up to alignment) + self.size = size + + # total alignment requirement of the aggregate + self.align = align + + # buffer format specification (as a string, UTF-8 but bes + # kept ASCII-only) + self.format_spec = format_spec + + +def get_layout(cls, input_fields, is_struct, base): + """Return a StructUnionLayout for the given class. + + Called by PyCStructUnionType_update_stginfo when _fields_ is assigned + to a class. + """ + # Currently there are two modes, selectable using the '_layout_' attribute: + # + # 'gcc-sysv' mode places fields one after another, bit by bit. + # But "each bit field must fit within a single object of its specified + # type" (GCC manual, section 15.8 "Bit Field Packing"). When it doesn't, + # we insert a few bits of padding to avoid that. + # + # 'ms' mode works similar except for bitfield packing. Adjacent + # bit-fields are packed into the same 1-, 2-, or 4-byte allocation unit + # if the integral types are the same size and if the next bit-field fits + # into the current allocation unit without crossing the boundary imposed + # by the common alignment requirements of the bit-fields. + # + # See https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html#index-mms-bitfields + # for details. + + # We do not support zero length bitfields (we use bitsize != 0 + # elsewhere to indicate a bitfield). Here, non-bitfields have bit_size + # set to size*8. + + # For clarity, variables that count bits have `bit` in their names. + + pack = getattr(cls, '_pack_', None) + + layout = getattr(cls, '_layout_', None) + if layout is None: + if sys.platform == 'win32': + gcc_layout = False + elif pack: + if is_struct: + base_type_name = 'Structure' + else: + base_type_name = 'Union' + warnings._deprecated( + '_pack_ without _layout_', + f"Due to '_pack_', the '{cls.__name__}' {base_type_name} will " + + "use memory layout compatible with MSVC (Windows). " + + "If this is intended, set _layout_ to 'ms'. " + + "The implicit default is deprecated and slated to become " + + "an error in Python {remove}.", + remove=(3, 19), + ) + gcc_layout = False + else: + gcc_layout = True + elif layout == 'ms': + gcc_layout = False + elif layout == 'gcc-sysv': + gcc_layout = True + else: + raise ValueError(f'unknown _layout_: {layout!r}') + + align = getattr(cls, '_align_', 1) + if align < 0: + raise ValueError('_align_ must be a non-negative integer') + elif align == 0: + # Setting `_align_ = 0` amounts to using the default alignment + align = 1 + + if base: + align = max(ctypes.alignment(base), align) + + swapped_bytes = hasattr(cls, '_swappedbytes_') + if swapped_bytes: + big_endian = sys.byteorder == 'little' + else: + big_endian = sys.byteorder == 'big' + + if pack is not None: + try: + pack = int(pack) + except (TypeError, ValueError): + raise ValueError("_pack_ must be an integer") + if pack < 0: + raise ValueError("_pack_ must be a non-negative integer") + if pack > _INT_MAX: + raise ValueError("_pack_ too big") + if gcc_layout: + raise ValueError('_pack_ is not compatible with gcc-sysv layout') + + result_fields = [] + + if is_struct: + format_spec_parts = ["T{"] + else: + format_spec_parts = ["B"] + + last_field_bit_size = 0 # used in MS layout only + + # `8 * next_byte_offset + next_bit_offset` points to where the + # next field would start. + next_bit_offset = 0 + next_byte_offset = 0 + + # size if this was a struct (sum of field sizes, plus padding) + struct_size = 0 + # max of field sizes; only meaningful for unions + union_size = 0 + + if base: + struct_size = ctypes.sizeof(base) + if gcc_layout: + next_bit_offset = struct_size * 8 + else: + next_byte_offset = struct_size + + last_size = struct_size + for i, field in enumerate(input_fields): + if not is_struct: + # Unions start fresh each time + last_field_bit_size = 0 + next_bit_offset = 0 + next_byte_offset = 0 + + # Unpack the field + field = tuple(field) + try: + name, ctype = field + except (ValueError, TypeError): + try: + name, ctype, bit_size = field + except (ValueError, TypeError) as exc: + raise ValueError( + '_fields_ must be a sequence of (name, C type) pairs ' + + 'or (name, C type, bit size) triples') from exc + is_bitfield = True + if bit_size <= 0: + raise ValueError( + f'number of bits invalid for bit field {name!r}') + type_size = ctypes.sizeof(ctype) + if bit_size > type_size * 8: + raise ValueError( + f'number of bits invalid for bit field {name!r}') + else: + is_bitfield = False + type_size = ctypes.sizeof(ctype) + bit_size = type_size * 8 + + type_bit_size = type_size * 8 + type_align = ctypes.alignment(ctype) or 1 + type_bit_align = type_align * 8 + + if gcc_layout: + # We don't use next_byte_offset here + assert pack is None + assert next_byte_offset == 0 + + # Determine whether the bit field, if placed at the next + # free bit, fits within a single object of its specified type. + # That is: determine a "slot", sized & aligned for the + # specified type, which contains the bitfield's beginning: + slot_start_bit = round_down(next_bit_offset, type_bit_align) + slot_end_bit = slot_start_bit + type_bit_size + # And see if it also contains the bitfield's last bit: + field_end_bit = next_bit_offset + bit_size + if field_end_bit > slot_end_bit: + # It doesn't: add padding (bump up to the next + # alignment boundary) + next_bit_offset = round_up(next_bit_offset, type_bit_align) + + offset = round_down(next_bit_offset, type_bit_align) // 8 + if is_bitfield: + bit_offset = next_bit_offset - 8 * offset + assert bit_offset <= type_bit_size + else: + assert offset == next_bit_offset / 8 + + next_bit_offset += bit_size + struct_size = round_up(next_bit_offset, 8) // 8 + else: + if pack: + type_align = min(pack, type_align) + + # next_byte_offset points to end of current bitfield. + # next_bit_offset is generally non-positive, + # and 8 * next_byte_offset + next_bit_offset points just behind + # the end of the last field we placed. + if ( + (0 < next_bit_offset + bit_size) + or (type_bit_size != last_field_bit_size) + ): + # Close the previous bitfield (if any) + # and start a new bitfield + next_byte_offset = round_up(next_byte_offset, type_align) + + next_byte_offset += type_size + + last_field_bit_size = type_bit_size + # Reminder: 8 * (next_byte_offset) + next_bit_offset + # points to where we would start a new field, namely + # just behind where we placed the last field plus an + # allowance for alignment. + next_bit_offset = -last_field_bit_size + + assert type_bit_size == last_field_bit_size + + offset = next_byte_offset - last_field_bit_size // 8 + if is_bitfield: + assert 0 <= (last_field_bit_size + next_bit_offset) + bit_offset = last_field_bit_size + next_bit_offset + if type_bit_size: + assert (last_field_bit_size + next_bit_offset) < type_bit_size + + next_bit_offset += bit_size + struct_size = next_byte_offset + + if is_bitfield and big_endian: + # On big-endian architectures, bit fields are also laid out + # starting with the big end. + bit_offset = type_bit_size - bit_size - bit_offset + + # Add the format spec parts + if is_struct: + padding = offset - last_size + format_spec_parts.append(padding_spec(padding)) + + fieldfmt, bf_ndim, bf_shape = buffer_info(ctype) + + if bf_shape: + format_spec_parts.extend(( + "(", + ','.join(str(n) for n in bf_shape), + ")", + )) + + if fieldfmt is None: + fieldfmt = "B" + if isinstance(name, bytes): + # a bytes name would be rejected later, but we check early + # to avoid a BytesWarning with `python -bb` + raise TypeError( + f"field {name!r}: name must be a string, not bytes") + format_spec_parts.append(f"{fieldfmt}:{name}:") + + result_fields.append(CField( + name=name, + type=ctype, + byte_size=type_size, + byte_offset=offset, + bit_size=bit_size if is_bitfield else None, + bit_offset=bit_offset if is_bitfield else None, + index=i, + + # Do not use CField outside ctypes, yet. + # The constructor is internal API and may change without warning. + _internal_use=True, + )) + if is_bitfield and not gcc_layout: + assert type_bit_size > 0 + + align = max(align, type_align) + last_size = struct_size + if not is_struct: + union_size = max(struct_size, union_size) + + if is_struct: + total_size = struct_size + else: + total_size = union_size + + # Adjust the size according to the alignment requirements + aligned_size = round_up(total_size, align) + + # Finish up the format spec + if is_struct: + padding = aligned_size - total_size + format_spec_parts.append(padding_spec(padding)) + format_spec_parts.append("}") + + return StructUnionLayout( + fields=result_fields, + size=aligned_size, + align=align, + format_spec="".join(format_spec_parts), + ) + + +def padding_spec(padding): + if padding <= 0: + return "" + if padding == 1: + return "x" + return f"{padding}x" diff --git a/PythonLib/full/ctypes/util.py b/PythonLib/full/ctypes/util.py index c550883e..378f1216 100644 --- a/PythonLib/full/ctypes/util.py +++ b/PythonLib/full/ctypes/util.py @@ -67,7 +67,66 @@ def find_library(name): return fname return None -elif os.name == "posix" and sys.platform == "darwin": + # Listing loaded DLLs on Windows relies on the following APIs: + # https://learn.microsoft.com/windows/win32/api/psapi/nf-psapi-enumprocessmodules + # https://learn.microsoft.com/windows/win32/api/libloaderapi/nf-libloaderapi-getmodulefilenamew + import ctypes + from ctypes import wintypes + + _kernel32 = ctypes.WinDLL('kernel32', use_last_error=True) + _get_current_process = _kernel32["GetCurrentProcess"] + _get_current_process.restype = wintypes.HANDLE + + _k32_get_module_file_name = _kernel32["GetModuleFileNameW"] + _k32_get_module_file_name.restype = wintypes.DWORD + _k32_get_module_file_name.argtypes = ( + wintypes.HMODULE, + wintypes.LPWSTR, + wintypes.DWORD, + ) + + _psapi = ctypes.WinDLL('psapi', use_last_error=True) + _enum_process_modules = _psapi["EnumProcessModules"] + _enum_process_modules.restype = wintypes.BOOL + _enum_process_modules.argtypes = ( + wintypes.HANDLE, + ctypes.POINTER(wintypes.HMODULE), + wintypes.DWORD, + wintypes.LPDWORD, + ) + + def _get_module_filename(module: wintypes.HMODULE): + name = (wintypes.WCHAR * 32767)() # UNICODE_STRING_MAX_CHARS + if _k32_get_module_file_name(module, name, len(name)): + return name.value + return None + + + def _get_module_handles(): + process = _get_current_process() + space_needed = wintypes.DWORD() + n = 1024 + while True: + modules = (wintypes.HMODULE * n)() + if not _enum_process_modules(process, + modules, + ctypes.sizeof(modules), + ctypes.byref(space_needed)): + err = ctypes.get_last_error() + msg = ctypes.FormatError(err).strip() + raise ctypes.WinError(err, f"EnumProcessModules failed: {msg}") + n = space_needed.value // ctypes.sizeof(wintypes.HMODULE) + if n <= len(modules): + return modules[:n] + + def dllist(): + """Return a list of loaded shared libraries in the current process.""" + modules = _get_module_handles() + libraries = [name for h in modules + if (name := _get_module_filename(h)) is not None] + return libraries + +elif os.name == "posix" and sys.platform in {"darwin", "ios", "tvos", "watchos"}: from ctypes.macholib.dyld import dyld_find as _dyld_find def find_library(name): possible = ['lib%s.dylib' % name, @@ -80,6 +139,22 @@ def find_library(name): continue return None + # Listing loaded libraries on Apple systems relies on the following API: + # https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man3/dyld.3.html + import ctypes + + _libc = ctypes.CDLL(find_library("c")) + _dyld_get_image_name = _libc["_dyld_get_image_name"] + _dyld_get_image_name.restype = ctypes.c_char_p + + def dllist(): + """Return a list of loaded shared libraries in the current process.""" + num_images = _libc._dyld_image_count() + libraries = [os.fsdecode(name) for i in range(num_images) + if (name := _dyld_get_image_name(i)) is not None] + + return libraries + elif sys.platform.startswith("aix"): # AIX has two styles of storing shared libraries # GNU auto_tools refer to these as svr4 and aix @@ -89,6 +164,34 @@ def find_library(name): from ctypes._aix import find_library +elif sys.platform == "android": + def find_library(name): + directory = "/system/lib" + if "64" in os.uname().machine: + directory += "64" + + fname = f"{directory}/lib{name}.so" + return fname if os.path.isfile(fname) else None + +elif sys.platform == "emscripten": + def _is_wasm(filename): + # Return True if the given file is an WASM module + wasm_header = b"\x00asm" + with open(filename, 'br') as thefile: + return thefile.read(4) == wasm_header + + def find_library(name): + candidates = [f"lib{name}.so", f"lib{name}.wasm"] + paths = os.environ.get("LD_LIBRARY_PATH", "") + for libdir in paths.split(":"): + for name in candidates: + libfile = os.path.join(libdir, name) + + if os.path.isfile(libfile) and _is_wasm(libfile): + return libfile + + return None + elif os.name == "posix": # Andreas Degert's find functions, using gcc, /sbin/ldconfig, objdump import re, tempfile @@ -332,6 +435,55 @@ def find_library(name): return _findSoname_ldconfig(name) or \ _get_soname(_findLib_gcc(name)) or _get_soname(_findLib_ld(name)) + +# Listing loaded libraries on other systems will try to use +# functions common to Linux and a few other Unix-like systems. +# See the following for several platforms' documentation of the same API: +# https://man7.org/linux/man-pages/man3/dl_iterate_phdr.3.html +# https://man.freebsd.org/cgi/man.cgi?query=dl_iterate_phdr +# https://man.openbsd.org/dl_iterate_phdr +# https://docs.oracle.com/cd/E88353_01/html/E37843/dl-iterate-phdr-3c.html +if (os.name == "posix" and + sys.platform not in {"darwin", "ios", "tvos", "watchos"}): + import ctypes + if hasattr((_libc := ctypes.CDLL(None)), "dl_iterate_phdr"): + + class _dl_phdr_info(ctypes.Structure): + _fields_ = [ + ("dlpi_addr", ctypes.c_void_p), + ("dlpi_name", ctypes.c_char_p), + ("dlpi_phdr", ctypes.c_void_p), + ("dlpi_phnum", ctypes.c_ushort), + ] + + _dl_phdr_callback = ctypes.CFUNCTYPE( + ctypes.c_int, + ctypes.POINTER(_dl_phdr_info), + ctypes.c_size_t, + ctypes.POINTER(ctypes.py_object), + ) + + @_dl_phdr_callback + def _info_callback(info, _size, data): + libraries = data.contents.value + name = os.fsdecode(info.contents.dlpi_name) + libraries.append(name) + return 0 + + _dl_iterate_phdr = _libc["dl_iterate_phdr"] + _dl_iterate_phdr.argtypes = [ + _dl_phdr_callback, + ctypes.POINTER(ctypes.py_object), + ] + _dl_iterate_phdr.restype = ctypes.c_int + + def dllist(): + """Return a list of loaded shared libraries in the current process.""" + libraries = [] + _dl_iterate_phdr(_info_callback, + ctypes.byref(ctypes.py_object(libraries))) + return libraries + ################################################################ # test code @@ -375,5 +527,12 @@ def test(): print(cdll.LoadLibrary("libcrypt.so")) print(find_library("crypt")) + try: + dllist + except NameError: + print('dllist() not available') + else: + print(dllist()) + if __name__ == "__main__": test() diff --git a/PythonLib/full/ctypes/wintypes.py b/PythonLib/full/ctypes/wintypes.py index 9c4e7214..4beba0d1 100644 --- a/PythonLib/full/ctypes/wintypes.py +++ b/PythonLib/full/ctypes/wintypes.py @@ -63,10 +63,16 @@ def __repr__(self): HBITMAP = HANDLE HBRUSH = HANDLE HCOLORSPACE = HANDLE +HCONV = HANDLE +HCONVLIST = HANDLE +HCURSOR = HANDLE HDC = HANDLE +HDDEDATA = HANDLE HDESK = HANDLE +HDROP = HANDLE HDWP = HANDLE HENHMETAFILE = HANDLE +HFILE = INT HFONT = HANDLE HGDIOBJ = HANDLE HGLOBAL = HANDLE @@ -82,9 +88,11 @@ def __repr__(self): HMONITOR = HANDLE HPALETTE = HANDLE HPEN = HANDLE +HRESULT = LONG HRGN = HANDLE HRSRC = HANDLE HSTR = HANDLE +HSZ = HANDLE HTASK = HANDLE HWINSTA = HANDLE HWND = HANDLE diff --git a/PythonLib/full/curses/__init__.py b/PythonLib/full/curses/__init__.py index 69270bfc..6165fe6c 100644 --- a/PythonLib/full/curses/__init__.py +++ b/PythonLib/full/curses/__init__.py @@ -53,7 +53,7 @@ def start_color(): try: has_key except NameError: - from .has_key import has_key + from .has_key import has_key # noqa: F401 # Wrapper for the entire curses-based application. Runs a function which # should be the rest of your curses-based application. If the application diff --git a/PythonLib/full/dataclasses.py b/PythonLib/full/dataclasses.py index 09a86bd2..c8dbb247 100644 --- a/PythonLib/full/dataclasses.py +++ b/PythonLib/full/dataclasses.py @@ -4,11 +4,10 @@ import types import inspect import keyword -import functools import itertools +import annotationlib import abc -import _thread -from types import FunctionType, GenericAlias +from reprlib import recursive_repr __all__ = ['dataclass', @@ -245,25 +244,10 @@ def __repr__(self): property, }) -# This function's logic is copied from "recursive_repr" function in -# reprlib module to avoid dependency. -def _recursive_repr(user_function): - # Decorator to make a repr function return "..." for a recursive - # call. - repr_running = set() - - @functools.wraps(user_function) - def wrapper(self): - key = id(self), _thread.get_ident() - if key in repr_running: - return '...' - repr_running.add(key) - try: - result = user_function(self) - finally: - repr_running.discard(key) - return result - return wrapper +# Any marker is used in `make_dataclass` to mark unannotated fields as `Any` +# without importing `typing` module. +_ANY_MARKER = object() + class InitVar: __slots__ = ('type', ) @@ -303,11 +287,12 @@ class Field: 'compare', 'metadata', 'kw_only', + 'doc', '_field_type', # Private: not to be used by user code. ) def __init__(self, default, default_factory, init, repr, hash, compare, - metadata, kw_only): + metadata, kw_only, doc): self.name = None self.type = None self.default = default @@ -320,9 +305,10 @@ def __init__(self, default, default_factory, init, repr, hash, compare, if metadata is None else types.MappingProxyType(metadata)) self.kw_only = kw_only + self.doc = doc self._field_type = None - @_recursive_repr + @recursive_repr() def __repr__(self): return ('Field(' f'name={self.name!r},' @@ -335,6 +321,7 @@ def __repr__(self): f'compare={self.compare!r},' f'metadata={self.metadata!r},' f'kw_only={self.kw_only!r},' + f'doc={self.doc!r},' f'_field_type={self._field_type}' ')') @@ -353,7 +340,7 @@ def __set_name__(self, owner, name): # it. func(self.default, owner, name) - __class_getitem__ = classmethod(GenericAlias) + __class_getitem__ = classmethod(types.GenericAlias) class _DataclassParams: @@ -402,7 +389,7 @@ def __repr__(self): # so that a type checker can be told (via overloads) that this is a # function whose type depends on its parameters. def field(*, default=MISSING, default_factory=MISSING, init=True, repr=True, - hash=None, compare=True, metadata=None, kw_only=MISSING): + hash=None, compare=True, metadata=None, kw_only=MISSING, doc=None): """Return an object to identify dataclass fields. default is the default value of the field. default_factory is a @@ -414,7 +401,7 @@ def field(*, default=MISSING, default_factory=MISSING, init=True, repr=True, comparison functions. metadata, if specified, must be a mapping which is stored but not otherwise examined by dataclass. If kw_only is true, the field will become a keyword-only parameter to - __init__(). + __init__(). doc is an optional docstring for this field. It is an error to specify both default and default_factory. """ @@ -422,7 +409,7 @@ def field(*, default=MISSING, default_factory=MISSING, init=True, repr=True, if default is not MISSING and default_factory is not MISSING: raise ValueError('cannot specify both default and default_factory') return Field(default, default_factory, init, repr, hash, compare, - metadata, kw_only) + metadata, kw_only, doc) def _fields_in_init_order(fields): @@ -446,32 +433,147 @@ def _tuple_str(obj_name, fields): return f'({",".join([f"{obj_name}.{f.name}" for f in fields])},)' -def _create_fn(name, args, body, *, globals=None, locals=None, - return_type=MISSING): - # Note that we may mutate locals. Callers beware! - # The only callers are internal to this module, so no - # worries about external callers. - if locals is None: - locals = {} - return_annotation = '' - if return_type is not MISSING: - locals['__dataclass_return_type__'] = return_type - return_annotation = '->__dataclass_return_type__' - args = ','.join(args) - body = '\n'.join(f' {b}' for b in body) - - # Compute the text of the entire function. - txt = f' def {name}({args}){return_annotation}:\n{body}' - - # Free variables in exec are resolved in the global namespace. - # The global namespace we have is user-provided, so we can't modify it for - # our purposes. So we put the things we need into locals and introduce a - # scope to allow the function we're creating to close over them. - local_vars = ', '.join(locals.keys()) - txt = f"def __create_fn__({local_vars}):\n{txt}\n return {name}" - ns = {} - exec(txt, globals, ns) - return ns['__create_fn__'](**locals) +class _FuncBuilder: + def __init__(self, globals): + self.names = [] + self.src = [] + self.globals = globals + self.locals = {} + self.overwrite_errors = {} + self.unconditional_adds = {} + self.method_annotations = {} + + def add_fn(self, name, args, body, *, locals=None, return_type=MISSING, + overwrite_error=False, unconditional_add=False, decorator=None, + annotation_fields=None): + if locals is not None: + self.locals.update(locals) + + # Keep track if this method is allowed to be overwritten if it already + # exists in the class. The error is method-specific, so keep it with + # the name. We'll use this when we generate all of the functions in + # the add_fns_to_class call. overwrite_error is either True, in which + # case we'll raise an error, or it's a string, in which case we'll + # raise an error and append this string. + if overwrite_error: + self.overwrite_errors[name] = overwrite_error + + # Should this function always overwrite anything that's already in the + # class? The default is to not overwrite a function that already + # exists. + if unconditional_add: + self.unconditional_adds[name] = True + + self.names.append(name) + + if annotation_fields is not None: + self.method_annotations[name] = (annotation_fields, return_type) + + args = ','.join(args) + body = '\n'.join(body) + + # Compute the text of the entire function, add it to the text we're generating. + self.src.append(f'{f' {decorator}\n' if decorator else ''} def {name}({args}):\n{body}') + + def add_fns_to_class(self, cls): + # The source to all of the functions we're generating. + fns_src = '\n'.join(self.src) + + # The locals they use. + local_vars = ','.join(self.locals.keys()) + + # The names of all of the functions, used for the return value of the + # outer function. Need to handle the 0-tuple specially. + if len(self.names) == 0: + return_names = '()' + else: + return_names =f'({",".join(self.names)},)' + + # txt is the entire function we're going to execute, including the + # bodies of the functions we're defining. Here's a greatly simplified + # version: + # def __create_fn__(): + # def __init__(self, x, y): + # self.x = x + # self.y = y + # @recursive_repr + # def __repr__(self): + # return f"cls(x={self.x!r},y={self.y!r})" + # return __init__,__repr__ + + txt = f"def __create_fn__({local_vars}):\n{fns_src}\n return {return_names}" + ns = {} + exec(txt, self.globals, ns) + fns = ns['__create_fn__'](**self.locals) + + # Now that we've generated the functions, assign them into cls. + for name, fn in zip(self.names, fns): + fn.__qualname__ = f"{cls.__qualname__}.{fn.__name__}" + + try: + annotation_fields, return_type = self.method_annotations[name] + except KeyError: + pass + else: + annotate_fn = _make_annotate_function(cls, name, annotation_fields, return_type) + fn.__annotate__ = annotate_fn + + if self.unconditional_adds.get(name, False): + setattr(cls, name, fn) + else: + already_exists = _set_new_attribute(cls, name, fn) + + # See if it's an error to overwrite this particular function. + if already_exists and (msg_extra := self.overwrite_errors.get(name)): + error_msg = (f'Cannot overwrite attribute {fn.__name__} ' + f'in class {cls.__name__}') + if not msg_extra is True: + error_msg = f'{error_msg} {msg_extra}' + + raise TypeError(error_msg) + + +def _make_annotate_function(__class__, method_name, annotation_fields, return_type): + # Create an __annotate__ function for a dataclass + # Try to return annotations in the same format as they would be + # from a regular __init__ function + + def __annotate__(format, /): + Format = annotationlib.Format + match format: + case Format.VALUE | Format.FORWARDREF | Format.STRING: + cls_annotations = {} + for base in reversed(__class__.__mro__): + cls_annotations.update( + annotationlib.get_annotations(base, format=format) + ) + + new_annotations = {} + for k in annotation_fields: + # gh-142214: The annotation may be missing in unusual dynamic cases. + # If so, just skip it. + try: + new_annotations[k] = cls_annotations[k] + except KeyError: + pass + + if return_type is not MISSING: + if format == Format.STRING: + new_annotations["return"] = annotationlib.type_repr(return_type) + else: + new_annotations["return"] = return_type + + return new_annotations + + case _: + raise NotImplementedError(format) + + # This is a flag for _add_slots to know it needs to regenerate this method + # In order to remove references to the original class when it is replaced + __annotate__.__generated_by_dataclasses__ = True + __annotate__.__qualname__ = f"{__class__.__qualname__}.{method_name}.__annotate__" + + return __annotate__ def _field_assign(frozen, name, value, self_name): @@ -482,8 +584,8 @@ def _field_assign(frozen, name, value, self_name): # self_name is what "self" is called in this function: don't # hard-code "self", since that might be a field name. if frozen: - return f'__dataclass_builtins_object__.__setattr__({self_name},{name!r},{value})' - return f'{self_name}.{name}={value}' + return f' __dataclass_builtins_object__.__setattr__({self_name},{name!r},{value})' + return f' {self_name}.{name}={value}' def _field_init(f, frozen, globals, self_name, slots): @@ -562,11 +664,11 @@ def _init_param(f): elif f.default_factory is not MISSING: # There's a factory function. Set a marker. default = '=__dataclass_HAS_DEFAULT_FACTORY__' - return f'{f.name}:__dataclass_type_{f.name}__{default}' + return f'{f.name}{default}' def _init_fn(fields, std_fields, kw_only_fields, frozen, has_post_init, - self_name, globals, slots): + self_name, func_builder, slots): # fields contains both real fields and InitVar pseudo-fields. # Make sure we don't have fields without defaults following fields @@ -575,21 +677,20 @@ def _init_fn(fields, std_fields, kw_only_fields, frozen, has_post_init, # message, and future-proofs us in case we build up the function # using ast. - seen_default = False + seen_default = None for f in std_fields: # Only consider the non-kw-only fields in the __init__ call. if f.init: if not (f.default is MISSING and f.default_factory is MISSING): - seen_default = True + seen_default = f elif seen_default: raise TypeError(f'non-default argument {f.name!r} ' - 'follows default argument') + f'follows default argument {seen_default.name!r}') + + annotation_fields = [f.name for f in fields if f.init] - locals = {f'__dataclass_type_{f.name}__': f.type for f in fields} - locals.update({ - '__dataclass_HAS_DEFAULT_FACTORY__': _HAS_DEFAULT_FACTORY, - '__dataclass_builtins_object__': object, - }) + locals = {'__dataclass_HAS_DEFAULT_FACTORY__': _HAS_DEFAULT_FACTORY, + '__dataclass_builtins_object__': object} body_lines = [] for f in fields: @@ -603,89 +704,53 @@ def _init_fn(fields, std_fields, kw_only_fields, frozen, has_post_init, if has_post_init: params_str = ','.join(f.name for f in fields if f._field_type is _FIELD_INITVAR) - body_lines.append(f'{self_name}.{_POST_INIT_NAME}({params_str})') + body_lines.append(f' {self_name}.{_POST_INIT_NAME}({params_str})') # If no body lines, use 'pass'. if not body_lines: - body_lines = ['pass'] + body_lines = [' pass'] _init_params = [_init_param(f) for f in std_fields] if kw_only_fields: # Add the keyword-only args. Because the * can only be added if # there's at least one keyword-only arg, there needs to be a test here - # (instead of just concatenting the lists together). + # (instead of just concatenating the lists together). _init_params += ['*'] _init_params += [_init_param(f) for f in kw_only_fields] - return _create_fn('__init__', - [self_name] + _init_params, - body_lines, - locals=locals, - globals=globals, - return_type=None) - - -def _repr_fn(fields, globals): - fn = _create_fn('__repr__', - ('self',), - ['return self.__class__.__qualname__ + f"(' + - ', '.join([f"{f.name}={{self.{f.name}!r}}" - for f in fields]) + - ')"'], - globals=globals) - return _recursive_repr(fn) - - -def _frozen_get_del_attr(cls, fields, globals): + func_builder.add_fn('__init__', + [self_name] + _init_params, + body_lines, + locals=locals, + return_type=None, + annotation_fields=annotation_fields) + + +def _frozen_get_del_attr(cls, fields, func_builder): locals = {'cls': cls, 'FrozenInstanceError': FrozenInstanceError} condition = 'type(self) is cls' if fields: condition += ' or name in {' + ', '.join(repr(f.name) for f in fields) + '}' - return (_create_fn('__setattr__', - ('self', 'name', 'value'), - (f'if {condition}:', - ' raise FrozenInstanceError(f"cannot assign to field {name!r}")', - f'super(cls, self).__setattr__(name, value)'), - locals=locals, - globals=globals), - _create_fn('__delattr__', - ('self', 'name'), - (f'if {condition}:', - ' raise FrozenInstanceError(f"cannot delete field {name!r}")', - f'super(cls, self).__delattr__(name)'), - locals=locals, - globals=globals), - ) - - -def _cmp_fn(name, op, self_tuple, other_tuple, globals): - # Create a comparison function. If the fields in the object are - # named 'x' and 'y', then self_tuple is the string - # '(self.x,self.y)' and other_tuple is the string - # '(other.x,other.y)'. - return _create_fn(name, - ('self', 'other'), - [ 'if other.__class__ is self.__class__:', - f' return {self_tuple}{op}{other_tuple}', - 'return NotImplemented'], - globals=globals) - - -def _hash_fn(fields, globals): - self_tuple = _tuple_str('self', fields) - return _create_fn('__hash__', - ('self',), - [f'return hash({self_tuple})'], - globals=globals) + func_builder.add_fn('__setattr__', + ('self', 'name', 'value'), + (f' if {condition}:', + ' raise FrozenInstanceError(f"cannot assign to field {name!r}")', + f' super(cls, self).__setattr__(name, value)'), + locals=locals, + overwrite_error=True) + func_builder.add_fn('__delattr__', + ('self', 'name'), + (f' if {condition}:', + ' raise FrozenInstanceError(f"cannot delete field {name!r}")', + f' super(cls, self).__delattr__(name)'), + locals=locals, + overwrite_error=True) def _is_classvar(a_type, typing): - # This test uses a typing internal class, but it's the best way to - # test if this is a ClassVar. return (a_type is typing.ClassVar - or (type(a_type) is typing._GenericAlias - and a_type.__origin__ is typing.ClassVar)) + or (typing.get_origin(a_type) is typing.ClassVar)) def _is_initvar(a_type, dataclasses): @@ -854,19 +919,11 @@ def _get_field(cls, a_name, a_type, default_kw_only): return f -def _set_qualname(cls, value): - # Ensure that the functions returned from _create_fn uses the proper - # __qualname__ (the class they belong to). - if isinstance(value, FunctionType): - value.__qualname__ = f"{cls.__qualname__}.{value.__name__}" - return value - def _set_new_attribute(cls, name, value): # Never overwrites an existing attribute. Returns True if the # attribute already exists. if name in cls.__dict__: return True - _set_qualname(cls, value) setattr(cls, name, value) return False @@ -876,14 +933,22 @@ def _set_new_attribute(cls, name, value): # take. The common case is to do nothing, so instead of providing a # function that is a no-op, use None to signify that. -def _hash_set_none(cls, fields, globals): - return None +def _hash_set_none(cls, fields, func_builder): + # It's sort of a hack that I'm setting this here, instead of at + # func_builder.add_fns_to_class time, but since this is an exceptional case + # (it's not setting an attribute to a function, but to a scalar value), + # just do it directly here. I might come to regret this. + cls.__hash__ = None -def _hash_add(cls, fields, globals): +def _hash_add(cls, fields, func_builder): flds = [f for f in fields if (f.compare if f.hash is None else f.hash)] - return _set_qualname(cls, _hash_fn(flds, globals)) + self_tuple = _tuple_str('self', flds) + func_builder.add_fn('__hash__', + ('self',), + [f' return hash({self_tuple})'], + unconditional_add=True) -def _hash_exception(cls, fields, globals): +def _hash_exception(cls, fields, func_builder): # Raise an exception. raise TypeError(f'Cannot overwrite attribute __hash__ ' f'in class {cls.__name__}') @@ -944,8 +1009,11 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen, # Find our base classes in reverse MRO order, and exclude # ourselves. In reversed order so that more derived classes # override earlier field definitions in base classes. As long as - # we're iterating over them, see if any are frozen. + # we're iterating over them, see if all or any of them are frozen. any_frozen_base = False + # By default `all_frozen_bases` is `None` to represent a case, + # where some dataclasses does not have any bases with `_FIELDS` + all_frozen_bases = None has_dataclass_bases = False for b in cls.__mro__[-1:0:-1]: # Only process classes that have been processed by our @@ -955,8 +1023,11 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen, has_dataclass_bases = True for f in base_fields.values(): fields[f.name] = f - if getattr(b, _PARAMS).frozen: - any_frozen_base = True + if all_frozen_bases is None: + all_frozen_bases = True + current_frozen = getattr(b, _PARAMS).frozen + all_frozen_bases = all_frozen_bases and current_frozen + any_frozen_base = any_frozen_base or current_frozen # Annotations defined specifically in this class (not in base classes). # @@ -967,7 +1038,8 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen, # actual default value. Pseudo-fields ClassVars and InitVars are # included, despite the fact that they're not real fields. That's # dealt with later. - cls_annotations = inspect.get_annotations(cls) + cls_annotations = annotationlib.get_annotations( + cls, format=annotationlib.Format.FORWARDREF) # Now find fields in our class. While doing so, validate some # things, and set the default values (as class attributes) where @@ -1025,7 +1097,7 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen, 'frozen one') # Raise an exception if we're frozen, but none of our bases are. - if not any_frozen_base and frozen: + if all_frozen_bases is False and frozen: raise TypeError('cannot inherit frozen dataclass from a ' 'non-frozen one') @@ -1036,7 +1108,7 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen, # Was this class defined with an explicit __hash__? Note that if # __eq__ is defined in this class, then python will automatically # set __hash__ to None. This is a heuristic, as it's possible - # that such a __hash__ == None was not auto-generated, but it + # that such a __hash__ == None was not auto-generated, but it's # close enough. class_hash = cls.__dict__.get('__hash__', MISSING) has_explicit_hash = not (class_hash is MISSING or @@ -1055,24 +1127,27 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen, (std_init_fields, kw_only_init_fields) = _fields_in_init_order(all_init_fields) + func_builder = _FuncBuilder(globals) + if init: # Does this class have a post-init function? has_post_init = hasattr(cls, _POST_INIT_NAME) - _set_new_attribute(cls, '__init__', - _init_fn(all_init_fields, - std_init_fields, - kw_only_init_fields, - frozen, - has_post_init, - # The name to use for the "self" - # param in __init__. Use "self" - # if possible. - '__dataclass_self__' if 'self' in fields - else 'self', - globals, - slots, - )) + _init_fn(all_init_fields, + std_init_fields, + kw_only_init_fields, + frozen, + has_post_init, + # The name to use for the "self" + # param in __init__. Use "self" + # if possible. + '__dataclass_self__' if 'self' in fields + else 'self', + func_builder, + slots, + ) + + _set_new_attribute(cls, '__replace__', _replace) # Get the fields as a list, and include only real fields. This is # used in all of the following methods. @@ -1080,18 +1155,27 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen, if repr: flds = [f for f in field_list if f.repr] - _set_new_attribute(cls, '__repr__', _repr_fn(flds, globals)) + func_builder.add_fn('__repr__', + ('self',), + [' return f"{self.__class__.__qualname__}(' + + ', '.join([f"{f.name}={{self.{f.name}!r}}" + for f in flds]) + ')"'], + locals={'__dataclasses_recursive_repr': recursive_repr}, + decorator="@__dataclasses_recursive_repr()") if eq: # Create __eq__ method. There's no need for a __ne__ method, # since python will call __eq__ and negate it. - flds = [f for f in field_list if f.compare] - self_tuple = _tuple_str('self', flds) - other_tuple = _tuple_str('other', flds) - _set_new_attribute(cls, '__eq__', - _cmp_fn('__eq__', '==', - self_tuple, other_tuple, - globals=globals)) + cmp_fields = (field for field in field_list if field.compare) + terms = [f'self.{field.name}==other.{field.name}' for field in cmp_fields] + field_comparisons = ' and '.join(terms) or 'True' + func_builder.add_fn('__eq__', + ('self', 'other'), + [ ' if self is other:', + ' return True', + ' if other.__class__ is self.__class__:', + f' return {field_comparisons}', + ' return NotImplemented']) if order: # Create and set the ordering methods. @@ -1103,18 +1187,19 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen, ('__gt__', '>'), ('__ge__', '>='), ]: - if _set_new_attribute(cls, name, - _cmp_fn(name, op, self_tuple, other_tuple, - globals=globals)): - raise TypeError(f'Cannot overwrite attribute {name} ' - f'in class {cls.__name__}. Consider using ' - 'functools.total_ordering') + # Create a comparison function. If the fields in the object are + # named 'x' and 'y', then self_tuple is the string + # '(self.x,self.y)' and other_tuple is the string + # '(other.x,other.y)'. + func_builder.add_fn(name, + ('self', 'other'), + [ ' if other.__class__ is self.__class__:', + f' return {self_tuple}{op}{other_tuple}', + ' return NotImplemented'], + overwrite_error='Consider using functools.total_ordering') if frozen: - for fn in _frozen_get_del_attr(cls, field_list, globals): - if _set_new_attribute(cls, fn.__name__, fn): - raise TypeError(f'Cannot overwrite attribute {fn.__name__} ' - f'in class {cls.__name__}') + _frozen_get_del_attr(cls, field_list, func_builder) # Decide if/how we're going to create a hash function. hash_action = _hash_action[bool(unsafe_hash), @@ -1122,22 +1207,28 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen, bool(frozen), has_explicit_hash] if hash_action: - # No need to call _set_new_attribute here, since by the time - # we're here the overwriting is unconditional. - cls.__hash__ = hash_action(cls, field_list, globals) + cls.__hash__ = hash_action(cls, field_list, func_builder) + + # Generate the methods and add them to the class. This needs to be done + # before the __doc__ logic below, since inspect will look at the __init__ + # signature. + func_builder.add_fns_to_class(cls) if not getattr(cls, '__doc__'): # Create a class doc-string. try: # In some cases fetching a signature is not possible. # But, we surely should not fail in this case. - text_sig = str(inspect.signature(cls)).replace(' -> None', '') + text_sig = str(inspect.signature( + cls, + annotation_format=annotationlib.Format.FORWARDREF, + )).replace(' -> None', '') except (TypeError, ValueError): text_sig = '' cls.__doc__ = (cls.__name__ + text_sig) if match_args: - # I could probably compute this once + # I could probably compute this once. _set_new_attribute(cls, '__match_args__', tuple(f.name for f in std_init_fields)) @@ -1145,7 +1236,7 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen, if weakref_slot and not slots: raise TypeError('weakref_slot is True but slots is False') if slots: - cls = _add_slots(cls, frozen, weakref_slot) + cls = _add_slots(cls, frozen, weakref_slot, fields) abc.update_abstractmethods(cls) @@ -1189,14 +1280,65 @@ def _get_slots(cls): raise TypeError(f"Slots of '{cls.__name__}' cannot be determined") -def _add_slots(cls, is_frozen, weakref_slot): - # Need to create a new class, since we can't set __slots__ - # after a class has been created. +def _update_func_cell_for__class__(f, oldcls, newcls): + # Returns True if we update a cell, else False. + if f is None: + # f will be None in the case of a property where not all of + # fget, fset, and fdel are used. Nothing to do in that case. + return False + try: + idx = f.__code__.co_freevars.index("__class__") + except ValueError: + # This function doesn't reference __class__, so nothing to do. + return False + # Fix the cell to point to the new class, if it's already pointing + # at the old class. I'm not convinced that the "is oldcls" test + # is needed, but other than performance can't hurt. + closure = f.__closure__[idx] + if closure.cell_contents is oldcls: + closure.cell_contents = newcls + return True + return False + + +def _create_slots(defined_fields, inherited_slots, field_names, weakref_slot): + # The slots for our class. Remove slots from our base classes. Add + # '__weakref__' if weakref_slot was given, unless it is already present. + seen_docs = False + slots = {} + for slot in itertools.filterfalse( + inherited_slots.__contains__, + itertools.chain( + # gh-93521: '__weakref__' also needs to be filtered out if + # already present in inherited_slots + field_names, ('__weakref__',) if weakref_slot else () + ) + ): + doc = getattr(defined_fields.get(slot), 'doc', None) + if doc is not None: + seen_docs = True + slots[slot] = doc + + # We only return dict if there's at least one doc member, + # otherwise we return tuple, which is the old default format. + if seen_docs: + return slots + return tuple(slots) + + +def _add_slots(cls, is_frozen, weakref_slot, defined_fields): + # Need to create a new class, since we can't set __slots__ after a + # class has been created, and the @dataclass decorator is called + # after the class is created. # Make sure __slots__ isn't already set. if '__slots__' in cls.__dict__: raise TypeError(f'{cls.__name__} already specifies __slots__') + # gh-102069: Remove existing __weakref__ descriptor. + # gh-135228: Make sure the original class can be garbage collected. + sys._clear_type_descriptors(cls) + # Create a new dict for our new class. cls_dict = dict(cls.__dict__) field_names = tuple(f.name for f in fields(cls)) @@ -1204,17 +1346,9 @@ def _add_slots(cls, is_frozen, weakref_slot): inherited_slots = set( itertools.chain.from_iterable(map(_get_slots, cls.__mro__[1:-1])) ) - # The slots for our class. Remove slots from our base classes. Add - # '__weakref__' if weakref_slot was given, unless it is already present. - cls_dict["__slots__"] = tuple( - itertools.filterfalse( - inherited_slots.__contains__, - itertools.chain( - # gh-93521: '__weakref__' also needs to be filtered out if - # already present in inherited_slots - field_names, ('__weakref__',) if weakref_slot else () - ) - ), + + cls_dict["__slots__"] = _create_slots( + defined_fields, inherited_slots, field_names, weakref_slot, ) for field_name in field_names: @@ -1222,26 +1356,59 @@ def _add_slots(cls, is_frozen, weakref_slot): # available in _MARKER. cls_dict.pop(field_name, None) - # Remove __dict__ itself. - cls_dict.pop('__dict__', None) - - # Clear existing `__weakref__` descriptor, it belongs to a previous type: - cls_dict.pop('__weakref__', None) # gh-102069 - # And finally create the class. qualname = getattr(cls, '__qualname__', None) - cls = type(cls)(cls.__name__, cls.__bases__, cls_dict) + newcls = type(cls)(cls.__name__, cls.__bases__, cls_dict) if qualname is not None: - cls.__qualname__ = qualname + newcls.__qualname__ = qualname if is_frozen: # Need this for pickling frozen classes with slots. if '__getstate__' not in cls_dict: - cls.__getstate__ = _dataclass_getstate + newcls.__getstate__ = _dataclass_getstate if '__setstate__' not in cls_dict: - cls.__setstate__ = _dataclass_setstate + newcls.__setstate__ = _dataclass_setstate + + # Fix up any closures which reference __class__. This is used to + # fix zero argument super so that it points to the correct class + # (the newly created one, which we're returning) and not the + # original class. We can break out of this loop as soon as we + # make an update, since all closures for a class will share a + # given cell. + for member in newcls.__dict__.values(): + # If this is a wrapped function, unwrap it. + member = inspect.unwrap(member) + + if isinstance(member, types.FunctionType): + if _update_func_cell_for__class__(member, cls, newcls): + break + elif isinstance(member, property): + if (_update_func_cell_for__class__(member.fget, cls, newcls) + or _update_func_cell_for__class__(member.fset, cls, newcls) + or _update_func_cell_for__class__(member.fdel, cls, newcls)): + break + + # Get new annotations to remove references to the original class + # in forward references + newcls_ann = annotationlib.get_annotations( + newcls, format=annotationlib.Format.FORWARDREF) + + # Fix references in dataclass Fields + for f in getattr(newcls, _FIELDS).values(): + try: + ann = newcls_ann[f.name] + except KeyError: + pass + else: + f.type = ann - return cls + # Fix the class reference in the __annotate__ method + init = newcls.__init__ + if init_annotate := getattr(init, "__annotate__", None): + if getattr(init_annotate, "__generated_by_dataclasses__", False): + _update_func_cell_for__class__(init_annotate, cls, newcls) + + return newcls def dataclass(cls=None, /, *, init=True, repr=True, eq=True, order=False, @@ -1330,58 +1497,69 @@ class C: def _asdict_inner(obj, dict_factory): - if type(obj) in _ATOMIC_TYPES: + obj_type = type(obj) + if obj_type in _ATOMIC_TYPES: return obj - elif _is_dataclass_instance(obj): - # fast path for the common case + elif hasattr(obj_type, _FIELDS): + # dataclass instance: fast path for the common case if dict_factory is dict: return { f.name: _asdict_inner(getattr(obj, f.name), dict) for f in fields(obj) } else: - result = [] - for f in fields(obj): - value = _asdict_inner(getattr(obj, f.name), dict_factory) - result.append((f.name, value)) - return dict_factory(result) - elif isinstance(obj, tuple) and hasattr(obj, '_fields'): - # obj is a namedtuple. Recurse into it, but the returned - # object is another namedtuple of the same type. This is - # similar to how other list- or tuple-derived classes are - # treated (see below), but we just need to create them - # differently because a namedtuple's __init__ needs to be - # called differently (see bpo-34363). - - # I'm not using namedtuple's _asdict() - # method, because: - # - it does not recurse in to the namedtuple fields and - # convert them to dicts (using dict_factory). - # - I don't actually want to return a dict here. The main - # use case here is json.dumps, and it handles converting - # namedtuples to lists. Admittedly we're losing some - # information here when we produce a json list instead of a - # dict. Note that if we returned dicts here instead of - # namedtuples, we could no longer call asdict() on a data - # structure where a namedtuple was used as a dict key. - - return type(obj)(*[_asdict_inner(v, dict_factory) for v in obj]) - elif isinstance(obj, (list, tuple)): - # Assume we can create an object of this type by passing in a - # generator (which is not true for namedtuples, handled - # above). - return type(obj)(_asdict_inner(v, dict_factory) for v in obj) - elif isinstance(obj, dict): - if hasattr(type(obj), 'default_factory'): + return dict_factory([ + (f.name, _asdict_inner(getattr(obj, f.name), dict_factory)) + for f in fields(obj) + ]) + # handle the builtin types first for speed; subclasses handled below + elif obj_type is list: + return [_asdict_inner(v, dict_factory) for v in obj] + elif obj_type is dict: + return { + _asdict_inner(k, dict_factory): _asdict_inner(v, dict_factory) + for k, v in obj.items() + } + elif obj_type is tuple: + return tuple([_asdict_inner(v, dict_factory) for v in obj]) + elif issubclass(obj_type, tuple): + if hasattr(obj, '_fields'): + # obj is a namedtuple. Recurse into it, but the returned + # object is another namedtuple of the same type. This is + # similar to how other list- or tuple-derived classes are + # treated (see below), but we just need to create them + # differently because a namedtuple's __init__ needs to be + # called differently (see bpo-34363). + + # I'm not using namedtuple's _asdict() + # method, because: + # - it does not recurse in to the namedtuple fields and + # convert them to dicts (using dict_factory). + # - I don't actually want to return a dict here. The main + # use case here is json.dumps, and it handles converting + # namedtuples to lists. Admittedly we're losing some + # information here when we produce a json list instead of a + # dict. Note that if we returned dicts here instead of + # namedtuples, we could no longer call asdict() on a data + # structure where a namedtuple was used as a dict key. + return obj_type(*[_asdict_inner(v, dict_factory) for v in obj]) + else: + return obj_type(_asdict_inner(v, dict_factory) for v in obj) + elif issubclass(obj_type, dict): + if hasattr(obj_type, 'default_factory'): # obj is a defaultdict, which has a different constructor from # dict as it requires the default_factory as its first arg. - result = type(obj)(getattr(obj, 'default_factory')) + result = obj_type(obj.default_factory) for k, v in obj.items(): result[_asdict_inner(k, dict_factory)] = _asdict_inner(v, dict_factory) return result - return type(obj)((_asdict_inner(k, dict_factory), - _asdict_inner(v, dict_factory)) - for k, v in obj.items()) + return obj_type((_asdict_inner(k, dict_factory), + _asdict_inner(v, dict_factory)) + for k, v in obj.items()) + elif issubclass(obj_type, list): + # Assume we can create an object of this type by passing in a + # generator + return obj_type(_asdict_inner(v, dict_factory) for v in obj) else: return copy.deepcopy(obj) @@ -1414,11 +1592,10 @@ def _astuple_inner(obj, tuple_factory): if type(obj) in _ATOMIC_TYPES: return obj elif _is_dataclass_instance(obj): - result = [] - for f in fields(obj): - value = _astuple_inner(getattr(obj, f.name), tuple_factory) - result.append(value) - return tuple_factory(result) + return tuple_factory([ + _astuple_inner(getattr(obj, f.name), tuple_factory) + for f in fields(obj) + ]) elif isinstance(obj, tuple) and hasattr(obj, '_fields'): # obj is a namedtuple. Recurse into it, but the returned # object is another namedtuple of the same type. This is @@ -1450,7 +1627,7 @@ def _astuple_inner(obj, tuple_factory): def make_dataclass(cls_name, fields, *, bases=(), namespace=None, init=True, repr=True, eq=True, order=False, unsafe_hash=False, frozen=False, match_args=True, kw_only=False, slots=False, - weakref_slot=False, module=None): + weakref_slot=False, module=None, decorator=dataclass): """Return a new dynamically created dataclass. The dataclass name will be 'cls_name'. 'fields' is an iterable @@ -1488,7 +1665,7 @@ class C(Base): for item in fields: if isinstance(item, str): name = item - tp = 'typing.Any' + tp = _ANY_MARKER elif len(item) == 2: name, tp, = item elif len(item) == 3: @@ -1507,15 +1684,49 @@ class C(Base): seen.add(name) annotations[name] = tp + # We initially block the VALUE format, because inside dataclass() we'll + # call get_annotations(), which will try the VALUE format first. If we don't + # block, that means we'd always end up eagerly importing typing here, which + # is what we're trying to avoid. + value_blocked = True + + def annotate_method(format): + def get_any(): + match format: + case annotationlib.Format.STRING: + return 'typing.Any' + case annotationlib.Format.FORWARDREF: + typing = sys.modules.get("typing") + if typing is None: + return annotationlib.ForwardRef("Any", module="typing") + else: + return typing.Any + case annotationlib.Format.VALUE: + if value_blocked: + raise NotImplementedError + from typing import Any + return Any + case _: + raise NotImplementedError + annos = { + ann: get_any() if t is _ANY_MARKER else t + for ann, t in annotations.items() + } + if format == annotationlib.Format.STRING: + return annotationlib.annotations_to_string(annos) + else: + return annos + # Update 'ns' with the user-supplied namespace plus our calculated values. def exec_body_callback(ns): ns.update(namespace) ns.update(defaults) - ns['__annotations__'] = annotations # We use `types.new_class()` instead of simply `type()` to allow dynamic creation # of generic dataclasses. cls = types.new_class(cls_name, bases, {}, exec_body_callback) + # For now, set annotations including the _ANY_MARKER. + cls.__annotate__ = annotate_method # For pickling to work, the __module__ variable needs to be set to the frame # where the dataclass is created. @@ -1530,11 +1741,14 @@ def exec_body_callback(ns): if module is not None: cls.__module__ = module - # Apply the normal decorator. - return dataclass(cls, init=init, repr=repr, eq=eq, order=order, - unsafe_hash=unsafe_hash, frozen=frozen, - match_args=match_args, kw_only=kw_only, slots=slots, - weakref_slot=weakref_slot) + # Apply the normal provided decorator. + cls = decorator(cls, init=init, repr=repr, eq=eq, order=order, + unsafe_hash=unsafe_hash, frozen=frozen, + match_args=match_args, kw_only=kw_only, slots=slots, + weakref_slot=weakref_slot) + # Now that the class is ready, allow the VALUE format. + value_blocked = False + return cls def replace(obj, /, **changes): @@ -1551,17 +1765,19 @@ class C: c1 = replace(c, x=3) assert c1.x == 3 and c1.y == 2 """ - - # We're going to mutate 'changes', but that's okay because it's a - # new dict, even if called with 'replace(obj, **my_changes)'. - if not _is_dataclass_instance(obj): raise TypeError("replace() should be called on dataclass instances") + return _replace(obj, **changes) + + +def _replace(self, /, **changes): + # We're going to mutate 'changes', but that's okay because it's a + # new dict, even if called with 'replace(self, **my_changes)'. # It's an error to have init=False fields in 'changes'. - # If a field is not in 'changes', read its value from the provided obj. + # If a field is not in 'changes', read its value from the provided 'self'. - for f in getattr(obj, _FIELDS).values(): + for f in getattr(self, _FIELDS).values(): # Only consider normal fields or InitVars. if f._field_type is _FIELD_CLASSVAR: continue @@ -1569,20 +1785,20 @@ class C: if not f.init: # Error if this field is specified in changes. if f.name in changes: - raise ValueError(f'field {f.name} is declared with ' - 'init=False, it cannot be specified with ' - 'replace()') + raise TypeError(f'field {f.name} is declared with ' + f'init=False, it cannot be specified with ' + f'replace()') continue if f.name not in changes: if f._field_type is _FIELD_INITVAR and f.default is MISSING: - raise ValueError(f"InitVar {f.name!r} " - 'must be specified with replace()') - changes[f.name] = getattr(obj, f.name) + raise TypeError(f"InitVar {f.name!r} " + f'must be specified with replace()') + changes[f.name] = getattr(self, f.name) # Create the new object, which calls __init__() and # __post_init__() (if defined), using all of the init fields we've # added and/or left in 'changes'. If there are values supplied in # changes that aren't fields, this will correctly raise a # TypeError. - return obj.__class__(**changes) + return self.__class__(**changes) diff --git a/PythonLib/full/datetime.py b/PythonLib/full/datetime.py index a33d2d72..14f30556 100644 --- a/PythonLib/full/datetime.py +++ b/PythonLib/full/datetime.py @@ -1,9 +1,13 @@ +"""Specific date/time and related types. + +See https://data.iana.org/time-zones/tz-link.html for +time zone and DST data sources. +""" + try: from _datetime import * - from _datetime import __doc__ except ImportError: from _pydatetime import * - from _pydatetime import __doc__ __all__ = ("date", "datetime", "time", "timedelta", "timezone", "tzinfo", "MINYEAR", "MAXYEAR", "UTC") diff --git a/PythonLib/full/dbm/__init__.py b/PythonLib/full/dbm/__init__.py index 8055d376..4fdbc54e 100644 --- a/PythonLib/full/dbm/__init__.py +++ b/PythonLib/full/dbm/__init__.py @@ -5,7 +5,7 @@ import dbm d = dbm.open(file, 'w', 0o666) -The returned object is a dbm.gnu, dbm.ndbm or dbm.dumb object, dependent on the +The returned object is a dbm.sqlite3, dbm.gnu, dbm.ndbm or dbm.dumb database object, dependent on the type of database being opened (determined by the whichdb function) in the case of an existing dbm. If the dbm does not exist and the create or new flag ('c' or 'n') was specified, the dbm type will be determined by the availability of @@ -38,7 +38,7 @@ class error(Exception): pass -_names = ['dbm.gnu', 'dbm.ndbm', 'dbm.dumb'] +_names = ['dbm.sqlite3', 'dbm.gnu', 'dbm.ndbm', 'dbm.dumb'] _defaultmod = None _modules = {} @@ -164,6 +164,10 @@ def whichdb(filename): if len(s) != 4: return "" + # Check for SQLite3 header string. + if s16 == b"SQLite format 3\0": + return "dbm.sqlite3" + # Convert to 4-byte int in native byte order -- return "" if impossible try: (magic,) = struct.unpack("=l", s) diff --git a/PythonLib/full/dbm/dumb.py b/PythonLib/full/dbm/dumb.py index 754624cc..def120ff 100644 --- a/PythonLib/full/dbm/dumb.py +++ b/PythonLib/full/dbm/dumb.py @@ -98,7 +98,8 @@ def _update(self, flag): except OSError: if flag not in ('c', 'n'): raise - self._modified = True + with self._io.open(self._dirfile, 'w', encoding="Latin-1") as f: + self._chmod(self._dirfile) else: with f: for line in f: @@ -134,6 +135,7 @@ def _commit(self): # position; UTF-8, though, does care sometimes. entry = "%r, %r\n" % (key.decode('Latin-1'), pos_and_siz_pair) f.write(entry) + self._modified = False sync = _commit diff --git a/PythonLib/full/dbm/sqlite3.py b/PythonLib/full/dbm/sqlite3.py new file mode 100644 index 00000000..d0eed54e --- /dev/null +++ b/PythonLib/full/dbm/sqlite3.py @@ -0,0 +1,144 @@ +import os +import sqlite3 +from pathlib import Path +from contextlib import suppress, closing +from collections.abc import MutableMapping + +BUILD_TABLE = """ + CREATE TABLE IF NOT EXISTS Dict ( + key BLOB UNIQUE NOT NULL, + value BLOB NOT NULL + ) +""" +GET_SIZE = "SELECT COUNT (key) FROM Dict" +LOOKUP_KEY = "SELECT value FROM Dict WHERE key = CAST(? AS BLOB)" +STORE_KV = "REPLACE INTO Dict (key, value) VALUES (CAST(? AS BLOB), CAST(? AS BLOB))" +DELETE_KEY = "DELETE FROM Dict WHERE key = CAST(? AS BLOB)" +ITER_KEYS = "SELECT key FROM Dict" + + +class error(OSError): + pass + + +_ERR_CLOSED = "DBM object has already been closed" +_ERR_REINIT = "DBM object does not support reinitialization" + + +def _normalize_uri(path): + path = Path(path) + uri = path.absolute().as_uri() + while "//" in uri: + uri = uri.replace("//", "/") + return uri + + +class _Database(MutableMapping): + + def __init__(self, path, /, *, flag, mode): + if hasattr(self, "_cx"): + raise error(_ERR_REINIT) + + path = os.fsdecode(path) + match flag: + case "r": + flag = "ro" + case "w": + flag = "rw" + case "c": + flag = "rwc" + Path(path).touch(mode=mode, exist_ok=True) + case "n": + flag = "rwc" + Path(path).unlink(missing_ok=True) + Path(path).touch(mode=mode) + case _: + raise ValueError("Flag must be one of 'r', 'w', 'c', or 'n', " + f"not {flag!r}") + + # We use the URI format when opening the database. + uri = _normalize_uri(path) + uri = f"{uri}?mode={flag}" + if flag == "ro": + # Add immutable=1 to allow read-only SQLite access even if wal/shm missing + uri += "&immutable=1" + + try: + self._cx = sqlite3.connect(uri, autocommit=True, uri=True) + except sqlite3.Error as exc: + raise error(str(exc)) + + if flag != "ro": + # This is an optimization only; it's ok if it fails. + with suppress(sqlite3.OperationalError): + self._cx.execute("PRAGMA journal_mode = wal") + + if flag == "rwc": + self._execute(BUILD_TABLE) + + def _execute(self, *args, **kwargs): + if not self._cx: + raise error(_ERR_CLOSED) + try: + return closing(self._cx.execute(*args, **kwargs)) + except sqlite3.Error as exc: + raise error(str(exc)) + + def __len__(self): + with self._execute(GET_SIZE) as cu: + row = cu.fetchone() + return row[0] + + def __getitem__(self, key): + with self._execute(LOOKUP_KEY, (key,)) as cu: + row = cu.fetchone() + if not row: + raise KeyError(key) + return row[0] + + def __setitem__(self, key, value): + self._execute(STORE_KV, (key, value)) + + def __delitem__(self, key): + with self._execute(DELETE_KEY, (key,)) as cu: + if not cu.rowcount: + raise KeyError(key) + + def __iter__(self): + try: + with self._execute(ITER_KEYS) as cu: + for row in cu: + yield row[0] + except sqlite3.Error as exc: + raise error(str(exc)) + + def close(self): + if self._cx: + self._cx.close() + self._cx = None + + def keys(self): + return list(super().keys()) + + def __enter__(self): + return self + + def __exit__(self, *args): + self.close() + + +def open(filename, /, flag="r", mode=0o666): + """Open a dbm.sqlite3 database and return the dbm object. + + The 'filename' parameter is the name of the database file. + + The optional 'flag' parameter can be one of ...: + 'r' (default): open an existing database for read only access + 'w': open an existing database for read/write access + 'c': create a database if it does not exist; open for read/write access + 'n': always create a new, empty database; open for read/write access + + The optional 'mode' parameter is the Unix file access mode of the database; + only used when creating a new database. Default: 0o666. + """ + return _Database(filename, flag=flag, mode=mode) diff --git a/PythonLib/full/decimal.py b/PythonLib/full/decimal.py index 4d8e15cb..530bdfb3 100644 --- a/PythonLib/full/decimal.py +++ b/PythonLib/full/decimal.py @@ -100,9 +100,10 @@ try: from _decimal import * - from _decimal import __version__ - from _decimal import __libmpdec_version__ + from _decimal import __version__ # noqa: F401 + from _decimal import __libmpdec_version__ # noqa: F401 except ImportError: - from _pydecimal import * - from _pydecimal import __version__ - from _pydecimal import __libmpdec_version__ + import _pydecimal + import sys + _pydecimal.__doc__ = __doc__ + sys.modules[__name__] = _pydecimal diff --git a/PythonLib/full/difflib.py b/PythonLib/full/difflib.py index 33e7e6c1..ac1ba4a6 100644 --- a/PythonLib/full/difflib.py +++ b/PythonLib/full/difflib.py @@ -78,8 +78,8 @@ class SequenceMatcher: sequences. As a rule of thumb, a .ratio() value over 0.6 means the sequences are close matches: - >>> print(round(s.ratio(), 3)) - 0.866 + >>> print(round(s.ratio(), 2)) + 0.87 >>> If you're only interested in where the sequences match, @@ -908,87 +908,85 @@ def _fancy_replace(self, a, alo, ahi, b, blo, bhi): + abcdefGhijkl ? ^ ^ ^ """ - - # don't synch up unless the lines have a similarity score of at - # least cutoff; best_ratio tracks the best score seen so far - best_ratio, cutoff = 0.74, 0.75 + # Don't synch up unless the lines have a similarity score above + # cutoff. Previously only the smallest pair was handled here, + # and if there are many pairs with the best ratio, recursion + # could grow very deep, and runtime cubic. See: + # https://github.com/python/cpython/issues/119105 + # + # Later, more pathological cases prompted removing recursion + # entirely. + cutoff = 0.74999 cruncher = SequenceMatcher(self.charjunk) - eqi, eqj = None, None # 1st indices of equal lines (if any) + crqr = cruncher.real_quick_ratio + cqr = cruncher.quick_ratio + cr = cruncher.ratio - # search for the pair that matches best without being identical - # (identical lines must be junk lines, & we don't want to synch up - # on junk -- unless we have to) + WINDOW = 10 + best_i = best_j = None + dump_i, dump_j = alo, blo # smallest indices not yet resolved for j in range(blo, bhi): - bj = b[j] - cruncher.set_seq2(bj) - for i in range(alo, ahi): - ai = a[i] - if ai == bj: - if eqi is None: - eqi, eqj = i, j - continue - cruncher.set_seq1(ai) - # computing similarity is expensive, so use the quick - # upper bounds first -- have seen this speed up messy - # compares by a factor of 3. - # note that ratio() is only expensive to compute the first - # time it's called on a sequence pair; the expensive part - # of the computation is cached by cruncher - if cruncher.real_quick_ratio() > best_ratio and \ - cruncher.quick_ratio() > best_ratio and \ - cruncher.ratio() > best_ratio: - best_ratio, best_i, best_j = cruncher.ratio(), i, j - if best_ratio < cutoff: - # no non-identical "pretty close" pair - if eqi is None: - # no identical pair either -- treat it as a straight replace - yield from self._plain_replace(a, alo, ahi, b, blo, bhi) - return - # no close pair, but an identical pair -- synch up on that - best_i, best_j, best_ratio = eqi, eqj, 1.0 - else: - # there's a close pair, so forget the identical pair (if any) - eqi = None - - # a[best_i] very similar to b[best_j]; eqi is None iff they're not - # identical - - # pump out diffs from before the synch point - yield from self._fancy_helper(a, alo, best_i, b, blo, best_j) - - # do intraline marking on the synch pair - aelt, belt = a[best_i], b[best_j] - if eqi is None: - # pump out a '-', '?', '+', '?' quad for the synched lines - atags = btags = "" - cruncher.set_seqs(aelt, belt) - for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes(): - la, lb = ai2 - ai1, bj2 - bj1 - if tag == 'replace': - atags += '^' * la - btags += '^' * lb - elif tag == 'delete': - atags += '-' * la - elif tag == 'insert': - btags += '+' * lb - elif tag == 'equal': - atags += ' ' * la - btags += ' ' * lb - else: - raise ValueError('unknown tag %r' % (tag,)) - yield from self._qformat(aelt, belt, atags, btags) - else: - # the synch pair is identical - yield ' ' + aelt + cruncher.set_seq2(b[j]) + # Search the corresponding i's within WINDOW for rhe highest + # ratio greater than `cutoff`. + aequiv = alo + (j - blo) + arange = range(max(aequiv - WINDOW, dump_i), + min(aequiv + WINDOW + 1, ahi)) + if not arange: # likely exit if `a` is shorter than `b` + break + best_ratio = cutoff + for i in arange: + cruncher.set_seq1(a[i]) + # Ordering by cheapest to most expensive ratio is very + # valuable, most often getting out early. + if (crqr() > best_ratio + and cqr() > best_ratio + and cr() > best_ratio): + best_i, best_j, best_ratio = i, j, cr() + + if best_i is None: + # found nothing to synch on yet - move to next j + continue - # pump out diffs from after the synch point - yield from self._fancy_helper(a, best_i+1, ahi, b, best_j+1, bhi) + # pump out straight replace from before this synch pair + yield from self._fancy_helper(a, dump_i, best_i, + b, dump_j, best_j) + # do intraline marking on the synch pair + aelt, belt = a[best_i], b[best_j] + if aelt != belt: + # pump out a '-', '?', '+', '?' quad for the synched lines + atags = btags = "" + cruncher.set_seqs(aelt, belt) + for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes(): + la, lb = ai2 - ai1, bj2 - bj1 + if tag == 'replace': + atags += '^' * la + btags += '^' * lb + elif tag == 'delete': + atags += '-' * la + elif tag == 'insert': + btags += '+' * lb + elif tag == 'equal': + atags += ' ' * la + btags += ' ' * lb + else: + raise ValueError('unknown tag %r' % (tag,)) + yield from self._qformat(aelt, belt, atags, btags) + else: + # the synch pair is identical + yield ' ' + aelt + dump_i, dump_j = best_i + 1, best_j + 1 + best_i = best_j = None + + # pump out straight replace from after the last synch pair + yield from self._fancy_helper(a, dump_i, ahi, + b, dump_j, bhi) def _fancy_helper(self, a, alo, ahi, b, blo, bhi): g = [] if alo < ahi: if blo < bhi: - g = self._fancy_replace(a, alo, ahi, b, blo, bhi) + g = self._plain_replace(a, alo, ahi, b, blo, bhi) else: g = self._dump('-', a, alo, ahi) elif blo < bhi: @@ -1040,11 +1038,9 @@ def _qformat(self, aline, bline, atags, btags): # remaining is that perhaps it was really the case that " volatile" # was inserted after "private". I can live with that . -import re - -def IS_LINE_JUNK(line, pat=re.compile(r"\s*(?:#\s*)?$").match): +def IS_LINE_JUNK(line, pat=None): r""" - Return True for ignorable line: iff `line` is blank or contains a single '#'. + Return True for ignorable line: if `line` is blank or contains a single '#'. Examples: @@ -1056,6 +1052,11 @@ def IS_LINE_JUNK(line, pat=re.compile(r"\s*(?:#\s*)?$").match): False """ + if pat is None: + # Default: match '#' or the empty string + return line.strip() in '#' + # Previous versions used the undocumented parameter 'pat' as a + # match function. Retain this behaviour for compatibility. return pat(line) is not None def IS_CHARACTER_JUNK(ch, ws=" \t"): @@ -1266,6 +1267,12 @@ def _check_types(a, b, *args): if b and not isinstance(b[0], str): raise TypeError('lines to compare must be str, not %s (%r)' % (type(b[0]).__name__, b[0])) + if isinstance(a, str): + raise TypeError('input must be a sequence of strings, not %s' % + type(a).__name__) + if isinstance(b, str): + raise TypeError('input must be a sequence of strings, not %s' % + type(b).__name__) for arg in args: if not isinstance(arg, str): raise TypeError('all arguments must be str, not: %r' % (arg,)) @@ -1628,13 +1635,22 @@ def _line_pair_iterator(): """ _styles = """ + :root {color-scheme: light dark} table.diff {font-family: Menlo, Consolas, Monaco, Liberation Mono, Lucida Console, monospace; border:medium} .diff_header {background-color:#e0e0e0} td.diff_header {text-align:right} .diff_next {background-color:#c0c0c0} - .diff_add {background-color:#aaffaa} + .diff_add {background-color:palegreen} .diff_chg {background-color:#ffff77} - .diff_sub {background-color:#ffaaaa}""" + .diff_sub {background-color:#ffaaaa} + + @media (prefers-color-scheme: dark) { + .diff_header {background-color:#666} + .diff_next {background-color:#393939} + .diff_add {background-color:darkgreen} + .diff_chg {background-color:#847415} + .diff_sub {background-color:darkred} + }""" _table_template = """ '). \ replace('\t',' ') -del re def restore(delta, which): r""" @@ -2047,10 +2062,3 @@ def restore(delta, which): for line in delta: if line[:2] in prefixes: yield line[2:] - -def _test(): - import doctest, difflib - return doctest.testmod(difflib) - -if __name__ == "__main__": - _test() diff --git a/PythonLib/full/dis.py b/PythonLib/full/dis.py index b1069c82..d6d2c138 100644 --- a/PythonLib/full/dis.py +++ b/PythonLib/full/dis.py @@ -11,12 +11,16 @@ _cache_format, _inline_cache_entries, _nb_ops, + _common_constants, _intrinsic_1_descs, _intrinsic_2_descs, + _special_method_names, _specializations, - _specialized_instructions, + _specialized_opmap, ) +from _opcode import get_executor + __all__ = ["code_info", "dis", "disassemble", "distb", "disco", "findlinestarts", "findlabels", "show_code", "get_instructions", "Instruction", "Bytecode"] + _opcodes_all @@ -25,19 +29,14 @@ _have_code = (types.MethodType, types.FunctionType, types.CodeType, classmethod, staticmethod, type) -FORMAT_VALUE = opmap['FORMAT_VALUE'] -FORMAT_VALUE_CONVERTERS = ( - (None, ''), - (str, 'str'), - (repr, 'repr'), - (ascii, 'ascii'), -) -MAKE_FUNCTION = opmap['MAKE_FUNCTION'] -MAKE_FUNCTION_FLAGS = ('defaults', 'kwdefaults', 'annotations', 'closure') +CONVERT_VALUE = opmap['CONVERT_VALUE'] + +SET_FUNCTION_ATTRIBUTE = opmap['SET_FUNCTION_ATTRIBUTE'] +FUNCTION_ATTR_FLAGS = ('defaults', 'kwdefaults', 'annotations', 'closure', 'annotate') -LOAD_CONST = opmap['LOAD_CONST'] -RETURN_CONST = opmap['RETURN_CONST'] +ENTER_EXECUTOR = opmap['ENTER_EXECUTOR'] LOAD_GLOBAL = opmap['LOAD_GLOBAL'] +LOAD_SMALL_INT = opmap['LOAD_SMALL_INT'] BINARY_OP = opmap['BINARY_OP'] JUMP_BACKWARD = opmap['JUMP_BACKWARD'] FOR_ITER = opmap['FOR_ITER'] @@ -46,16 +45,25 @@ LOAD_SUPER_ATTR = opmap['LOAD_SUPER_ATTR'] CALL_INTRINSIC_1 = opmap['CALL_INTRINSIC_1'] CALL_INTRINSIC_2 = opmap['CALL_INTRINSIC_2'] +LOAD_COMMON_CONSTANT = opmap['LOAD_COMMON_CONSTANT'] +LOAD_SPECIAL = opmap['LOAD_SPECIAL'] +LOAD_FAST_LOAD_FAST = opmap['LOAD_FAST_LOAD_FAST'] +LOAD_FAST_BORROW_LOAD_FAST_BORROW = opmap['LOAD_FAST_BORROW_LOAD_FAST_BORROW'] +STORE_FAST_LOAD_FAST = opmap['STORE_FAST_LOAD_FAST'] +STORE_FAST_STORE_FAST = opmap['STORE_FAST_STORE_FAST'] +IS_OP = opmap['IS_OP'] +CONTAINS_OP = opmap['CONTAINS_OP'] +END_ASYNC_FOR = opmap['END_ASYNC_FOR'] CACHE = opmap["CACHE"] _all_opname = list(opname) _all_opmap = dict(opmap) -_empty_slot = [slot for slot, name in enumerate(_all_opname) if name.startswith("<")] -for spec_op, specialized in zip(_empty_slot, _specialized_instructions): +for name, op in _specialized_opmap.items(): # fill opname and opmap - _all_opname[spec_op] = specialized - _all_opmap[specialized] = spec_op + assert op < len(_all_opname) + _all_opname[op] = name + _all_opmap[name] = op deoptmap = { specialized: base for base, family in _specializations.items() for specialized in family @@ -74,7 +82,8 @@ def _try_compile(source, name): pass return compile(source, name, 'exec') -def dis(x=None, *, file=None, depth=None, show_caches=False, adaptive=False): +def dis(x=None, *, file=None, depth=None, show_caches=False, adaptive=False, + show_offsets=False, show_positions=False): """Disassemble classes, methods, functions, and other compiled objects. With no argument, disassemble the last traceback. @@ -84,7 +93,8 @@ def dis(x=None, *, file=None, depth=None, show_caches=False, adaptive=False): in a special attribute. """ if x is None: - distb(file=file, show_caches=show_caches, adaptive=adaptive) + distb(file=file, show_caches=show_caches, adaptive=adaptive, + show_offsets=show_offsets, show_positions=show_positions) return # Extract functions from methods. if hasattr(x, '__func__'): @@ -105,21 +115,28 @@ def dis(x=None, *, file=None, depth=None, show_caches=False, adaptive=False): if isinstance(x1, _have_code): print("Disassembly of %s:" % name, file=file) try: - dis(x1, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive) + dis(x1, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets, show_positions=show_positions) except TypeError as msg: print("Sorry:", msg, file=file) print(file=file) elif hasattr(x, 'co_code'): # Code object - _disassemble_recursive(x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive) + _disassemble_recursive(x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets, show_positions=show_positions) elif isinstance(x, (bytes, bytearray)): # Raw bytecode - _disassemble_bytes(x, file=file, show_caches=show_caches) + labels_map = _make_labels_map(x) + label_width = 4 + len(str(len(labels_map))) + formatter = Formatter(file=file, + offset_width=len(str(max(len(x) - 2, 9999))) if show_offsets else 0, + label_width=label_width, + show_caches=show_caches) + arg_resolver = ArgResolver(labels_map=labels_map) + _disassemble_bytes(x, arg_resolver=arg_resolver, formatter=formatter) elif isinstance(x, str): # Source code - _disassemble_str(x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive) + _disassemble_str(x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets, show_positions=show_positions) else: raise TypeError("don't know how to disassemble %s objects" % type(x).__name__) -def distb(tb=None, *, file=None, show_caches=False, adaptive=False): +def distb(tb=None, *, file=None, show_caches=False, adaptive=False, show_offsets=False, show_positions=False): """Disassemble a traceback (default: last traceback).""" if tb is None: try: @@ -130,22 +147,24 @@ def distb(tb=None, *, file=None, show_caches=False, adaptive=False): except AttributeError: raise RuntimeError("no last traceback to disassemble") from None while tb.tb_next: tb = tb.tb_next - disassemble(tb.tb_frame.f_code, tb.tb_lasti, file=file, show_caches=show_caches, adaptive=adaptive) + disassemble(tb.tb_frame.f_code, tb.tb_lasti, file=file, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets, show_positions=show_positions) # The inspect module interrogates this dictionary to build its # list of CO_* constants. It is also used by pretty_flags to # turn the co_flags field into a human readable list. COMPILER_FLAG_NAMES = { - 1: "OPTIMIZED", - 2: "NEWLOCALS", - 4: "VARARGS", - 8: "VARKEYWORDS", - 16: "NESTED", - 32: "GENERATOR", - 64: "NOFREE", - 128: "COROUTINE", - 256: "ITERABLE_COROUTINE", - 512: "ASYNC_GENERATOR", + 1: "OPTIMIZED", + 2: "NEWLOCALS", + 4: "VARARGS", + 8: "VARKEYWORDS", + 16: "NESTED", + 32: "GENERATOR", + 64: "NOFREE", + 128: "COROUTINE", + 256: "ITERABLE_COROUTINE", + 512: "ASYNC_GENERATOR", + 0x4000000: "HAS_DOCSTRING", + 0x8000000: "METHOD", } def pretty_flags(flags): @@ -197,7 +216,27 @@ def _deoptop(op): return _all_opmap[deoptmap[name]] if name in deoptmap else op def _get_code_array(co, adaptive): - return co._co_code_adaptive if adaptive else co.co_code + if adaptive: + code = co._co_code_adaptive + res = [] + found = False + for i in range(0, len(code), 2): + op, arg = code[i], code[i+1] + if op == ENTER_EXECUTOR: + try: + ex = get_executor(co, i) + except (ValueError, RuntimeError): + ex = None + + if ex: + op, arg = ex.get_opcode(), ex.get_oparg() + found = True + + res.append(op.to_bytes()) + res.append(arg.to_bytes()) + return code if not found else b''.join(res) + else: + return co.co_code def code_info(x): """Formatted details of methods, functions, or code.""" @@ -262,11 +301,14 @@ def show_code(co, *, file=None): 'argval', 'argrepr', 'offset', + 'start_offset', 'starts_line', - 'is_jump_target', - 'positions' + 'line_number', + 'label', + 'positions', + 'cache_info', ], - defaults=[None] + defaults=[None, None, None] ) _Instruction.opname.__doc__ = "Human readable name for operation" @@ -275,18 +317,48 @@ def show_code(co, *, file=None): _Instruction.argval.__doc__ = "Resolved arg value (if known), otherwise same as arg" _Instruction.argrepr.__doc__ = "Human readable description of operation argument" _Instruction.offset.__doc__ = "Start index of operation within bytecode sequence" -_Instruction.starts_line.__doc__ = "Line started by this opcode (if any), otherwise None" -_Instruction.is_jump_target.__doc__ = "True if other code jumps to here, otherwise False" +_Instruction.start_offset.__doc__ = ( + "Start index of operation within bytecode sequence, including extended args if present; " + "otherwise equal to Instruction.offset" +) +_Instruction.starts_line.__doc__ = "True if this opcode starts a source line, otherwise False" +_Instruction.line_number.__doc__ = "source line number associated with this opcode (if any), otherwise None" +_Instruction.label.__doc__ = "A label (int > 0) if this instruction is a jump target, otherwise None" _Instruction.positions.__doc__ = "dis.Positions object holding the span of source code covered by this instruction" +_Instruction.cache_info.__doc__ = "list of (name, size, data), one for each cache entry of the instruction" -_ExceptionTableEntry = collections.namedtuple("_ExceptionTableEntry", +_ExceptionTableEntryBase = collections.namedtuple("_ExceptionTableEntryBase", "start end target depth lasti") +class _ExceptionTableEntry(_ExceptionTableEntryBase): + pass + _OPNAME_WIDTH = 20 _OPARG_WIDTH = 5 +def _get_cache_size(opname): + return _inline_cache_entries.get(opname, 0) + +def _get_jump_target(op, arg, offset): + """Gets the bytecode offset of the jump target if this is a jump instruction. + + Otherwise return None. + """ + deop = _deoptop(op) + caches = _get_cache_size(_all_opname[deop]) + if deop in hasjrel: + if _is_backward_jump(deop): + arg = -arg + target = offset + 2 + arg*2 + target += 2 * caches + elif deop in hasjabs: + target = arg*2 + else: + target = None + return target + class Instruction(_Instruction): - """Details for a bytecode operation + """Details for a bytecode operation. Defined fields: opname - human readable name for operation @@ -295,51 +367,289 @@ class Instruction(_Instruction): argval - resolved arg value (if known), otherwise same as arg argrepr - human readable description of operation argument offset - start index of operation within bytecode sequence - starts_line - line started by this opcode (if any), otherwise None - is_jump_target - True if other code jumps to here, otherwise False + start_offset - start index of operation within bytecode sequence including extended args if present; + otherwise equal to Instruction.offset + starts_line - True if this opcode starts a source line, otherwise False + line_number - source line number associated with this opcode (if any), otherwise None + label - A label if this instruction is a jump target, otherwise None positions - Optional dis.Positions object holding the span of source code covered by this instruction + cache_info - information about the format and content of the instruction's cache + entries (if any) """ - def _disassemble(self, lineno_width=3, mark_as_current=False, offset_width=4): - """Format instruction details for inclusion in disassembly output + @staticmethod + def make( + opname, arg, argval, argrepr, offset, start_offset, starts_line, + line_number, label=None, positions=None, cache_info=None + ): + return Instruction(opname, _all_opmap[opname], arg, argval, argrepr, offset, + start_offset, starts_line, line_number, label, positions, cache_info) + + @property + def oparg(self): + """Alias for Instruction.arg.""" + return self.arg + + @property + def baseopcode(self): + """Numeric code for the base operation if operation is specialized. + + Otherwise equal to Instruction.opcode. + """ + return _deoptop(self.opcode) + + @property + def baseopname(self): + """Human readable name for the base operation if operation is specialized. + + Otherwise equal to Instruction.opname. + """ + return opname[self.baseopcode] + + @property + def cache_offset(self): + """Start index of the cache entries following the operation.""" + return self.offset + 2 + + @property + def end_offset(self): + """End index of the cache entries following the operation.""" + return self.cache_offset + _get_cache_size(_all_opname[self.opcode])*2 + + @property + def jump_target(self): + """Bytecode index of the jump target if this is a jump operation. + + Otherwise return None. + """ + return _get_jump_target(self.opcode, self.arg, self.offset) + + @property + def is_jump_target(self): + """True if other code jumps to here, otherwise False""" + return self.label is not None - *lineno_width* sets the width of the line number field (0 omits it) - *mark_as_current* inserts a '-->' marker arrow as part of the line + def __str__(self): + output = io.StringIO() + formatter = Formatter(file=output) + formatter.print_instruction(self, False) + return output.getvalue() + + +class Formatter: + + def __init__(self, file=None, lineno_width=0, offset_width=0, label_width=0, + line_offset=0, show_caches=False, *, show_positions=False): + """Create a Formatter + + *file* where to write the output + *lineno_width* sets the width of the source location field (0 omits it). + Should be large enough for a line number or full positions (depending + on the value of *show_positions*). *offset_width* sets the width of the instruction offset field + *label_width* sets the width of the label field + *show_caches* is a boolean indicating whether to display cache lines + *show_positions* is a boolean indicating whether full positions should + be reported instead of only the line numbers. """ + self.file = file + self.lineno_width = lineno_width + self.offset_width = offset_width + self.label_width = label_width + self.show_caches = show_caches + self.show_positions = show_positions + + def print_instruction(self, instr, mark_as_current=False): + self.print_instruction_line(instr, mark_as_current) + if self.show_caches and instr.cache_info: + offset = instr.offset + for name, size, data in instr.cache_info: + for i in range(size): + offset += 2 + # Only show the fancy argrepr for a CACHE instruction when it's + # the first entry for a particular cache value: + if i == 0: + argrepr = f"{name}: {int.from_bytes(data, sys.byteorder)}" + else: + argrepr = "" + self.print_instruction_line( + Instruction("CACHE", CACHE, 0, None, argrepr, offset, offset, + False, None, None, instr.positions), + False) + + def print_instruction_line(self, instr, mark_as_current): + """Format instruction details for inclusion in disassembly output.""" + lineno_width = self.lineno_width + offset_width = self.offset_width + label_width = self.label_width + + new_source_line = (lineno_width > 0 and + instr.starts_line and + instr.offset > 0) + if new_source_line: + print(file=self.file) + fields = [] - # Column: Source code line number + # Column: Source code locations information if lineno_width: - if self.starts_line is not None: - lineno_fmt = "%%%dd" % lineno_width - fields.append(lineno_fmt % self.starts_line) + if self.show_positions: + # reporting positions instead of just line numbers + if instr_positions := instr.positions: + if all(p is None for p in instr_positions): + positions_str = _NO_LINENO + else: + ps = tuple('?' if p is None else p for p in instr_positions) + positions_str = f"{ps[0]}:{ps[2]}-{ps[1]}:{ps[3]}" + fields.append(f'{positions_str:{lineno_width}}') + else: + fields.append(' ' * lineno_width) else: - fields.append(' ' * lineno_width) + if instr.starts_line: + lineno_fmt = "%%%dd" if instr.line_number is not None else "%%%ds" + lineno_fmt = lineno_fmt % lineno_width + lineno = _NO_LINENO if instr.line_number is None else instr.line_number + fields.append(lineno_fmt % lineno) + else: + fields.append(' ' * lineno_width) + # Column: Label + if instr.label is not None: + lbl = f"L{instr.label}:" + fields.append(f"{lbl:>{label_width}}") + else: + fields.append(' ' * label_width) + # Column: Instruction offset from start of code sequence + if offset_width > 0: + fields.append(f"{repr(instr.offset):>{offset_width}} ") # Column: Current instruction indicator if mark_as_current: fields.append('-->') else: fields.append(' ') - # Column: Jump target marker - if self.is_jump_target: - fields.append('>>') - else: - fields.append(' ') - # Column: Instruction offset from start of code sequence - fields.append(repr(self.offset).rjust(offset_width)) # Column: Opcode name - fields.append(self.opname.ljust(_OPNAME_WIDTH)) + fields.append(instr.opname.ljust(_OPNAME_WIDTH)) # Column: Opcode argument - if self.arg is not None: - fields.append(repr(self.arg).rjust(_OPARG_WIDTH)) + if instr.arg is not None: + arg = repr(instr.arg) + # If opname is longer than _OPNAME_WIDTH, we allow it to overflow into + # the space reserved for oparg. This results in fewer misaligned opargs + # in the disassembly output. + opname_excess = max(0, len(instr.opname) - _OPNAME_WIDTH) + fields.append(repr(instr.arg).rjust(_OPARG_WIDTH - opname_excess)) # Column: Opcode argument details - if self.argrepr: - fields.append('(' + self.argrepr + ')') - return ' '.join(fields).rstrip() - - -def get_instructions(x, *, first_line=None, show_caches=False, adaptive=False): + if instr.argrepr: + fields.append('(' + instr.argrepr + ')') + print(' '.join(fields).rstrip(), file=self.file) + + def print_exception_table(self, exception_entries): + file = self.file + if exception_entries: + print("ExceptionTable:", file=file) + for entry in exception_entries: + lasti = " lasti" if entry.lasti else "" + start = entry.start_label + end = entry.end_label + target = entry.target_label + print(f" L{start} to L{end} -> L{target} [{entry.depth}]{lasti}", file=file) + + +class ArgResolver: + def __init__(self, co_consts=None, names=None, varname_from_oparg=None, labels_map=None): + self.co_consts = co_consts + self.names = names + self.varname_from_oparg = varname_from_oparg + self.labels_map = labels_map or {} + + def offset_from_jump_arg(self, op, arg, offset): + deop = _deoptop(op) + if deop in hasjabs: + return arg * 2 + elif deop in hasjrel: + signed_arg = -arg if _is_backward_jump(deop) else arg + argval = offset + 2 + signed_arg*2 + caches = _get_cache_size(_all_opname[deop]) + argval += 2 * caches + return argval + return None + + def get_label_for_offset(self, offset): + return self.labels_map.get(offset, None) + + def get_argval_argrepr(self, op, arg, offset): + get_name = None if self.names is None else self.names.__getitem__ + argval = None + argrepr = '' + deop = _deoptop(op) + if arg is not None: + # Set argval to the dereferenced value of the argument when + # available, and argrepr to the string representation of argval. + # _disassemble_bytes needs the string repr of the + # raw name index for LOAD_GLOBAL, LOAD_CONST, etc. + argval = arg + if deop in hasconst: + argval, argrepr = _get_const_info(deop, arg, self.co_consts) + elif deop in hasname: + if deop == LOAD_GLOBAL: + argval, argrepr = _get_name_info(arg//2, get_name) + if (arg & 1) and argrepr: + argrepr = f"{argrepr} + NULL" + elif deop == LOAD_ATTR: + argval, argrepr = _get_name_info(arg//2, get_name) + if (arg & 1) and argrepr: + argrepr = f"{argrepr} + NULL|self" + elif deop == LOAD_SUPER_ATTR: + argval, argrepr = _get_name_info(arg//4, get_name) + if (arg & 1) and argrepr: + argrepr = f"{argrepr} + NULL|self" + else: + argval, argrepr = _get_name_info(arg, get_name) + elif deop in hasjump or deop in hasexc: + argval = self.offset_from_jump_arg(op, arg, offset) + lbl = self.get_label_for_offset(argval) + assert lbl is not None + preposition = "from" if deop == END_ASYNC_FOR else "to" + argrepr = f"{preposition} L{lbl}" + elif deop in (LOAD_FAST_LOAD_FAST, LOAD_FAST_BORROW_LOAD_FAST_BORROW, STORE_FAST_LOAD_FAST, STORE_FAST_STORE_FAST): + arg1 = arg >> 4 + arg2 = arg & 15 + val1, argrepr1 = _get_name_info(arg1, self.varname_from_oparg) + val2, argrepr2 = _get_name_info(arg2, self.varname_from_oparg) + argrepr = argrepr1 + ", " + argrepr2 + argval = val1, val2 + elif deop in haslocal or deop in hasfree: + argval, argrepr = _get_name_info(arg, self.varname_from_oparg) + elif deop in hascompare: + argval = cmp_op[arg >> 5] + argrepr = argval + if arg & 16: + argrepr = f"bool({argrepr})" + elif deop == CONVERT_VALUE: + argval = (None, str, repr, ascii)[arg] + argrepr = ('', 'str', 'repr', 'ascii')[arg] + elif deop == SET_FUNCTION_ATTRIBUTE: + argrepr = ', '.join(s for i, s in enumerate(FUNCTION_ATTR_FLAGS) + if arg & (1<>4] - argrepr = argval - elif deop == FORMAT_VALUE: - argval, argrepr = FORMAT_VALUE_CONVERTERS[arg & 0x3] - argval = (argval, bool(arg & 0x4)) - if argval[1]: - if argrepr: - argrepr += ', ' - argrepr += 'with format' - elif deop == MAKE_FUNCTION: - argrepr = ', '.join(s for i, s in enumerate(MAKE_FUNCTION_FLAGS) - if arg & (1< 0: if depth is not None: depth = depth - 1 @@ -560,53 +842,82 @@ def _disassemble_recursive(co, *, file=None, depth=None, show_caches=False, adap print(file=file) print("Disassembly of %r:" % (x,), file=file) _disassemble_recursive( - x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive + x, file=file, depth=depth, show_caches=show_caches, + adaptive=adaptive, show_offsets=show_offsets, show_positions=show_positions ) -def _disassemble_bytes(code, lasti=-1, varname_from_oparg=None, - names=None, co_consts=None, linestarts=None, - *, file=None, line_offset=0, exception_entries=(), - co_positions=None, show_caches=False): - # Omit the line number column entirely if we have no line number info - show_lineno = bool(linestarts) - if show_lineno: - maxlineno = max(linestarts.values()) + line_offset - if maxlineno >= 1000: - lineno_width = len(str(maxlineno)) - else: - lineno_width = 3 - else: - lineno_width = 0 - maxoffset = len(code) - 2 - if maxoffset >= 10000: - offset_width = len(str(maxoffset)) - else: - offset_width = 4 - for instr in _get_instructions_bytes(code, varname_from_oparg, names, - co_consts, linestarts, - line_offset=line_offset, - exception_entries=exception_entries, - co_positions=co_positions, - show_caches=show_caches): - new_source_line = (show_lineno and - instr.starts_line is not None and - instr.offset > 0) - if new_source_line: - print(file=file) - if show_caches: - is_current_instr = instr.offset == lasti - else: - # Each CACHE takes 2 bytes - is_current_instr = instr.offset <= lasti \ - <= instr.offset + 2 * _inline_cache_entries[_deoptop(instr.opcode)] - print(instr._disassemble(lineno_width, is_current_instr, offset_width), - file=file) - if exception_entries: - print("ExceptionTable:", file=file) - for entry in exception_entries: - lasti = " lasti" if entry.lasti else "" - end = entry.end-2 - print(f" {entry.start} to {end} -> {entry.target} [{entry.depth}]{lasti}", file=file) + +def _make_labels_map(original_code, exception_entries=()): + jump_targets = set(findlabels(original_code)) + labels = set(jump_targets) + for start, end, target, _, _ in exception_entries: + labels.add(start) + labels.add(end) + labels.add(target) + labels = sorted(labels) + labels_map = {offset: i+1 for (i, offset) in enumerate(sorted(labels))} + for e in exception_entries: + e.start_label = labels_map[e.start] + e.end_label = labels_map[e.end] + e.target_label = labels_map[e.target] + return labels_map + +_NO_LINENO = ' --' + +def _get_lineno_width(linestarts): + if linestarts is None: + return 0 + maxlineno = max(filter(None, linestarts.values()), default=-1) + if maxlineno == -1: + # Omit the line number column entirely if we have no line number info + return 0 + lineno_width = max(3, len(str(maxlineno))) + if lineno_width < len(_NO_LINENO) and None in linestarts.values(): + lineno_width = len(_NO_LINENO) + return lineno_width + +def _get_positions_width(code): + # Positions are formatted as 'LINE:COL-ENDLINE:ENDCOL ' (note trailing space). + # A missing component appears as '?', and when all components are None, we + # render '_NO_LINENO'. thus the minimum width is 1 + len(_NO_LINENO). + # + # If all values are missing, positions are not printed (i.e. positions_width = 0). + has_value = False + values_width = 0 + for positions in code.co_positions(): + has_value |= any(isinstance(p, int) for p in positions) + width = sum(1 if p is None else len(str(p)) for p in positions) + values_width = max(width, values_width) + if has_value: + # 3 = number of separators in a normal format + return 1 + max(len(_NO_LINENO), 3 + values_width) + return 0 + +def _disassemble_bytes(code, lasti=-1, linestarts=None, + *, line_offset=0, exception_entries=(), + co_positions=None, original_code=None, + arg_resolver=None, formatter=None): + + assert formatter is not None + assert arg_resolver is not None + + instrs = _get_instructions_bytes(code, linestarts=linestarts, + line_offset=line_offset, + co_positions=co_positions, + original_code=original_code, + arg_resolver=arg_resolver) + + print_instructions(instrs, exception_entries, formatter, lasti=lasti) + + +def print_instructions(instrs, exception_entries, formatter, lasti=-1): + for instr in instrs: + # Each CACHE takes 2 bytes + is_current_instr = instr.offset <= lasti \ + <= instr.offset + 2 * _get_cache_size(_all_opname[_deoptop(instr.opcode)]) + formatter.print_instruction(instr, is_current_instr) + + formatter.print_exception_table(exception_entries) def _disassemble_str(source, **kwargs): """Compile the source string, then disassemble the code object.""" @@ -622,6 +933,7 @@ def _disassemble_str(source, **kwargs): def _unpack_opargs(code): extended_arg = 0 + extended_args_offset = 0 # Number of EXTENDED_ARG instructions preceding the current instruction caches = 0 for i in range(0, len(code), 2): # Skip inline CACHE entries: @@ -630,7 +942,7 @@ def _unpack_opargs(code): continue op = code[i] deop = _deoptop(op) - caches = _inline_cache_entries[deop] + caches = _get_cache_size(_all_opname[deop]) if deop in hasarg: arg = code[i+1] | extended_arg extended_arg = (arg << 8) if deop == EXTENDED_ARG else 0 @@ -642,7 +954,13 @@ def _unpack_opargs(code): else: arg = None extended_arg = 0 - yield (i, op, arg) + if deop == EXTENDED_ARG: + extended_args_offset += 1 + yield (i, i, op, arg) + else: + start_offset = i - extended_args_offset*2 + yield (i, start_offset, op, arg) + extended_args_offset = 0 def findlabels(code): """Detect all offsets in a byte code which are jump targets. @@ -651,18 +969,10 @@ def findlabels(code): """ labels = [] - for offset, op, arg in _unpack_opargs(code): + for offset, _, op, arg in _unpack_opargs(code): if arg is not None: - deop = _deoptop(op) - caches = _inline_cache_entries[deop] - if deop in hasjrel: - if _is_backward_jump(deop): - arg = -arg - label = offset + 2 + arg*2 - label += 2 * caches - elif deop in hasjabs: - label = arg*2 - else: + label = _get_jump_target(op, arg, offset) + if label is None: continue if label not in labels: labels.append(label) @@ -672,10 +982,12 @@ def findlinestarts(code): """Find the offsets in a byte code which are start of lines in the source. Generate pairs (offset, lineno) + lineno will be an integer or None the offset does not have a source line. """ - lastline = None + + lastline = False # None is a valid line number for start, end, line in code.co_lines(): - if line is not None and line != lastline: + if line is not lastline: lastline = line yield start, line return @@ -691,13 +1003,14 @@ def _find_imports(co): consts = co.co_consts names = co.co_names - opargs = [(op, arg) for _, op, arg in _unpack_opargs(co.co_code) + opargs = [(op, arg) for _, _, op, arg in _unpack_opargs(co.co_code) if op != EXTENDED_ARG] for i, (op, oparg) in enumerate(opargs): if op == IMPORT_NAME and i >= 2: from_op = opargs[i-1] level_op = opargs[i-2] - if (from_op[0] in hasconst and level_op[0] in hasconst): + if (from_op[0] in hasconst and + (level_op[0] in hasconst or level_op[0] == LOAD_SMALL_INT)): level = _get_const_value(level_op[0], level_op[1], consts) fromlist = _get_const_value(from_op[0], from_op[1], consts) yield (names[oparg], level, fromlist) @@ -713,7 +1026,7 @@ def _find_store_names(co): } names = co.co_names - for _, op, arg in _unpack_opargs(co.co_code): + for _, _, op, arg in _unpack_opargs(co.co_code): if op in STORE_OPS: yield names[arg] @@ -726,7 +1039,7 @@ class Bytecode: Iterating over this yields the bytecode operations as Instruction instances. """ - def __init__(self, x, *, first_line=None, current_offset=None, show_caches=False, adaptive=False): + def __init__(self, x, *, first_line=None, current_offset=None, show_caches=False, adaptive=False, show_offsets=False, show_positions=False): self.codeobj = co = _get_code_object(x) if first_line is None: self.first_line = co.co_firstlineno @@ -740,17 +1053,23 @@ def __init__(self, x, *, first_line=None, current_offset=None, show_caches=False self.exception_entries = _parse_exception_table(co) self.show_caches = show_caches self.adaptive = adaptive + self.show_offsets = show_offsets + self.show_positions = show_positions def __iter__(self): co = self.codeobj + original_code = co.co_code + labels_map = _make_labels_map(original_code, self.exception_entries) + arg_resolver = ArgResolver(co_consts=co.co_consts, + names=co.co_names, + varname_from_oparg=co._varname_from_oparg, + labels_map=labels_map) return _get_instructions_bytes(_get_code_array(co, self.adaptive), - co._varname_from_oparg, - co.co_names, co.co_consts, - self._linestarts, + linestarts=self._linestarts, line_offset=self._line_offset, - exception_entries=self.exception_entries, co_positions=co.co_positions(), - show_caches=self.show_caches) + original_code=original_code, + arg_resolver=arg_resolver) def __repr__(self): return "{}({!r})".format(self.__class__.__name__, @@ -777,29 +1096,62 @@ def dis(self): else: offset = -1 with io.StringIO() as output: - _disassemble_bytes(_get_code_array(co, self.adaptive), - varname_from_oparg=co._varname_from_oparg, - names=co.co_names, co_consts=co.co_consts, + code = _get_code_array(co, self.adaptive) + offset_width = len(str(max(len(code) - 2, 9999))) if self.show_offsets else 0 + if self.show_positions: + lineno_width = _get_positions_width(co) + else: + lineno_width = _get_lineno_width(self._linestarts) + labels_map = _make_labels_map(co.co_code, self.exception_entries) + label_width = 4 + len(str(len(labels_map))) + formatter = Formatter(file=output, + lineno_width=lineno_width, + offset_width=offset_width, + label_width=label_width, + line_offset=self._line_offset, + show_caches=self.show_caches, + show_positions=self.show_positions) + + arg_resolver = ArgResolver(co_consts=co.co_consts, + names=co.co_names, + varname_from_oparg=co._varname_from_oparg, + labels_map=labels_map) + _disassemble_bytes(code, linestarts=self._linestarts, line_offset=self._line_offset, - file=output, lasti=offset, exception_entries=self.exception_entries, co_positions=co.co_positions(), - show_caches=self.show_caches) + original_code=co.co_code, + arg_resolver=arg_resolver, + formatter=formatter) return output.getvalue() def main(args=None): import argparse - parser = argparse.ArgumentParser() - parser.add_argument('infile', type=argparse.FileType('rb'), nargs='?', default='-') + parser = argparse.ArgumentParser(color=True) + parser.add_argument('-C', '--show-caches', action='store_true', + help='show inline caches') + parser.add_argument('-O', '--show-offsets', action='store_true', + help='show instruction offsets') + parser.add_argument('-P', '--show-positions', action='store_true', + help='show instruction positions') + parser.add_argument('-S', '--specialized', action='store_true', + help='show specialized bytecode') + parser.add_argument('infile', nargs='?', default='-') args = parser.parse_args(args=args) - with args.infile as infile: - source = infile.read() - code = compile(source, args.infile.name, "exec") - dis(code) + if args.infile == '-': + name = '' + source = sys.stdin.buffer.read() + else: + name = args.infile + with open(args.infile, 'rb') as infile: + source = infile.read() + code = compile(source, name, "exec") + dis(code, show_caches=args.show_caches, adaptive=args.specialized, + show_offsets=args.show_offsets, show_positions=args.show_positions) if __name__ == "__main__": main() diff --git a/PythonLib/full/doctest.py b/PythonLib/full/doctest.py index d617d96a..a66888d8 100644 --- a/PythonLib/full/doctest.py +++ b/PythonLib/full/doctest.py @@ -94,6 +94,7 @@ def _test(): import __future__ import difflib +import functools import inspect import linecache import os @@ -104,8 +105,28 @@ def _test(): import unittest from io import StringIO, IncrementalNewlineDecoder from collections import namedtuple +import _colorize # Used in doctests +from _colorize import ANSIColors, can_colorize + + +__unittest = True + +class TestResults(namedtuple('TestResults', 'failed attempted')): + def __new__(cls, failed, attempted, *, skipped=0): + results = super().__new__(cls, failed, attempted) + results.skipped = skipped + return results + + def __repr__(self): + if self.skipped: + return (f'TestResults(failed={self.failed}, ' + f'attempted={self.attempted}, ' + f'skipped={self.skipped})') + else: + # Leave the repr() unchanged for backward compatibility + # if skipped is zero + return super().__repr__() -TestResults = namedtuple('TestResults', 'failed attempted') # There are 4 basic classes: # - Example: a pair, plus an intra-docstring line number. @@ -371,11 +392,11 @@ def __init__(self, out): # still use input() to get user input self.use_rawinput = 1 - def set_trace(self, frame=None): + def set_trace(self, frame=None, *, commands=None): self.__debugger_used = True if frame is None: frame = sys._getframe().f_back - pdb.Pdb.set_trace(self, frame) + pdb.Pdb.set_trace(self, frame, commands=commands) def set_continue(self): # Calling set_continue unconditionally would break unit test @@ -1122,7 +1143,9 @@ def _find_lineno(self, obj, source_lines): if inspect.ismethod(obj): obj = obj.__func__ if isinstance(obj, property): obj = obj.fget - if inspect.isfunction(obj) and getattr(obj, '__doc__', None): + if isinstance(obj, functools.cached_property): + obj = obj.func + if inspect.isroutine(obj) and getattr(obj, '__doc__', None): # We don't use `docstring` var here, because `obj` can be changed. obj = inspect.unwrap(obj) try: @@ -1161,8 +1184,10 @@ class DocTestRunner: """ A class used to run DocTest test cases, and accumulate statistics. The `run` method is used to process a single DocTest case. It - returns a tuple `(f, t)`, where `t` is the number of test cases - tried, and `f` is the number of test cases that failed. + returns a TestResults instance. + + >>> save_colorize = _colorize.COLORIZE + >>> _colorize.COLORIZE = False >>> tests = DocTestFinder().find(_TestClass) >>> runner = DocTestRunner(verbose=False) @@ -1175,27 +1200,29 @@ class DocTestRunner: _TestClass.square -> TestResults(failed=0, attempted=1) The `summarize` method prints a summary of all the test cases that - have been run by the runner, and returns an aggregated `(f, t)` - tuple: + have been run by the runner, and returns an aggregated TestResults + instance: >>> runner.summarize(verbose=1) 4 items passed all tests: 2 tests in _TestClass 2 tests in _TestClass.__init__ 2 tests in _TestClass.get - 1 tests in _TestClass.square + 1 test in _TestClass.square 7 tests in 4 items. - 7 passed and 0 failed. + 7 passed. Test passed. TestResults(failed=0, attempted=7) - The aggregated number of tried examples and failed examples is - also available via the `tries` and `failures` attributes: + The aggregated number of tried examples and failed examples is also + available via the `tries`, `failures` and `skips` attributes: >>> runner.tries 7 >>> runner.failures 0 + >>> runner.skips + 0 The comparison between expected outputs and actual outputs is done by an `OutputChecker`. This comparison may be customized with a @@ -1205,13 +1232,15 @@ class DocTestRunner: `OutputChecker` to the constructor. The test runner's display output can be controlled in two ways. - First, an output function (`out) can be passed to + First, an output function (`out`) can be passed to `TestRunner.run`; this function will be called with strings that should be displayed. It defaults to `sys.stdout.write`. If capturing the output is not sufficient, then the display output can be also customized by subclassing DocTestRunner, and overriding the methods `report_start`, `report_success`, `report_unexpected_exception`, and `report_failure`. + + >>> _colorize.COLORIZE = save_colorize """ # This divider string is used to separate failure messages, and to # separate sections of the summary. @@ -1244,7 +1273,8 @@ def __init__(self, checker=None, verbose=None, optionflags=0): # Keep track of the examples we've run. self.tries = 0 self.failures = 0 - self._name2ft = {} + self.skips = 0 + self._stats = {} # Create a fake output target for capturing doctest output. self._fakeout = _SpoofOut() @@ -1289,7 +1319,10 @@ def report_unexpected_exception(self, out, test, example, exc_info): 'Exception raised:\n' + _indent(_exception_traceback(exc_info))) def _failure_header(self, test, example): - out = [self.DIVIDER] + red, reset = ( + (ANSIColors.RED, ANSIColors.RESET) if can_colorize() else ("", "") + ) + out = [f"{red}{self.DIVIDER}{reset}"] if test.filename: if test.lineno is not None and example.lineno is not None: lineno = test.lineno + example.lineno + 1 @@ -1313,13 +1346,11 @@ def __run(self, test, compileflags, out): Run the examples in `test`. Write the outcome of each example with one of the `DocTestRunner.report_*` methods, using the writer function `out`. `compileflags` is the set of compiler - flags that should be used to execute examples. Return a tuple - `(f, t)`, where `t` is the number of examples tried, and `f` - is the number of examples that failed. The examples are run - in the namespace `test.globs`. + flags that should be used to execute examples. Return a TestResults + instance. The examples are run in the namespace `test.globs`. """ - # Keep track of the number of failures and tries. - failures = tries = 0 + # Keep track of the number of failed, attempted, skipped examples. + failures = attempted = skips = 0 # Save the option flags (since option directives can be used # to modify them). @@ -1331,6 +1362,7 @@ def __run(self, test, compileflags, out): # Process each example. for examplenum, example in enumerate(test.examples): + attempted += 1 # If REPORT_ONLY_FIRST_FAILURE is set, then suppress # reporting after the first failure. @@ -1348,10 +1380,10 @@ def __run(self, test, compileflags, out): # If 'SKIP' is set, then skip this example. if self.optionflags & SKIP: + skips += 1 continue # Record that we started this example. - tries += 1 if not quiet: self.report_start(out, test, example) @@ -1368,11 +1400,11 @@ def __run(self, test, compileflags, out): exec(compile(example.source, filename, "single", compileflags, True), test.globs) self.debugger.set_continue() # ==== Example Finished ==== - exception = None + exc_info = None except KeyboardInterrupt: raise - except: - exception = sys.exc_info() + except BaseException as exc: + exc_info = type(exc), exc, exc.__traceback__.tb_next self.debugger.set_continue() # ==== Example Finished ==== got = self._fakeout.getvalue() # the actual output @@ -1381,21 +1413,21 @@ def __run(self, test, compileflags, out): # If the example executed without raising any exceptions, # verify its output. - if exception is None: + if exc_info is None: if check(example.want, got, self.optionflags): outcome = SUCCESS # The example raised an exception: check if it was expected. else: - formatted_ex = traceback.format_exception_only(*exception[:2]) - if issubclass(exception[0], SyntaxError): + formatted_ex = traceback.format_exception_only(*exc_info[:2]) + if issubclass(exc_info[0], SyntaxError): # SyntaxError / IndentationError is special: # we don't care about the carets / suggestions / etc # We only care about the error message and notes. # They start with `SyntaxError:` (or any other class name) exception_line_prefixes = ( - f"{exception[0].__qualname__}:", - f"{exception[0].__module__}.{exception[0].__qualname__}:", + f"{exc_info[0].__qualname__}:", + f"{exc_info[0].__module__}.{exc_info[0].__qualname__}:", ) exc_msg_index = next( index @@ -1406,7 +1438,7 @@ def __run(self, test, compileflags, out): exc_msg = "".join(formatted_ex) if not quiet: - got += _exception_traceback(exception) + got += _exception_traceback(exc_info) # If `example.exc_msg` is None, then we weren't expecting # an exception. @@ -1435,7 +1467,7 @@ def __run(self, test, compileflags, out): elif outcome is BOOM: if not quiet: self.report_unexpected_exception(out, test, example, - exception) + exc_info) failures += 1 else: assert False, ("unknown outcome", outcome) @@ -1446,19 +1478,22 @@ def __run(self, test, compileflags, out): # Restore the option flags (in case they were modified) self.optionflags = original_optionflags - # Record and return the number of failures and tries. - self.__record_outcome(test, failures, tries) - return TestResults(failures, tries) + # Record and return the number of failures and attempted. + self.__record_outcome(test, failures, attempted, skips) + return TestResults(failures, attempted, skipped=skips) - def __record_outcome(self, test, f, t): + def __record_outcome(self, test, failures, tries, skips): """ - Record the fact that the given DocTest (`test`) generated `f` - failures out of `t` tried examples. + Record the fact that the given DocTest (`test`) generated `failures` + failures out of `tries` tried examples. """ - f2, t2 = self._name2ft.get(test.name, (0,0)) - self._name2ft[test.name] = (f+f2, t+t2) - self.failures += f - self.tries += t + failures2, tries2, skips2 = self._stats.get(test.name, (0, 0, 0)) + self._stats[test.name] = (failures + failures2, + tries + tries2, + skips + skips2) + self.failures += failures + self.tries += tries + self.skips += skips __LINECACHE_FILENAME_RE = re.compile(r'.+)' @@ -1527,7 +1562,11 @@ def out(s): # Make sure sys.displayhook just prints the value to stdout save_displayhook = sys.displayhook sys.displayhook = sys.__displayhook__ - + saved_can_colorize = _colorize.can_colorize + _colorize.can_colorize = lambda *args, **kwargs: False + color_variables = {"PYTHON_COLORS": None, "FORCE_COLOR": None} + for key in color_variables: + color_variables[key] = os.environ.pop(key, None) try: return self.__run(test, compileflags, out) finally: @@ -1536,6 +1575,10 @@ def out(s): sys.settrace(save_trace) linecache.getlines = self.save_linecache_getlines sys.displayhook = save_displayhook + _colorize.can_colorize = saved_can_colorize + for key, value in color_variables.items(): + if value is not None: + os.environ[key] = value if clear_globs: test.globs.clear() import builtins @@ -1547,9 +1590,7 @@ def out(s): def summarize(self, verbose=None): """ Print a summary of all the test cases that have been run by - this DocTestRunner, and return a tuple `(f, t)`, where `f` is - the total number of failed examples, and `t` is the total - number of tried examples. + this DocTestRunner, and return a TestResults instance. The optional `verbose` argument controls how detailed the summary is. If the verbosity is not specified, then the @@ -1557,66 +1598,98 @@ def summarize(self, verbose=None): """ if verbose is None: verbose = self._verbose - notests = [] - passed = [] - failed = [] - totalt = totalf = 0 - for x in self._name2ft.items(): - name, (f, t) = x - assert f <= t - totalt += t - totalf += f - if t == 0: + + notests, passed, failed = [], [], [] + total_tries = total_failures = total_skips = 0 + + for name, (failures, tries, skips) in self._stats.items(): + assert failures <= tries + total_tries += tries + total_failures += failures + total_skips += skips + + if tries == 0: notests.append(name) - elif f == 0: - passed.append( (name, t) ) + elif failures == 0: + passed.append((name, tries)) else: - failed.append(x) + failed.append((name, (failures, tries, skips))) + + ansi = _colorize.get_colors() + bold_green = ansi.BOLD_GREEN + bold_red = ansi.BOLD_RED + green = ansi.GREEN + red = ansi.RED + reset = ansi.RESET + yellow = ansi.YELLOW + if verbose: if notests: - print(len(notests), "items had no tests:") + print(f"{_n_items(notests)} had no tests:") notests.sort() - for thing in notests: - print(" ", thing) + for name in notests: + print(f" {name}") + if passed: - print(len(passed), "items passed all tests:") - passed.sort() - for thing, count in passed: - print(" %3d tests in %s" % (count, thing)) + print(f"{green}{_n_items(passed)} passed all tests:{reset}") + for name, count in sorted(passed): + s = "" if count == 1 else "s" + print(f" {green}{count:3d} test{s} in {name}{reset}") + if failed: - print(self.DIVIDER) - print(len(failed), "items had failures:") - failed.sort() - for thing, (f, t) in failed: - print(" %3d of %3d in %s" % (f, t, thing)) + print(f"{red}{self.DIVIDER}{reset}") + print(f"{_n_items(failed)} had failures:") + for name, (failures, tries, skips) in sorted(failed): + print(f" {failures:3d} of {tries:3d} in {name}") + if verbose: - print(totalt, "tests in", len(self._name2ft), "items.") - print(totalt - totalf, "passed and", totalf, "failed.") - if totalf: - print("***Test Failed***", totalf, "failures.") + s = "" if total_tries == 1 else "s" + print(f"{total_tries} test{s} in {_n_items(self._stats)}.") + + and_f = ( + f" and {red}{total_failures} failed{reset}" + if total_failures else "" + ) + print(f"{green}{total_tries - total_failures} passed{reset}{and_f}.") + + if total_failures: + s = "" if total_failures == 1 else "s" + msg = f"{bold_red}***Test Failed*** {total_failures} failure{s}{reset}" + if total_skips: + s = "" if total_skips == 1 else "s" + msg = f"{msg} and {yellow}{total_skips} skipped test{s}{reset}" + print(f"{msg}.") elif verbose: - print("Test passed.") - return TestResults(totalf, totalt) + print(f"{bold_green}Test passed.{reset}") + + return TestResults(total_failures, total_tries, skipped=total_skips) #///////////////////////////////////////////////////////////////// # Backward compatibility cruft to maintain doctest.master. #///////////////////////////////////////////////////////////////// def merge(self, other): - d = self._name2ft - for name, (f, t) in other._name2ft.items(): + d = self._stats + for name, (failures, tries, skips) in other._stats.items(): if name in d: - # Don't print here by default, since doing - # so breaks some of the buildbots - #print("*** DocTestRunner.merge: '" + name + "' in both" \ - # " testers; summing outcomes.") - f2, t2 = d[name] - f = f + f2 - t = t + t2 - d[name] = f, t + failures2, tries2, skips2 = d[name] + failures = failures + failures2 + tries = tries + tries2 + skips = skips + skips2 + d[name] = (failures, tries, skips) + + +def _n_items(items: list | dict) -> str: + """ + Helper to pluralise the number of items in a list. + """ + n = len(items) + s = "" if n == 1 else "s" + return f"{n} item{s}" + class OutputChecker: """ - A class used to check the whether the actual output from a doctest + A class used to check whether the actual output from a doctest example matches the expected output. `OutputChecker` defines two methods: `check_output`, which compares a given pair of outputs, and returns true if they match; and `output_difference`, which @@ -1921,8 +1994,8 @@ def testmod(m=None, name=None, globs=None, verbose=None, from module m (or the current module if m is not supplied), starting with m.__doc__. - Also test examples reachable from dict m.__test__ if it exists and is - not None. m.__test__ maps names to functions, classes and strings; + Also test examples reachable from dict m.__test__ if it exists. + m.__test__ maps names to functions, classes and strings; function and class docstrings are tested even if the name is private; strings are tested directly, as if they were docstrings. @@ -2012,7 +2085,8 @@ class doctest.Tester, then merges the results into (or creates) else: master.merge(runner) - return TestResults(runner.failures, runner.tries) + return TestResults(runner.failures, runner.tries, skipped=runner.skips) + def testfile(filename, module_relative=True, name=None, package=None, globs=None, verbose=None, report=True, optionflags=0, @@ -2135,7 +2209,8 @@ class doctest.Tester, then merges the results into (or creates) else: master.merge(runner) - return TestResults(runner.failures, runner.tries) + return TestResults(runner.failures, runner.tries, skipped=runner.skips) + def run_docstring_examples(f, globs, verbose=False, name="NoName", compileflags=None, optionflags=0): @@ -2247,13 +2322,14 @@ def runTest(self): try: runner.DIVIDER = "-"*70 - failures, tries = runner.run( - test, out=new.write, clear_globs=False) + results = runner.run(test, out=new.write, clear_globs=False) + if results.skipped == results.attempted: + raise unittest.SkipTest("all examples were skipped") finally: sys.stdout = old - if failures: - raise self.failureException(self.format_failure(new.getvalue())) + if results.failed: + raise self.failureException(self.format_failure(new.getvalue().rstrip('\n'))) def format_failure(self, err): test = self._dt_test @@ -2663,7 +2739,7 @@ def testsource(module, name): return testsrc def debug_src(src, pm=False, globs=None): - """Debug a single doctest docstring, in argument `src`'""" + """Debug a single doctest docstring, in argument `src`""" testsrc = script_from_examples(src) debug_script(testsrc, pm, globs) @@ -2799,7 +2875,7 @@ def get(self): def _test(): import argparse - parser = argparse.ArgumentParser(description="doctest runner") + parser = argparse.ArgumentParser(description="doctest runner", color=True) parser.add_argument('-v', '--verbose', action='store_true', default=False, help='print very verbose output for all tests') parser.add_argument('-o', '--option', action='append', diff --git a/PythonLib/full/email/__init__.py b/PythonLib/full/email/__init__.py index 9fa47783..6d597006 100644 --- a/PythonLib/full/email/__init__.py +++ b/PythonLib/full/email/__init__.py @@ -1,4 +1,4 @@ -# Copyright (C) 2001-2007 Python Software Foundation +# Copyright (C) 2001 Python Software Foundation # Author: Barry Warsaw # Contact: email-sig@python.org diff --git a/PythonLib/full/email/_encoded_words.py b/PythonLib/full/email/_encoded_words.py index 6795a606..05a34a4c 100644 --- a/PythonLib/full/email/_encoded_words.py +++ b/PythonLib/full/email/_encoded_words.py @@ -219,7 +219,7 @@ def encode(string, charset='utf-8', encoding=None, lang=''): """ if charset == 'unknown-8bit': - bstring = string.encode('ascii', 'surrogateescape') + bstring = string.encode('utf-8', 'surrogateescape') else: bstring = string.encode(charset) if encoding is None: diff --git a/PythonLib/full/email/_header_value_parser.py b/PythonLib/full/email/_header_value_parser.py index 9a51b943..51727688 100644 --- a/PythonLib/full/email/_header_value_parser.py +++ b/PythonLib/full/email/_header_value_parser.py @@ -101,6 +101,12 @@ def make_quoted_pairs(value): return str(value).replace('\\', '\\\\').replace('"', '\\"') +def make_parenthesis_pairs(value): + """Escape parenthesis and backslash for use within a comment.""" + return str(value).replace('\\', '\\\\') \ + .replace('(', '\\(').replace(')', '\\)') + + def quote_string(value): escaped = make_quoted_pairs(value) return f'"{escaped}"' @@ -874,6 +880,12 @@ class MessageID(MsgID): class InvalidMessageID(MessageID): token_type = 'invalid-message-id' +class MessageIDList(TokenList): + token_type = 'message-id-list' + + @property + def message_ids(self): + return [x for x in self if x.token_type=='msg-id'] class Header(TokenList): token_type = 'header' @@ -933,7 +945,7 @@ def value(self): return ' ' def startswith_fws(self): - return True + return self and self[0] in WSP class ValueTerminal(Terminal): @@ -1020,6 +1032,8 @@ def _get_ptext_to_endchars(value, endchars): a flag that is True iff there were any quoted printables decoded. """ + if not value: + return '', '', False fragment, *remainder = _wsp_splitter(value, 1) vchars = [] escape = False @@ -1573,7 +1587,7 @@ def get_dtext(value): def _check_for_early_dl_end(value, domain_literal): if value: return False - domain_literal.append(errors.InvalidHeaderDefect( + domain_literal.defects.append(errors.InvalidHeaderDefect( "end of input inside domain-literal")) domain_literal.append(ValueTerminal(']', 'domain-literal-end')) return True @@ -1592,9 +1606,9 @@ def get_domain_literal(value): raise errors.HeaderParseError("expected '[' at start of domain-literal " "but found '{}'".format(value)) value = value[1:] + domain_literal.append(ValueTerminal('[', 'domain-literal-start')) if _check_for_early_dl_end(value, domain_literal): return domain_literal, value - domain_literal.append(ValueTerminal('[', 'domain-literal-start')) if value[0] in WSP: token, value = get_fws(value) domain_literal.append(token) @@ -2169,6 +2183,32 @@ def parse_message_id(value): return message_id +def parse_message_ids(value): + """in-reply-to = "In-Reply-To:" 1*msg-id CRLF + references = "References:" 1*msg-id CRLF + """ + message_id_list = MessageIDList() + while value: + if value[0] == ',': + # message id list separated with commas - this is invalid, + # but happens rather frequently in the wild + message_id_list.defects.append( + errors.InvalidHeaderDefect("comma in msg-id list")) + message_id_list.append( + WhiteSpaceTerminal(' ', 'invalid-comma-replacement')) + value = value[1:] + continue + try: + token, value = get_msg_id(value) + message_id_list.append(token) + except errors.HeaderParseError as ex: + token = get_unstructured(value) + message_id_list.append(InvalidMessageID(token)) + message_id_list.defects.append( + errors.InvalidHeaderDefect("Invalid msg-id: {!r}".format(ex))) + break + return message_id_list + # # XXX: As I begin to add additional header parsers, I'm realizing we probably # have two level of parser routines: the get_XXX methods that get a token in @@ -2786,6 +2826,9 @@ def _steal_trailing_WSP_if_exists(lines): if lines and lines[-1] and lines[-1][-1] in WSP: wsp = lines[-1][-1] lines[-1] = lines[-1][:-1] + # gh-142006: if the line is now empty, remove it entirely. + if not lines[-1]: + lines.pop() return wsp def _refold_parse_tree(parse_tree, *, policy): @@ -2922,6 +2965,13 @@ def _refold_parse_tree(parse_tree, *, policy): [ValueTerminal(make_quoted_pairs(p), 'ptext') for p in newparts] + [ValueTerminal('"', 'ptext')]) + if part.token_type == 'comment': + newparts = ( + [ValueTerminal('(', 'ptext')] + + [ValueTerminal(make_parenthesis_pairs(p), 'ptext') + if p.token_type == 'ptext' else p + for p in newparts] + + [ValueTerminal(')', 'ptext')]) if not part.as_ew_allowed: wrap_as_ew_blocked += 1 newparts.append(end_ew_not_allowed) diff --git a/PythonLib/full/email/_parseaddr.py b/PythonLib/full/email/_parseaddr.py index febe4113..6a7c5fa0 100644 --- a/PythonLib/full/email/_parseaddr.py +++ b/PythonLib/full/email/_parseaddr.py @@ -1,4 +1,4 @@ -# Copyright (C) 2002-2007 Python Software Foundation +# Copyright (C) 2002 Python Software Foundation # Contact: email-sig@python.org """Email address parsing code. @@ -13,7 +13,7 @@ 'quote', ] -import time, calendar +import time SPACE = ' ' EMPTYSTRING = '' @@ -146,8 +146,9 @@ def _parsedate_tz(data): return None # Check for a yy specified in two-digit format, then convert it to the # appropriate four-digit format, according to the POSIX standard. RFC 822 - # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822) - # mandates a 4-digit yy. For more information, see the documentation for + # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822) already + # mandated a 4-digit yy, and RFC 5322 (which obsoletes RFC 2822) continues + # this requirement. For more information, see the documentation for # the time module. if yy < 100: # The year is between 1969 and 1999 (inclusive). @@ -194,6 +195,9 @@ def mktime_tz(data): # No zone info, so localtime is better assumption than GMT return time.mktime(data[:8] + (-1,)) else: + # Delay the import, since mktime_tz is rarely used + import calendar + t = calendar.timegm(data) return t - data[9] @@ -221,7 +225,7 @@ class AddrlistClass: def __init__(self, field): """Initialize a new instance. - `field' is an unparsed address header field, containing + 'field' is an unparsed address header field, containing one or more addresses. """ self.specials = '()<>@,:;.\"[]' @@ -230,9 +234,11 @@ def __init__(self, field): self.CR = '\r\n' self.FWS = self.LWS + self.CR self.atomends = self.specials + self.LWS + self.CR - # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it - # is obsolete syntax. RFC 2822 requires that we recognize obsolete - # syntax, so allow dots in phrases. + # Note that RFC 2822 section 4.1 introduced '.' as obs-phrase to handle + # existing practice (periods in display names), even though it was not + # allowed in RFC 822. RFC 5322 section 4.1 (which obsoletes RFC 2822) + # continues this requirement. We must recognize obsolete syntax, so + # allow dots in phrases. self.phraseends = self.atomends.replace('.', '') self.field = field self.commentlist = [] @@ -420,14 +426,14 @@ def getdomain(self): def getdelimited(self, beginchar, endchars, allowcomments=True): """Parse a header fragment delimited by special characters. - `beginchar' is the start character for the fragment. - If self is not looking at an instance of `beginchar' then + 'beginchar' is the start character for the fragment. + If self is not looking at an instance of 'beginchar' then getdelimited returns the empty string. - `endchars' is a sequence of allowable end-delimiting characters. + 'endchars' is a sequence of allowable end-delimiting characters. Parsing stops when one of these is encountered. - If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed + If 'allowcomments' is non-zero, embedded RFC 2822 comments are allowed within the parsed fragment. """ if self.field[self.pos] != beginchar: @@ -471,7 +477,7 @@ def getatom(self, atomends=None): Optional atomends specifies a different set of end token delimiters (the default is to use self.atomends). This is used e.g. in - getphraselist() since phrase endings must not include the `.' (which + getphraselist() since phrase endings must not include the '.' (which is legal in phrases).""" atomlist = [''] if atomends is None: diff --git a/PythonLib/full/email/_policybase.py b/PythonLib/full/email/_policybase.py index c9f0d743..e23843df 100644 --- a/PythonLib/full/email/_policybase.py +++ b/PythonLib/full/email/_policybase.py @@ -4,6 +4,7 @@ """ import abc +import re from email import header from email import charset as _charset from email.utils import _has_surrogates @@ -14,6 +15,14 @@ 'compat32', ] +# validation regex from RFC 5322, equivalent to pattern re.compile("[!-9;-~]+$") +valid_header_name_re = re.compile("[\041-\071\073-\176]+$") + +def validate_header_name(name): + # Validate header name according to RFC 5322 + if not valid_header_name_re.match(name): + raise ValueError( + f"Header field name contains invalid characters: {name!r}") class _PolicyBase: @@ -150,7 +159,7 @@ class Policy(_PolicyBase, metaclass=abc.ABCMeta): wrapping is done. Default is 78. mangle_from_ -- a flag that, when True escapes From_ lines in the - body of the message by putting a `>' in front of + body of the message by putting a '>' in front of them. This is used when the message is being serialized by a generator. Default: False. @@ -314,6 +323,7 @@ def header_store_parse(self, name, value): """+ The name and value are returned unmodified. """ + validate_header_name(name) return (name, value) def header_fetch_parse(self, name, value): @@ -370,7 +380,7 @@ def _fold(self, name, value, sanitize): h = value if h is not None: # The Header class interprets a value of None for maxlinelen as the - # default value of 78, as recommended by RFC 2822. + # default value of 78, as recommended by RFC 5322 section 2.1.1. maxlinelen = 0 if self.max_line_length is not None: maxlinelen = self.max_line_length diff --git a/PythonLib/full/email/base64mime.py b/PythonLib/full/email/base64mime.py index 4cdf2266..a5a3f737 100644 --- a/PythonLib/full/email/base64mime.py +++ b/PythonLib/full/email/base64mime.py @@ -1,4 +1,4 @@ -# Copyright (C) 2002-2007 Python Software Foundation +# Copyright (C) 2002 Python Software Foundation # Author: Ben Gertzfield # Contact: email-sig@python.org @@ -15,7 +15,7 @@ with Base64 encoding. RFC 2045 defines a method for including character set information in an -`encoded-word' in a header. This method is commonly used for 8-bit real names +'encoded-word' in a header. This method is commonly used for 8-bit real names in To:, From:, Cc:, etc. fields, as well as Subject: lines. This module does not do the line wrapping or end-of-line character conversion diff --git a/PythonLib/full/email/charset.py b/PythonLib/full/email/charset.py index 04380110..5036c3f5 100644 --- a/PythonLib/full/email/charset.py +++ b/PythonLib/full/email/charset.py @@ -1,4 +1,4 @@ -# Copyright (C) 2001-2007 Python Software Foundation +# Copyright (C) 2001 Python Software Foundation # Author: Ben Gertzfield, Barry Warsaw # Contact: email-sig@python.org @@ -175,7 +175,7 @@ class Charset: module expose the following information about a character set: input_charset: The initial character set specified. Common aliases - are converted to their `official' email names (e.g. latin_1 + are converted to their 'official' email names (e.g. latin_1 is converted to iso-8859-1). Defaults to 7-bit us-ascii. header_encoding: If the character set must be encoded before it can be @@ -245,7 +245,7 @@ def __eq__(self, other): def get_body_encoding(self): """Return the content-transfer-encoding used for body encoding. - This is either the string `quoted-printable' or `base64' depending on + This is either the string 'quoted-printable' or 'base64' depending on the encoding used, or it is a function in which case you should call the function with a single argument, the Message object being encoded. The function should then set the Content-Transfer-Encoding diff --git a/PythonLib/full/email/contentmanager.py b/PythonLib/full/email/contentmanager.py index b4f5830b..11d1536d 100644 --- a/PythonLib/full/email/contentmanager.py +++ b/PythonLib/full/email/contentmanager.py @@ -2,6 +2,7 @@ import email.charset import email.message import email.errors +import sys from email import quoprimime class ContentManager: @@ -142,13 +143,15 @@ def _encode_base64(data, max_line_length): def _encode_text(string, charset, cte, policy): + # If max_line_length is 0 or None, there is no limit. + maxlen = policy.max_line_length or sys.maxsize lines = string.encode(charset).splitlines() linesep = policy.linesep.encode('ascii') def embedded_body(lines): return linesep.join(lines) + linesep def normal_body(lines): return b'\n'.join(lines) + b'\n' if cte is None: # Use heuristics to decide on the "best" encoding. - if max((len(x) for x in lines), default=0) <= policy.max_line_length: + if max(map(len, lines), default=0) <= maxlen: try: return '7bit', normal_body(lines).decode('ascii') except UnicodeDecodeError: @@ -156,8 +159,7 @@ def normal_body(lines): return b'\n'.join(lines) + b'\n' if policy.cte_type == '8bit': return '8bit', normal_body(lines).decode('ascii', 'surrogateescape') sniff = embedded_body(lines[:10]) - sniff_qp = quoprimime.body_encode(sniff.decode('latin-1'), - policy.max_line_length) + sniff_qp = quoprimime.body_encode(sniff.decode('latin-1'), maxlen) sniff_base64 = binascii.b2a_base64(sniff) # This is a little unfair to qp; it includes lineseps, base64 doesn't. if len(sniff_qp) > len(sniff_base64): @@ -172,9 +174,9 @@ def normal_body(lines): return b'\n'.join(lines) + b'\n' data = normal_body(lines).decode('ascii', 'surrogateescape') elif cte == 'quoted-printable': data = quoprimime.body_encode(normal_body(lines).decode('latin-1'), - policy.max_line_length) + maxlen) elif cte == 'base64': - data = _encode_base64(embedded_body(lines), policy.max_line_length) + data = _encode_base64(embedded_body(lines), maxlen) else: raise ValueError("Unknown content transfer encoding {}".format(cte)) return cte, data diff --git a/PythonLib/full/email/encoders.py b/PythonLib/full/email/encoders.py index 17bd1ab7..55741a22 100644 --- a/PythonLib/full/email/encoders.py +++ b/PythonLib/full/email/encoders.py @@ -1,4 +1,4 @@ -# Copyright (C) 2001-2006 Python Software Foundation +# Copyright (C) 2001 Python Software Foundation # Author: Barry Warsaw # Contact: email-sig@python.org diff --git a/PythonLib/full/email/errors.py b/PythonLib/full/email/errors.py index 02aa5ece..6bc744bd 100644 --- a/PythonLib/full/email/errors.py +++ b/PythonLib/full/email/errors.py @@ -1,4 +1,4 @@ -# Copyright (C) 2001-2006 Python Software Foundation +# Copyright (C) 2001 Python Software Foundation # Author: Barry Warsaw # Contact: email-sig@python.org diff --git a/PythonLib/full/email/feedparser.py b/PythonLib/full/email/feedparser.py index c2881d9b..ae8ef327 100644 --- a/PythonLib/full/email/feedparser.py +++ b/PythonLib/full/email/feedparser.py @@ -1,4 +1,4 @@ -# Copyright (C) 2004-2006 Python Software Foundation +# Copyright (C) 2004 Python Software Foundation # Authors: Baxter, Wouters and Warsaw # Contact: email-sig@python.org @@ -30,13 +30,15 @@ NLCRE = re.compile(r'\r\n|\r|\n') NLCRE_bol = re.compile(r'(\r\n|\r|\n)') -NLCRE_eol = re.compile(r'(\r\n|\r|\n)\Z') +NLCRE_eol = re.compile(r'(\r\n|\r|\n)\z') NLCRE_crack = re.compile(r'(\r\n|\r|\n)') -# RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character +# RFC 5322 section 3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character # except controls, SP, and ":". headerRE = re.compile(r'^(From |[\041-\071\073-\176]*:|[\t ])') EMPTYSTRING = '' NL = '\n' +boundaryendRE = re.compile( + r'(?P--)?(?P[ \t]*)(?P\r\n|\r|\n)?$') NeedMoreData = object() @@ -292,7 +294,7 @@ def _parsegen(self): return if self._cur.get_content_maintype() == 'message': # The message claims to be a message/* type, then what follows is - # another RFC 2822 message. + # another RFC 5322 message. for retval in self._parsegen(): if retval is NeedMoreData: yield NeedMoreData @@ -327,9 +329,10 @@ def _parsegen(self): # this onto the input stream until we've scanned past the # preamble. separator = '--' + boundary - boundaryre = re.compile( - '(?P' + re.escape(separator) + - r')(?P--)?(?P[ \t]*)(?P\r\n|\r|\n)?$') + def boundarymatch(line): + if not line.startswith(separator): + return None + return boundaryendRE.match(line, len(separator)) capturing_preamble = True preamble = [] linesep = False @@ -341,7 +344,7 @@ def _parsegen(self): continue if line == '': break - mo = boundaryre.match(line) + mo = boundarymatch(line) if mo: # If we're looking at the end boundary, we're done with # this multipart. If there was a newline at the end of @@ -373,13 +376,13 @@ def _parsegen(self): if line is NeedMoreData: yield NeedMoreData continue - mo = boundaryre.match(line) + mo = boundarymatch(line) if not mo: self._input.unreadline(line) break # Recurse to parse this subpart; the input stream points # at the subpart's first line. - self._input.push_eof_matcher(boundaryre.match) + self._input.push_eof_matcher(boundarymatch) for retval in self._parsegen(): if retval is NeedMoreData: yield NeedMoreData @@ -501,10 +504,9 @@ def _parse_headers(self, lines): self._input.unreadline(line) return else: - # Weirdly placed unix-from line. Note this as a defect - # and ignore it. + # Weirdly placed unix-from line. defect = errors.MisplacedEnvelopeHeaderDefect(line) - self._cur.defects.append(defect) + self.policy.handle_defect(self._cur, defect) continue # Split the line on the colon separating field name from value. # There will always be a colon, because if there wasn't the part of @@ -516,7 +518,7 @@ def _parse_headers(self, lines): # message. Track the error but keep going. if i == 0: defect = errors.InvalidHeaderDefect("Missing header name.") - self._cur.defects.append(defect) + self.policy.handle_defect(self._cur, defect) continue assert i>0, "_parse_headers fed line with no : and no leading WS" diff --git a/PythonLib/full/email/generator.py b/PythonLib/full/email/generator.py index 47b9df8f..cebbc416 100644 --- a/PythonLib/full/email/generator.py +++ b/PythonLib/full/email/generator.py @@ -1,4 +1,4 @@ -# Copyright (C) 2001-2010 Python Software Foundation +# Copyright (C) 2001 Python Software Foundation # Author: Barry Warsaw # Contact: email-sig@python.org @@ -22,6 +22,7 @@ NLCRE = re.compile(r'\r\n|\r|\n') fcre = re.compile(r'^From ', re.MULTILINE) NEWLINE_WITHOUT_FWSP = re.compile(r'\r\n[^ \t]|\r[^ \n\t]|\n[^ \t]') +NEWLINE_WITHOUT_FWSP_BYTES = re.compile(br'\r\n[^ \t]|\r[^ \n\t]|\n[^ \t]') class Generator: @@ -43,14 +44,14 @@ def __init__(self, outfp, mangle_from_=None, maxheaderlen=None, *, Optional mangle_from_ is a flag that, when True (the default if policy is not set), escapes From_ lines in the body of the message by putting - a `>' in front of them. + a '>' in front of them. Optional maxheaderlen specifies the longest length for a non-continued header. When a header line is longer (in characters, with tabs expanded to 8 spaces) than maxheaderlen, the header will split as defined in the Header class. Set maxheaderlen to zero to disable header wrapping. The default is 78, as recommended (but not required) - by RFC 2822. + by RFC 5322 section 2.1.1. The policy keyword specifies a policy object that controls a number of aspects of the generator's operation. If no policy is specified, @@ -76,7 +77,7 @@ def flatten(self, msg, unixfrom=False, linesep=None): unixfrom is a flag that forces the printing of a Unix From_ delimiter before the first object in the message tree. If the original message - has no From_ delimiter, a `standard' one is crafted. By default, this + has no From_ delimiter, a 'standard' one is crafted. By default, this is False to inhibit the printing of any From_ delimiter. Note that for subobjects, no From_ line is printed. @@ -227,7 +228,7 @@ def _write_headers(self, msg): folded = self.policy.fold(h, v) if self.policy.verify_generated_headers: linesep = self.policy.linesep - if not folded.endswith(self.policy.linesep): + if not folded.endswith(linesep): raise HeaderWriteError( f'folded header does not end with {linesep!r}: {folded!r}') if NEWLINE_WITHOUT_FWSP.search(folded.removesuffix(linesep)): @@ -429,7 +430,16 @@ def _write_headers(self, msg): # This is almost the same as the string version, except for handling # strings with 8bit bytes. for h, v in msg.raw_items(): - self._fp.write(self.policy.fold_binary(h, v)) + folded = self.policy.fold_binary(h, v) + if self.policy.verify_generated_headers: + linesep = self.policy.linesep.encode() + if not folded.endswith(linesep): + raise HeaderWriteError( + f'folded header does not end with {linesep!r}: {folded!r}') + if NEWLINE_WITHOUT_FWSP_BYTES.search(folded.removesuffix(linesep)): + raise HeaderWriteError( + f'folded header contains newline: {folded!r}') + self._fp.write(folded) # A blank line always separates headers from body self.write(self._NL) @@ -467,7 +477,7 @@ def __init__(self, outfp, mangle_from_=None, maxheaderlen=None, fmt=None, *, argument is allowed. Walks through all subparts of a message. If the subpart is of main - type `text', then it prints the decoded payload of the subpart. + type 'text', then it prints the decoded payload of the subpart. Otherwise, fmt is a format string that is used instead of the message payload. fmt is expanded with the following keywords (in diff --git a/PythonLib/full/email/header.py b/PythonLib/full/email/header.py index 984851a7..220a84a7 100644 --- a/PythonLib/full/email/header.py +++ b/PythonLib/full/email/header.py @@ -1,4 +1,4 @@ -# Copyright (C) 2002-2007 Python Software Foundation +# Copyright (C) 2002 Python Software Foundation # Author: Ben Gertzfield, Barry Warsaw # Contact: email-sig@python.org @@ -59,16 +59,22 @@ def decode_header(header): """Decode a message header value without converting charset. - Returns a list of (string, charset) pairs containing each of the decoded - parts of the header. Charset is None for non-encoded parts of the header, - otherwise a lower-case string containing the name of the character set - specified in the encoded string. + For historical reasons, this function may return either: + + 1. A list of length 1 containing a pair (str, None). + 2. A list of (bytes, charset) pairs containing each of the decoded + parts of the header. Charset is None for non-encoded parts of the header, + otherwise a lower-case string containing the name of the character set + specified in the encoded string. header may be a string that may or may not contain RFC2047 encoded words, or it may be a Header object. An email.errors.HeaderParseError may be raised when certain decoding error occurs (e.g. a base64 decoding exception). + + This function exists for backwards compatibility only. For new code, we + recommend using email.headerregistry.HeaderRegistry instead. """ # If it is a Header object, we can just return the encoded chunks. if hasattr(header, '_chunks'): @@ -161,6 +167,9 @@ def make_header(decoded_seq, maxlinelen=None, header_name=None, This function takes one of those sequence of pairs and returns a Header instance. Optional maxlinelen, header_name, and continuation_ws are as in the Header constructor. + + This function exists for backwards compatibility only, and is not + recommended for use in new code. """ h = Header(maxlinelen=maxlinelen, header_name=header_name, continuation_ws=continuation_ws) @@ -192,7 +201,7 @@ def __init__(self, s=None, charset=None, The maximum line length can be specified explicitly via maxlinelen. For splitting the first line to a shorter value (to account for the field - header which isn't included in s, e.g. `Subject') pass in the name of + header which isn't included in s, e.g. 'Subject') pass in the name of the field in header_name. The default maxlinelen is 78 as recommended by RFC 2822. @@ -276,7 +285,7 @@ def append(self, s, charset=None, errors='strict'): output codec of the charset. If the string cannot be encoded to the output codec, a UnicodeError will be raised. - Optional `errors' is passed as the errors argument to the decode + Optional 'errors' is passed as the errors argument to the decode call if s is a byte string. """ if charset is None: @@ -326,7 +335,7 @@ def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'): Optional splitchars is a string containing characters which should be given extra weight by the splitting algorithm during normal header - wrapping. This is in very rough support of RFC 2822's `higher level + wrapping. This is in very rough support of RFC 2822's 'higher level syntactic breaks': split points preceded by a splitchar are preferred during line splitting, with the characters preferred in the order in which they appear in the string. Space and tab may be included in the diff --git a/PythonLib/full/email/headerregistry.py b/PythonLib/full/email/headerregistry.py index 543141dc..0e8698ef 100644 --- a/PythonLib/full/email/headerregistry.py +++ b/PythonLib/full/email/headerregistry.py @@ -534,6 +534,18 @@ def parse(cls, value, kwds): kwds['defects'].extend(parse_tree.all_defects) +class ReferencesHeader: + + max_count = 1 + value_parser = staticmethod(parser.parse_message_ids) + + @classmethod + def parse(cls, value, kwds): + kwds['parse_tree'] = parse_tree = cls.value_parser(value) + kwds['decoded'] = str(parse_tree) + kwds['defects'].extend(parse_tree.all_defects) + + # The header factory # _default_header_map = { @@ -557,6 +569,8 @@ def parse(cls, value, kwds): 'content-disposition': ContentDispositionHeader, 'content-transfer-encoding': ContentTransferEncodingHeader, 'message-id': MessageIDHeader, + 'in-reply-to': ReferencesHeader, + 'references': ReferencesHeader, } class HeaderRegistry: diff --git a/PythonLib/full/email/iterators.py b/PythonLib/full/email/iterators.py index 3410935e..08ede3ec 100644 --- a/PythonLib/full/email/iterators.py +++ b/PythonLib/full/email/iterators.py @@ -1,4 +1,4 @@ -# Copyright (C) 2001-2006 Python Software Foundation +# Copyright (C) 2001 Python Software Foundation # Author: Barry Warsaw # Contact: email-sig@python.org @@ -43,8 +43,8 @@ def body_line_iterator(msg, decode=False): def typed_subpart_iterator(msg, maintype='text', subtype=None): """Iterate over the subparts with a given MIME type. - Use `maintype' as the main MIME type to match against; this defaults to - "text". Optional `subtype' is the MIME subtype to match against; if + Use 'maintype' as the main MIME type to match against; this defaults to + "text". Optional 'subtype' is the MIME subtype to match against; if omitted, only the main type is matched. """ for subpart in msg.walk(): diff --git a/PythonLib/full/email/message.py b/PythonLib/full/email/message.py index 6b7c3a23..641fb2e9 100644 --- a/PythonLib/full/email/message.py +++ b/PythonLib/full/email/message.py @@ -1,4 +1,4 @@ -# Copyright (C) 2001-2007 Python Software Foundation +# Copyright (C) 2001 Python Software Foundation # Author: Barry Warsaw # Contact: email-sig@python.org @@ -21,7 +21,7 @@ SEMISPACE = '; ' -# Regular expression that matches `special' characters in parameters, the +# Regular expression that matches 'special' characters in parameters, the # existence of which force quoting of the parameter value. tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]') @@ -74,19 +74,25 @@ def _parseparam(s): # RDM This might be a Header, so for now stringify it. s = ';' + str(s) plist = [] - while s[:1] == ';': - s = s[1:] - end = s.find(';') - while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: - end = s.find(';', end + 1) + start = 0 + while s.find(';', start) == start: + start += 1 + end = s.find(';', start) + ind, diff = start, 0 + while end > 0: + diff += s.count('"', ind, end) - s.count('\\"', ind, end) + if diff % 2 == 0: + break + end, ind = ind, s.find(';', end + 1) if end < 0: end = len(s) - f = s[:end] - if '=' in f: - i = f.index('=') - f = f[:i].strip().lower() + '=' + f[i+1:].strip() + i = s.find('=', start, end) + if i == -1: + f = s[start:end] + else: + f = s[start:i].rstrip().lower() + '=' + s[i+1:end].lstrip() plist.append(f.strip()) - s = s[end:] + start = end return plist @@ -135,13 +141,13 @@ def _decode_uu(encoded): class Message: """Basic message object. - A message object is defined as something that has a bunch of RFC 2822 + A message object is defined as something that has a bunch of RFC 5322 headers and a payload. It may optionally have an envelope header (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a multipart or a message/rfc822), then the payload is a list of Message objects, otherwise it is a string. - Message objects implement part of the `mapping' interface, which assumes + Message objects implement part of the 'mapping' interface, which assumes there is exactly one occurrence of the header per message. Some headers do in fact appear multiple times (e.g. Received) and for those headers, you must use the explicit API to set or get all the headers. Not all of @@ -313,6 +319,8 @@ def get_payload(self, i=None, decode=False): # If it does happen, turn the string into bytes in a way # guaranteed not to fail. bpayload = payload.encode('raw-unicode-escape') + else: + bpayload = payload if cte == 'quoted-printable': return quopri.decodestring(bpayload) elif cte == 'base64': @@ -564,7 +572,7 @@ def add_header(self, _name, _value, **_params): msg.add_header('content-disposition', 'attachment', filename='bud.gif') msg.add_header('content-disposition', 'attachment', - filename=('utf-8', '', Fußballer.ppt')) + filename=('utf-8', '', 'Fußballer.ppt')) msg.add_header('content-disposition', 'attachment', filename='Fußballer.ppt')) """ @@ -601,7 +609,7 @@ def get_content_type(self): """Return the message's content type. The returned string is coerced to lower case of the form - `maintype/subtype'. If there was no Content-Type header in the + 'maintype/subtype'. If there was no Content-Type header in the message, the default type as given by get_default_type() will be returned. Since according to RFC 2045, messages always have a default type this will always return a value. @@ -624,7 +632,7 @@ def get_content_type(self): def get_content_maintype(self): """Return the message's main content type. - This is the `maintype' part of the string returned by + This is the 'maintype' part of the string returned by get_content_type(). """ ctype = self.get_content_type() @@ -633,14 +641,14 @@ def get_content_maintype(self): def get_content_subtype(self): """Returns the message's sub-content type. - This is the `subtype' part of the string returned by + This is the 'subtype' part of the string returned by get_content_type(). """ ctype = self.get_content_type() return ctype.split('/')[1] def get_default_type(self): - """Return the `default' content type. + """Return the 'default' content type. Most messages have a default content type of text/plain, except for messages that are subparts of multipart/digest containers. Such @@ -649,7 +657,7 @@ def get_default_type(self): return self._default_type def set_default_type(self, ctype): - """Set the `default' content type. + """Set the 'default' content type. ctype should be either "text/plain" or "message/rfc822", although this is not enforced. The default content type is not stored in the @@ -682,8 +690,8 @@ def get_params(self, failobj=None, header='content-type', unquote=True): """Return the message's Content-Type parameters, as a list. The elements of the returned list are 2-tuples of key/value pairs, as - split on the `=' sign. The left hand side of the `=' is the key, - while the right hand side is the value. If there is no `=' sign in + split on the '=' sign. The left hand side of the '=' is the key, + while the right hand side is the value. If there is no '=' sign in the parameter the value is the empty string. The value is as described in the get_param() method. @@ -843,9 +851,9 @@ def get_filename(self, failobj=None): """Return the filename associated with the payload if present. The filename is extracted from the Content-Disposition header's - `filename' parameter, and it is unquoted. If that header is missing - the `filename' parameter, this method falls back to looking for the - `name' parameter. + 'filename' parameter, and it is unquoted. If that header is missing + the 'filename' parameter, this method falls back to looking for the + 'name' parameter. """ missing = object() filename = self.get_param('filename', missing, 'content-disposition') @@ -858,7 +866,7 @@ def get_filename(self, failobj=None): def get_boundary(self, failobj=None): """Return the boundary associated with the payload if present. - The boundary is extracted from the Content-Type header's `boundary' + The boundary is extracted from the Content-Type header's 'boundary' parameter, and it is unquoted. """ missing = object() diff --git a/PythonLib/full/email/mime/application.py b/PythonLib/full/email/mime/application.py index f67cbad3..9a9d213d 100644 --- a/PythonLib/full/email/mime/application.py +++ b/PythonLib/full/email/mime/application.py @@ -1,4 +1,4 @@ -# Copyright (C) 2001-2006 Python Software Foundation +# Copyright (C) 2001 Python Software Foundation # Author: Keith Dart # Contact: email-sig@python.org diff --git a/PythonLib/full/email/mime/audio.py b/PythonLib/full/email/mime/audio.py index 065819b2..85f4a955 100644 --- a/PythonLib/full/email/mime/audio.py +++ b/PythonLib/full/email/mime/audio.py @@ -1,4 +1,4 @@ -# Copyright (C) 2001-2007 Python Software Foundation +# Copyright (C) 2001 Python Software Foundation # Author: Anthony Baxter # Contact: email-sig@python.org @@ -6,7 +6,6 @@ __all__ = ['MIMEAudio'] -from io import BytesIO from email import encoders from email.mime.nonmultipart import MIMENonMultipart @@ -59,10 +58,8 @@ def _what(data): # sndhdr.what() had a pretty cruddy interface, unfortunately. This is why # we re-do it here. It would be easier to reverse engineer the Unix 'file' # command and use the standard 'magic' file, as shipped with a modern Unix. - hdr = data[:512] - fakefile = BytesIO(hdr) for testfn in _rules: - if res := testfn(hdr, fakefile): + if res := testfn(data): return res else: return None @@ -74,7 +71,7 @@ def rule(rulefunc): @rule -def _aiff(h, f): +def _aiff(h): if not h.startswith(b'FORM'): return None if h[8:12] in {b'AIFC', b'AIFF'}: @@ -84,7 +81,7 @@ def _aiff(h, f): @rule -def _au(h, f): +def _au(h): if h.startswith(b'.snd'): return 'basic' else: @@ -92,7 +89,7 @@ def _au(h, f): @rule -def _wav(h, f): +def _wav(h): # 'RIFF' 'WAVE' 'fmt ' if not h.startswith(b'RIFF') or h[8:12] != b'WAVE' or h[12:16] != b'fmt ': return None diff --git a/PythonLib/full/email/mime/base.py b/PythonLib/full/email/mime/base.py index f601f621..da4c6e59 100644 --- a/PythonLib/full/email/mime/base.py +++ b/PythonLib/full/email/mime/base.py @@ -1,4 +1,4 @@ -# Copyright (C) 2001-2006 Python Software Foundation +# Copyright (C) 2001 Python Software Foundation # Author: Barry Warsaw # Contact: email-sig@python.org diff --git a/PythonLib/full/email/mime/image.py b/PythonLib/full/email/mime/image.py index 4b7f2f9c..dab96858 100644 --- a/PythonLib/full/email/mime/image.py +++ b/PythonLib/full/email/mime/image.py @@ -1,4 +1,4 @@ -# Copyright (C) 2001-2006 Python Software Foundation +# Copyright (C) 2001 Python Software Foundation # Author: Barry Warsaw # Contact: email-sig@python.org diff --git a/PythonLib/full/email/mime/message.py b/PythonLib/full/email/mime/message.py index 61836b5a..13d9ff59 100644 --- a/PythonLib/full/email/mime/message.py +++ b/PythonLib/full/email/mime/message.py @@ -1,4 +1,4 @@ -# Copyright (C) 2001-2006 Python Software Foundation +# Copyright (C) 2001 Python Software Foundation # Author: Barry Warsaw # Contact: email-sig@python.org diff --git a/PythonLib/full/email/mime/multipart.py b/PythonLib/full/email/mime/multipart.py index 94d81c77..1abb84d5 100644 --- a/PythonLib/full/email/mime/multipart.py +++ b/PythonLib/full/email/mime/multipart.py @@ -1,4 +1,4 @@ -# Copyright (C) 2002-2006 Python Software Foundation +# Copyright (C) 2002 Python Software Foundation # Author: Barry Warsaw # Contact: email-sig@python.org @@ -21,7 +21,7 @@ def __init__(self, _subtype='mixed', boundary=None, _subparts=None, Content-Type and MIME-Version headers. _subtype is the subtype of the multipart content type, defaulting to - `mixed'. + 'mixed'. boundary is the multipart boundary string. By default it is calculated as needed. diff --git a/PythonLib/full/email/mime/nonmultipart.py b/PythonLib/full/email/mime/nonmultipart.py index a41386eb..5beab3a4 100644 --- a/PythonLib/full/email/mime/nonmultipart.py +++ b/PythonLib/full/email/mime/nonmultipart.py @@ -1,4 +1,4 @@ -# Copyright (C) 2002-2006 Python Software Foundation +# Copyright (C) 2002 Python Software Foundation # Author: Barry Warsaw # Contact: email-sig@python.org diff --git a/PythonLib/full/email/mime/text.py b/PythonLib/full/email/mime/text.py index 7672b789..aa4da7f8 100644 --- a/PythonLib/full/email/mime/text.py +++ b/PythonLib/full/email/mime/text.py @@ -1,4 +1,4 @@ -# Copyright (C) 2001-2006 Python Software Foundation +# Copyright (C) 2001 Python Software Foundation # Author: Barry Warsaw # Contact: email-sig@python.org diff --git a/PythonLib/full/email/parser.py b/PythonLib/full/email/parser.py index 06d99b17..c6a51dd8 100644 --- a/PythonLib/full/email/parser.py +++ b/PythonLib/full/email/parser.py @@ -1,8 +1,8 @@ -# Copyright (C) 2001-2007 Python Software Foundation +# Copyright (C) 2001 Python Software Foundation # Author: Barry Warsaw, Thomas Wouters, Anthony Baxter # Contact: email-sig@python.org -"""A parser of RFC 2822 and MIME email messages.""" +"""A parser of RFC 5322 and MIME email messages.""" __all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser', 'FeedParser', 'BytesFeedParser'] @@ -15,14 +15,14 @@ class Parser: def __init__(self, _class=None, *, policy=compat32): - """Parser of RFC 2822 and MIME email messages. + """Parser of RFC 5322 and MIME email messages. Creates an in-memory object tree representing the email message, which can then be manipulated and turned over to a Generator to return the textual representation of the message. - The string must be formatted as a block of RFC 2822 headers and header - continuation lines, optionally preceded by a `Unix-from' header. The + The string must be formatted as a block of RFC 5322 headers and header + continuation lines, optionally preceded by a 'Unix-from' header. The header block is terminated either by the end of the string or by a blank line. @@ -75,14 +75,14 @@ def parsestr(self, text, headersonly=True): class BytesParser: def __init__(self, *args, **kw): - """Parser of binary RFC 2822 and MIME email messages. + """Parser of binary RFC 5322 and MIME email messages. Creates an in-memory object tree representing the email message, which can then be manipulated and turned over to a Generator to return the textual representation of the message. - The input must be formatted as a block of RFC 2822 headers and header - continuation lines, optionally preceded by a `Unix-from' header. The + The input must be formatted as a block of RFC 5322 headers and header + continuation lines, optionally preceded by a 'Unix-from' header. The header block is terminated either by the end of the input or by a blank line. diff --git a/PythonLib/full/email/policy.py b/PythonLib/full/email/policy.py index 6e109b65..41691501 100644 --- a/PythonLib/full/email/policy.py +++ b/PythonLib/full/email/policy.py @@ -4,7 +4,13 @@ import re import sys -from email._policybase import Policy, Compat32, compat32, _extend_docstrings +from email._policybase import ( + Compat32, + Policy, + _extend_docstrings, + compat32, + validate_header_name +) from email.utils import _has_surrogates from email.headerregistry import HeaderRegistry as HeaderRegistry from email.contentmanager import raw_data_manager @@ -138,6 +144,7 @@ def header_store_parse(self, name, value): CR or LF characters. """ + validate_header_name(name) if hasattr(value, 'name') and value.name.lower() == name.lower(): return (name, value) if isinstance(value, str) and len(value.splitlines())>1: diff --git a/PythonLib/full/email/quoprimime.py b/PythonLib/full/email/quoprimime.py index 27fcbb5a..27c7ea55 100644 --- a/PythonLib/full/email/quoprimime.py +++ b/PythonLib/full/email/quoprimime.py @@ -1,11 +1,11 @@ -# Copyright (C) 2001-2006 Python Software Foundation +# Copyright (C) 2001 Python Software Foundation # Author: Ben Gertzfield # Contact: email-sig@python.org """Quoted-printable content transfer encoding per RFCs 2045-2047. This module handles the content transfer encoding method defined in RFC 2045 -to encode US ASCII-like 8-bit data called `quoted-printable'. It is used to +to encode US ASCII-like 8-bit data called 'quoted-printable'. It is used to safely encode text that is in a character set similar to the 7-bit US ASCII character set, but that includes some 8-bit characters that are normally not allowed in email bodies or headers. @@ -17,7 +17,7 @@ with quoted-printable encoding. RFC 2045 defines a method for including character set information in an -`encoded-word' in a header. This method is commonly used for 8-bit real names +'encoded-word' in a header. This method is commonly used for 8-bit real names in To:/From:/Cc: etc. fields, as well as Subject: lines. This module does not do the line wrapping or end-of-line character @@ -127,7 +127,7 @@ def quote(c): def header_encode(header_bytes, charset='iso-8859-1'): """Encode a single header line with quoted-printable (like) encoding. - Defined in RFC 2045, this `Q' encoding is similar to quoted-printable, but + Defined in RFC 2045, this 'Q' encoding is similar to quoted-printable, but used specifically for email header fields to allow charsets with mostly 7 bit characters (and some 8 bit) to remain more or less readable in non-RFC 2045 aware mail clients. @@ -290,7 +290,7 @@ def _unquote_match(match): # Header decoding is done a bit differently def header_decode(s): - """Decode a string encoded with RFC 2045 MIME header `Q' encoding. + """Decode a string encoded with RFC 2045 MIME header 'Q' encoding. This function does not parse a full MIME header value encoded with quoted-printable (like =?iso-8859-1?q?Hello_World?=) -- please use diff --git a/PythonLib/full/email/utils.py b/PythonLib/full/email/utils.py index e53abc8b..3de1f0d2 100644 --- a/PythonLib/full/email/utils.py +++ b/PythonLib/full/email/utils.py @@ -1,4 +1,4 @@ -# Copyright (C) 2001-2010 Python Software Foundation +# Copyright (C) 2001 Python Software Foundation # Author: Barry Warsaw # Contact: email-sig@python.org @@ -25,8 +25,6 @@ import os import re import time -import random -import socket import datetime import urllib.parse @@ -36,9 +34,6 @@ from email._parseaddr import parsedate, parsedate_tz, _parsedate_tz -# Intrapackage imports -from email.charset import Charset - COMMASPACE = ', ' EMPTYSTRING = '' UEMPTYSTRING = '' @@ -95,6 +90,8 @@ def formataddr(pair, charset='utf-8'): name.encode('ascii') except UnicodeEncodeError: if isinstance(charset, str): + # lazy import to improve module import time + from email.charset import Charset charset = Charset(charset) encoded_name = charset.header_encode(name) return "%s <%s>" % (encoded_name, address) @@ -297,6 +294,11 @@ def make_msgid(idstring=None, domain=None): portion of the message id after the '@'. It defaults to the locally defined hostname. """ + # Lazy imports to speedup module import time + # (no other functions in email.utils need these modules) + import random + import socket + timeval = int(time.time()*100) pid = os.getpid() randint = random.getrandbits(64) @@ -415,8 +417,14 @@ def decode_params(params): for name, continuations in rfc2231_params.items(): value = [] extended = False - # Sort by number - continuations.sort() + # Sort by number, treating None as 0 if there is no 0, + # and ignore it if there is already a 0. + has_zero = any(x[0] == 0 for x in continuations) + if has_zero: + continuations = [x for x in continuations if x[0] is not None] + else: + continuations = [(x[0] or 0, x[1], x[2]) for x in continuations] + continuations.sort(key=lambda x: x[0]) # And now append all values in numerical order, converting # %-encodings for the encoded segments. If any of the # continuation names ends in a *, then the entire string, after @@ -464,23 +472,15 @@ def collapse_rfc2231_value(value, errors='replace', # better than not having it. # -def localtime(dt=None, isdst=None): +def localtime(dt=None): """Return local time as an aware datetime object. If called without arguments, return current time. Otherwise *dt* argument should be a datetime instance, and it is converted to the local time zone according to the system time zone database. If *dt* is naive (that is, dt.tzinfo is None), it is assumed to be in local time. - The isdst parameter is ignored. """ - if isdst is not None: - import warnings - warnings._deprecated( - "The 'isdst' parameter to 'localtime'", - message='{name} is deprecated and slated for removal in Python {remove}', - remove=(3, 14), - ) if dt is None: dt = datetime.datetime.now() return dt.astimezone() diff --git a/PythonLib/full/encodings/__init__.py b/PythonLib/full/encodings/__init__.py index f9075b8f..298177eb 100644 --- a/PythonLib/full/encodings/__init__.py +++ b/PythonLib/full/encodings/__init__.py @@ -156,19 +156,22 @@ def search_function(encoding): codecs.register(search_function) if sys.platform == 'win32': - # bpo-671666, bpo-46668: If Python does not implement a codec for current - # Windows ANSI code page, use the "mbcs" codec instead: - # WideCharToMultiByte() and MultiByteToWideChar() functions with CP_ACP. - # Python does not support custom code pages. - def _alias_mbcs(encoding): + from ._win_cp_codecs import create_win32_code_page_codec + + def win32_code_page_search_function(encoding): + encoding = encoding.lower() + if not encoding.startswith('cp'): + return None try: - import _winapi - ansi_code_page = "cp%s" % _winapi.GetACP() - if encoding == ansi_code_page: - import encodings.mbcs - return encodings.mbcs.getregentry() - except ImportError: - # Imports may fail while we are shutting down - pass + cp = int(encoding[2:]) + except ValueError: + return None + # Test if the code page is supported + try: + codecs.code_page_encode(cp, 'x') + except (OverflowError, OSError): + return None + + return create_win32_code_page_codec(cp) - codecs.register(_alias_mbcs) + codecs.register(win32_code_page_search_function) diff --git a/PythonLib/full/encodings/_win_cp_codecs.py b/PythonLib/full/encodings/_win_cp_codecs.py new file mode 100644 index 00000000..4f8eb886 --- /dev/null +++ b/PythonLib/full/encodings/_win_cp_codecs.py @@ -0,0 +1,36 @@ +import codecs + +def create_win32_code_page_codec(cp): + from codecs import code_page_encode, code_page_decode + + def encode(input, errors='strict'): + return code_page_encode(cp, input, errors) + + def decode(input, errors='strict'): + return code_page_decode(cp, input, errors, True) + + class IncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input, final=False): + return code_page_encode(cp, input, self.errors)[0] + + class IncrementalDecoder(codecs.BufferedIncrementalDecoder): + def _buffer_decode(self, input, errors, final): + return code_page_decode(cp, input, errors, final) + + class StreamWriter(codecs.StreamWriter): + def encode(self, input, errors='strict'): + return code_page_encode(cp, input, errors) + + class StreamReader(codecs.StreamReader): + def decode(self, input, errors, final): + return code_page_decode(cp, input, errors, final) + + return codecs.CodecInfo( + name=f'cp{cp}', + encode=encode, + decode=decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) diff --git a/PythonLib/full/encodings/aliases.py b/PythonLib/full/encodings/aliases.py index d85afd6d..4ecb6b6e 100644 --- a/PythonLib/full/encodings/aliases.py +++ b/PythonLib/full/encodings/aliases.py @@ -204,11 +204,17 @@ 'csibm869' : 'cp869', 'ibm869' : 'cp869', + # cp874 codec + '874' : 'cp874', + 'ms874' : 'cp874', + 'windows_874' : 'cp874', + # cp932 codec '932' : 'cp932', 'ms932' : 'cp932', 'mskanji' : 'cp932', 'ms_kanji' : 'cp932', + 'windows_31j' : 'cp932', # cp949 codec '949' : 'cp949', @@ -240,6 +246,7 @@ 'ks_c_5601_1987' : 'euc_kr', 'ksx1001' : 'euc_kr', 'ks_x_1001' : 'euc_kr', + 'cseuckr' : 'euc_kr', # gb18030 codec 'gb18030_2000' : 'gb18030', @@ -398,6 +405,8 @@ 'iso_8859_8' : 'iso8859_8', 'iso_8859_8_1988' : 'iso8859_8', 'iso_ir_138' : 'iso8859_8', + 'iso_8859_8_i' : 'iso8859_8', + 'iso_8859_8_e' : 'iso8859_8', # iso8859_9 codec 'csisolatin5' : 'iso8859_9', diff --git a/PythonLib/full/encodings/idna.py b/PythonLib/full/encodings/idna.py index 5396047a..0c90b4c9 100644 --- a/PythonLib/full/encodings/idna.py +++ b/PythonLib/full/encodings/idna.py @@ -11,7 +11,7 @@ sace_prefix = "xn--" # This assumes query strings, so AllowUnassigned is true -def nameprep(label): +def nameprep(label): # type: (str) -> str # Map newlabel = [] for c in label: @@ -25,7 +25,7 @@ def nameprep(label): label = unicodedata.normalize("NFKC", label) # Prohibit - for c in label: + for i, c in enumerate(label): if stringprep.in_table_c12(c) or \ stringprep.in_table_c22(c) or \ stringprep.in_table_c3(c) or \ @@ -35,7 +35,7 @@ def nameprep(label): stringprep.in_table_c7(c) or \ stringprep.in_table_c8(c) or \ stringprep.in_table_c9(c): - raise UnicodeError("Invalid character %r" % c) + raise UnicodeEncodeError("idna", label, i, i+1, f"Invalid character {c!r}") # Check bidi RandAL = [stringprep.in_table_d1(x) for x in label] @@ -46,29 +46,38 @@ def nameprep(label): # This is table C.8, which was already checked # 2) If a string contains any RandALCat character, the string # MUST NOT contain any LCat character. - if any(stringprep.in_table_d2(x) for x in label): - raise UnicodeError("Violation of BIDI requirement 2") + for i, x in enumerate(label): + if stringprep.in_table_d2(x): + raise UnicodeEncodeError("idna", label, i, i+1, + "Violation of BIDI requirement 2") # 3) If a string contains any RandALCat character, a # RandALCat character MUST be the first character of the # string, and a RandALCat character MUST be the last # character of the string. - if not RandAL[0] or not RandAL[-1]: - raise UnicodeError("Violation of BIDI requirement 3") + if not RandAL[0]: + raise UnicodeEncodeError("idna", label, 0, 1, + "Violation of BIDI requirement 3") + if not RandAL[-1]: + raise UnicodeEncodeError("idna", label, len(label)-1, len(label), + "Violation of BIDI requirement 3") return label -def ToASCII(label): +def ToASCII(label): # type: (str) -> bytes try: # Step 1: try ASCII - label = label.encode("ascii") - except UnicodeError: + label_ascii = label.encode("ascii") + except UnicodeEncodeError: pass else: # Skip to step 3: UseSTD3ASCIIRules is false, so # Skip to step 8. - if 0 < len(label) < 64: - return label - raise UnicodeError("label empty or too long") + if 0 < len(label_ascii) < 64: + return label_ascii + if len(label) == 0: + raise UnicodeEncodeError("idna", label, 0, 1, "label empty") + else: + raise UnicodeEncodeError("idna", label, 0, len(label), "label too long") # Step 2: nameprep label = nameprep(label) @@ -76,29 +85,34 @@ def ToASCII(label): # Step 3: UseSTD3ASCIIRules is false # Step 4: try ASCII try: - label = label.encode("ascii") - except UnicodeError: + label_ascii = label.encode("ascii") + except UnicodeEncodeError: pass else: # Skip to step 8. if 0 < len(label) < 64: - return label - raise UnicodeError("label empty or too long") + return label_ascii + if len(label) == 0: + raise UnicodeEncodeError("idna", label, 0, 1, "label empty") + else: + raise UnicodeEncodeError("idna", label, 0, len(label), "label too long") # Step 5: Check ACE prefix - if label.startswith(sace_prefix): - raise UnicodeError("Label starts with ACE prefix") + if label.lower().startswith(sace_prefix): + raise UnicodeEncodeError( + "idna", label, 0, len(sace_prefix), "Label starts with ACE prefix") # Step 6: Encode with PUNYCODE - label = label.encode("punycode") + label_ascii = label.encode("punycode") # Step 7: Prepend ACE prefix - label = ace_prefix + label + label_ascii = ace_prefix + label_ascii # Step 8: Check size - if 0 < len(label) < 64: - return label - raise UnicodeError("label empty or too long") + # do not check for empty as we prepend ace_prefix. + if len(label_ascii) < 64: + return label_ascii + raise UnicodeEncodeError("idna", label, 0, len(label), "label too long") def ToUnicode(label): if len(label) > 1024: @@ -110,7 +124,9 @@ def ToUnicode(label): # per https://www.rfc-editor.org/rfc/rfc3454#section-3.1 while still # preventing us from wasting time decoding a big thing that'll just # hit the actual <= 63 length limit in Step 6. - raise UnicodeError("label way too long") + if isinstance(label, str): + label = label.encode("utf-8", errors="backslashreplace") + raise UnicodeDecodeError("idna", label, 0, len(label), "label way too long") # Step 1: Check for ASCII if isinstance(label, bytes): pure_ascii = True @@ -118,25 +134,32 @@ def ToUnicode(label): try: label = label.encode("ascii") pure_ascii = True - except UnicodeError: + except UnicodeEncodeError: pure_ascii = False if not pure_ascii: + assert isinstance(label, str) # Step 2: Perform nameprep label = nameprep(label) # It doesn't say this, but apparently, it should be ASCII now try: label = label.encode("ascii") - except UnicodeError: - raise UnicodeError("Invalid character in IDN label") + except UnicodeEncodeError as exc: + raise UnicodeEncodeError("idna", label, exc.start, exc.end, + "Invalid character in IDN label") # Step 3: Check for ACE prefix - if not label.startswith(ace_prefix): + assert isinstance(label, bytes) + if not label.lower().startswith(ace_prefix): return str(label, "ascii") # Step 4: Remove ACE prefix label1 = label[len(ace_prefix):] # Step 5: Decode using PUNYCODE - result = label1.decode("punycode") + try: + result = label1.decode("punycode") + except UnicodeDecodeError as exc: + offset = len(ace_prefix) + raise UnicodeDecodeError("idna", label, offset+exc.start, offset+exc.end, exc.reason) # Step 6: Apply ToASCII label2 = ToASCII(result) @@ -144,7 +167,8 @@ def ToUnicode(label): # Step 7: Compare the result of step 6 with the one of step 3 # label2 will already be in lower case. if str(label, "ascii").lower() != str(label2, "ascii"): - raise UnicodeError("IDNA does not round-trip", label, label2) + raise UnicodeDecodeError("idna", label, 0, len(label), + f"IDNA does not round-trip, '{label!r}' != '{label2!r}'") # Step 8: return the result of step 5 return result @@ -156,7 +180,7 @@ def encode(self, input, errors='strict'): if errors != 'strict': # IDNA is quite clear that implementations must be strict - raise UnicodeError("unsupported error handling "+errors) + raise UnicodeError(f"Unsupported error handling: {errors}") if not input: return b'', 0 @@ -168,11 +192,16 @@ def encode(self, input, errors='strict'): else: # ASCII name: fast path labels = result.split(b'.') - for label in labels[:-1]: - if not (0 < len(label) < 64): - raise UnicodeError("label empty or too long") - if len(labels[-1]) >= 64: - raise UnicodeError("label too long") + for i, label in enumerate(labels[:-1]): + if len(label) == 0: + offset = sum(len(l) for l in labels[:i]) + i + raise UnicodeEncodeError("idna", input, offset, offset+1, + "label empty") + for i, label in enumerate(labels): + if len(label) >= 64: + offset = sum(len(l) for l in labels[:i]) + i + raise UnicodeEncodeError("idna", input, offset, offset+len(label), + "label too long") return result, len(input) result = bytearray() @@ -182,17 +211,27 @@ def encode(self, input, errors='strict'): del labels[-1] else: trailing_dot = b'' - for label in labels: + for i, label in enumerate(labels): if result: # Join with U+002E result.extend(b'.') - result.extend(ToASCII(label)) + try: + result.extend(ToASCII(label)) + except (UnicodeEncodeError, UnicodeDecodeError) as exc: + offset = sum(len(l) for l in labels[:i]) + i + raise UnicodeEncodeError( + "idna", + input, + offset + exc.start, + offset + exc.end, + exc.reason, + ) return bytes(result+trailing_dot), len(input) def decode(self, input, errors='strict'): if errors != 'strict': - raise UnicodeError("Unsupported error handling "+errors) + raise UnicodeError(f"Unsupported error handling: {errors}") if not input: return "", 0 @@ -202,7 +241,7 @@ def decode(self, input, errors='strict'): # XXX obviously wrong, see #3232 input = bytes(input) - if ace_prefix not in input: + if ace_prefix not in input.lower(): # Fast path try: return input.decode('ascii'), len(input) @@ -218,8 +257,15 @@ def decode(self, input, errors='strict'): trailing_dot = '' result = [] - for label in labels: - result.append(ToUnicode(label)) + for i, label in enumerate(labels): + try: + u_label = ToUnicode(label) + except (UnicodeEncodeError, UnicodeDecodeError) as exc: + offset = sum(len(x) for x in labels[:i]) + len(labels[:i]) + raise UnicodeDecodeError( + "idna", input, offset+exc.start, offset+exc.end, exc.reason) + else: + result.append(u_label) return ".".join(result)+trailing_dot, len(input) @@ -227,7 +273,7 @@ class IncrementalEncoder(codecs.BufferedIncrementalEncoder): def _buffer_encode(self, input, errors, final): if errors != 'strict': # IDNA is quite clear that implementations must be strict - raise UnicodeError("unsupported error handling "+errors) + raise UnicodeError(f"Unsupported error handling: {errors}") if not input: return (b'', 0) @@ -251,7 +297,16 @@ def _buffer_encode(self, input, errors, final): # Join with U+002E result.extend(b'.') size += 1 - result.extend(ToASCII(label)) + try: + result.extend(ToASCII(label)) + except (UnicodeEncodeError, UnicodeDecodeError) as exc: + raise UnicodeEncodeError( + "idna", + input, + size + exc.start, + size + exc.end, + exc.reason, + ) size += len(label) result += trailing_dot @@ -261,7 +316,7 @@ def _buffer_encode(self, input, errors, final): class IncrementalDecoder(codecs.BufferedIncrementalDecoder): def _buffer_decode(self, input, errors, final): if errors != 'strict': - raise UnicodeError("Unsupported error handling "+errors) + raise UnicodeError(f"Unsupported error handling: {errors}") if not input: return ("", 0) @@ -271,7 +326,11 @@ def _buffer_decode(self, input, errors, final): labels = dots.split(input) else: # Must be ASCII string - input = str(input, "ascii") + try: + input = str(input, "ascii") + except (UnicodeEncodeError, UnicodeDecodeError) as exc: + raise UnicodeDecodeError("idna", input, + exc.start, exc.end, exc.reason) labels = input.split(".") trailing_dot = '' @@ -288,7 +347,18 @@ def _buffer_decode(self, input, errors, final): result = [] size = 0 for label in labels: - result.append(ToUnicode(label)) + try: + u_label = ToUnicode(label) + except (UnicodeEncodeError, UnicodeDecodeError) as exc: + raise UnicodeDecodeError( + "idna", + input.encode("ascii", errors="backslashreplace"), + size + exc.start, + size + exc.end, + exc.reason, + ) + else: + result.append(u_label) if size: size += 1 size += len(label) diff --git a/PythonLib/full/encodings/palmos.py b/PythonLib/full/encodings/palmos.py index c506d654..df164ca5 100644 --- a/PythonLib/full/encodings/palmos.py +++ b/PythonLib/full/encodings/palmos.py @@ -201,7 +201,7 @@ def getregentry(): '\u02dc' # 0x98 -> SMALL TILDE '\u2122' # 0x99 -> TRADE MARK SIGN '\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON - '\x9b' # 0x9B -> + '\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK '\u0153' # 0x9C -> LATIN SMALL LIGATURE OE '\x9d' # 0x9D -> '\x9e' # 0x9E -> diff --git a/PythonLib/full/encodings/punycode.py b/PythonLib/full/encodings/punycode.py index 1c572644..4622fc8c 100644 --- a/PythonLib/full/encodings/punycode.py +++ b/PythonLib/full/encodings/punycode.py @@ -1,4 +1,4 @@ -""" Codec for the Punicode encoding, as specified in RFC 3492 +""" Codec for the Punycode encoding, as specified in RFC 3492 Written by Martin v. Löwis. """ @@ -131,10 +131,11 @@ def decode_generalized_number(extended, extpos, bias, errors): j = 0 while 1: try: - char = ord(extended[extpos]) + char = extended[extpos] except IndexError: if errors == "strict": - raise UnicodeError("incomplete punicode string") + raise UnicodeDecodeError("punycode", extended, extpos, extpos+1, + "incomplete punycode string") return extpos + 1, None extpos += 1 if 0x41 <= char <= 0x5A: # A-Z @@ -142,8 +143,8 @@ def decode_generalized_number(extended, extpos, bias, errors): elif 0x30 <= char <= 0x39: digit = char - 22 # 0x30-26 elif errors == "strict": - raise UnicodeError("Invalid extended code point '%s'" - % extended[extpos-1]) + raise UnicodeDecodeError("punycode", extended, extpos-1, extpos, + f"Invalid extended code point '{extended[extpos-1]}'") else: return extpos, None t = T(j, bias) @@ -155,11 +156,14 @@ def decode_generalized_number(extended, extpos, bias, errors): def insertion_sort(base, extended, errors): - """3.2 Insertion unsort coding""" + """3.2 Insertion sort coding""" + # This function raises UnicodeDecodeError with position in the extended. + # Caller should add the offset. char = 0x80 pos = -1 bias = 72 extpos = 0 + while extpos < len(extended): newpos, delta = decode_generalized_number(extended, extpos, bias, errors) @@ -171,7 +175,9 @@ def insertion_sort(base, extended, errors): char += pos // (len(base) + 1) if char > 0x10FFFF: if errors == "strict": - raise UnicodeError("Invalid character U+%x" % char) + raise UnicodeDecodeError( + "punycode", extended, pos-1, pos, + f"Invalid character U+{char:x}") char = ord('?') pos = pos % (len(base) + 1) base = base[:pos] + chr(char) + base[pos:] @@ -187,11 +193,21 @@ def punycode_decode(text, errors): pos = text.rfind(b"-") if pos == -1: base = "" - extended = str(text, "ascii").upper() + extended = text.upper() else: - base = str(text[:pos], "ascii", errors) - extended = str(text[pos+1:], "ascii").upper() - return insertion_sort(base, extended, errors) + try: + base = str(text[:pos], "ascii", errors) + except UnicodeDecodeError as exc: + raise UnicodeDecodeError("ascii", text, exc.start, exc.end, + exc.reason) from None + extended = text[pos+1:].upper() + try: + return insertion_sort(base, extended, errors) + except UnicodeDecodeError as exc: + offset = pos + 1 + raise UnicodeDecodeError("punycode", text, + offset+exc.start, offset+exc.end, + exc.reason) from None ### Codec APIs @@ -203,7 +219,7 @@ def encode(self, input, errors='strict'): def decode(self, input, errors='strict'): if errors not in ('strict', 'replace', 'ignore'): - raise UnicodeError("Unsupported error handling "+errors) + raise UnicodeError(f"Unsupported error handling: {errors}") res = punycode_decode(input, errors) return res, len(input) @@ -214,7 +230,7 @@ def encode(self, input, final=False): class IncrementalDecoder(codecs.IncrementalDecoder): def decode(self, input, final=False): if self.errors not in ('strict', 'replace', 'ignore'): - raise UnicodeError("Unsupported error handling "+self.errors) + raise UnicodeError(f"Unsupported error handling: {self.errors}") return punycode_decode(input, self.errors) class StreamWriter(Codec,codecs.StreamWriter): diff --git a/PythonLib/full/encodings/undefined.py b/PythonLib/full/encodings/undefined.py index 46902883..082771e1 100644 --- a/PythonLib/full/encodings/undefined.py +++ b/PythonLib/full/encodings/undefined.py @@ -1,6 +1,6 @@ """ Python 'undefined' Codec - This codec will always raise a ValueError exception when being + This codec will always raise a UnicodeError exception when being used. It is intended for use by the site.py file to switch off automatic string to Unicode coercion. diff --git a/PythonLib/full/encodings/utf_16.py b/PythonLib/full/encodings/utf_16.py index c6124824..d3b99800 100644 --- a/PythonLib/full/encodings/utf_16.py +++ b/PythonLib/full/encodings/utf_16.py @@ -64,7 +64,7 @@ def _buffer_decode(self, input, errors, final): elif byteorder == 1: self.decoder = codecs.utf_16_be_decode elif consumed >= 2: - raise UnicodeError("UTF-16 stream does not start with BOM") + raise UnicodeDecodeError("utf-16", input, 0, 2, "Stream does not start with BOM") return (output, consumed) return self.decoder(input, self.errors, final) @@ -138,7 +138,7 @@ def decode(self, input, errors='strict'): elif byteorder == 1: self.decode = codecs.utf_16_be_decode elif consumed>=2: - raise UnicodeError("UTF-16 stream does not start with BOM") + raise UnicodeDecodeError("utf-16", input, 0, 2, "Stream does not start with BOM") return (object, consumed) ### encodings module API diff --git a/PythonLib/full/encodings/utf_32.py b/PythonLib/full/encodings/utf_32.py index cdf84d14..1924bedb 100644 --- a/PythonLib/full/encodings/utf_32.py +++ b/PythonLib/full/encodings/utf_32.py @@ -59,7 +59,7 @@ def _buffer_decode(self, input, errors, final): elif byteorder == 1: self.decoder = codecs.utf_32_be_decode elif consumed >= 4: - raise UnicodeError("UTF-32 stream does not start with BOM") + raise UnicodeDecodeError("utf-32", input, 0, 4, "Stream does not start with BOM") return (output, consumed) return self.decoder(input, self.errors, final) @@ -132,8 +132,8 @@ def decode(self, input, errors='strict'): self.decode = codecs.utf_32_le_decode elif byteorder == 1: self.decode = codecs.utf_32_be_decode - elif consumed>=4: - raise UnicodeError("UTF-32 stream does not start with BOM") + elif consumed >= 4: + raise UnicodeDecodeError("utf-32", input, 0, 4, "Stream does not start with BOM") return (object, consumed) ### encodings module API diff --git a/PythonLib/full/ensurepip/__init__.py b/PythonLib/full/ensurepip/__init__.py index f827afa4..21bbfad0 100644 --- a/PythonLib/full/ensurepip/__init__.py +++ b/PythonLib/full/ensurepip/__init__.py @@ -1,78 +1,64 @@ -import collections import os -import os.path import subprocess import sys import sysconfig import tempfile +from contextlib import nullcontext from importlib import resources +from pathlib import Path +from shutil import copy2 __all__ = ["version", "bootstrap"] -_PACKAGE_NAMES = ('pip',) -_PIP_VERSION = "25.0.1" -_PROJECTS = [ - ("pip", _PIP_VERSION, "py3"), -] - -# Packages bundled in ensurepip._bundled have wheel_name set. -# Packages from WHEEL_PKG_DIR have wheel_path set. -_Package = collections.namedtuple('Package', - ('version', 'wheel_name', 'wheel_path')) +_PIP_VERSION = "25.3" # Directory of system wheel packages. Some Linux distribution packaging # policies recommend against bundling dependencies. For example, Fedora # installs wheel packages in the /usr/share/python-wheels/ directory and don't # install the ensurepip._bundled package. -_WHEEL_PKG_DIR = sysconfig.get_config_var('WHEEL_PKG_DIR') +if (_pkg_dir := sysconfig.get_config_var('WHEEL_PKG_DIR')) is not None: + _WHEEL_PKG_DIR = Path(_pkg_dir).resolve() +else: + _WHEEL_PKG_DIR = None + +def _find_wheel_pkg_dir_pip(): + if _WHEEL_PKG_DIR is None: + # NOTE: The compile-time `WHEEL_PKG_DIR` is unset so there is no place + # NOTE: for looking up the wheels. + return None -def _find_packages(path): - packages = {} + dist_matching_wheels = _WHEEL_PKG_DIR.glob('pip-*.whl') try: - filenames = os.listdir(path) - except OSError: - # Ignore: path doesn't exist or permission error - filenames = () - # Make the code deterministic if a directory contains multiple wheel files - # of the same package, but don't attempt to implement correct version - # comparison since this case should not happen. - filenames = sorted(filenames) - for filename in filenames: - # filename is like 'pip-21.2.4-py3-none-any.whl' - if not filename.endswith(".whl"): - continue - for name in _PACKAGE_NAMES: - prefix = name + '-' - if filename.startswith(prefix): - break - else: - continue - - # Extract '21.2.4' from 'pip-21.2.4-py3-none-any.whl' - version = filename.removeprefix(prefix).partition('-')[0] - wheel_path = os.path.join(path, filename) - packages[name] = _Package(version, None, wheel_path) - return packages - - -def _get_packages(): - global _PACKAGES, _WHEEL_PKG_DIR - if _PACKAGES is not None: - return _PACKAGES - - packages = {} - for name, version, py_tag in _PROJECTS: - wheel_name = f"{name}-{version}-{py_tag}-none-any.whl" - packages[name] = _Package(version, wheel_name, None) - if _WHEEL_PKG_DIR: - dir_packages = _find_packages(_WHEEL_PKG_DIR) - # only used the wheel package directory if all packages are found there - if all(name in dir_packages for name in _PACKAGE_NAMES): - packages = dir_packages - _PACKAGES = packages - return packages -_PACKAGES = None + last_matching_dist_wheel = sorted(dist_matching_wheels)[-1] + except IndexError: + # NOTE: `WHEEL_PKG_DIR` does not contain any wheel files for `pip`. + return None + + return nullcontext(last_matching_dist_wheel) + + +def _get_pip_whl_path_ctx(): + # Prefer pip from the wheel package directory, if present. + if (alternative_pip_wheel_path := _find_wheel_pkg_dir_pip()) is not None: + return alternative_pip_wheel_path + + return resources.as_file( + resources.files('ensurepip') + / '_bundled' + / f'pip-{_PIP_VERSION}-py3-none-any.whl' + ) + + +def _get_pip_version(): + with _get_pip_whl_path_ctx() as bundled_wheel_path: + wheel_name = bundled_wheel_path.name + return ( + # Extract '21.2.4' from 'pip-21.2.4-py3-none-any.whl' + wheel_name. + removeprefix('pip-'). + partition('-')[0] + ) def _run_pip(args, additional_paths=None): @@ -105,7 +91,7 @@ def version(): """ Returns a string specifying the bundled version of pip. """ - return _get_packages()['pip'].version + return _get_pip_version() def _disable_pip_configuration_settings(): @@ -167,24 +153,10 @@ def _bootstrap(*, root=None, upgrade=False, user=False, with tempfile.TemporaryDirectory() as tmpdir: # Put our bundled wheels into a temporary directory and construct the # additional paths that need added to sys.path - additional_paths = [] - for name, package in _get_packages().items(): - if package.wheel_name: - # Use bundled wheel package - wheel_name = package.wheel_name - wheel_path = resources.files("ensurepip") / "_bundled" / wheel_name - whl = wheel_path.read_bytes() - else: - # Use the wheel package directory - with open(package.wheel_path, "rb") as fp: - whl = fp.read() - wheel_name = os.path.basename(package.wheel_path) - - filename = os.path.join(tmpdir, wheel_name) - with open(filename, "wb") as fp: - fp.write(whl) - - additional_paths.append(filename) + tmpdir_path = Path(tmpdir) + with _get_pip_whl_path_ctx() as bundled_wheel_path: + tmp_wheel_path = tmpdir_path / bundled_wheel_path.name + copy2(bundled_wheel_path, tmp_wheel_path) # Construct the arguments to be passed to the pip command args = ["install", "--no-cache-dir", "--no-index", "--find-links", tmpdir] @@ -197,7 +169,8 @@ def _bootstrap(*, root=None, upgrade=False, user=False, if verbosity: args += ["-" + "v" * verbosity] - return _run_pip([*args, *_PACKAGE_NAMES], additional_paths) + return _run_pip([*args, "pip"], [os.fsdecode(tmp_wheel_path)]) + def _uninstall_helper(*, verbosity=0): """Helper to support a clean default uninstall process on Windows @@ -227,12 +200,12 @@ def _uninstall_helper(*, verbosity=0): if verbosity: args += ["-" + "v" * verbosity] - return _run_pip([*args, *reversed(_PACKAGE_NAMES)]) + return _run_pip([*args, "pip"]) def _main(argv=None): import argparse - parser = argparse.ArgumentParser(prog="python -m ensurepip") + parser = argparse.ArgumentParser(color=True) parser.add_argument( "--version", action="version", diff --git a/PythonLib/full/ensurepip/_bundled/pip-25.0.1-py3-none-any.whl b/PythonLib/full/ensurepip/_bundled/pip-25.0.1-py3-none-any.whl deleted file mode 100644 index 8d3b0043..00000000 Binary files a/PythonLib/full/ensurepip/_bundled/pip-25.0.1-py3-none-any.whl and /dev/null differ diff --git a/PythonLib/full/ensurepip/_bundled/pip-25.3-py3-none-any.whl b/PythonLib/full/ensurepip/_bundled/pip-25.3-py3-none-any.whl new file mode 100644 index 00000000..755e1aa0 Binary files /dev/null and b/PythonLib/full/ensurepip/_bundled/pip-25.3-py3-none-any.whl differ diff --git a/PythonLib/full/ensurepip/_uninstall.py b/PythonLib/full/ensurepip/_uninstall.py index b2579043..4183c28a 100644 --- a/PythonLib/full/ensurepip/_uninstall.py +++ b/PythonLib/full/ensurepip/_uninstall.py @@ -6,7 +6,7 @@ def _main(argv=None): - parser = argparse.ArgumentParser(prog="python -m ensurepip._uninstall") + parser = argparse.ArgumentParser() parser.add_argument( "--version", action="version", diff --git a/PythonLib/full/enum.py b/PythonLib/full/enum.py index c87aeef7..b4551da1 100644 --- a/PythonLib/full/enum.py +++ b/PythonLib/full/enum.py @@ -1,12 +1,10 @@ import sys import builtins as bltns from types import MappingProxyType, DynamicClassAttribute -from operator import or_ as _or_ -from functools import reduce __all__ = [ - 'EnumType', 'EnumMeta', + 'EnumType', 'EnumMeta', 'EnumDict', 'Enum', 'IntEnum', 'StrEnum', 'Flag', 'IntFlag', 'ReprEnum', 'auto', 'unique', 'property', 'verify', 'member', 'nonmember', 'FlagBoundary', 'STRICT', 'CONFORM', 'EJECT', 'KEEP', @@ -63,8 +61,8 @@ def _is_sunder(name): return ( len(name) > 2 and name[0] == name[-1] == '_' and - name[1:2] != '_' and - name[-2:-1] != '_' + name[1] != '_' and + name[-2] != '_' ) def _is_internal_class(cls_name, obj): @@ -83,7 +81,6 @@ def _is_private(cls_name, name): if ( len(name) > pat_len and name.startswith(pattern) - and name[pat_len:pat_len+1] != ['_'] and (name[-1] != '_' or name[-2] != '_') ): return True @@ -132,7 +129,7 @@ def show_flag_values(value): def bin(num, max_bits=None): """ Like built-in bin(), except negative values are represented in - twos-compliment, and the leading bit always indicates sign + twos-complement, and the leading bit always indicates sign (0=positive, 1=negative). >>> bin(10) @@ -141,6 +138,7 @@ def bin(num, max_bits=None): '0b1 0101' """ + num = num.__index__() ceiling = 2 ** (num).bit_length() if num >= 0: s = bltns.bin(num + ceiling).replace('1', '0', 1) @@ -153,19 +151,6 @@ def bin(num, max_bits=None): digits = (sign[-1] * max_bits + digits)[-max_bits:] return "%s %s" % (sign, digits) -def _dedent(text): - """ - Like textwrap.dedent. Rewritten because we cannot import textwrap. - """ - lines = text.split('\n') - blanks = 0 - for i, ch in enumerate(lines[0]): - if ch != ' ': - break - for j, l in enumerate(lines): - lines[j] = l[i:] - return '\n'.join(lines) - class _not_given: def __repr__(self): return('') @@ -211,7 +196,7 @@ def __get__(self, instance, ownerclass=None): # use previous enum.property return self.fget(instance) elif self._attr_type == 'attr': - # look up previous attibute + # look up previous attribute return getattr(self._cls_type, self.name) elif self._attr_type == 'desc': # use previous descriptor @@ -324,68 +309,35 @@ def __set_name__(self, enum_class, member_name): ): # no other instances found, record this member in _member_names_ enum_class._member_names_.append(member_name) - # if necessary, get redirect in place and then add it to _member_map_ - found_descriptor = None - descriptor_type = None - class_type = None - for base in enum_class.__mro__[1:]: - attr = base.__dict__.get(member_name) - if attr is not None: - if isinstance(attr, (property, DynamicClassAttribute)): - found_descriptor = attr - class_type = base - descriptor_type = 'enum' - break - elif _is_descriptor(attr): - found_descriptor = attr - descriptor_type = descriptor_type or 'desc' - class_type = class_type or base - continue - else: - descriptor_type = 'attr' - class_type = base - if found_descriptor: - redirect = property() - redirect.member = enum_member - redirect.__set_name__(enum_class, member_name) - if descriptor_type in ('enum','desc'): - # earlier descriptor found; copy fget, fset, fdel to this one. - redirect.fget = getattr(found_descriptor, 'fget', None) - redirect._get = getattr(found_descriptor, '__get__', None) - redirect.fset = getattr(found_descriptor, 'fset', None) - redirect._set = getattr(found_descriptor, '__set__', None) - redirect.fdel = getattr(found_descriptor, 'fdel', None) - redirect._del = getattr(found_descriptor, '__delete__', None) - redirect._attr_type = descriptor_type - redirect._cls_type = class_type - setattr(enum_class, member_name, redirect) - else: - setattr(enum_class, member_name, enum_member) - # now add to _member_map_ (even aliases) - enum_class._member_map_[member_name] = enum_member + + enum_class._add_member_(member_name, enum_member) try: # This may fail if value is not hashable. We can't add the value # to the map, and by-value lookups for this value will be # linear. enum_class._value2member_map_.setdefault(value, enum_member) + if value not in enum_class._hashable_values_: + enum_class._hashable_values_.append(value) except TypeError: # keep track of the value in a list so containment checks are quick enum_class._unhashable_values_.append(value) + enum_class._unhashable_values_map_.setdefault(member_name, []).append(value) -class _EnumDict(dict): +class EnumDict(dict): """ Track enum member order and ensure member names are not reused. EnumType will use the names found in self._member_names as the enumeration member names. """ - def __init__(self): + def __init__(self, cls_name=None): super().__init__() - self._member_names = {} # use a dict to keep insertion order + self._member_names = {} # use a dict -- faster look-up than a list, and keeps insertion order since 3.7 self._last_values = [] self._ignore = [] self._auto_called = False + self._cls_name = cls_name def __setitem__(self, key, value): """ @@ -396,23 +348,19 @@ def __setitem__(self, key, value): Single underscore (sunder) names are reserved. """ - if _is_internal_class(self._cls_name, value): - import warnings - warnings.warn( - "In 3.13 classes created inside an enum will not become a member. " - "Use the `member` decorator to keep the current behavior.", - DeprecationWarning, - stacklevel=2, - ) - if _is_private(self._cls_name, key): - # also do nothing, name will be a normal attribute + if self._cls_name is not None and _is_private(self._cls_name, key): + # do nothing, name will be a normal attribute pass elif _is_sunder(key): if key not in ( '_order_', '_generate_next_value_', '_numeric_repr_', '_missing_', '_ignore_', '_iter_member_', '_iter_member_by_value_', '_iter_member_by_def_', - ): + '_add_alias_', '_add_value_alias_', + # While not in use internally, those are common for pretty + # printing and thus excluded from Enum's reservation of + # _sunder_ names + ) and not key.startswith('_repr_'): raise ValueError( '_sunder_ names, such as %r, are reserved for future Enum use' % (key, ) @@ -448,10 +396,9 @@ def __setitem__(self, key, value): value = value.value elif _is_descriptor(value): pass - # TODO: uncomment next three lines in 3.13 - # elif _is_internal_class(self._cls_name, value): - # # do nothing, name will be a normal attribute - # pass + elif self._cls_name is not None and _is_internal_class(self._cls_name, value): + # do nothing, name will be a normal attribute + pass else: if key in self: # enum overwriting a descriptor? @@ -494,6 +441,10 @@ def __setitem__(self, key, value): self._last_values.append(value) super().__setitem__(key, value) + @property + def member_names(self): + return list(self._member_names) + def update(self, members, **more_members): try: for name in members.keys(): @@ -504,6 +455,8 @@ def update(self, members, **more_members): for name, value in more_members.items(): self[name] = value +_EnumDict = EnumDict # keep private name for backwards compatibility + class EnumType(type): """ @@ -515,8 +468,7 @@ def __prepare__(metacls, cls, bases, **kwds): # check that previous enum members do not exist metacls._check_for_existing_members_(cls, bases) # create the namespace dict - enum_dict = _EnumDict() - enum_dict._cls_name = cls + enum_dict = EnumDict(cls) # inherit previous flags and _generate_next_value_ function member_type, first_enum = metacls._get_mixins_(cls, bases) if first_enum is not None: @@ -577,7 +529,9 @@ def __new__(metacls, cls, bases, classdict, *, boundary=None, _simple=False, **k classdict['_member_names_'] = [] classdict['_member_map_'] = {} classdict['_value2member_map_'] = {} - classdict['_unhashable_values_'] = [] + classdict['_hashable_values_'] = [] # for comparing with non-hashable types + classdict['_unhashable_values_'] = [] # e.g. frozenset() with set() + classdict['_unhashable_values_map_'] = {} classdict['_member_type_'] = member_type # now set the __repr__ for the value classdict['_value_repr_'] = metacls._find_data_repr_(cls, bases) @@ -592,7 +546,10 @@ def __new__(metacls, cls, bases, classdict, *, boundary=None, _simple=False, **k classdict['_all_bits_'] = 0 classdict['_inverted_'] = None try: + classdict['_%s__in_progress' % cls] = True enum_class = super().__new__(metacls, cls, bases, classdict, **kwds) + classdict['_%s__in_progress' % cls] = False + delattr(enum_class, '_%s__in_progress' % cls) except Exception as e: # since 3.12 the note "Error calling __set_name__ on '_proto_member' instance ..." # is tacked on to the error instead of raising a RuntimeError, so discard it @@ -775,11 +732,16 @@ def __contains__(cls, value): """ if isinstance(value, cls): return True - try: - cls(value) - return True - except ValueError: - return value in cls._unhashable_values_ + if issubclass(cls, Flag): + try: + result = cls._missing_(value) + return isinstance(result, cls) + except ValueError: + pass + return ( + value in cls._unhashable_values_ # both structures are lists + or value in cls._hashable_values_ + ) def __delattr__(cls, attr): # nicer error message when someone tries to delete an attribute @@ -1075,7 +1037,70 @@ def _find_new_(mcls, classdict, member_type, first_enum): else: use_args = True return __new__, save_new, use_args -EnumMeta = EnumType + + def _add_member_(cls, name, member): + # _value_ structures are not updated + if name in cls._member_map_: + if cls._member_map_[name] is not member: + raise NameError('%r is already bound: %r' % (name, cls._member_map_[name])) + return + # + # if necessary, get redirect in place and then add it to _member_map_ + found_descriptor = None + descriptor_type = None + class_type = None + for base in cls.__mro__[1:]: + attr = base.__dict__.get(name) + if attr is not None: + if isinstance(attr, (property, DynamicClassAttribute)): + found_descriptor = attr + class_type = base + descriptor_type = 'enum' + break + elif _is_descriptor(attr): + found_descriptor = attr + descriptor_type = descriptor_type or 'desc' + class_type = class_type or base + continue + else: + descriptor_type = 'attr' + class_type = base + if found_descriptor: + redirect = property() + redirect.member = member + redirect.__set_name__(cls, name) + if descriptor_type in ('enum', 'desc'): + # earlier descriptor found; copy fget, fset, fdel to this one. + redirect.fget = getattr(found_descriptor, 'fget', None) + redirect._get = getattr(found_descriptor, '__get__', None) + redirect.fset = getattr(found_descriptor, 'fset', None) + redirect._set = getattr(found_descriptor, '__set__', None) + redirect.fdel = getattr(found_descriptor, 'fdel', None) + redirect._del = getattr(found_descriptor, '__delete__', None) + redirect._attr_type = descriptor_type + redirect._cls_type = class_type + setattr(cls, name, redirect) + else: + setattr(cls, name, member) + # now add to _member_map_ (even aliases) + cls._member_map_[name] = member + + @property + def __signature__(cls): + from inspect import Parameter, Signature + if cls._member_names_: + return Signature([Parameter('values', Parameter.VAR_POSITIONAL)]) + else: + return Signature([Parameter('new_class_name', Parameter.POSITIONAL_ONLY), + Parameter('names', Parameter.POSITIONAL_OR_KEYWORD), + Parameter('module', Parameter.KEYWORD_ONLY, default=None), + Parameter('qualname', Parameter.KEYWORD_ONLY, default=None), + Parameter('type', Parameter.KEYWORD_ONLY, default=None), + Parameter('start', Parameter.KEYWORD_ONLY, default=1), + Parameter('boundary', Parameter.KEYWORD_ONLY, default=None)]) + + +EnumMeta = EnumType # keep EnumMeta name for backwards compatibility class Enum(metaclass=EnumType): @@ -1118,13 +1143,6 @@ class Enum(metaclass=EnumType): attributes -- see the documentation for details. """ - @classmethod - def __signature__(cls): - if cls._member_names_: - return '(*values)' - else: - return '(new_class_name, /, names, *, module=None, qualname=None, type=None, start=1, boundary=None)' - def __new__(cls, value): # all enum instances are actually created during class construction # without calling this method; this method is called by the metaclass' @@ -1141,12 +1159,17 @@ def __new__(cls, value): pass except TypeError: # not there, now do long search -- O(n) behavior - for member in cls._member_map_.values(): - if member._value_ == value: - return member + for name, unhashable_values in cls._unhashable_values_map_.items(): + if value in unhashable_values: + return cls[name] + for name, member in cls._member_map_.items(): + if value == member._value_: + return cls[name] # still not found -- verify that members exist, in-case somebody got here mistakenly # (such as via super when trying to override __new__) if not cls._member_map_: + if getattr(cls, '_%s__in_progress' % cls.__name__, False): + raise TypeError('do not use `super().__new__; call the appropriate __new__ directly') from None raise TypeError("%r has no members defined" % cls) # # still not found -- try _missing_ hook @@ -1181,8 +1204,33 @@ def __new__(cls, value): exc = None ve_exc = None - def __init__(self, *args, **kwds): - pass + def _add_alias_(self, name): + self.__class__._add_member_(name, self) + + def _add_value_alias_(self, value): + cls = self.__class__ + try: + if value in cls._value2member_map_: + if cls._value2member_map_[value] is not self: + raise ValueError('%r is already bound: %r' % (value, cls._value2member_map_[value])) + return + except TypeError: + # unhashable value, do long search + for m in cls._member_map_.values(): + if m._value_ == value: + if m is not self: + raise ValueError('%r is already bound: %r' % (value, cls._value2member_map_[value])) + return + try: + # This may fail if value is not hashable. We can't add the value + # to the map, and by-value lookups for this value will be + # linear. + cls._value2member_map_.setdefault(value, self) + cls._hashable_values_.append(value) + except TypeError: + # keep track of the value in a list so containment checks are quick + cls._unhashable_values_.append(value) + cls._unhashable_values_map_.setdefault(self.name, []).append(value) @staticmethod def _generate_next_value_(name, start, count, last_values): @@ -1197,28 +1245,13 @@ def _generate_next_value_(name, start, count, last_values): if not last_values: return start try: - last = last_values[-1] - last_values.sort() - if last == last_values[-1]: - # no difference between old and new methods - return last + 1 - else: - # trigger old method (with warning) - raise TypeError + last_value = sorted(last_values).pop() except TypeError: - import warnings - warnings.warn( - "In 3.13 the default `auto()`/`_generate_next_value_` will require all values to be sortable and support adding +1\n" - "and the value returned will be the largest value in the enum incremented by 1", - DeprecationWarning, - stacklevel=3, - ) - for v in reversed(last_values): - try: - return v + 1 - except TypeError: - pass - return start + raise TypeError('unable to sort non-numeric values') from None + try: + return last_value + 1 + except TypeError: + raise TypeError('unable to increment %r' % (last_value, )) from None @classmethod def _missing_(cls, value): @@ -1693,7 +1726,7 @@ def _simple_enum(etype=Enum, *, boundary=None, use_args=None): Class decorator that converts a normal class into an :class:`Enum`. No safety checks are done, and some advanced behavior (such as :func:`__init_subclass__`) is not available. Enum creation can be faster - using :func:`simple_enum`. + using :func:`_simple_enum`. >>> from enum import Enum, _simple_enum >>> @_simple_enum(Enum) @@ -1724,7 +1757,9 @@ def convert_class(cls): body['_member_names_'] = member_names = [] body['_member_map_'] = member_map = {} body['_value2member_map_'] = value2member_map = {} - body['_unhashable_values_'] = [] + body['_hashable_values_'] = hashable_values = [] + body['_unhashable_values_'] = unhashable_values = [] + body['_unhashable_values_map_'] = {} body['_member_type_'] = member_type = etype._member_type_ body['_value_repr_'] = etype._value_repr_ if issubclass(etype, Flag): @@ -1771,35 +1806,42 @@ def convert_class(cls): for name, value in attrs.items(): if isinstance(value, auto) and auto.value is _auto_null: value = gnv(name, 1, len(member_names), gnv_last_values) - if value in value2member_map: + # create basic member (possibly isolate value for alias check) + if use_args: + if not isinstance(value, tuple): + value = (value, ) + member = new_member(enum_class, *value) + value = value[0] + else: + member = new_member(enum_class) + if __new__ is None: + member._value_ = value + # now check if alias + try: + contained = value2member_map.get(member._value_) + except TypeError: + contained = None + if member._value_ in unhashable_values or member.value in hashable_values: + for m in enum_class: + if m._value_ == member._value_: + contained = m + break + if contained is not None: # an alias to an existing member - member = value2member_map[value] - redirect = property() - redirect.member = member - redirect.__set_name__(enum_class, name) - setattr(enum_class, name, redirect) - member_map[name] = member + contained._add_alias_(name) else: - # create the member - if use_args: - if not isinstance(value, tuple): - value = (value, ) - member = new_member(enum_class, *value) - value = value[0] - else: - member = new_member(enum_class) - if __new__ is None: - member._value_ = value + # finish creating member member._name_ = name member.__objclass__ = enum_class member.__init__(value) - redirect = property() - redirect.member = member - redirect.__set_name__(enum_class, name) - setattr(enum_class, name, redirect) - member_map[name] = member member._sort_order_ = len(member_names) + if name not in ('name', 'value'): + setattr(enum_class, name, member) + member_map[name] = member + else: + enum_class._add_member_(name, member) value2member_map[value] = member + hashable_values.append(value) if _is_single_bit(value): # not a multi-bit alias, record in _member_names_ and _flag_mask_ member_names.append(name) @@ -1821,37 +1863,53 @@ def convert_class(cls): if value.value is _auto_null: value.value = gnv(name, 1, len(member_names), gnv_last_values) value = value.value - if value in value2member_map: + # create basic member (possibly isolate value for alias check) + if use_args: + if not isinstance(value, tuple): + value = (value, ) + member = new_member(enum_class, *value) + value = value[0] + else: + member = new_member(enum_class) + if __new__ is None: + member._value_ = value + # now check if alias + try: + contained = value2member_map.get(member._value_) + except TypeError: + contained = None + if member._value_ in unhashable_values or member._value_ in hashable_values: + for m in enum_class: + if m._value_ == member._value_: + contained = m + break + if contained is not None: # an alias to an existing member - member = value2member_map[value] - redirect = property() - redirect.member = member - redirect.__set_name__(enum_class, name) - setattr(enum_class, name, redirect) - member_map[name] = member + contained._add_alias_(name) else: - # create the member - if use_args: - if not isinstance(value, tuple): - value = (value, ) - member = new_member(enum_class, *value) - value = value[0] - else: - member = new_member(enum_class) - if __new__ is None: - member._value_ = value + # finish creating member member._name_ = name member.__objclass__ = enum_class member.__init__(value) member._sort_order_ = len(member_names) - redirect = property() - redirect.member = member - redirect.__set_name__(enum_class, name) - setattr(enum_class, name, redirect) - member_map[name] = member - value2member_map[value] = member + if name not in ('name', 'value'): + setattr(enum_class, name, member) + member_map[name] = member + else: + enum_class._add_member_(name, member) member_names.append(name) gnv_last_values.append(value) + try: + # This may fail if value is not hashable. We can't add the value + # to the map, and by-value lookups for this value will be + # linear. + enum_class._value2member_map_.setdefault(value, member) + if value not in hashable_values: + hashable_values.append(value) + except TypeError: + # keep track of the value in a list so containment checks are quick + enum_class._unhashable_values_.append(value) + enum_class._unhashable_values_map_.setdefault(name, []).append(value) if '__new__' in body: enum_class.__new_member__ = enum_class.__new__ enum_class.__new__ = Enum.__new__ @@ -1908,7 +1966,7 @@ def __call__(self, enumeration): if 2**i not in values: missing.append(2**i) elif enum_type == 'enum': - # check for powers of one + # check for missing consecutive integers for i in range(low+1, high): if i not in values: missing.append(i) @@ -1936,7 +1994,8 @@ def __call__(self, enumeration): missed = [v for v in values if v not in member_values] if missed: missing_names.append(name) - missing_value |= reduce(_or_, missed) + for val in missed: + missing_value |= val if missing_names: if len(missing_names) == 1: alias = 'alias %s is missing' % missing_names[0] @@ -1984,7 +2043,8 @@ def _test_simple_enum(checked_enum, simple_enum): + list(simple_enum._member_map_.keys()) ) for key in set(checked_keys + simple_keys): - if key in ('__module__', '_member_map_', '_value2member_map_', '__doc__'): + if key in ('__module__', '_member_map_', '_value2member_map_', '__doc__', + '__static_attributes__', '__firstlineno__'): # keys known to be different, or very long continue elif key in member_names: diff --git a/PythonLib/full/filecmp.py b/PythonLib/full/filecmp.py index 6fdb48f7..c5b8d854 100644 --- a/PythonLib/full/filecmp.py +++ b/PythonLib/full/filecmp.py @@ -88,12 +88,15 @@ def _do_cmp(f1, f2): class dircmp: """A class that manages the comparison of 2 directories. - dircmp(a, b, ignore=None, hide=None) + dircmp(a, b, ignore=None, hide=None, *, shallow=True) A and B are directories. IGNORE is a list of names to ignore, defaults to DEFAULT_IGNORES. HIDE is a list of names to hide, defaults to [os.curdir, os.pardir]. + SHALLOW specifies whether to just check the stat signature (do not read + the files). + defaults to True. High level usage: x = dircmp(dir1, dir2) @@ -121,7 +124,7 @@ class dircmp: in common_dirs. """ - def __init__(self, a, b, ignore=None, hide=None): # Initialize + def __init__(self, a, b, ignore=None, hide=None, *, shallow=True): # Initialize self.left = a self.right = b if hide is None: @@ -132,6 +135,7 @@ def __init__(self, a, b, ignore=None, hide=None): # Initialize self.ignore = DEFAULT_IGNORES else: self.ignore = ignore + self.shallow = shallow def phase0(self): # Compare everything except common subdirectories self.left_list = _filter(os.listdir(self.left), @@ -186,7 +190,7 @@ def phase2(self): # Distinguish files, directories, funnies self.common_funny.append(x) def phase3(self): # Find out differences between common files - xx = cmpfiles(self.left, self.right, self.common_files) + xx = cmpfiles(self.left, self.right, self.common_files, self.shallow) self.same_files, self.diff_files, self.funny_files = xx def phase4(self): # Find out differences between common subdirectories @@ -198,7 +202,8 @@ def phase4(self): # Find out differences between common subdirectories for x in self.common_dirs: a_x = os.path.join(self.left, x) b_x = os.path.join(self.right, x) - self.subdirs[x] = self.__class__(a_x, b_x, self.ignore, self.hide) + self.subdirs[x] = self.__class__(a_x, b_x, self.ignore, self.hide, + shallow=self.shallow) def phase4_closure(self): # Recursively call phase4() on subdirectories self.phase4() diff --git a/PythonLib/full/fileinput.py b/PythonLib/full/fileinput.py index 1b25f28f..3dba3d2f 100644 --- a/PythonLib/full/fileinput.py +++ b/PythonLib/full/fileinput.py @@ -53,7 +53,7 @@ sequence must be accessed in strictly sequential order; sequence access and readline() cannot be mixed. -Optional in-place filtering: if the keyword argument inplace=1 is +Optional in-place filtering: if the keyword argument inplace=True is passed to input() or to the FileInput constructor, the file is moved to a backup file and standard output is directed to the input file. This makes it possible to write a filter that rewrites its input file diff --git a/PythonLib/full/fnmatch.py b/PythonLib/full/fnmatch.py index d5e296f7..10e1c936 100644 --- a/PythonLib/full/fnmatch.py +++ b/PythonLib/full/fnmatch.py @@ -9,12 +9,15 @@ The function translate(PATTERN) returns a regular expression corresponding to PATTERN. (It does not compile it.) """ + +import functools +import itertools import os import posixpath import re -import functools -__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"] +__all__ = ["filter", "filterfalse", "fnmatch", "fnmatchcase", "translate"] + def fnmatch(name, pat): """Test whether FILENAME matches PATTERN. @@ -35,6 +38,7 @@ def fnmatch(name, pat): pat = os.path.normcase(pat) return fnmatchcase(name, pat) + @functools.lru_cache(maxsize=32768, typed=True) def _compile_pattern(pat): if isinstance(pat, bytes): @@ -45,6 +49,7 @@ def _compile_pattern(pat): res = translate(pat) return re.compile(res).match + def filter(names, pat): """Construct a list from those elements of the iterable NAMES that match PAT.""" result = [] @@ -61,6 +66,22 @@ def filter(names, pat): result.append(name) return result + +def filterfalse(names, pat): + """Construct a list from those elements of the iterable NAMES that do not match PAT.""" + pat = os.path.normcase(pat) + match = _compile_pattern(pat) + if os.path is posixpath: + # normcase on posix is NOP. Optimize it away from the loop. + return list(itertools.filterfalse(match, names)) + + result = [] + for name in names: + if match(os.path.normcase(name)) is None: + result.append(name) + return result + + def fnmatchcase(name, pat): """Test whether FILENAME matches PATTERN, including case. @@ -77,19 +98,32 @@ def translate(pat): There is no way to quote meta-characters. """ - STAR = object() + parts, star_indices = _translate(pat, '*', '.') + return _join_translated_parts(parts, star_indices) + + +_re_setops_sub = re.compile(r'([&~|])').sub +_re_escape = functools.lru_cache(maxsize=512)(re.escape) + + +def _translate(pat, star, question_mark): res = [] add = res.append + star_indices = [] + i, n = 0, len(pat) while i < n: c = pat[i] i = i+1 if c == '*': + # store the position of the wildcard + star_indices.append(len(res)) + add(star) # compress consecutive `*` into one - if (not res) or res[-1] is not STAR: - add(STAR) + while i < n and pat[i] == '*': + i += 1 elif c == '?': - add('.') + add(question_mark) elif c == '[': j = i if j < n and pat[j] == '!': @@ -128,8 +162,6 @@ def translate(pat): # Hyphens that create ranges shouldn't be escaped. stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-') for s in chunks) - # Escape set operations (&&, ~~ and ||). - stuff = re.sub(r'([&~|])', r'\\\1', stuff) i = j+1 if not stuff: # Empty range: never match. @@ -138,48 +170,40 @@ def translate(pat): # Negated empty range: match any character. add('.') else: + # Escape set operations (&&, ~~ and ||). + stuff = _re_setops_sub(r'\\\1', stuff) if stuff[0] == '!': stuff = '^' + stuff[1:] elif stuff[0] in ('^', '['): stuff = '\\' + stuff add(f'[{stuff}]') else: - add(re.escape(c)) - assert i == n - - # Deal with STARs. - inp = res - res = [] - add = res.append - i, n = 0, len(inp) - # Fixed pieces at the start? - while i < n and inp[i] is not STAR: - add(inp[i]) - i += 1 - # Now deal with STAR fixed STAR fixed ... - # For an interior `STAR fixed` pairing, we want to do a minimal - # .*? match followed by `fixed`, with no possibility of backtracking. - # Atomic groups ("(?>...)") allow us to spell that directly. - # Note: people rely on the undocumented ability to join multiple - # translate() results together via "|" to build large regexps matching - # "one of many" shell patterns. - while i < n: - assert inp[i] is STAR - i += 1 - if i == n: - add(".*") - break - assert inp[i] is not STAR - fixed = [] - while i < n and inp[i] is not STAR: - fixed.append(inp[i]) - i += 1 - fixed = "".join(fixed) - if i == n: - add(".*") - add(fixed) - else: - add(f"(?>.*?{fixed})") + add(_re_escape(c)) assert i == n - res = "".join(res) - return fr'(?s:{res})\Z' + return res, star_indices + + +def _join_translated_parts(parts, star_indices): + if not star_indices: + return fr'(?s:{"".join(parts)})\z' + iter_star_indices = iter(star_indices) + j = next(iter_star_indices) + buffer = parts[:j] # fixed pieces at the start + append, extend = buffer.append, buffer.extend + i = j + 1 + for j in iter_star_indices: + # Now deal with STAR fixed STAR fixed ... + # For an interior `STAR fixed` pairing, we want to do a minimal + # .*? match followed by `fixed`, with no possibility of backtracking. + # Atomic groups ("(?>...)") allow us to spell that directly. + # Note: people rely on the undocumented ability to join multiple + # translate() results together via "|" to build large regexps matching + # "one of many" shell patterns. + append('(?>.*?') + extend(parts[i:j]) + append(')') + i = j + 1 + append('.*') + extend(parts[i:]) + res = ''.join(buffer) + return fr'(?s:{res})\z' diff --git a/PythonLib/full/fractions.py b/PythonLib/full/fractions.py index e3a8bbcf..a497ee19 100644 --- a/PythonLib/full/fractions.py +++ b/PythonLib/full/fractions.py @@ -3,7 +3,6 @@ """Fraction, infinite-precision, rational numbers.""" -from decimal import Decimal import functools import math import numbers @@ -65,7 +64,7 @@ def _hash_algorithm(numerator, denominator): (?:\.(?P\d*|\d+(_\d+)*))? # an optional fractional part (?:E(?P[-+]?\d+(_\d+)*))? # and optional exponent ) - \s*\Z # and optional whitespace to finish + \s*\z # and optional whitespace to finish """, re.VERBOSE | re.IGNORECASE) @@ -139,6 +138,23 @@ def _round_to_figures(n, d, figures): return sign, significand, exponent +# Pattern for matching non-float-style format specifications. +_GENERAL_FORMAT_SPECIFICATION_MATCHER = re.compile(r""" + (?: + (?P.)? + (?P[<>=^]) + )? + (?P[-+ ]?) + # Alt flag forces a slash and denominator in the output, even for + # integer-valued Fraction objects. + (?P\#)? + # We don't implement the zeropad flag since there's no single obvious way + # to interpret it. + (?P0|[1-9][0-9]*)? + (?P[,_])? +""", re.DOTALL | re.VERBOSE).fullmatch + + # Pattern for matching float-style format specifications; # supports 'e', 'E', 'f', 'F', 'g', 'G' and '%' presentation types. _FLOAT_FORMAT_SPECIFICATION_MATCHER = re.compile(r""" @@ -152,9 +168,13 @@ def _round_to_figures(n, d, figures): # A '0' that's *not* followed by another digit is parsed as a minimum width # rather than a zeropad flag. (?P0(?=[0-9]))? - (?P0|[1-9][0-9]*)? + (?P[0-9]+)? (?P[,_])? - (?:\.(?P0|[1-9][0-9]*))? + (?:\. + (?=[,_0-9]) # lookahead for digit or separator + (?P[0-9]+)? + (?P[,_])? + )? (?P[eEfFgG%]) """, re.DOTALL | re.VERBOSE).fullmatch @@ -227,7 +247,9 @@ def __new__(cls, numerator=0, denominator=None): self._denominator = numerator.denominator return self - elif isinstance(numerator, (float, Decimal)): + elif (isinstance(numerator, float) or + (not isinstance(numerator, type) and + hasattr(numerator, 'as_integer_ratio'))): # Exact conversion self._numerator, self._denominator = numerator.as_integer_ratio() return self @@ -261,8 +283,8 @@ def __new__(cls, numerator=0, denominator=None): numerator = -numerator else: - raise TypeError("argument should be a string " - "or a Rational instance") + raise TypeError("argument should be a string or a Rational " + "instance or have the as_integer_ratio() method") elif type(numerator) is int is type(denominator): pass # *very* normal case @@ -288,6 +310,28 @@ def __new__(cls, numerator=0, denominator=None): self._denominator = denominator return self + @classmethod + def from_number(cls, number): + """Converts a finite real number to a rational number, exactly. + + Beware that Fraction.from_number(0.3) != Fraction(3, 10). + + """ + if type(number) is int: + return cls._from_coprime_ints(number, 1) + + elif isinstance(number, numbers.Rational): + return cls._from_coprime_ints(number.numerator, number.denominator) + + elif (isinstance(number, float) or + (not isinstance(number, type) and + hasattr(number, 'as_integer_ratio'))): + return cls._from_coprime_ints(*number.as_integer_ratio()) + + else: + raise TypeError("argument should be a Rational instance or " + "have the as_integer_ratio() method") + @classmethod def from_float(cls, f): """Converts a finite float to a rational number, exactly. @@ -414,27 +458,42 @@ def __str__(self): else: return '%s/%s' % (self._numerator, self._denominator) - def __format__(self, format_spec, /): - """Format this fraction according to the given format specification.""" - - # Backwards compatiblility with existing formatting. - if not format_spec: - return str(self) + def _format_general(self, match): + """Helper method for __format__. + Handles fill, alignment, signs, and thousands separators in the + case of no presentation type. + """ # Validate and parse the format specifier. - match = _FLOAT_FORMAT_SPECIFICATION_MATCHER(format_spec) - if match is None: - raise ValueError( - f"Invalid format specifier {format_spec!r} " - f"for object of type {type(self).__name__!r}" - ) - elif match["align"] is not None and match["zeropad"] is not None: - # Avoid the temptation to guess. - raise ValueError( - f"Invalid format specifier {format_spec!r} " - f"for object of type {type(self).__name__!r}; " - "can't use explicit alignment when zero-padding" - ) + fill = match["fill"] or " " + align = match["align"] or ">" + pos_sign = "" if match["sign"] == "-" else match["sign"] + alternate_form = bool(match["alt"]) + minimumwidth = int(match["minimumwidth"] or "0") + thousands_sep = match["thousands_sep"] or '' + + # Determine the body and sign representation. + n, d = self._numerator, self._denominator + if d > 1 or alternate_form: + body = f"{abs(n):{thousands_sep}}/{d:{thousands_sep}}" + else: + body = f"{abs(n):{thousands_sep}}" + sign = '-' if n < 0 else pos_sign + + # Pad with fill character if necessary and return. + padding = fill * (minimumwidth - len(sign) - len(body)) + if align == ">": + return padding + sign + body + elif align == "<": + return sign + body + padding + elif align == "^": + half = len(padding) // 2 + return padding[:half] + sign + body + padding[half:] + else: # align == "=" + return sign + padding + body + + def _format_float_style(self, match): + """Helper method for __format__; handles float presentation types.""" fill = match["fill"] or " " align = match["align"] or ">" pos_sign = "" if match["sign"] == "-" else match["sign"] @@ -444,11 +503,15 @@ def __format__(self, format_spec, /): minimumwidth = int(match["minimumwidth"] or "0") thousands_sep = match["thousands_sep"] precision = int(match["precision"] or "6") + frac_sep = match["frac_separators"] or "" presentation_type = match["presentation_type"] trim_zeros = presentation_type in "gG" and not alternate_form trim_point = not alternate_form exponent_indicator = "E" if presentation_type in "EFG" else "e" + if align == '=' and fill == '0': + zeropad = True + # Round to get the digits we need, figure out where to place the point, # and decide whether to use scientific notation. 'point_pos' is the # relative to the _end_ of the digit string: that is, it's the number @@ -497,6 +560,9 @@ def __format__(self, format_spec, /): if trim_zeros: frac_part = frac_part.rstrip("0") separator = "" if trim_point and not frac_part else "." + if frac_sep: + frac_part = frac_sep.join(frac_part[pos:pos + 3] + for pos in range(0, len(frac_part), 3)) trailing = separator + frac_part + suffix # Do zero padding if required. @@ -530,7 +596,25 @@ def __format__(self, format_spec, /): else: # align == "=" return sign + padding + body - def _operator_fallbacks(monomorphic_operator, fallback_operator): + def __format__(self, format_spec, /): + """Format this fraction according to the given format specification.""" + + if match := _GENERAL_FORMAT_SPECIFICATION_MATCHER(format_spec): + return self._format_general(match) + + if match := _FLOAT_FORMAT_SPECIFICATION_MATCHER(format_spec): + # Refuse the temptation to guess if both alignment _and_ + # zero padding are specified. + if match["align"] is None or match["zeropad"] is None: + return self._format_float_style(match) + + raise ValueError( + f"Invalid format specifier {format_spec!r} " + f"for object of type {type(self).__name__!r}" + ) + + def _operator_fallbacks(monomorphic_operator, fallback_operator, + handle_complex=True): """Generates forward and reverse operators given a purely-rational operator and a function from the operator module. @@ -617,8 +701,8 @@ def forward(a, b): return monomorphic_operator(a, Fraction(b)) elif isinstance(b, float): return fallback_operator(float(a), b) - elif isinstance(b, complex): - return fallback_operator(complex(a), b) + elif handle_complex and isinstance(b, complex): + return fallback_operator(float(a), b) else: return NotImplemented forward.__name__ = '__' + fallback_operator.__name__ + '__' @@ -630,8 +714,8 @@ def reverse(b, a): return monomorphic_operator(Fraction(a), b) elif isinstance(a, numbers.Real): return fallback_operator(float(a), float(b)) - elif isinstance(a, numbers.Complex): - return fallback_operator(complex(a), complex(b)) + elif handle_complex and isinstance(a, numbers.Complex): + return fallback_operator(complex(a), float(b)) else: return NotImplemented reverse.__name__ = '__r' + fallback_operator.__name__ + '__' @@ -781,7 +865,7 @@ def _floordiv(a, b): """a // b""" return (a.numerator * b.denominator) // (a.denominator * b.numerator) - __floordiv__, __rfloordiv__ = _operator_fallbacks(_floordiv, operator.floordiv) + __floordiv__, __rfloordiv__ = _operator_fallbacks(_floordiv, operator.floordiv, False) def _divmod(a, b): """(a // b, a % b)""" @@ -789,16 +873,16 @@ def _divmod(a, b): div, n_mod = divmod(a.numerator * db, da * b.numerator) return div, Fraction(n_mod, da * db) - __divmod__, __rdivmod__ = _operator_fallbacks(_divmod, divmod) + __divmod__, __rdivmod__ = _operator_fallbacks(_divmod, divmod, False) def _mod(a, b): """a % b""" da, db = a.denominator, b.denominator return Fraction((a.numerator * db) % (b.numerator * da), da * db) - __mod__, __rmod__ = _operator_fallbacks(_mod, operator.mod) + __mod__, __rmod__ = _operator_fallbacks(_mod, operator.mod, False) - def __pow__(a, b): + def __pow__(a, b, modulo=None): """a ** b If b is not an integer, the result will be a float or complex @@ -806,6 +890,8 @@ def __pow__(a, b): result will be rational. """ + if modulo is not None: + return NotImplemented if isinstance(b, numbers.Rational): if b.denominator == 1: power = b.numerator @@ -830,8 +916,10 @@ def __pow__(a, b): else: return NotImplemented - def __rpow__(b, a): + def __rpow__(b, a, modulo=None): """a ** b""" + if modulo is not None: + return NotImplemented if b._denominator == 1 and b._numerator >= 0: # If a is an int, keep it that way if possible. return a ** b._numerator diff --git a/PythonLib/full/ftplib.py b/PythonLib/full/ftplib.py index 10c5d1ea..50771e8c 100644 --- a/PythonLib/full/ftplib.py +++ b/PythonLib/full/ftplib.py @@ -343,7 +343,7 @@ def ntransfercmd(self, cmd, rest=None): connection and the expected size of the transfer. The expected size may be None if it could not be determined. - Optional `rest' argument can be a string that is sent as the + Optional 'rest' argument can be a string that is sent as the argument to a REST command. This is essentially a server marker used to tell the server to skip over any data up to the given marker. diff --git a/PythonLib/full/functools.py b/PythonLib/full/functools.py index 6025032b..df4660ee 100644 --- a/PythonLib/full/functools.py +++ b/PythonLib/full/functools.py @@ -6,21 +6,21 @@ # Written by Nick Coghlan , # Raymond Hettinger , # and Łukasz Langa . -# Copyright (C) 2006-2013 Python Software Foundation. +# Copyright (C) 2006 Python Software Foundation. # See C source code for _functools credits/copyright __all__ = ['update_wrapper', 'wraps', 'WRAPPER_ASSIGNMENTS', 'WRAPPER_UPDATES', 'total_ordering', 'cache', 'cmp_to_key', 'lru_cache', 'reduce', 'partial', 'partialmethod', 'singledispatch', 'singledispatchmethod', - 'cached_property'] + 'cached_property', 'Placeholder'] from abc import get_cache_token from collections import namedtuple -# import types, weakref # Deferred to single_dispatch() +# import weakref # Deferred to single_dispatch() +from operator import itemgetter from reprlib import recursive_repr +from types import GenericAlias, MethodType, MappingProxyType, UnionType from _thread import RLock -from types import GenericAlias - ################################################################################ ### update_wrapper() and wraps() decorator @@ -30,7 +30,7 @@ # wrapper functions that can handle naive introspection WRAPPER_ASSIGNMENTS = ('__module__', '__name__', '__qualname__', '__doc__', - '__annotations__', '__type_params__') + '__annotate__', '__type_params__') WRAPPER_UPDATES = ('__dict__',) def update_wrapper(wrapper, wrapped, @@ -236,7 +236,7 @@ def __ge__(self, other): def reduce(function, sequence, initial=_initial_missing): """ - reduce(function, iterable[, initial]) -> value + reduce(function, iterable, /[, initial]) -> value Apply a function of two arguments cumulatively to the items of an iterable, from left to right. @@ -264,53 +264,138 @@ def reduce(function, sequence, initial=_initial_missing): return value -try: - from _functools import reduce -except ImportError: - pass - ################################################################################ ### partial() argument application ################################################################################ -# Purely functional, no descriptor behaviour -class partial: - """New function with partial application of the given arguments - and keywords. + +class _PlaceholderType: + """The type of the Placeholder singleton. + + Used as a placeholder for partial arguments. """ + __instance = None + __slots__ = () - __slots__ = "func", "args", "keywords", "__dict__", "__weakref__" + def __init_subclass__(cls, *args, **kwargs): + raise TypeError(f"type '{cls.__name__}' is not an acceptable base type") - def __new__(cls, func, /, *args, **keywords): + def __new__(cls): + if cls.__instance is None: + cls.__instance = object.__new__(cls) + return cls.__instance + + def __repr__(self): + return 'Placeholder' + + def __reduce__(self): + return 'Placeholder' + +Placeholder = _PlaceholderType() + +def _partial_prepare_merger(args): + if not args: + return 0, None + nargs = len(args) + order = [] + j = nargs + for i, a in enumerate(args): + if a is Placeholder: + order.append(j) + j += 1 + else: + order.append(i) + phcount = j - nargs + merger = itemgetter(*order) if phcount else None + return phcount, merger + +def _partial_new(cls, func, /, *args, **keywords): + if issubclass(cls, partial): + base_cls = partial if not callable(func): raise TypeError("the first argument must be callable") + else: + base_cls = partialmethod + # func could be a descriptor like classmethod which isn't callable + if not callable(func) and not hasattr(func, "__get__"): + raise TypeError(f"the first argument {func!r} must be a callable " + "or a descriptor") + if args and args[-1] is Placeholder: + raise TypeError("trailing Placeholders are not allowed") + for value in keywords.values(): + if value is Placeholder: + raise TypeError("Placeholder cannot be passed as a keyword argument") + if isinstance(func, base_cls): + pto_phcount = func._phcount + tot_args = func.args + if args: + tot_args += args + if pto_phcount: + # merge args with args of `func` which is `partial` + nargs = len(args) + if nargs < pto_phcount: + tot_args += (Placeholder,) * (pto_phcount - nargs) + tot_args = func._merger(tot_args) + if nargs > pto_phcount: + tot_args += args[pto_phcount:] + phcount, merger = _partial_prepare_merger(tot_args) + else: # works for both pto_phcount == 0 and != 0 + phcount, merger = pto_phcount, func._merger + keywords = {**func.keywords, **keywords} + func = func.func + else: + tot_args = args + phcount, merger = _partial_prepare_merger(tot_args) + + self = object.__new__(cls) + self.func = func + self.args = tot_args + self.keywords = keywords + self._phcount = phcount + self._merger = merger + return self + +def _partial_repr(self): + cls = type(self) + module = cls.__module__ + qualname = cls.__qualname__ + args = [repr(self.func)] + args.extend(map(repr, self.args)) + args.extend(f"{k}={v!r}" for k, v in self.keywords.items()) + return f"{module}.{qualname}({', '.join(args)})" - if hasattr(func, "func"): - args = func.args + args - keywords = {**func.keywords, **keywords} - func = func.func +# Purely functional, no descriptor behaviour +class partial: + """New function with partial application of the given arguments + and keywords. + """ - self = super(partial, cls).__new__(cls) + __slots__ = ("func", "args", "keywords", "_phcount", "_merger", + "__dict__", "__weakref__") - self.func = func - self.args = args - self.keywords = keywords - return self + __new__ = _partial_new + __repr__ = recursive_repr()(_partial_repr) def __call__(self, /, *args, **keywords): + phcount = self._phcount + if phcount: + try: + pto_args = self._merger(self.args + args) + args = args[phcount:] + except IndexError: + raise TypeError("missing positional arguments " + "in 'partial' call; expected " + f"at least {phcount}, got {len(args)}") + else: + pto_args = self.args keywords = {**self.keywords, **keywords} - return self.func(*self.args, *args, **keywords) + return self.func(*pto_args, *args, **keywords) - @recursive_repr() - def __repr__(self): - qualname = type(self).__qualname__ - args = [repr(self.func)] - args.extend(repr(x) for x in self.args) - args.extend(f"{k}={v!r}" for (k, v) in self.keywords.items()) - if type(self).__module__ == "functools": - return f"functools.{qualname}({', '.join(args)})" - return f"{qualname}({', '.join(args)})" + def __get__(self, obj, objtype=None): + if obj is None: + return self + return MethodType(self, obj) def __reduce__(self): return type(self), (self.func,), (self.func, self.args, @@ -327,6 +412,10 @@ def __setstate__(self, state): (namespace is not None and not isinstance(namespace, dict))): raise TypeError("invalid partial state") + if args and args[-1] is Placeholder: + raise TypeError("trailing Placeholders are not allowed") + phcount, merger = _partial_prepare_merger(args) + args = tuple(args) # just in case it's a subclass if kwds is None: kwds = {} @@ -339,58 +428,45 @@ def __setstate__(self, state): self.func = func self.args = args self.keywords = kwds + self._phcount = phcount + self._merger = merger __class_getitem__ = classmethod(GenericAlias) try: - from _functools import partial + from _functools import partial, Placeholder, _PlaceholderType except ImportError: pass # Descriptor version -class partialmethod(object): +class partialmethod: """Method descriptor with partial application of the given arguments and keywords. Supports wrapping existing descriptors and handles non-descriptor callables as instance methods. """ - - def __init__(self, func, /, *args, **keywords): - if not callable(func) and not hasattr(func, "__get__"): - raise TypeError("{!r} is not callable or a descriptor" - .format(func)) - - # func could be a descriptor like classmethod which isn't callable, - # so we can't inherit from partial (it verifies func is callable) - if isinstance(func, partialmethod): - # flattening is mandatory in order to place cls/self before all - # other arguments - # it's also more efficient since only one function will be called - self.func = func.func - self.args = func.args + args - self.keywords = {**func.keywords, **keywords} - else: - self.func = func - self.args = args - self.keywords = keywords - - def __repr__(self): - cls = type(self) - module = cls.__module__ - qualname = cls.__qualname__ - args = [repr(self.func)] - args.extend(map(repr, self.args)) - args.extend(f"{k}={v!r}" for k, v in self.keywords.items()) - return f"{module}.{qualname}({', '.join(args)})" + __new__ = _partial_new + __repr__ = _partial_repr def _make_unbound_method(self): def _method(cls_or_self, /, *args, **keywords): + phcount = self._phcount + if phcount: + try: + pto_args = self._merger(self.args + args) + args = args[phcount:] + except IndexError: + raise TypeError("missing positional arguments " + "in 'partialmethod' call; expected " + f"at least {phcount}, got {len(args)}") + else: + pto_args = self.args keywords = {**self.keywords, **keywords} - return self.func(cls_or_self, *self.args, *args, **keywords) + return self.func(cls_or_self, *pto_args, *args, **keywords) _method.__isabstractmethod__ = self.__isabstractmethod__ - _method._partialmethod = self + _method.__partialmethod__ = self return _method def __get__(self, obj, cls=None): @@ -426,28 +502,23 @@ def _unwrap_partial(func): func = func.func return func +def _unwrap_partialmethod(func): + prev = None + while func is not prev: + prev = func + while isinstance(getattr(func, "__partialmethod__", None), partialmethod): + func = func.__partialmethod__ + while isinstance(func, partialmethod): + func = getattr(func, 'func') + func = _unwrap_partial(func) + return func + ################################################################################ ### LRU Cache function decorator ################################################################################ _CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"]) -class _HashedSeq(list): - """ This class guarantees that hash() will be called no more than once - per element. This is important because the lru_cache() will hash - the key multiple times on a cache miss. - - """ - - __slots__ = 'hashvalue' - - def __init__(self, tup, hash=hash): - self[:] = tup - self.hashvalue = hash(tup) - - def __hash__(self): - return self.hashvalue - def _make_key(args, kwds, typed, kwd_mark = (object(),), fasttypes = {int, str}, @@ -477,7 +548,7 @@ def _make_key(args, kwds, typed, key += tuple(type(v) for v in kwds.values()) elif len(key) == 1 and type(key[0]) in fasttypes: return key[0] - return _HashedSeq(key) + return key def lru_cache(maxsize=128, typed=False): """Least-recently-used cache decorator. @@ -486,8 +557,9 @@ def lru_cache(maxsize=128, typed=False): can grow without bound. If *typed* is True, arguments of different types will be cached separately. - For example, f(3.0) and f(3) will be treated as distinct calls with - distinct results. + For example, f(decimal.Decimal("3.0")) and f(3.0) will be treated as + distinct calls with distinct results. Some types such as str and int may + be cached separately even when typed is false. Arguments to the cached function must be hashable. @@ -812,7 +884,7 @@ def singledispatch(func): # There are many programs that use functools without singledispatch, so we # trade-off making singledispatch marginally slower for the benefit of # making start-up of such applications slightly faster. - import types, weakref + import weakref registry = {} dispatch_cache = weakref.WeakKeyDictionary() @@ -841,16 +913,11 @@ def dispatch(cls): dispatch_cache[cls] = impl return impl - def _is_union_type(cls): - from typing import get_origin, Union - return get_origin(cls) in {Union, types.UnionType} - def _is_valid_dispatch_type(cls): if isinstance(cls, type): return True - from typing import get_args - return (_is_union_type(cls) and - all(isinstance(arg, type) for arg in get_args(cls))) + return (isinstance(cls, UnionType) and + all(isinstance(arg, type) for arg in cls.__args__)) def register(cls, func=None): """generic_func.register(cls, func) -> func @@ -868,8 +935,8 @@ def register(cls, func=None): f"Invalid first argument to `register()`. " f"{cls!r} is not a class or union type." ) - ann = getattr(cls, '__annotations__', {}) - if not ann: + ann = getattr(cls, '__annotate__', None) + if ann is None: raise TypeError( f"Invalid first argument to `register()`: {cls!r}. " f"Use either `@register(some_class)` or plain `@register` " @@ -879,23 +946,27 @@ def register(cls, func=None): # only import typing if annotation parsing is necessary from typing import get_type_hints - argname, cls = next(iter(get_type_hints(func).items())) + from annotationlib import Format, ForwardRef + argname, cls = next(iter(get_type_hints(func, format=Format.FORWARDREF).items())) if not _is_valid_dispatch_type(cls): - if _is_union_type(cls): + if isinstance(cls, UnionType): raise TypeError( f"Invalid annotation for {argname!r}. " f"{cls!r} not all arguments are classes." ) + elif isinstance(cls, ForwardRef): + raise TypeError( + f"Invalid annotation for {argname!r}. " + f"{cls!r} is an unresolved forward reference." + ) else: raise TypeError( f"Invalid annotation for {argname!r}. " f"{cls!r} is not a class." ) - if _is_union_type(cls): - from typing import get_args - - for arg in get_args(cls): + if isinstance(cls, UnionType): + for arg in cls.__args__: registry[arg] = func else: registry[cls] = func @@ -908,14 +979,13 @@ def wrapper(*args, **kw): if not args: raise TypeError(f'{funcname} requires at least ' '1 positional argument') - return dispatch(args[0].__class__)(*args, **kw) funcname = getattr(func, '__name__', 'singledispatch function') registry[object] = func wrapper.register = register wrapper.dispatch = dispatch - wrapper.registry = types.MappingProxyType(registry) + wrapper.registry = MappingProxyType(registry) wrapper._clear_cache = dispatch_cache.clear update_wrapper(wrapper, func) return wrapper @@ -925,8 +995,7 @@ def wrapper(*args, **kw): class singledispatchmethod: """Single-dispatch generic method descriptor. - Supports wrapping existing descriptors and handles non-descriptor - callables as instance methods. + Supports wrapping existing descriptors. """ def __init__(self, func): @@ -944,19 +1013,77 @@ def register(self, cls, method=None): return self.dispatcher.register(cls, func=method) def __get__(self, obj, cls=None): - def _method(*args, **kwargs): - method = self.dispatcher.dispatch(args[0].__class__) - return method.__get__(obj, cls)(*args, **kwargs) - - _method.__isabstractmethod__ = self.__isabstractmethod__ - _method.register = self.register - update_wrapper(_method, self.func) - return _method + return _singledispatchmethod_get(self, obj, cls) @property def __isabstractmethod__(self): return getattr(self.func, '__isabstractmethod__', False) + def __repr__(self): + try: + name = self.func.__qualname__ + except AttributeError: + try: + name = self.func.__name__ + except AttributeError: + name = '?' + return f'' + +class _singledispatchmethod_get: + def __init__(self, unbound, obj, cls): + self._unbound = unbound + self._dispatch = unbound.dispatcher.dispatch + self._obj = obj + self._cls = cls + # Set instance attributes which cannot be handled in __getattr__() + # because they conflict with type descriptors. + func = unbound.func + try: + self.__module__ = func.__module__ + except AttributeError: + pass + try: + self.__doc__ = func.__doc__ + except AttributeError: + pass + + def __repr__(self): + try: + name = self.__qualname__ + except AttributeError: + try: + name = self.__name__ + except AttributeError: + name = '?' + if self._obj is not None: + return f'' + else: + return f'' + + def __call__(self, /, *args, **kwargs): + if not args: + funcname = getattr(self._unbound.func, '__name__', + 'singledispatchmethod method') + raise TypeError(f'{funcname} requires at least ' + '1 positional argument') + return self._dispatch(args[0].__class__).__get__(self._obj, self._cls)(*args, **kwargs) + + def __getattr__(self, name): + # Resolve these attributes lazily to speed up creation of + # the _singledispatchmethod_get instance. + if name not in {'__name__', '__qualname__', '__isabstractmethod__', + '__annotations__', '__type_params__'}: + raise AttributeError + return getattr(self._unbound.func, name) + + @property + def __wrapped__(self): + return self._unbound.func + + @property + def register(self): + return self._unbound.register + ################################################################################ ### cached_property() - property result cached as instance attribute @@ -969,6 +1096,7 @@ def __init__(self, func): self.func = func self.attrname = None self.__doc__ = func.__doc__ + self.__module__ = func.__module__ def __set_name__(self, owner, name): if self.attrname is None: @@ -1007,3 +1135,31 @@ def __get__(self, instance, owner=None): return val __class_getitem__ = classmethod(GenericAlias) + +def _warn_python_reduce_kwargs(py_reduce): + @wraps(py_reduce) + def wrapper(*args, **kwargs): + if 'function' in kwargs or 'sequence' in kwargs: + import os + import warnings + warnings.warn( + 'Calling functools.reduce with keyword arguments ' + '"function" or "sequence" ' + 'is deprecated in Python 3.14 and will be ' + 'forbidden in Python 3.16.', + DeprecationWarning, + skip_file_prefixes=(os.path.dirname(__file__),)) + return py_reduce(*args, **kwargs) + return wrapper + +reduce = _warn_python_reduce_kwargs(reduce) +del _warn_python_reduce_kwargs + +# The import of the C accelerated version of reduce() has been moved +# here due to gh-121676. In Python 3.16, _warn_python_reduce_kwargs() +# should be removed and the import block should be moved back right +# after the definition of reduce(). +try: + from _functools import reduce +except ImportError: + pass diff --git a/PythonLib/full/genericpath.py b/PythonLib/full/genericpath.py index 1bd5b389..9363f564 100644 --- a/PythonLib/full/genericpath.py +++ b/PythonLib/full/genericpath.py @@ -7,8 +7,8 @@ import stat __all__ = ['commonprefix', 'exists', 'getatime', 'getctime', 'getmtime', - 'getsize', 'isdir', 'isfile', 'islink', 'samefile', 'sameopenfile', - 'samestat'] + 'getsize', 'isdevdrive', 'isdir', 'isfile', 'isjunction', 'islink', + 'lexists', 'samefile', 'sameopenfile', 'samestat', 'ALLOW_MISSING'] # Does a path exist? @@ -22,6 +22,15 @@ def exists(path): return True +# Being true for dangling symbolic links is also useful. +def lexists(path): + """Test whether a path exists. Returns True for broken symbolic links""" + try: + os.lstat(path) + except (OSError, ValueError): + return False + return True + # This follows symbolic links, so both islink() and isdir() can be true # for the same path on systems that support symlinks def isfile(path): @@ -57,6 +66,21 @@ def islink(path): return stat.S_ISLNK(st.st_mode) +# Is a path a junction? +def isjunction(path): + """Test whether a path is a junction + Junctions are not supported on the current platform""" + os.fspath(path) + return False + + +def isdevdrive(path): + """Determines whether the specified path is on a Windows Dev Drive. + Dev Drives are not supported on the current platform""" + os.fspath(path) + return False + + def getsize(filename): """Return the size of a file, reported by os.stat().""" return os.stat(filename).st_size @@ -165,3 +189,12 @@ def _check_arg_types(funcname, *args): f'os.PathLike object, not {s.__class__.__name__!r}') from None if hasstr and hasbytes: raise TypeError("Can't mix strings and bytes in path components") from None + +# A singleton with a true boolean value. +@object.__new__ +class ALLOW_MISSING: + """Special value for use in realpath().""" + def __repr__(self): + return 'os.path.ALLOW_MISSING' + def __reduce__(self): + return self.__class__.__name__ diff --git a/PythonLib/full/getopt.py b/PythonLib/full/getopt.py index 5419d77f..25f3e243 100644 --- a/PythonLib/full/getopt.py +++ b/PythonLib/full/getopt.py @@ -2,8 +2,8 @@ This module helps scripts to parse the command line arguments in sys.argv. It supports the same conventions as the Unix getopt() -function (including the special meanings of arguments of the form `-' -and `--'). Long options similar to those supported by GNU software +function (including the special meanings of arguments of the form '-' +and '--'). Long options similar to those supported by GNU software may be used as well via an optional third argument. This module provides two functions and an exception: @@ -24,21 +24,14 @@ # TODO for gnu_getopt(): # # - GNU getopt_long_only mechanism -# - allow the caller to specify ordering -# - RETURN_IN_ORDER option -# - GNU extension with '-' as first character of option string -# - optional arguments, specified by double colons # - an option string with a W followed by semicolon should # treat "-W foo" as "--foo" __all__ = ["GetoptError","error","getopt","gnu_getopt"] import os -try: - from gettext import gettext as _ -except ImportError: - # Bootstrapping Python: gettext's dependencies not built yet - def _(s): return s +from gettext import gettext as _ + class GetoptError(Exception): opt = '' @@ -61,12 +54,14 @@ def getopt(args, shortopts, longopts = []): running program. Typically, this means "sys.argv[1:]". shortopts is the string of option letters that the script wants to recognize, with options that require an argument followed by a - colon (i.e., the same format that Unix getopt() uses). If + colon and options that accept an optional argument followed by + two colons (i.e., the same format that Unix getopt() uses). If specified, longopts is a list of strings with the names of the long options which should be supported. The leading '--' characters should not be included in the option name. Options which require an argument should be followed by an equal sign - ('='). + ('='). Options which accept an optional argument should be + followed by an equal sign and question mark ('=?'). The return value consists of two elements: the first is a list of (option, value) pairs; the second is the list of program arguments @@ -105,7 +100,7 @@ def gnu_getopt(args, shortopts, longopts = []): processing options as soon as a non-option argument is encountered. - If the first character of the option string is `+', or if the + If the first character of the option string is '+', or if the environment variable POSIXLY_CORRECT is set, then option processing stops as soon as a non-option argument is encountered. @@ -118,8 +113,13 @@ def gnu_getopt(args, shortopts, longopts = []): else: longopts = list(longopts) + return_in_order = False + if shortopts.startswith('-'): + shortopts = shortopts[1:] + all_options_first = False + return_in_order = True # Allow options after non-option arguments? - if shortopts.startswith('+'): + elif shortopts.startswith('+'): shortopts = shortopts[1:] all_options_first = True elif os.environ.get("POSIXLY_CORRECT"): @@ -133,8 +133,14 @@ def gnu_getopt(args, shortopts, longopts = []): break if args[0][:2] == '--': + if return_in_order and prog_args: + opts.append((None, prog_args)) + prog_args = [] opts, args = do_longs(opts, args[0][2:], longopts, args[1:]) elif args[0][:1] == '-' and args[0] != '-': + if return_in_order and prog_args: + opts.append((None, prog_args)) + prog_args = [] opts, args = do_shorts(opts, args[0][1:], shortopts, args[1:]) else: if all_options_first: @@ -156,7 +162,7 @@ def do_longs(opts, opt, longopts, args): has_arg, opt = long_has_args(opt, longopts) if has_arg: - if optarg is None: + if optarg is None and has_arg != '?': if not args: raise GetoptError(_('option --%s requires argument') % opt, opt) optarg, args = args[0], args[1:] @@ -177,13 +183,19 @@ def long_has_args(opt, longopts): return False, opt elif opt + '=' in possibilities: return True, opt - # No exact match, so better be unique. + elif opt + '=?' in possibilities: + return '?', opt + # Possibilities must be unique to be accepted if len(possibilities) > 1: - # XXX since possibilities contains all valid continuations, might be - # nice to work them into the error msg - raise GetoptError(_('option --%s not a unique prefix') % opt, opt) + raise GetoptError( + _("option --%s not a unique prefix; possible options: %s") + % (opt, ", ".join(possibilities)), + opt, + ) assert len(possibilities) == 1 unique_match = possibilities[0] + if unique_match.endswith('=?'): + return '?', unique_match[:-2] has_arg = unique_match.endswith('=') if has_arg: unique_match = unique_match[:-1] @@ -192,8 +204,9 @@ def long_has_args(opt, longopts): def do_shorts(opts, optstring, shortopts, args): while optstring != '': opt, optstring = optstring[0], optstring[1:] - if short_has_arg(opt, shortopts): - if optstring == '': + has_arg = short_has_arg(opt, shortopts) + if has_arg: + if optstring == '' and has_arg != '?': if not args: raise GetoptError(_('option -%s requires argument') % opt, opt) @@ -207,7 +220,11 @@ def do_shorts(opts, optstring, shortopts, args): def short_has_arg(opt, shortopts): for i in range(len(shortopts)): if opt == shortopts[i] != ':': - return shortopts.startswith(':', i+1) + if not shortopts.startswith(':', i+1): + return False + if shortopts.startswith('::', i+1): + return '?' + return True raise GetoptError(_('option -%s not recognized') % opt, opt) if __name__ == '__main__': diff --git a/PythonLib/full/getpass.py b/PythonLib/full/getpass.py index 6970d8ad..3d9bb1f0 100644 --- a/PythonLib/full/getpass.py +++ b/PythonLib/full/getpass.py @@ -1,6 +1,7 @@ """Utilities to get a password and/or the current user name. -getpass(prompt[, stream]) - Prompt for a password, with echo turned off. +getpass(prompt[, stream[, echo_char]]) - Prompt for a password, with echo +turned off and optional keyboard feedback. getuser() - Get the user name from the environment or password database. GetPassWarning - This UserWarning is issued when getpass() cannot prevent @@ -18,7 +19,6 @@ import io import os import sys -import warnings __all__ = ["getpass","getuser","GetPassWarning"] @@ -26,13 +26,15 @@ class GetPassWarning(UserWarning): pass -def unix_getpass(prompt='Password: ', stream=None): +def unix_getpass(prompt='Password: ', stream=None, *, echo_char=None): """Prompt for a password, with echo turned off. Args: prompt: Written on stream to ask for the input. Default: 'Password: ' stream: A writable file object to display the prompt. Defaults to the tty. If no tty is available defaults to sys.stderr. + echo_char: A single ASCII character to mask input (e.g., '*'). + If None, input is hidden. Returns: The seKr3t input. Raises: @@ -41,6 +43,8 @@ def unix_getpass(prompt='Password: ', stream=None): Always restores terminal settings before returning. """ + _check_echo_char(echo_char) + passwd = None with contextlib.ExitStack() as stack: try: @@ -69,12 +73,16 @@ def unix_getpass(prompt='Password: ', stream=None): old = termios.tcgetattr(fd) # a copy to save new = old[:] new[3] &= ~termios.ECHO # 3 == 'lflags' + if echo_char: + new[3] &= ~termios.ICANON tcsetattr_flags = termios.TCSAFLUSH if hasattr(termios, 'TCSASOFT'): tcsetattr_flags |= termios.TCSASOFT try: termios.tcsetattr(fd, tcsetattr_flags, new) - passwd = _raw_input(prompt, stream, input=input) + passwd = _raw_input(prompt, stream, input=input, + echo_char=echo_char) + finally: termios.tcsetattr(fd, tcsetattr_flags, old) stream.flush() # issue7208 @@ -94,10 +102,11 @@ def unix_getpass(prompt='Password: ', stream=None): return passwd -def win_getpass(prompt='Password: ', stream=None): +def win_getpass(prompt='Password: ', stream=None, *, echo_char=None): """Prompt for password with echo off, using Windows getwch().""" if sys.stdin is not sys.__stdin__: return fallback_getpass(prompt, stream) + _check_echo_char(echo_char) for c in prompt: msvcrt.putwch(c) @@ -109,24 +118,48 @@ def win_getpass(prompt='Password: ', stream=None): if c == '\003': raise KeyboardInterrupt if c == '\b': + if echo_char and pw: + msvcrt.putwch('\b') + msvcrt.putwch(' ') + msvcrt.putwch('\b') pw = pw[:-1] else: pw = pw + c + if echo_char: + msvcrt.putwch(echo_char) msvcrt.putwch('\r') msvcrt.putwch('\n') return pw -def fallback_getpass(prompt='Password: ', stream=None): +def fallback_getpass(prompt='Password: ', stream=None, *, echo_char=None): + _check_echo_char(echo_char) + import warnings warnings.warn("Can not control echo on the terminal.", GetPassWarning, stacklevel=2) if not stream: stream = sys.stderr print("Warning: Password input may be echoed.", file=stream) - return _raw_input(prompt, stream) + return _raw_input(prompt, stream, echo_char=echo_char) + +def _check_echo_char(echo_char): + # Single-character ASCII excluding control characters + if echo_char is None: + return + if not isinstance(echo_char, str): + raise TypeError("'echo_char' must be a str or None, not " + f"{type(echo_char).__name__}") + if not ( + len(echo_char) == 1 + and echo_char.isprintable() + and echo_char.isascii() + ): + raise ValueError("'echo_char' must be a single printable ASCII " + f"character, got: {echo_char!r}") -def _raw_input(prompt="", stream=None, input=None): + +def _raw_input(prompt="", stream=None, input=None, echo_char=None): # This doesn't save the string in the GNU readline history. if not stream: stream = sys.stderr @@ -143,6 +176,8 @@ def _raw_input(prompt="", stream=None, input=None): stream.write(prompt) stream.flush() # NOTE: The Python C API calls flockfile() (and unlock) during readline. + if echo_char: + return _readline_with_echo_char(stream, input, echo_char) line = input.readline() if not line: raise EOFError @@ -151,12 +186,45 @@ def _raw_input(prompt="", stream=None, input=None): return line +def _readline_with_echo_char(stream, input, echo_char): + passwd = "" + eof_pressed = False + while True: + char = input.read(1) + if char == '\n' or char == '\r': + break + elif char == '\x03': + raise KeyboardInterrupt + elif char == '\x7f' or char == '\b': + if passwd: + stream.write("\b \b") + stream.flush() + passwd = passwd[:-1] + elif char == '\x04': + if eof_pressed: + break + else: + eof_pressed = True + elif char == '\x00': + continue + else: + passwd += char + stream.write(echo_char) + stream.flush() + eof_pressed = False + return passwd + + def getuser(): """Get the username from the environment or password database. First try various environment variables, then the password database. This works on Windows as long as USERNAME is set. + Any failure to find a username raises OSError. + .. versionchanged:: 3.13 + Previously, various exceptions beyond just :exc:`OSError` + were raised. """ for name in ('LOGNAME', 'USER', 'LNAME', 'USERNAME'): @@ -164,9 +232,12 @@ def getuser(): if user: return user - # If this fails, the exception will "explain" why - import pwd - return pwd.getpwuid(os.getuid())[0] + try: + import pwd + return pwd.getpwuid(os.getuid())[0] + except (ImportError, KeyError) as e: + raise OSError('No username set in the environment') from e + # Bind the name getpass to the appropriate function try: diff --git a/PythonLib/full/gettext.py b/PythonLib/full/gettext.py index e84765bf..6c11ab2b 100644 --- a/PythonLib/full/gettext.py +++ b/PythonLib/full/gettext.py @@ -41,14 +41,10 @@ # to do binary searches and lazy initializations. Or you might want to use # the undocumented double-hash algorithm for .mo files with hash tables, but # you'll need to study the GNU gettext code to do this. -# -# - Support Solaris .mo file formats. Unfortunately, we've been unable to -# find this format documented anywhere. import operator import os -import re import sys @@ -70,22 +66,26 @@ # https://www.gnu.org/software/gettext/manual/gettext.html#Plural-forms # http://git.savannah.gnu.org/cgit/gettext.git/tree/gettext-runtime/intl/plural.y -_token_pattern = re.compile(r""" - (?P[ \t]+) | # spaces and horizontal tabs - (?P[0-9]+\b) | # decimal integer - (?Pn\b) | # only n is allowed - (?P[()]) | - (?P[-*/%+?:]|[>, - # <=, >=, ==, !=, &&, ||, - # ? : - # unary and bitwise ops - # not allowed - (?P\w+|.) # invalid token - """, re.VERBOSE|re.DOTALL) - +_token_pattern = None def _tokenize(plural): - for mo in re.finditer(_token_pattern, plural): + global _token_pattern + if _token_pattern is None: + import re + _token_pattern = re.compile(r""" + (?P[ \t]+) | # spaces and horizontal tabs + (?P[0-9]+\b) | # decimal integer + (?Pn\b) | # only n is allowed + (?P[()]) | + (?P[-*/%+?:]|[>, + # <=, >=, ==, !=, &&, ||, + # ? : + # unary and bitwise ops + # not allowed + (?P\w+|.) # invalid token + """, re.VERBOSE|re.DOTALL) + + for mo in _token_pattern.finditer(plural): kind = mo.lastgroup if kind == 'WHITESPACES': continue @@ -171,6 +171,13 @@ def _as_int(n): except TypeError: raise TypeError('Plural value must be an integer, got %s' % (n.__class__.__name__,)) from None + return _as_int2(n) + +def _as_int2(n): + try: + return operator.index(n) + except TypeError: + pass import warnings frame = sys._getframe(1) @@ -288,6 +295,7 @@ def gettext(self, message): def ngettext(self, msgid1, msgid2, n): if self._fallback: return self._fallback.ngettext(msgid1, msgid2, n) + n = _as_int2(n) if n == 1: return msgid1 else: @@ -301,6 +309,7 @@ def pgettext(self, context, message): def npgettext(self, context, msgid1, msgid2, n): if self._fallback: return self._fallback.npgettext(context, msgid1, msgid2, n) + n = _as_int2(n) if n == 1: return msgid1 else: @@ -587,6 +596,7 @@ def dngettext(domain, msgid1, msgid2, n): try: t = translation(domain, _localedirs.get(domain, None)) except OSError: + n = _as_int2(n) if n == 1: return msgid1 else: @@ -606,6 +616,7 @@ def dnpgettext(domain, context, msgid1, msgid2, n): try: t = translation(domain, _localedirs.get(domain, None)) except OSError: + n = _as_int2(n) if n == 1: return msgid1 else: @@ -637,7 +648,7 @@ def npgettext(context, msgid1, msgid2, n): # import gettext # cat = gettext.Catalog(PACKAGE, localedir=LOCALEDIR) # _ = cat.gettext -# print _('Hello World') +# print(_('Hello World')) # The resulting catalog object currently don't support access through a # dictionary API, which was supported (but apparently unused) in GNOME diff --git a/PythonLib/full/glob.py b/PythonLib/full/glob.py index 50beef37..f1a87c82 100644 --- a/PythonLib/full/glob.py +++ b/PythonLib/full/glob.py @@ -4,11 +4,14 @@ import os import re import fnmatch +import functools import itertools +import operator import stat import sys -__all__ = ["glob", "iglob", "escape"] + +__all__ = ["glob", "iglob", "escape", "translate"] def glob(pathname, *, root_dir=None, dir_fd=None, recursive=False, include_hidden=False): @@ -19,6 +22,9 @@ def glob(pathname, *, root_dir=None, dir_fd=None, recursive=False, dot are special cases that are not matched by '*' and '?' patterns by default. + The order of the returned list is undefined. Sort it if you need a + particular order. + If `include_hidden` is true, the patterns '*', '?', '**' will match hidden directories. @@ -37,6 +43,9 @@ def iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False, dot are special cases that are not matched by '*' and '?' patterns. + The order of the returned paths is undefined. Sort them if you need a + particular order. + If recursive is true, the pattern '**' will match any files and zero or more directories and subdirectories. """ @@ -104,8 +113,8 @@ def _iglob(pathname, root_dir, dir_fd, recursive, dironly, def _glob1(dirname, pattern, dir_fd, dironly, include_hidden=False): names = _listdir(dirname, dir_fd, dironly) - if include_hidden or not _ishidden(pattern): - names = (x for x in names if include_hidden or not _ishidden(x)) + if not (include_hidden or _ishidden(pattern)): + names = (x for x in names if not _ishidden(x)) return fnmatch.filter(names, pattern) def _glob0(dirname, basename, dir_fd, dironly, include_hidden=False): @@ -119,12 +128,19 @@ def _glob0(dirname, basename, dir_fd, dironly, include_hidden=False): return [basename] return [] -# Following functions are not public but can be used by third-party code. +_deprecated_function_message = ( + "{name} is deprecated and will be removed in Python {remove}. Use " + "glob.glob and pass a directory to its root_dir argument instead." +) def glob0(dirname, pattern): + import warnings + warnings._deprecated("glob.glob0", _deprecated_function_message, remove=(3, 15)) return _glob0(dirname, pattern, None, False) def glob1(dirname, pattern): + import warnings + warnings._deprecated("glob.glob1", _deprecated_function_message, remove=(3, 15)) return _glob1(dirname, pattern, None, False) # This helper function recursively yields relative pathnames inside a literal @@ -249,4 +265,289 @@ def escape(pathname): return drive + pathname +_special_parts = ('', '.', '..') _dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0) +_no_recurse_symlinks = object() + + +def translate(pat, *, recursive=False, include_hidden=False, seps=None): + """Translate a pathname with shell wildcards to a regular expression. + + If `recursive` is true, the pattern segment '**' will match any number of + path segments. + + If `include_hidden` is true, wildcards can match path segments beginning + with a dot ('.'). + + If a sequence of separator characters is given to `seps`, they will be + used to split the pattern into segments and match path separators. If not + given, os.path.sep and os.path.altsep (where available) are used. + """ + if not seps: + if os.path.altsep: + seps = (os.path.sep, os.path.altsep) + else: + seps = os.path.sep + escaped_seps = ''.join(map(re.escape, seps)) + any_sep = f'[{escaped_seps}]' if len(seps) > 1 else escaped_seps + not_sep = f'[^{escaped_seps}]' + if include_hidden: + one_last_segment = f'{not_sep}+' + one_segment = f'{one_last_segment}{any_sep}' + any_segments = f'(?:.+{any_sep})?' + any_last_segments = '.*' + else: + one_last_segment = f'[^{escaped_seps}.]{not_sep}*' + one_segment = f'{one_last_segment}{any_sep}' + any_segments = f'(?:{one_segment})*' + any_last_segments = f'{any_segments}(?:{one_last_segment})?' + + results = [] + parts = re.split(any_sep, pat) + last_part_idx = len(parts) - 1 + for idx, part in enumerate(parts): + if part == '*': + results.append(one_segment if idx < last_part_idx else one_last_segment) + elif recursive and part == '**': + if idx < last_part_idx: + if parts[idx + 1] != '**': + results.append(any_segments) + else: + results.append(any_last_segments) + else: + if part: + if not include_hidden and part[0] in '*?': + results.append(r'(?!\.)') + results.extend(fnmatch._translate(part, f'{not_sep}*', not_sep)[0]) + if idx < last_part_idx: + results.append(any_sep) + res = ''.join(results) + return fr'(?s:{res})\z' + + +@functools.lru_cache(maxsize=512) +def _compile_pattern(pat, seps, case_sensitive, recursive=True): + """Compile given glob pattern to a re.Pattern object (observing case + sensitivity).""" + flags = re.NOFLAG if case_sensitive else re.IGNORECASE + regex = translate(pat, recursive=recursive, include_hidden=True, seps=seps) + return re.compile(regex, flags=flags).match + + +class _GlobberBase: + """Abstract class providing shell-style pattern matching and globbing. + """ + + def __init__(self, sep, case_sensitive, case_pedantic=False, recursive=False): + self.sep = sep + self.case_sensitive = case_sensitive + self.case_pedantic = case_pedantic + self.recursive = recursive + + # Abstract methods + + @staticmethod + def lexists(path): + """Implements os.path.lexists(). + """ + raise NotImplementedError + + @staticmethod + def scandir(path): + """Like os.scandir(), but generates (entry, name, path) tuples. + """ + raise NotImplementedError + + @staticmethod + def concat_path(path, text): + """Implements path concatenation. + """ + raise NotImplementedError + + # High-level methods + + def compile(self, pat, altsep=None): + seps = (self.sep, altsep) if altsep else self.sep + return _compile_pattern(pat, seps, self.case_sensitive, self.recursive) + + def selector(self, parts): + """Returns a function that selects from a given path, walking and + filtering according to the glob-style pattern parts in *parts*. + """ + if not parts: + return self.select_exists + part = parts.pop() + if self.recursive and part == '**': + selector = self.recursive_selector + elif part in _special_parts: + selector = self.special_selector + elif not self.case_pedantic and magic_check.search(part) is None: + selector = self.literal_selector + else: + selector = self.wildcard_selector + return selector(part, parts) + + def special_selector(self, part, parts): + """Returns a function that selects special children of the given path. + """ + if parts: + part += self.sep + select_next = self.selector(parts) + + def select_special(path, exists=False): + path = self.concat_path(path, part) + return select_next(path, exists) + return select_special + + def literal_selector(self, part, parts): + """Returns a function that selects a literal descendant of a path. + """ + + # Optimization: consume and join any subsequent literal parts here, + # rather than leaving them for the next selector. This reduces the + # number of string concatenation operations. + while parts and magic_check.search(parts[-1]) is None: + part += self.sep + parts.pop() + if parts: + part += self.sep + + select_next = self.selector(parts) + + def select_literal(path, exists=False): + path = self.concat_path(path, part) + return select_next(path, exists=False) + return select_literal + + def wildcard_selector(self, part, parts): + """Returns a function that selects direct children of a given path, + filtering by pattern. + """ + + match = None if part == '*' else self.compile(part) + dir_only = bool(parts) + if dir_only: + select_next = self.selector(parts) + + def select_wildcard(path, exists=False): + try: + entries = self.scandir(path) + except OSError: + pass + else: + for entry, entry_name, entry_path in entries: + if match is None or match(entry_name): + if dir_only: + try: + if not entry.is_dir(): + continue + except OSError: + continue + entry_path = self.concat_path(entry_path, self.sep) + yield from select_next(entry_path, exists=True) + else: + yield entry_path + return select_wildcard + + def recursive_selector(self, part, parts): + """Returns a function that selects a given path and all its children, + recursively, filtering by pattern. + """ + # Optimization: consume following '**' parts, which have no effect. + while parts and parts[-1] == '**': + parts.pop() + + # Optimization: consume and join any following non-special parts here, + # rather than leaving them for the next selector. They're used to + # build a regular expression, which we use to filter the results of + # the recursive walk. As a result, non-special pattern segments + # following a '**' wildcard don't require additional filesystem access + # to expand. + follow_symlinks = self.recursive is not _no_recurse_symlinks + if follow_symlinks: + while parts and parts[-1] not in _special_parts: + part += self.sep + parts.pop() + + match = None if part == '**' else self.compile(part) + dir_only = bool(parts) + select_next = self.selector(parts) + + def select_recursive(path, exists=False): + match_pos = len(str(path)) + if match is None or match(str(path), match_pos): + yield from select_next(path, exists) + stack = [path] + while stack: + yield from select_recursive_step(stack, match_pos) + + def select_recursive_step(stack, match_pos): + path = stack.pop() + try: + entries = self.scandir(path) + except OSError: + pass + else: + for entry, _entry_name, entry_path in entries: + is_dir = False + try: + if entry.is_dir(follow_symlinks=follow_symlinks): + is_dir = True + except OSError: + pass + + if is_dir or not dir_only: + entry_path_str = str(entry_path) + if dir_only: + entry_path = self.concat_path(entry_path, self.sep) + if match is None or match(entry_path_str, match_pos): + if dir_only: + yield from select_next(entry_path, exists=True) + else: + # Optimization: directly yield the path if this is + # last pattern part. + yield entry_path + if is_dir: + stack.append(entry_path) + + return select_recursive + + def select_exists(self, path, exists=False): + """Yields the given path, if it exists. + """ + if exists: + # Optimization: this path is already known to exist, e.g. because + # it was returned from os.scandir(), so we skip calling lstat(). + yield path + elif self.lexists(path): + yield path + + +class _StringGlobber(_GlobberBase): + """Provides shell-style pattern matching and globbing for string paths. + """ + lexists = staticmethod(os.path.lexists) + concat_path = operator.add + + @staticmethod + def scandir(path): + # We must close the scandir() object before proceeding to + # avoid exhausting file descriptors when globbing deep trees. + with os.scandir(path) as scandir_it: + entries = list(scandir_it) + return ((entry, entry.name, entry.path) for entry in entries) + + +class _PathGlobber(_GlobberBase): + """Provides shell-style pattern matching and globbing for pathlib paths. + """ + + @staticmethod + def lexists(path): + return path.info.exists(follow_symlinks=False) + + @staticmethod + def scandir(path): + return ((child.info, child.name, child) for child in path.iterdir()) + + @staticmethod + def concat_path(path, text): + return path.with_segments(str(path) + text) diff --git a/PythonLib/full/graphlib.py b/PythonLib/full/graphlib.py index 9512865a..7961c9c5 100644 --- a/PythonLib/full/graphlib.py +++ b/PythonLib/full/graphlib.py @@ -90,20 +90,24 @@ def prepare(self): still be used to obtain as many nodes as possible until cycles block more progress. After a call to this function, the graph cannot be modified and therefore no more nodes can be added using "add". + + Raise ValueError if nodes have already been passed out of the sorter. + """ - if self._ready_nodes is not None: - raise ValueError("cannot prepare() more than once") + if self._npassedout > 0: + raise ValueError("cannot prepare() after starting sort") - self._ready_nodes = [ - i.node for i in self._node2info.values() if i.npredecessors == 0 - ] + if self._ready_nodes is None: + self._ready_nodes = [ + i.node for i in self._node2info.values() if i.npredecessors == 0 + ] # ready_nodes is set before we look for cycles on purpose: # if the user wants to catch the CycleError, that's fine, # they can continue using the instance to grab as many # nodes as possible before cycles block more progress cycle = self._find_cycle() if cycle: - raise CycleError(f"nodes are in a cycle", cycle) + raise CycleError("nodes are in a cycle", cycle) def get_ready(self): """Return a tuple of all the nodes that are ready. diff --git a/PythonLib/full/gzip.py b/PythonLib/full/gzip.py index ecfe3a9c..c00f5185 100644 --- a/PythonLib/full/gzip.py +++ b/PythonLib/full/gzip.py @@ -5,7 +5,6 @@ # based on Andrew Kuchling's minigzip.py distributed with the zlib module -import _compression import builtins import io import os @@ -14,12 +13,14 @@ import time import weakref import zlib +from compression._common import _streams __all__ = ["BadGzipFile", "GzipFile", "open", "compress", "decompress"] FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16 -READ, WRITE = 1, 2 +READ = 'rb' +WRITE = 'wb' _COMPRESS_LEVEL_FAST = 1 _COMPRESS_LEVEL_TRADEOFF = 6 @@ -143,7 +144,7 @@ def writable(self): return True -class GzipFile(_compression.BaseStream): +class GzipFile(_streams.BaseStream): """The GzipFile class simulates most of the methods of a file object with the exception of the truncate() method. @@ -185,12 +186,18 @@ def __init__(self, filename=None, mode=None, and 9 is slowest and produces the most compression. 0 is no compression at all. The default is 9. - The mtime argument is an optional numeric timestamp to be written - to the last modification time field in the stream when compressing. - If omitted or None, the current time is used. + The optional mtime argument is the timestamp requested by gzip. The time + is in Unix format, i.e., seconds since 00:00:00 UTC, January 1, 1970. + If mtime is omitted or None, the current time is used. Use mtime = 0 + to generate a compressed stream that does not depend on creation time. """ + # Ensure attributes exist at __del__ + self.mode = None + self.fileobj = None + self._buffer = None + if mode and ('t' in mode or 'U' in mode): raise ValueError("Invalid mode: {!r}".format(mode)) if mode and 'b' not in mode: @@ -330,11 +337,15 @@ def _write_raw(self, data): return length - def read(self, size=-1): - self._check_not_closed() + def _check_read(self, caller): if self.mode != READ: import errno - raise OSError(errno.EBADF, "read() on write-only GzipFile object") + msg = f"{caller}() on write-only GzipFile object" + raise OSError(errno.EBADF, msg) + + def read(self, size=-1): + self._check_not_closed() + self._check_read("read") return self._buffer.read(size) def read1(self, size=-1): @@ -342,19 +353,25 @@ def read1(self, size=-1): Reads up to a buffer's worth of data if size is negative.""" self._check_not_closed() - if self.mode != READ: - import errno - raise OSError(errno.EBADF, "read1() on write-only GzipFile object") + self._check_read("read1") if size < 0: size = io.DEFAULT_BUFFER_SIZE return self._buffer.read1(size) + def readinto(self, b): + self._check_not_closed() + self._check_read("readinto") + return self._buffer.readinto(b) + + def readinto1(self, b): + self._check_not_closed() + self._check_read("readinto1") + return self._buffer.readinto1(b) + def peek(self, n): self._check_not_closed() - if self.mode != READ: - import errno - raise OSError(errno.EBADF, "peek() on write-only GzipFile object") + self._check_read("peek") return self._buffer.peek(n) @property @@ -365,6 +382,8 @@ def close(self): fileobj = self.fileobj if fileobj is None: return + if self._buffer is None or self._buffer.closed: + return try: if self.mode == WRITE: self._buffer.flush() @@ -443,6 +462,13 @@ def readline(self, size=-1): self._check_not_closed() return self._buffer.readline(size) + def __del__(self): + if self.mode == WRITE and not self.closed: + import warnings + warnings.warn("unclosed GzipFile", + ResourceWarning, source=self, stacklevel=2) + + super().__del__() def _read_exact(fp, n): '''Read exactly *n* bytes from `fp` @@ -497,7 +523,7 @@ def _read_gzip_header(fp): return last_mtime -class _GzipReader(_compression.DecompressReader): +class _GzipReader(_streams.DecompressReader): def __init__(self, fp): super().__init__(_PaddedFile(fp), zlib._ZlibDecompressor, wbits=-zlib.MAX_WBITS) @@ -595,43 +621,21 @@ def _rewind(self): self._new_member = True -def _create_simple_gzip_header(compresslevel: int, - mtime = None) -> bytes: - """ - Write a simple gzip header with no extra fields. - :param compresslevel: Compresslevel used to determine the xfl bytes. - :param mtime: The mtime (must support conversion to a 32-bit integer). - :return: A bytes object representing the gzip header. - """ - if mtime is None: - mtime = time.time() - if compresslevel == _COMPRESS_LEVEL_BEST: - xfl = 2 - elif compresslevel == _COMPRESS_LEVEL_FAST: - xfl = 4 - else: - xfl = 0 - # Pack ID1 and ID2 magic bytes, method (8=deflate), header flags (no extra - # fields added to header), mtime, xfl and os (255 for unknown OS). - return struct.pack("= startpos, except possibly for pos. pos # is the index of a leaf with a possibly out-of-order value. Restore the # heap invariant. @@ -335,9 +349,9 @@ def merge(*iterables, key=None, reverse=False): h_append = h.append if reverse: - _heapify = _heapify_max - _heappop = _heappop_max - _heapreplace = _heapreplace_max + _heapify = heapify_max + _heappop = heappop_max + _heapreplace = heapreplace_max direction = -1 else: _heapify = heapify @@ -490,10 +504,10 @@ def nsmallest(n, iterable, key=None): result = [(elem, i) for i, elem in zip(range(n), it)] if not result: return result - _heapify_max(result) + heapify_max(result) top = result[0][0] order = n - _heapreplace = _heapreplace_max + _heapreplace = heapreplace_max for elem in it: if elem < top: _heapreplace(result, (elem, order)) @@ -507,10 +521,10 @@ def nsmallest(n, iterable, key=None): result = [(key(elem), i, elem) for i, elem in zip(range(n), it)] if not result: return result - _heapify_max(result) + heapify_max(result) top = result[0][0] order = n - _heapreplace = _heapreplace_max + _heapreplace = heapreplace_max for elem in it: k = key(elem) if k < top: @@ -583,19 +597,13 @@ def nlargest(n, iterable, key=None): from _heapq import * except ImportError: pass -try: - from _heapq import _heapreplace_max -except ImportError: - pass -try: - from _heapq import _heapify_max -except ImportError: - pass -try: - from _heapq import _heappop_max -except ImportError: - pass +# For backwards compatibility +_heappop_max = heappop_max +_heapreplace_max = heapreplace_max +_heappush_max = heappush_max +_heappushpop_max = heappushpop_max +_heapify_max = heapify_max if __name__ == "__main__": diff --git a/PythonLib/full/hmac.py b/PythonLib/full/hmac.py index 8b4eb2fe..2d6016cd 100644 --- a/PythonLib/full/hmac.py +++ b/PythonLib/full/hmac.py @@ -3,7 +3,6 @@ Implements the HMAC algorithm as described by RFC 2104. """ -import warnings as _warnings try: import _hashlib as _hashopenssl except ImportError: @@ -14,7 +13,10 @@ compare_digest = _hashopenssl.compare_digest _functype = type(_hashopenssl.openssl_sha256) # builtin type -import hashlib as _hashlib +try: + import _hmac +except ImportError: + _hmac = None trans_5C = bytes((x ^ 0x5C) for x in range(256)) trans_36 = bytes((x ^ 0x36) for x in range(256)) @@ -24,11 +26,27 @@ digest_size = None +def _get_digest_constructor(digest_like): + if callable(digest_like): + return digest_like + if isinstance(digest_like, str): + def digest_wrapper(d=b''): + import hashlib + return hashlib.new(digest_like, d) + else: + def digest_wrapper(d=b''): + return digest_like.new(d) + return digest_wrapper + + class HMAC: """RFC 2104 HMAC class. Also complies with RFC 4231. This supports the API for Cryptographic Hash Functions (PEP 247). """ + + # Note: self.blocksize is the default blocksize; self.block_size + # is effective block size as well as the public API attribute. blocksize = 64 # 512-bit HMAC; can be changed in subclasses. __slots__ = ( @@ -50,31 +68,47 @@ def __init__(self, key, msg=None, digestmod=''): """ if not isinstance(key, (bytes, bytearray)): - raise TypeError("key: expected bytes or bytearray, but got %r" % type(key).__name__) + raise TypeError(f"key: expected bytes or bytearray, " + f"but got {type(key).__name__!r}") if not digestmod: raise TypeError("Missing required argument 'digestmod'.") + self.__init(key, msg, digestmod) + + def __init(self, key, msg, digestmod): if _hashopenssl and isinstance(digestmod, (str, _functype)): try: - self._init_hmac(key, msg, digestmod) - except _hashopenssl.UnsupportedDigestmodError: - self._init_old(key, msg, digestmod) - else: - self._init_old(key, msg, digestmod) + self._init_openssl_hmac(key, msg, digestmod) + return + except _hashopenssl.UnsupportedDigestmodError: # pragma: no cover + pass + if _hmac and isinstance(digestmod, str): + try: + self._init_builtin_hmac(key, msg, digestmod) + return + except _hmac.UnknownHashError: # pragma: no cover + pass + self._init_old(key, msg, digestmod) - def _init_hmac(self, key, msg, digestmod): + def _init_openssl_hmac(self, key, msg, digestmod): self._hmac = _hashopenssl.hmac_new(key, msg, digestmod=digestmod) + self._inner = self._outer = None # because the slots are defined + self.digest_size = self._hmac.digest_size + self.block_size = self._hmac.block_size + + _init_hmac = _init_openssl_hmac # for backward compatibility (if any) + + def _init_builtin_hmac(self, key, msg, digestmod): + self._hmac = _hmac.new(key, msg, digestmod=digestmod) + self._inner = self._outer = None # because the slots are defined self.digest_size = self._hmac.digest_size self.block_size = self._hmac.block_size def _init_old(self, key, msg, digestmod): - if callable(digestmod): - digest_cons = digestmod - elif isinstance(digestmod, str): - digest_cons = lambda d=b'': _hashlib.new(digestmod, d) - else: - digest_cons = lambda d=b'': digestmod.new(d) + import warnings + + digest_cons = _get_digest_constructor(digestmod) self._hmac = None self._outer = digest_cons() @@ -84,21 +118,19 @@ def _init_old(self, key, msg, digestmod): if hasattr(self._inner, 'block_size'): blocksize = self._inner.block_size if blocksize < 16: - _warnings.warn('block_size of %d seems too small; using our ' - 'default of %d.' % (blocksize, self.blocksize), - RuntimeWarning, 2) - blocksize = self.blocksize + warnings.warn(f"block_size of {blocksize} seems too small; " + f"using our default of {self.blocksize}.", + RuntimeWarning, 2) + blocksize = self.blocksize # pragma: no cover else: - _warnings.warn('No block_size attribute on given digest object; ' - 'Assuming %d.' % (self.blocksize), - RuntimeWarning, 2) - blocksize = self.blocksize + warnings.warn("No block_size attribute on given digest object; " + f"Assuming {self.blocksize}.", + RuntimeWarning, 2) + blocksize = self.blocksize # pragma: no cover if len(key) > blocksize: key = digest_cons(key).digest() - # self.blocksize is the default blocksize. self.block_size is - # effective block size as well as the public API attribute. self.block_size = blocksize key = key.ljust(blocksize, b'\0') @@ -127,6 +159,7 @@ def copy(self): # Call __new__ directly to avoid the expensive __init__. other = self.__class__.__new__(self.__class__) other.digest_size = self.digest_size + other.block_size = self.block_size if self._hmac: other._hmac = self._hmac.copy() other._inner = other._outer = None @@ -164,6 +197,7 @@ def hexdigest(self): h = self._current() return h.hexdigest() + def new(key, msg=None, digestmod=''): """Create a new hashing object and return it. @@ -193,25 +227,41 @@ def digest(key, msg, digest): A hashlib constructor returning a new hash object. *OR* A module supporting PEP 247. """ - if _hashopenssl is not None and isinstance(digest, (str, _functype)): + if _hashopenssl and isinstance(digest, (str, _functype)): try: return _hashopenssl.hmac_digest(key, msg, digest) + except OverflowError: + # OpenSSL's HMAC limits the size of the key to INT_MAX. + # Instead of falling back to HACL* implementation which + # may still not be supported due to a too large key, we + # directly switch to the pure Python fallback instead + # even if we could have used streaming HMAC for small keys + # but large messages. + return _compute_digest_fallback(key, msg, digest) except _hashopenssl.UnsupportedDigestmodError: pass - if callable(digest): - digest_cons = digest - elif isinstance(digest, str): - digest_cons = lambda d=b'': _hashlib.new(digest, d) - else: - digest_cons = lambda d=b'': digest.new(d) + if _hmac and isinstance(digest, str): + try: + return _hmac.compute_digest(key, msg, digest) + except (OverflowError, _hmac.UnknownHashError): + # HACL* HMAC limits the size of the key to UINT32_MAX + # so we fallback to the pure Python implementation even + # if streaming HMAC may have been used for small keys + # and large messages. + pass + + return _compute_digest_fallback(key, msg, digest) + +def _compute_digest_fallback(key, msg, digest): + digest_cons = _get_digest_constructor(digest) inner = digest_cons() outer = digest_cons() blocksize = getattr(inner, 'block_size', 64) if len(key) > blocksize: key = digest_cons(key).digest() - key = key + b'\x00' * (blocksize - len(key)) + key = key.ljust(blocksize, b'\0') inner.update(key.translate(trans_36)) outer.update(key.translate(trans_5C)) inner.update(msg) diff --git a/PythonLib/full/html/parser.py b/PythonLib/full/html/parser.py index 13c95c34..80fb8c3f 100644 --- a/PythonLib/full/html/parser.py +++ b/PythonLib/full/html/parser.py @@ -12,6 +12,7 @@ import _markupbase from html import unescape +from html.entities import html5 as html5_entities __all__ = ['HTMLParser'] @@ -23,20 +24,52 @@ entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]') charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]') +incomplete_charref = re.compile('&#(?:[0-9]|[xX][0-9a-fA-F])') +attr_charref = re.compile(r'&(#[0-9]+|#[xX][0-9a-fA-F]+|[a-zA-Z][a-zA-Z0-9]*)[;=]?') starttagopen = re.compile('<[a-zA-Z]') +endtagopen = re.compile('') -commentclose = re.compile(r'--\s*>') +commentclose = re.compile(r'--!?>') +commentabruptclose = re.compile(r'-?>') # Note: -# 1) if you change tagfind/attrfind remember to update locatestarttagend too; -# 2) if you change tagfind/attrfind and/or locatestarttagend the parser will +# 1) if you change tagfind/attrfind remember to update locatetagend too; +# 2) if you change tagfind/attrfind and/or locatetagend the parser will # explode, so don't do it. -# see http://www.w3.org/TR/html5/tokenization.html#tag-open-state -# and http://www.w3.org/TR/html5/tokenization.html#tag-name-state -tagfind_tolerant = re.compile(r'([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*') -attrfind_tolerant = re.compile( - r'((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*' - r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*') +# see the HTML5 specs section "13.2.5.6 Tag open state", +# "13.2.5.8 Tag name state" and "13.2.5.33 Attribute name state". +# https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state +# https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state +# https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state +tagfind_tolerant = re.compile(r'([a-zA-Z][^\t\n\r\f />]*)(?:[\t\n\r\f ]|/(?!>))*') +attrfind_tolerant = re.compile(r""" + ( + (?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name + ) + ([\t\n\r\f ]*=[\t\n\r\f ]* # value indicator + ('[^']*' # LITA-enclosed value + |"[^"]*" # LIT-enclosed value + |(?!['"])[^>\t\n\r\f ]* # bare value + ) + )? + (?:[\t\n\r\f ]|/(?!>))* # possibly followed by a space +""", re.VERBOSE) +locatetagend = re.compile(r""" + [a-zA-Z][^\t\n\r\f />]* # tag name + [\t\n\r\f /]* # optional whitespace before attribute name + (?:(?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name + (?:[\t\n\r\f ]*=[\t\n\r\f ]* # value indicator + (?:'[^']*' # LITA-enclosed value + |"[^"]*" # LIT-enclosed value + |(?!['"])[^>\t\n\r\f ]* # bare value + ) + )? + [\t\n\r\f /]* # possibly followed by a space + )* + >? +""", re.VERBOSE) +# The following variables are not used, but are temporarily left for +# backward compatibility. locatestarttagend_tolerant = re.compile(r""" <[a-zA-Z][^\t\n\r\f />\x00]* # tag name (?:[\s/]* # optional whitespace before attribute name @@ -53,10 +86,24 @@ \s* # trailing whitespace """, re.VERBOSE) endendtag = re.compile('>') -# the HTML 5 spec, section 8.1.2.2, doesn't allow spaces between -# ') +# Character reference processing logic specific to attribute values +# See: https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state +def _replace_attr_charref(match): + ref = match.group(0) + # Numeric / hex char refs must always be unescaped + if ref.startswith('&#'): + return unescape(ref) + # Named character / entity references must only be unescaped + # if they are an exact match, and they are not followed by an equals sign + if not ref.endswith('=') and ref[1:] in html5_entities: + return unescape(ref) + # Otherwise do not unescape + return ref + +def _unescape_attrvalue(s): + return attr_charref.sub(_replace_attr_charref, s) class HTMLParser(_markupbase.ParserBase): @@ -81,16 +128,25 @@ class HTMLParser(_markupbase.ParserBase): argument. """ - CDATA_CONTENT_ELEMENTS = ("script", "style") + # See the HTML5 specs section "13.4 Parsing HTML fragments". + # https://html.spec.whatwg.org/multipage/parsing.html#parsing-html-fragments + # CDATA_CONTENT_ELEMENTS are parsed in RAWTEXT mode + CDATA_CONTENT_ELEMENTS = ("script", "style", "xmp", "iframe", "noembed", "noframes") + RCDATA_CONTENT_ELEMENTS = ("textarea", "title") - def __init__(self, *, convert_charrefs=True): + def __init__(self, *, convert_charrefs=True, scripting=False): """Initialize and reset this instance. - If convert_charrefs is True (the default), all character references + If convert_charrefs is true (the default), all character references are automatically converted to the corresponding Unicode characters. + + If *scripting* is false (the default), the content of the + ``noscript`` element is parsed normally; if it's true, + it's returned as is without being parsed. """ super().__init__() self.convert_charrefs = convert_charrefs + self.scripting = scripting self.reset() def reset(self): @@ -99,6 +155,8 @@ def reset(self): self.lasttag = '???' self.interesting = interesting_normal self.cdata_elem = None + self._support_cdata = True + self._escapable = True super().reset() def feed(self, data): @@ -120,13 +178,35 @@ def get_starttag_text(self): """Return full source of start tag: '<...>'.""" return self.__starttag_text - def set_cdata_mode(self, elem): + def set_cdata_mode(self, elem, *, escapable=False): self.cdata_elem = elem.lower() - self.interesting = re.compile(r'' % self.cdata_elem, re.I) + self._escapable = escapable + if self.cdata_elem == 'plaintext': + self.interesting = re.compile(r'\z') + elif escapable and not self.convert_charrefs: + self.interesting = re.compile(r'&|])' % self.cdata_elem, + re.IGNORECASE|re.ASCII) + else: + self.interesting = re.compile(r'])' % self.cdata_elem, + re.IGNORECASE|re.ASCII) def clear_cdata_mode(self): self.interesting = interesting_normal self.cdata_elem = None + self._escapable = True + + def _set_support_cdata(self, flag=True): + """Enable or disable support of the CDATA sections. + If enabled, "<[CDATA[" starts a CDATA section which ends with "]]>". + If disabled, "<[CDATA[" starts a bogus comments which ends with ">". + + This method is not called by default. Its purpose is to be called + in custom handle_starttag() and handle_endtag() methods, with + value that depends on the adjusted current node. + See https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state + for details. + """ + self._support_cdata = flag # Internal -- handle data as far as reasonable. May leave state # and data to be processed by a subsequent call. If 'end' is @@ -147,7 +227,7 @@ def goahead(self, end): # & near the end and see if it's followed by a space or ;. amppos = rawdata.rfind('&', max(i, n-34)) if (amppos >= 0 and - not re.compile(r'[\s;]').search(rawdata, amppos)): + not re.compile(r'[\t\n\r\f ;]').search(rawdata, amppos)): break # wait till we get all the text j = n else: @@ -159,7 +239,7 @@ def goahead(self, end): break j = n if i < j: - if self.convert_charrefs and not self.cdata_elem: + if self.convert_charrefs and self._escapable: self.handle_data(unescape(rawdata[i:j])) else: self.handle_data(rawdata[i:j]) @@ -177,7 +257,7 @@ def goahead(self, end): k = self.parse_pi(i) elif startswith("', i + 1) - if k < 0: - k = rawdata.find('<', i + 1) - if k < 0: - k = i + 1 - else: - k += 1 - if self.convert_charrefs and not self.cdata_elem: - self.handle_data(unescape(rawdata[i:k])) + if starttagopen.match(rawdata, i): # < + letter + pass + elif startswith("', i+9) + if j < 0: + return -1 + self.unknown_decl(rawdata[i+3: j]) + return j + 3 elif rawdata[i:i+9].lower() == ' gtpos = rawdata.find('>', i+9) @@ -272,12 +382,27 @@ def parse_html_declaration(self, i): else: return self.parse_bogus_comment(i) + # Internal -- parse comment, return length or -1 if not terminated + # see https://html.spec.whatwg.org/multipage/parsing.html#comment-start-state + def parse_comment(self, i, report=True): + rawdata = self.rawdata + assert rawdata.startswith(' (map|filter|zip) and - itertools.ifilterfalse --> itertools.filterfalse (bugs 2360-2363) - - imports from itertools are fixed in fix_itertools_import.py - - If itertools is imported as something else (ie: import itertools as it; - it.izip(spam, eggs)) method calls will not get fixed. - """ - -# Local imports -from .. import fixer_base -from ..fixer_util import Name - -class FixItertools(fixer_base.BaseFix): - BM_compatible = True - it_funcs = "('imap'|'ifilter'|'izip'|'izip_longest'|'ifilterfalse')" - PATTERN = """ - power< it='itertools' - trailer< - dot='.' func=%(it_funcs)s > trailer< '(' [any] ')' > > - | - power< func=%(it_funcs)s trailer< '(' [any] ')' > > - """ %(locals()) - - # Needs to be run after fix_(map|zip|filter) - run_order = 6 - - def transform(self, node, results): - prefix = None - func = results['func'][0] - if ('it' in results and - func.value not in ('ifilterfalse', 'izip_longest')): - dot, it = (results['dot'], results['it']) - # Remove the 'itertools' - prefix = it.prefix - it.remove() - # Replace the node which contains ('.', 'function') with the - # function (to be consistent with the second part of the pattern) - dot.remove() - func.parent.replace(func) - - prefix = prefix or func.prefix - func.replace(Name(func.value[1:], prefix=prefix)) diff --git a/PythonLib/full/lib2to3/fixes/fix_itertools_imports.py b/PythonLib/full/lib2to3/fixes/fix_itertools_imports.py deleted file mode 100644 index 0ddbc7b8..00000000 --- a/PythonLib/full/lib2to3/fixes/fix_itertools_imports.py +++ /dev/null @@ -1,57 +0,0 @@ -""" Fixer for imports of itertools.(imap|ifilter|izip|ifilterfalse) """ - -# Local imports -from lib2to3 import fixer_base -from lib2to3.fixer_util import BlankLine, syms, token - - -class FixItertoolsImports(fixer_base.BaseFix): - BM_compatible = True - PATTERN = """ - import_from< 'from' 'itertools' 'import' imports=any > - """ %(locals()) - - def transform(self, node, results): - imports = results['imports'] - if imports.type == syms.import_as_name or not imports.children: - children = [imports] - else: - children = imports.children - for child in children[::2]: - if child.type == token.NAME: - member = child.value - name_node = child - elif child.type == token.STAR: - # Just leave the import as is. - return - else: - assert child.type == syms.import_as_name - name_node = child.children[0] - member_name = name_node.value - if member_name in ('imap', 'izip', 'ifilter'): - child.value = None - child.remove() - elif member_name in ('ifilterfalse', 'izip_longest'): - node.changed() - name_node.value = ('filterfalse' if member_name[1] == 'f' - else 'zip_longest') - - # Make sure the import statement is still sane - children = imports.children[:] or [imports] - remove_comma = True - for child in children: - if remove_comma and child.type == token.COMMA: - child.remove() - else: - remove_comma ^= True - - while children and children[-1].type == token.COMMA: - children.pop().remove() - - # If there are no imports left, just get rid of the entire statement - if (not (imports.children or getattr(imports, 'value', None)) or - imports.parent is None): - p = node.prefix - node = BlankLine() - node.prefix = p - return node diff --git a/PythonLib/full/lib2to3/fixes/fix_long.py b/PythonLib/full/lib2to3/fixes/fix_long.py deleted file mode 100644 index f227c9f4..00000000 --- a/PythonLib/full/lib2to3/fixes/fix_long.py +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright 2006 Google, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -"""Fixer that turns 'long' into 'int' everywhere. -""" - -# Local imports -from lib2to3 import fixer_base -from lib2to3.fixer_util import is_probably_builtin - - -class FixLong(fixer_base.BaseFix): - BM_compatible = True - PATTERN = "'long'" - - def transform(self, node, results): - if is_probably_builtin(node): - node.value = "int" - node.changed() diff --git a/PythonLib/full/lib2to3/fixes/fix_map.py b/PythonLib/full/lib2to3/fixes/fix_map.py deleted file mode 100644 index 78cf81c6..00000000 --- a/PythonLib/full/lib2to3/fixes/fix_map.py +++ /dev/null @@ -1,110 +0,0 @@ -# Copyright 2007 Google, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -"""Fixer that changes map(F, ...) into list(map(F, ...)) unless there -exists a 'from future_builtins import map' statement in the top-level -namespace. - -As a special case, map(None, X) is changed into list(X). (This is -necessary because the semantics are changed in this case -- the new -map(None, X) is equivalent to [(x,) for x in X].) - -We avoid the transformation (except for the special case mentioned -above) if the map() call is directly contained in iter(<>), list(<>), -tuple(<>), sorted(<>), ...join(<>), or for V in <>:. - -NOTE: This is still not correct if the original code was depending on -map(F, X, Y, ...) to go on until the longest argument is exhausted, -substituting None for missing values -- like zip(), it now stops as -soon as the shortest argument is exhausted. -""" - -# Local imports -from ..pgen2 import token -from .. import fixer_base -from ..fixer_util import Name, ArgList, Call, ListComp, in_special_context -from ..pygram import python_symbols as syms -from ..pytree import Node - - -class FixMap(fixer_base.ConditionalFix): - BM_compatible = True - - PATTERN = """ - map_none=power< - 'map' - trailer< '(' arglist< 'None' ',' arg=any [','] > ')' > - [extra_trailers=trailer*] - > - | - map_lambda=power< - 'map' - trailer< - '(' - arglist< - lambdef< 'lambda' - (fp=NAME | vfpdef< '(' fp=NAME ')'> ) ':' xp=any - > - ',' - it=any - > - ')' - > - [extra_trailers=trailer*] - > - | - power< - 'map' args=trailer< '(' [any] ')' > - [extra_trailers=trailer*] - > - """ - - skip_on = 'future_builtins.map' - - def transform(self, node, results): - if self.should_skip(node): - return - - trailers = [] - if 'extra_trailers' in results: - for t in results['extra_trailers']: - trailers.append(t.clone()) - - if node.parent.type == syms.simple_stmt: - self.warning(node, "You should use a for loop here") - new = node.clone() - new.prefix = "" - new = Call(Name("list"), [new]) - elif "map_lambda" in results: - new = ListComp(results["xp"].clone(), - results["fp"].clone(), - results["it"].clone()) - new = Node(syms.power, [new] + trailers, prefix="") - - else: - if "map_none" in results: - new = results["arg"].clone() - new.prefix = "" - else: - if "args" in results: - args = results["args"] - if args.type == syms.trailer and \ - args.children[1].type == syms.arglist and \ - args.children[1].children[0].type == token.NAME and \ - args.children[1].children[0].value == "None": - self.warning(node, "cannot convert map(None, ...) " - "with multiple arguments because map() " - "now truncates to the shortest sequence") - return - - new = Node(syms.power, [Name("map"), args.clone()]) - new.prefix = "" - - if in_special_context(node): - return None - - new = Node(syms.power, [Name("list"), ArgList([new])] + trailers) - new.prefix = "" - - new.prefix = node.prefix - return new diff --git a/PythonLib/full/lib2to3/fixes/fix_metaclass.py b/PythonLib/full/lib2to3/fixes/fix_metaclass.py deleted file mode 100644 index fe547b22..00000000 --- a/PythonLib/full/lib2to3/fixes/fix_metaclass.py +++ /dev/null @@ -1,228 +0,0 @@ -"""Fixer for __metaclass__ = X -> (metaclass=X) methods. - - The various forms of classef (inherits nothing, inherits once, inherits - many) don't parse the same in the CST so we look at ALL classes for - a __metaclass__ and if we find one normalize the inherits to all be - an arglist. - - For one-liner classes ('class X: pass') there is no indent/dedent so - we normalize those into having a suite. - - Moving the __metaclass__ into the classdef can also cause the class - body to be empty so there is some special casing for that as well. - - This fixer also tries very hard to keep original indenting and spacing - in all those corner cases. - -""" -# Author: Jack Diederich - -# Local imports -from .. import fixer_base -from ..pygram import token -from ..fixer_util import syms, Node, Leaf - - -def has_metaclass(parent): - """ we have to check the cls_node without changing it. - There are two possibilities: - 1) clsdef => suite => simple_stmt => expr_stmt => Leaf('__meta') - 2) clsdef => simple_stmt => expr_stmt => Leaf('__meta') - """ - for node in parent.children: - if node.type == syms.suite: - return has_metaclass(node) - elif node.type == syms.simple_stmt and node.children: - expr_node = node.children[0] - if expr_node.type == syms.expr_stmt and expr_node.children: - left_side = expr_node.children[0] - if isinstance(left_side, Leaf) and \ - left_side.value == '__metaclass__': - return True - return False - - -def fixup_parse_tree(cls_node): - """ one-line classes don't get a suite in the parse tree so we add - one to normalize the tree - """ - for node in cls_node.children: - if node.type == syms.suite: - # already in the preferred format, do nothing - return - - # !%@#! one-liners have no suite node, we have to fake one up - for i, node in enumerate(cls_node.children): - if node.type == token.COLON: - break - else: - raise ValueError("No class suite and no ':'!") - - # move everything into a suite node - suite = Node(syms.suite, []) - while cls_node.children[i+1:]: - move_node = cls_node.children[i+1] - suite.append_child(move_node.clone()) - move_node.remove() - cls_node.append_child(suite) - node = suite - - -def fixup_simple_stmt(parent, i, stmt_node): - """ if there is a semi-colon all the parts count as part of the same - simple_stmt. We just want the __metaclass__ part so we move - everything after the semi-colon into its own simple_stmt node - """ - for semi_ind, node in enumerate(stmt_node.children): - if node.type == token.SEMI: # *sigh* - break - else: - return - - node.remove() # kill the semicolon - new_expr = Node(syms.expr_stmt, []) - new_stmt = Node(syms.simple_stmt, [new_expr]) - while stmt_node.children[semi_ind:]: - move_node = stmt_node.children[semi_ind] - new_expr.append_child(move_node.clone()) - move_node.remove() - parent.insert_child(i, new_stmt) - new_leaf1 = new_stmt.children[0].children[0] - old_leaf1 = stmt_node.children[0].children[0] - new_leaf1.prefix = old_leaf1.prefix - - -def remove_trailing_newline(node): - if node.children and node.children[-1].type == token.NEWLINE: - node.children[-1].remove() - - -def find_metas(cls_node): - # find the suite node (Mmm, sweet nodes) - for node in cls_node.children: - if node.type == syms.suite: - break - else: - raise ValueError("No class suite!") - - # look for simple_stmt[ expr_stmt[ Leaf('__metaclass__') ] ] - for i, simple_node in list(enumerate(node.children)): - if simple_node.type == syms.simple_stmt and simple_node.children: - expr_node = simple_node.children[0] - if expr_node.type == syms.expr_stmt and expr_node.children: - # Check if the expr_node is a simple assignment. - left_node = expr_node.children[0] - if isinstance(left_node, Leaf) and \ - left_node.value == '__metaclass__': - # We found an assignment to __metaclass__. - fixup_simple_stmt(node, i, simple_node) - remove_trailing_newline(simple_node) - yield (node, i, simple_node) - - -def fixup_indent(suite): - """ If an INDENT is followed by a thing with a prefix then nuke the prefix - Otherwise we get in trouble when removing __metaclass__ at suite start - """ - kids = suite.children[::-1] - # find the first indent - while kids: - node = kids.pop() - if node.type == token.INDENT: - break - - # find the first Leaf - while kids: - node = kids.pop() - if isinstance(node, Leaf) and node.type != token.DEDENT: - if node.prefix: - node.prefix = '' - return - else: - kids.extend(node.children[::-1]) - - -class FixMetaclass(fixer_base.BaseFix): - BM_compatible = True - - PATTERN = """ - classdef - """ - - def transform(self, node, results): - if not has_metaclass(node): - return - - fixup_parse_tree(node) - - # find metaclasses, keep the last one - last_metaclass = None - for suite, i, stmt in find_metas(node): - last_metaclass = stmt - stmt.remove() - - text_type = node.children[0].type # always Leaf(nnn, 'class') - - # figure out what kind of classdef we have - if len(node.children) == 7: - # Node(classdef, ['class', 'name', '(', arglist, ')', ':', suite]) - # 0 1 2 3 4 5 6 - if node.children[3].type == syms.arglist: - arglist = node.children[3] - # Node(classdef, ['class', 'name', '(', 'Parent', ')', ':', suite]) - else: - parent = node.children[3].clone() - arglist = Node(syms.arglist, [parent]) - node.set_child(3, arglist) - elif len(node.children) == 6: - # Node(classdef, ['class', 'name', '(', ')', ':', suite]) - # 0 1 2 3 4 5 - arglist = Node(syms.arglist, []) - node.insert_child(3, arglist) - elif len(node.children) == 4: - # Node(classdef, ['class', 'name', ':', suite]) - # 0 1 2 3 - arglist = Node(syms.arglist, []) - node.insert_child(2, Leaf(token.RPAR, ')')) - node.insert_child(2, arglist) - node.insert_child(2, Leaf(token.LPAR, '(')) - else: - raise ValueError("Unexpected class definition") - - # now stick the metaclass in the arglist - meta_txt = last_metaclass.children[0].children[0] - meta_txt.value = 'metaclass' - orig_meta_prefix = meta_txt.prefix - - if arglist.children: - arglist.append_child(Leaf(token.COMMA, ',')) - meta_txt.prefix = ' ' - else: - meta_txt.prefix = '' - - # compact the expression "metaclass = Meta" -> "metaclass=Meta" - expr_stmt = last_metaclass.children[0] - assert expr_stmt.type == syms.expr_stmt - expr_stmt.children[1].prefix = '' - expr_stmt.children[2].prefix = '' - - arglist.append_child(last_metaclass) - - fixup_indent(suite) - - # check for empty suite - if not suite.children: - # one-liner that was just __metaclass_ - suite.remove() - pass_leaf = Leaf(text_type, 'pass') - pass_leaf.prefix = orig_meta_prefix - node.append_child(pass_leaf) - node.append_child(Leaf(token.NEWLINE, '\n')) - - elif len(suite.children) > 1 and \ - (suite.children[-2].type == token.INDENT and - suite.children[-1].type == token.DEDENT): - # there was only one line in the class body and it was __metaclass__ - pass_leaf = Leaf(text_type, 'pass') - suite.insert_child(-1, pass_leaf) - suite.insert_child(-1, Leaf(token.NEWLINE, '\n')) diff --git a/PythonLib/full/lib2to3/fixes/fix_methodattrs.py b/PythonLib/full/lib2to3/fixes/fix_methodattrs.py deleted file mode 100644 index 7f9004f0..00000000 --- a/PythonLib/full/lib2to3/fixes/fix_methodattrs.py +++ /dev/null @@ -1,24 +0,0 @@ -"""Fix bound method attributes (method.im_? -> method.__?__). -""" -# Author: Christian Heimes - -# Local imports -from .. import fixer_base -from ..fixer_util import Name - -MAP = { - "im_func" : "__func__", - "im_self" : "__self__", - "im_class" : "__self__.__class__" - } - -class FixMethodattrs(fixer_base.BaseFix): - BM_compatible = True - PATTERN = """ - power< any+ trailer< '.' attr=('im_func' | 'im_self' | 'im_class') > any* > - """ - - def transform(self, node, results): - attr = results["attr"][0] - new = MAP[attr.value] - attr.replace(Name(new, prefix=attr.prefix)) diff --git a/PythonLib/full/lib2to3/fixes/fix_ne.py b/PythonLib/full/lib2to3/fixes/fix_ne.py deleted file mode 100644 index e3ee10f4..00000000 --- a/PythonLib/full/lib2to3/fixes/fix_ne.py +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright 2006 Google, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -"""Fixer that turns <> into !=.""" - -# Local imports -from .. import pytree -from ..pgen2 import token -from .. import fixer_base - - -class FixNe(fixer_base.BaseFix): - # This is so simple that we don't need the pattern compiler. - - _accept_type = token.NOTEQUAL - - def match(self, node): - # Override - return node.value == "<>" - - def transform(self, node, results): - new = pytree.Leaf(token.NOTEQUAL, "!=", prefix=node.prefix) - return new diff --git a/PythonLib/full/lib2to3/fixes/fix_next.py b/PythonLib/full/lib2to3/fixes/fix_next.py deleted file mode 100644 index 9f6305e1..00000000 --- a/PythonLib/full/lib2to3/fixes/fix_next.py +++ /dev/null @@ -1,103 +0,0 @@ -"""Fixer for it.next() -> next(it), per PEP 3114.""" -# Author: Collin Winter - -# Things that currently aren't covered: -# - listcomp "next" names aren't warned -# - "with" statement targets aren't checked - -# Local imports -from ..pgen2 import token -from ..pygram import python_symbols as syms -from .. import fixer_base -from ..fixer_util import Name, Call, find_binding - -bind_warning = "Calls to builtin next() possibly shadowed by global binding" - - -class FixNext(fixer_base.BaseFix): - BM_compatible = True - PATTERN = """ - power< base=any+ trailer< '.' attr='next' > trailer< '(' ')' > > - | - power< head=any+ trailer< '.' attr='next' > not trailer< '(' ')' > > - | - classdef< 'class' any+ ':' - suite< any* - funcdef< 'def' - name='next' - parameters< '(' NAME ')' > any+ > - any* > > - | - global=global_stmt< 'global' any* 'next' any* > - """ - - order = "pre" # Pre-order tree traversal - - def start_tree(self, tree, filename): - super(FixNext, self).start_tree(tree, filename) - - n = find_binding('next', tree) - if n: - self.warning(n, bind_warning) - self.shadowed_next = True - else: - self.shadowed_next = False - - def transform(self, node, results): - assert results - - base = results.get("base") - attr = results.get("attr") - name = results.get("name") - - if base: - if self.shadowed_next: - attr.replace(Name("__next__", prefix=attr.prefix)) - else: - base = [n.clone() for n in base] - base[0].prefix = "" - node.replace(Call(Name("next", prefix=node.prefix), base)) - elif name: - n = Name("__next__", prefix=name.prefix) - name.replace(n) - elif attr: - # We don't do this transformation if we're assigning to "x.next". - # Unfortunately, it doesn't seem possible to do this in PATTERN, - # so it's being done here. - if is_assign_target(node): - head = results["head"] - if "".join([str(n) for n in head]).strip() == '__builtin__': - self.warning(node, bind_warning) - return - attr.replace(Name("__next__")) - elif "global" in results: - self.warning(node, bind_warning) - self.shadowed_next = True - - -### The following functions help test if node is part of an assignment -### target. - -def is_assign_target(node): - assign = find_assign(node) - if assign is None: - return False - - for child in assign.children: - if child.type == token.EQUAL: - return False - elif is_subtree(child, node): - return True - return False - -def find_assign(node): - if node.type == syms.expr_stmt: - return node - if node.type == syms.simple_stmt or node.parent is None: - return None - return find_assign(node.parent) - -def is_subtree(root, node): - if root == node: - return True - return any(is_subtree(c, node) for c in root.children) diff --git a/PythonLib/full/lib2to3/fixes/fix_nonzero.py b/PythonLib/full/lib2to3/fixes/fix_nonzero.py deleted file mode 100644 index c2295969..00000000 --- a/PythonLib/full/lib2to3/fixes/fix_nonzero.py +++ /dev/null @@ -1,21 +0,0 @@ -"""Fixer for __nonzero__ -> __bool__ methods.""" -# Author: Collin Winter - -# Local imports -from .. import fixer_base -from ..fixer_util import Name - -class FixNonzero(fixer_base.BaseFix): - BM_compatible = True - PATTERN = """ - classdef< 'class' any+ ':' - suite< any* - funcdef< 'def' name='__nonzero__' - parameters< '(' NAME ')' > any+ > - any* > > - """ - - def transform(self, node, results): - name = results["name"] - new = Name("__bool__", prefix=name.prefix) - name.replace(new) diff --git a/PythonLib/full/lib2to3/fixes/fix_numliterals.py b/PythonLib/full/lib2to3/fixes/fix_numliterals.py deleted file mode 100644 index 79207d4a..00000000 --- a/PythonLib/full/lib2to3/fixes/fix_numliterals.py +++ /dev/null @@ -1,28 +0,0 @@ -"""Fixer that turns 1L into 1, 0755 into 0o755. -""" -# Copyright 2007 Georg Brandl. -# Licensed to PSF under a Contributor Agreement. - -# Local imports -from ..pgen2 import token -from .. import fixer_base -from ..fixer_util import Number - - -class FixNumliterals(fixer_base.BaseFix): - # This is so simple that we don't need the pattern compiler. - - _accept_type = token.NUMBER - - def match(self, node): - # Override - return (node.value.startswith("0") or node.value[-1] in "Ll") - - def transform(self, node, results): - val = node.value - if val[-1] in 'Ll': - val = val[:-1] - elif val.startswith('0') and val.isdigit() and len(set(val)) > 1: - val = "0o" + val[1:] - - return Number(val, prefix=node.prefix) diff --git a/PythonLib/full/lib2to3/fixes/fix_operator.py b/PythonLib/full/lib2to3/fixes/fix_operator.py deleted file mode 100644 index d303cd20..00000000 --- a/PythonLib/full/lib2to3/fixes/fix_operator.py +++ /dev/null @@ -1,97 +0,0 @@ -"""Fixer for operator functions. - -operator.isCallable(obj) -> callable(obj) -operator.sequenceIncludes(obj) -> operator.contains(obj) -operator.isSequenceType(obj) -> isinstance(obj, collections.abc.Sequence) -operator.isMappingType(obj) -> isinstance(obj, collections.abc.Mapping) -operator.isNumberType(obj) -> isinstance(obj, numbers.Number) -operator.repeat(obj, n) -> operator.mul(obj, n) -operator.irepeat(obj, n) -> operator.imul(obj, n) -""" - -import collections.abc - -# Local imports -from lib2to3 import fixer_base -from lib2to3.fixer_util import Call, Name, String, touch_import - - -def invocation(s): - def dec(f): - f.invocation = s - return f - return dec - - -class FixOperator(fixer_base.BaseFix): - BM_compatible = True - order = "pre" - - methods = """ - method=('isCallable'|'sequenceIncludes' - |'isSequenceType'|'isMappingType'|'isNumberType' - |'repeat'|'irepeat') - """ - obj = "'(' obj=any ')'" - PATTERN = """ - power< module='operator' - trailer< '.' %(methods)s > trailer< %(obj)s > > - | - power< %(methods)s trailer< %(obj)s > > - """ % dict(methods=methods, obj=obj) - - def transform(self, node, results): - method = self._check_method(node, results) - if method is not None: - return method(node, results) - - @invocation("operator.contains(%s)") - def _sequenceIncludes(self, node, results): - return self._handle_rename(node, results, "contains") - - @invocation("callable(%s)") - def _isCallable(self, node, results): - obj = results["obj"] - return Call(Name("callable"), [obj.clone()], prefix=node.prefix) - - @invocation("operator.mul(%s)") - def _repeat(self, node, results): - return self._handle_rename(node, results, "mul") - - @invocation("operator.imul(%s)") - def _irepeat(self, node, results): - return self._handle_rename(node, results, "imul") - - @invocation("isinstance(%s, collections.abc.Sequence)") - def _isSequenceType(self, node, results): - return self._handle_type2abc(node, results, "collections.abc", "Sequence") - - @invocation("isinstance(%s, collections.abc.Mapping)") - def _isMappingType(self, node, results): - return self._handle_type2abc(node, results, "collections.abc", "Mapping") - - @invocation("isinstance(%s, numbers.Number)") - def _isNumberType(self, node, results): - return self._handle_type2abc(node, results, "numbers", "Number") - - def _handle_rename(self, node, results, name): - method = results["method"][0] - method.value = name - method.changed() - - def _handle_type2abc(self, node, results, module, abc): - touch_import(None, module, node) - obj = results["obj"] - args = [obj.clone(), String(", " + ".".join([module, abc]))] - return Call(Name("isinstance"), args, prefix=node.prefix) - - def _check_method(self, node, results): - method = getattr(self, "_" + results["method"][0].value) - if isinstance(method, collections.abc.Callable): - if "module" in results: - return method - else: - sub = (str(results["obj"]),) - invocation_str = method.invocation % sub - self.warning(node, "You should use '%s' here." % invocation_str) - return None diff --git a/PythonLib/full/lib2to3/fixes/fix_paren.py b/PythonLib/full/lib2to3/fixes/fix_paren.py deleted file mode 100644 index df3da5f5..00000000 --- a/PythonLib/full/lib2to3/fixes/fix_paren.py +++ /dev/null @@ -1,44 +0,0 @@ -"""Fixer that adds parentheses where they are required - -This converts ``[x for x in 1, 2]`` to ``[x for x in (1, 2)]``.""" - -# By Taek Joo Kim and Benjamin Peterson - -# Local imports -from .. import fixer_base -from ..fixer_util import LParen, RParen - -# XXX This doesn't support nested for loops like [x for x in 1, 2 for x in 1, 2] -class FixParen(fixer_base.BaseFix): - BM_compatible = True - - PATTERN = """ - atom< ('[' | '(') - (listmaker< any - comp_for< - 'for' NAME 'in' - target=testlist_safe< any (',' any)+ [','] - > - [any] - > - > - | - testlist_gexp< any - comp_for< - 'for' NAME 'in' - target=testlist_safe< any (',' any)+ [','] - > - [any] - > - >) - (']' | ')') > - """ - - def transform(self, node, results): - target = results["target"] - - lparen = LParen() - lparen.prefix = target.prefix - target.prefix = "" # Make it hug the parentheses - target.insert_child(0, lparen) - target.append_child(RParen()) diff --git a/PythonLib/full/lib2to3/fixes/fix_print.py b/PythonLib/full/lib2to3/fixes/fix_print.py deleted file mode 100644 index 87803222..00000000 --- a/PythonLib/full/lib2to3/fixes/fix_print.py +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright 2006 Google, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -"""Fixer for print. - -Change: - 'print' into 'print()' - 'print ...' into 'print(...)' - 'print ... ,' into 'print(..., end=" ")' - 'print >>x, ...' into 'print(..., file=x)' - -No changes are applied if print_function is imported from __future__ - -""" - -# Local imports -from .. import patcomp -from .. import pytree -from ..pgen2 import token -from .. import fixer_base -from ..fixer_util import Name, Call, Comma, String - - -parend_expr = patcomp.compile_pattern( - """atom< '(' [atom|STRING|NAME] ')' >""" - ) - - -class FixPrint(fixer_base.BaseFix): - - BM_compatible = True - - PATTERN = """ - simple_stmt< any* bare='print' any* > | print_stmt - """ - - def transform(self, node, results): - assert results - - bare_print = results.get("bare") - - if bare_print: - # Special-case print all by itself - bare_print.replace(Call(Name("print"), [], - prefix=bare_print.prefix)) - return - assert node.children[0] == Name("print") - args = node.children[1:] - if len(args) == 1 and parend_expr.match(args[0]): - # We don't want to keep sticking parens around an - # already-parenthesised expression. - return - - sep = end = file = None - if args and args[-1] == Comma(): - args = args[:-1] - end = " " - if args and args[0] == pytree.Leaf(token.RIGHTSHIFT, ">>"): - assert len(args) >= 2 - file = args[1].clone() - args = args[3:] # Strip a possible comma after the file expression - # Now synthesize a print(args, sep=..., end=..., file=...) node. - l_args = [arg.clone() for arg in args] - if l_args: - l_args[0].prefix = "" - if sep is not None or end is not None or file is not None: - if sep is not None: - self.add_kwarg(l_args, "sep", String(repr(sep))) - if end is not None: - self.add_kwarg(l_args, "end", String(repr(end))) - if file is not None: - self.add_kwarg(l_args, "file", file) - n_stmt = Call(Name("print"), l_args) - n_stmt.prefix = node.prefix - return n_stmt - - def add_kwarg(self, l_nodes, s_kwd, n_expr): - # XXX All this prefix-setting may lose comments (though rarely) - n_expr.prefix = "" - n_argument = pytree.Node(self.syms.argument, - (Name(s_kwd), - pytree.Leaf(token.EQUAL, "="), - n_expr)) - if l_nodes: - l_nodes.append(Comma()) - n_argument.prefix = " " - l_nodes.append(n_argument) diff --git a/PythonLib/full/lib2to3/fixes/fix_raise.py b/PythonLib/full/lib2to3/fixes/fix_raise.py deleted file mode 100644 index 05aa21e7..00000000 --- a/PythonLib/full/lib2to3/fixes/fix_raise.py +++ /dev/null @@ -1,90 +0,0 @@ -"""Fixer for 'raise E, V, T' - -raise -> raise -raise E -> raise E -raise E, V -> raise E(V) -raise E, V, T -> raise E(V).with_traceback(T) -raise E, None, T -> raise E.with_traceback(T) - -raise (((E, E'), E''), E'''), V -> raise E(V) -raise "foo", V, T -> warns about string exceptions - - -CAVEATS: -1) "raise E, V" will be incorrectly translated if V is an exception - instance. The correct Python 3 idiom is - - raise E from V - - but since we can't detect instance-hood by syntax alone and since - any client code would have to be changed as well, we don't automate - this. -""" -# Author: Collin Winter - -# Local imports -from .. import pytree -from ..pgen2 import token -from .. import fixer_base -from ..fixer_util import Name, Call, Attr, ArgList, is_tuple - -class FixRaise(fixer_base.BaseFix): - - BM_compatible = True - PATTERN = """ - raise_stmt< 'raise' exc=any [',' val=any [',' tb=any]] > - """ - - def transform(self, node, results): - syms = self.syms - - exc = results["exc"].clone() - if exc.type == token.STRING: - msg = "Python 3 does not support string exceptions" - self.cannot_convert(node, msg) - return - - # Python 2 supports - # raise ((((E1, E2), E3), E4), E5), V - # as a synonym for - # raise E1, V - # Since Python 3 will not support this, we recurse down any tuple - # literals, always taking the first element. - if is_tuple(exc): - while is_tuple(exc): - # exc.children[1:-1] is the unparenthesized tuple - # exc.children[1].children[0] is the first element of the tuple - exc = exc.children[1].children[0].clone() - exc.prefix = " " - - if "val" not in results: - # One-argument raise - new = pytree.Node(syms.raise_stmt, [Name("raise"), exc]) - new.prefix = node.prefix - return new - - val = results["val"].clone() - if is_tuple(val): - args = [c.clone() for c in val.children[1:-1]] - else: - val.prefix = "" - args = [val] - - if "tb" in results: - tb = results["tb"].clone() - tb.prefix = "" - - e = exc - # If there's a traceback and None is passed as the value, then don't - # add a call, since the user probably just wants to add a - # traceback. See issue #9661. - if val.type != token.NAME or val.value != "None": - e = Call(exc, args) - with_tb = Attr(e, Name('with_traceback')) + [ArgList([tb])] - new = pytree.Node(syms.simple_stmt, [Name("raise")] + with_tb) - new.prefix = node.prefix - return new - else: - return pytree.Node(syms.raise_stmt, - [Name("raise"), Call(exc, args)], - prefix=node.prefix) diff --git a/PythonLib/full/lib2to3/fixes/fix_raw_input.py b/PythonLib/full/lib2to3/fixes/fix_raw_input.py deleted file mode 100644 index a51bb694..00000000 --- a/PythonLib/full/lib2to3/fixes/fix_raw_input.py +++ /dev/null @@ -1,17 +0,0 @@ -"""Fixer that changes raw_input(...) into input(...).""" -# Author: Andre Roberge - -# Local imports -from .. import fixer_base -from ..fixer_util import Name - -class FixRawInput(fixer_base.BaseFix): - - BM_compatible = True - PATTERN = """ - power< name='raw_input' trailer< '(' [any] ')' > any* > - """ - - def transform(self, node, results): - name = results["name"] - name.replace(Name("input", prefix=name.prefix)) diff --git a/PythonLib/full/lib2to3/fixes/fix_reduce.py b/PythonLib/full/lib2to3/fixes/fix_reduce.py deleted file mode 100644 index 00e5aa1c..00000000 --- a/PythonLib/full/lib2to3/fixes/fix_reduce.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright 2008 Armin Ronacher. -# Licensed to PSF under a Contributor Agreement. - -"""Fixer for reduce(). - -Makes sure reduce() is imported from the functools module if reduce is -used in that module. -""" - -from lib2to3 import fixer_base -from lib2to3.fixer_util import touch_import - - - -class FixReduce(fixer_base.BaseFix): - - BM_compatible = True - order = "pre" - - PATTERN = """ - power< 'reduce' - trailer< '(' - arglist< ( - (not(argument) any ',' - not(argument - > - """ - - def transform(self, node, results): - touch_import('functools', 'reduce', node) diff --git a/PythonLib/full/lib2to3/fixes/fix_reload.py b/PythonLib/full/lib2to3/fixes/fix_reload.py deleted file mode 100644 index b3084113..00000000 --- a/PythonLib/full/lib2to3/fixes/fix_reload.py +++ /dev/null @@ -1,36 +0,0 @@ -"""Fixer for reload(). - -reload(s) -> importlib.reload(s)""" - -# Local imports -from .. import fixer_base -from ..fixer_util import ImportAndCall, touch_import - - -class FixReload(fixer_base.BaseFix): - BM_compatible = True - order = "pre" - - PATTERN = """ - power< 'reload' - trailer< lpar='(' - ( not(arglist | argument) any ','> ) - rpar=')' > - after=any* - > - """ - - def transform(self, node, results): - if results: - # I feel like we should be able to express this logic in the - # PATTERN above but I don't know how to do it so... - obj = results['obj'] - if obj: - if (obj.type == self.syms.argument and - obj.children[0].value in {'**', '*'}): - return # Make no change. - names = ('importlib', 'reload') - new = ImportAndCall(node, results, names) - touch_import(None, 'importlib', node) - return new diff --git a/PythonLib/full/lib2to3/fixes/fix_renames.py b/PythonLib/full/lib2to3/fixes/fix_renames.py deleted file mode 100644 index c0e3705a..00000000 --- a/PythonLib/full/lib2to3/fixes/fix_renames.py +++ /dev/null @@ -1,70 +0,0 @@ -"""Fix incompatible renames - -Fixes: - * sys.maxint -> sys.maxsize -""" -# Author: Christian Heimes -# based on Collin Winter's fix_import - -# Local imports -from .. import fixer_base -from ..fixer_util import Name, attr_chain - -MAPPING = {"sys": {"maxint" : "maxsize"}, - } -LOOKUP = {} - -def alternates(members): - return "(" + "|".join(map(repr, members)) + ")" - - -def build_pattern(): - #bare = set() - for module, replace in list(MAPPING.items()): - for old_attr, new_attr in list(replace.items()): - LOOKUP[(module, old_attr)] = new_attr - #bare.add(module) - #bare.add(old_attr) - #yield """ - # import_name< 'import' (module=%r - # | dotted_as_names< any* module=%r any* >) > - # """ % (module, module) - yield """ - import_from< 'from' module_name=%r 'import' - ( attr_name=%r | import_as_name< attr_name=%r 'as' any >) > - """ % (module, old_attr, old_attr) - yield """ - power< module_name=%r trailer< '.' attr_name=%r > any* > - """ % (module, old_attr) - #yield """bare_name=%s""" % alternates(bare) - - -class FixRenames(fixer_base.BaseFix): - BM_compatible = True - PATTERN = "|".join(build_pattern()) - - order = "pre" # Pre-order tree traversal - - # Don't match the node if it's within another match - def match(self, node): - match = super(FixRenames, self).match - results = match(node) - if results: - if any(match(obj) for obj in attr_chain(node, "parent")): - return False - return results - return False - - #def start_tree(self, tree, filename): - # super(FixRenames, self).start_tree(tree, filename) - # self.replace = {} - - def transform(self, node, results): - mod_name = results.get("module_name") - attr_name = results.get("attr_name") - #bare_name = results.get("bare_name") - #import_mod = results.get("module") - - if mod_name and attr_name: - new_attr = LOOKUP[(mod_name.value, attr_name.value)] - attr_name.replace(Name(new_attr, prefix=attr_name.prefix)) diff --git a/PythonLib/full/lib2to3/fixes/fix_repr.py b/PythonLib/full/lib2to3/fixes/fix_repr.py deleted file mode 100644 index 1150bb8b..00000000 --- a/PythonLib/full/lib2to3/fixes/fix_repr.py +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright 2006 Google, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -"""Fixer that transforms `xyzzy` into repr(xyzzy).""" - -# Local imports -from .. import fixer_base -from ..fixer_util import Call, Name, parenthesize - - -class FixRepr(fixer_base.BaseFix): - - BM_compatible = True - PATTERN = """ - atom < '`' expr=any '`' > - """ - - def transform(self, node, results): - expr = results["expr"].clone() - - if expr.type == self.syms.testlist1: - expr = parenthesize(expr) - return Call(Name("repr"), [expr], prefix=node.prefix) diff --git a/PythonLib/full/lib2to3/fixes/fix_set_literal.py b/PythonLib/full/lib2to3/fixes/fix_set_literal.py deleted file mode 100644 index 762550cf..00000000 --- a/PythonLib/full/lib2to3/fixes/fix_set_literal.py +++ /dev/null @@ -1,53 +0,0 @@ -""" -Optional fixer to transform set() calls to set literals. -""" - -# Author: Benjamin Peterson - -from lib2to3 import fixer_base, pytree -from lib2to3.fixer_util import token, syms - - - -class FixSetLiteral(fixer_base.BaseFix): - - BM_compatible = True - explicit = True - - PATTERN = """power< 'set' trailer< '(' - (atom=atom< '[' (items=listmaker< any ((',' any)* [',']) > - | - single=any) ']' > - | - atom< '(' items=testlist_gexp< any ((',' any)* [',']) > ')' > - ) - ')' > > - """ - - def transform(self, node, results): - single = results.get("single") - if single: - # Make a fake listmaker - fake = pytree.Node(syms.listmaker, [single.clone()]) - single.replace(fake) - items = fake - else: - items = results["items"] - - # Build the contents of the literal - literal = [pytree.Leaf(token.LBRACE, "{")] - literal.extend(n.clone() for n in items.children) - literal.append(pytree.Leaf(token.RBRACE, "}")) - # Set the prefix of the right brace to that of the ')' or ']' - literal[-1].prefix = items.next_sibling.prefix - maker = pytree.Node(syms.dictsetmaker, literal) - maker.prefix = node.prefix - - # If the original was a one tuple, we need to remove the extra comma. - if len(maker.children) == 4: - n = maker.children[2] - n.remove() - maker.children[-1].prefix = n.prefix - - # Finally, replace the set call with our shiny new literal. - return maker diff --git a/PythonLib/full/lib2to3/fixes/fix_standarderror.py b/PythonLib/full/lib2to3/fixes/fix_standarderror.py deleted file mode 100644 index dc742167..00000000 --- a/PythonLib/full/lib2to3/fixes/fix_standarderror.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright 2007 Google, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -"""Fixer for StandardError -> Exception.""" - -# Local imports -from .. import fixer_base -from ..fixer_util import Name - - -class FixStandarderror(fixer_base.BaseFix): - BM_compatible = True - PATTERN = """ - 'StandardError' - """ - - def transform(self, node, results): - return Name("Exception", prefix=node.prefix) diff --git a/PythonLib/full/lib2to3/fixes/fix_sys_exc.py b/PythonLib/full/lib2to3/fixes/fix_sys_exc.py deleted file mode 100644 index f6039690..00000000 --- a/PythonLib/full/lib2to3/fixes/fix_sys_exc.py +++ /dev/null @@ -1,30 +0,0 @@ -"""Fixer for sys.exc_{type, value, traceback} - -sys.exc_type -> sys.exc_info()[0] -sys.exc_value -> sys.exc_info()[1] -sys.exc_traceback -> sys.exc_info()[2] -""" - -# By Jeff Balogh and Benjamin Peterson - -# Local imports -from .. import fixer_base -from ..fixer_util import Attr, Call, Name, Number, Subscript, Node, syms - -class FixSysExc(fixer_base.BaseFix): - # This order matches the ordering of sys.exc_info(). - exc_info = ["exc_type", "exc_value", "exc_traceback"] - BM_compatible = True - PATTERN = """ - power< 'sys' trailer< dot='.' attribute=(%s) > > - """ % '|'.join("'%s'" % e for e in exc_info) - - def transform(self, node, results): - sys_attr = results["attribute"][0] - index = Number(self.exc_info.index(sys_attr.value)) - - call = Call(Name("exc_info"), prefix=sys_attr.prefix) - attr = Attr(Name("sys"), call) - attr[1].children[0].prefix = results["dot"].prefix - attr.append(Subscript(index)) - return Node(syms.power, attr, prefix=node.prefix) diff --git a/PythonLib/full/lib2to3/fixes/fix_throw.py b/PythonLib/full/lib2to3/fixes/fix_throw.py deleted file mode 100644 index aac29169..00000000 --- a/PythonLib/full/lib2to3/fixes/fix_throw.py +++ /dev/null @@ -1,56 +0,0 @@ -"""Fixer for generator.throw(E, V, T). - -g.throw(E) -> g.throw(E) -g.throw(E, V) -> g.throw(E(V)) -g.throw(E, V, T) -> g.throw(E(V).with_traceback(T)) - -g.throw("foo"[, V[, T]]) will warn about string exceptions.""" -# Author: Collin Winter - -# Local imports -from .. import pytree -from ..pgen2 import token -from .. import fixer_base -from ..fixer_util import Name, Call, ArgList, Attr, is_tuple - -class FixThrow(fixer_base.BaseFix): - BM_compatible = True - PATTERN = """ - power< any trailer< '.' 'throw' > - trailer< '(' args=arglist< exc=any ',' val=any [',' tb=any] > ')' > - > - | - power< any trailer< '.' 'throw' > trailer< '(' exc=any ')' > > - """ - - def transform(self, node, results): - syms = self.syms - - exc = results["exc"].clone() - if exc.type is token.STRING: - self.cannot_convert(node, "Python 3 does not support string exceptions") - return - - # Leave "g.throw(E)" alone - val = results.get("val") - if val is None: - return - - val = val.clone() - if is_tuple(val): - args = [c.clone() for c in val.children[1:-1]] - else: - val.prefix = "" - args = [val] - - throw_args = results["args"] - - if "tb" in results: - tb = results["tb"].clone() - tb.prefix = "" - - e = Call(exc, args) - with_tb = Attr(e, Name('with_traceback')) + [ArgList([tb])] - throw_args.replace(pytree.Node(syms.power, with_tb)) - else: - throw_args.replace(Call(exc, args)) diff --git a/PythonLib/full/lib2to3/fixes/fix_tuple_params.py b/PythonLib/full/lib2to3/fixes/fix_tuple_params.py deleted file mode 100644 index cad755ff..00000000 --- a/PythonLib/full/lib2to3/fixes/fix_tuple_params.py +++ /dev/null @@ -1,175 +0,0 @@ -"""Fixer for function definitions with tuple parameters. - -def func(((a, b), c), d): - ... - - -> - -def func(x, d): - ((a, b), c) = x - ... - -It will also support lambdas: - - lambda (x, y): x + y -> lambda t: t[0] + t[1] - - # The parens are a syntax error in Python 3 - lambda (x): x + y -> lambda x: x + y -""" -# Author: Collin Winter - -# Local imports -from .. import pytree -from ..pgen2 import token -from .. import fixer_base -from ..fixer_util import Assign, Name, Newline, Number, Subscript, syms - -def is_docstring(stmt): - return isinstance(stmt, pytree.Node) and \ - stmt.children[0].type == token.STRING - -class FixTupleParams(fixer_base.BaseFix): - run_order = 4 #use a lower order since lambda is part of other - #patterns - BM_compatible = True - - PATTERN = """ - funcdef< 'def' any parameters< '(' args=any ')' > - ['->' any] ':' suite=any+ > - | - lambda= - lambdef< 'lambda' args=vfpdef< '(' inner=any ')' > - ':' body=any - > - """ - - def transform(self, node, results): - if "lambda" in results: - return self.transform_lambda(node, results) - - new_lines = [] - suite = results["suite"] - args = results["args"] - # This crap is so "def foo(...): x = 5; y = 7" is handled correctly. - # TODO(cwinter): suite-cleanup - if suite[0].children[1].type == token.INDENT: - start = 2 - indent = suite[0].children[1].value - end = Newline() - else: - start = 0 - indent = "; " - end = pytree.Leaf(token.INDENT, "") - - # We need access to self for new_name(), and making this a method - # doesn't feel right. Closing over self and new_lines makes the - # code below cleaner. - def handle_tuple(tuple_arg, add_prefix=False): - n = Name(self.new_name()) - arg = tuple_arg.clone() - arg.prefix = "" - stmt = Assign(arg, n.clone()) - if add_prefix: - n.prefix = " " - tuple_arg.replace(n) - new_lines.append(pytree.Node(syms.simple_stmt, - [stmt, end.clone()])) - - if args.type == syms.tfpdef: - handle_tuple(args) - elif args.type == syms.typedargslist: - for i, arg in enumerate(args.children): - if arg.type == syms.tfpdef: - # Without add_prefix, the emitted code is correct, - # just ugly. - handle_tuple(arg, add_prefix=(i > 0)) - - if not new_lines: - return - - # This isn't strictly necessary, but it plays nicely with other fixers. - # TODO(cwinter) get rid of this when children becomes a smart list - for line in new_lines: - line.parent = suite[0] - - # TODO(cwinter) suite-cleanup - after = start - if start == 0: - new_lines[0].prefix = " " - elif is_docstring(suite[0].children[start]): - new_lines[0].prefix = indent - after = start + 1 - - for line in new_lines: - line.parent = suite[0] - suite[0].children[after:after] = new_lines - for i in range(after+1, after+len(new_lines)+1): - suite[0].children[i].prefix = indent - suite[0].changed() - - def transform_lambda(self, node, results): - args = results["args"] - body = results["body"] - inner = simplify_args(results["inner"]) - - # Replace lambda ((((x)))): x with lambda x: x - if inner.type == token.NAME: - inner = inner.clone() - inner.prefix = " " - args.replace(inner) - return - - params = find_params(args) - to_index = map_to_index(params) - tup_name = self.new_name(tuple_name(params)) - - new_param = Name(tup_name, prefix=" ") - args.replace(new_param.clone()) - for n in body.post_order(): - if n.type == token.NAME and n.value in to_index: - subscripts = [c.clone() for c in to_index[n.value]] - new = pytree.Node(syms.power, - [new_param.clone()] + subscripts) - new.prefix = n.prefix - n.replace(new) - - -### Helper functions for transform_lambda() - -def simplify_args(node): - if node.type in (syms.vfplist, token.NAME): - return node - elif node.type == syms.vfpdef: - # These look like vfpdef< '(' x ')' > where x is NAME - # or another vfpdef instance (leading to recursion). - while node.type == syms.vfpdef: - node = node.children[1] - return node - raise RuntimeError("Received unexpected node %s" % node) - -def find_params(node): - if node.type == syms.vfpdef: - return find_params(node.children[1]) - elif node.type == token.NAME: - return node.value - return [find_params(c) for c in node.children if c.type != token.COMMA] - -def map_to_index(param_list, prefix=[], d=None): - if d is None: - d = {} - for i, obj in enumerate(param_list): - trailer = [Subscript(Number(str(i)))] - if isinstance(obj, list): - map_to_index(obj, trailer, d=d) - else: - d[obj] = prefix + trailer - return d - -def tuple_name(param_list): - l = [] - for obj in param_list: - if isinstance(obj, list): - l.append(tuple_name(obj)) - else: - l.append(obj) - return "_".join(l) diff --git a/PythonLib/full/lib2to3/fixes/fix_types.py b/PythonLib/full/lib2to3/fixes/fix_types.py deleted file mode 100644 index 67bf51f2..00000000 --- a/PythonLib/full/lib2to3/fixes/fix_types.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright 2007 Google, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -"""Fixer for removing uses of the types module. - -These work for only the known names in the types module. The forms above -can include types. or not. ie, It is assumed the module is imported either as: - - import types - from types import ... # either * or specific types - -The import statements are not modified. - -There should be another fixer that handles at least the following constants: - - type([]) -> list - type(()) -> tuple - type('') -> str - -""" - -# Local imports -from .. import fixer_base -from ..fixer_util import Name - -_TYPE_MAPPING = { - 'BooleanType' : 'bool', - 'BufferType' : 'memoryview', - 'ClassType' : 'type', - 'ComplexType' : 'complex', - 'DictType': 'dict', - 'DictionaryType' : 'dict', - 'EllipsisType' : 'type(Ellipsis)', - #'FileType' : 'io.IOBase', - 'FloatType': 'float', - 'IntType': 'int', - 'ListType': 'list', - 'LongType': 'int', - 'ObjectType' : 'object', - 'NoneType': 'type(None)', - 'NotImplementedType' : 'type(NotImplemented)', - 'SliceType' : 'slice', - 'StringType': 'bytes', # XXX ? - 'StringTypes' : '(str,)', # XXX ? - 'TupleType': 'tuple', - 'TypeType' : 'type', - 'UnicodeType': 'str', - 'XRangeType' : 'range', - } - -_pats = ["power< 'types' trailer< '.' name='%s' > >" % t for t in _TYPE_MAPPING] - -class FixTypes(fixer_base.BaseFix): - BM_compatible = True - PATTERN = '|'.join(_pats) - - def transform(self, node, results): - new_value = _TYPE_MAPPING.get(results["name"].value) - if new_value: - return Name(new_value, prefix=node.prefix) - return None diff --git a/PythonLib/full/lib2to3/fixes/fix_unicode.py b/PythonLib/full/lib2to3/fixes/fix_unicode.py deleted file mode 100644 index c7982c2b..00000000 --- a/PythonLib/full/lib2to3/fixes/fix_unicode.py +++ /dev/null @@ -1,42 +0,0 @@ -r"""Fixer for unicode. - -* Changes unicode to str and unichr to chr. - -* If "...\u..." is not unicode literal change it into "...\\u...". - -* Change u"..." into "...". - -""" - -from ..pgen2 import token -from .. import fixer_base - -_mapping = {"unichr" : "chr", "unicode" : "str"} - -class FixUnicode(fixer_base.BaseFix): - BM_compatible = True - PATTERN = "STRING | 'unicode' | 'unichr'" - - def start_tree(self, tree, filename): - super(FixUnicode, self).start_tree(tree, filename) - self.unicode_literals = 'unicode_literals' in tree.future_features - - def transform(self, node, results): - if node.type == token.NAME: - new = node.clone() - new.value = _mapping[node.value] - return new - elif node.type == token.STRING: - val = node.value - if not self.unicode_literals and val[0] in '\'"' and '\\' in val: - val = r'\\'.join([ - v.replace('\\u', r'\\u').replace('\\U', r'\\U') - for v in val.split(r'\\') - ]) - if val[0] in 'uU': - val = val[1:] - if val == node.value: - return node - new = node.clone() - new.value = val - return new diff --git a/PythonLib/full/lib2to3/fixes/fix_urllib.py b/PythonLib/full/lib2to3/fixes/fix_urllib.py deleted file mode 100644 index ab892bc5..00000000 --- a/PythonLib/full/lib2to3/fixes/fix_urllib.py +++ /dev/null @@ -1,196 +0,0 @@ -"""Fix changes imports of urllib which are now incompatible. - This is rather similar to fix_imports, but because of the more - complex nature of the fixing for urllib, it has its own fixer. -""" -# Author: Nick Edds - -# Local imports -from lib2to3.fixes.fix_imports import alternates, FixImports -from lib2to3.fixer_util import (Name, Comma, FromImport, Newline, - find_indentation, Node, syms) - -MAPPING = {"urllib": [ - ("urllib.request", - ["URLopener", "FancyURLopener", "urlretrieve", - "_urlopener", "urlopen", "urlcleanup", - "pathname2url", "url2pathname", "getproxies"]), - ("urllib.parse", - ["quote", "quote_plus", "unquote", "unquote_plus", - "urlencode", "splitattr", "splithost", "splitnport", - "splitpasswd", "splitport", "splitquery", "splittag", - "splittype", "splituser", "splitvalue", ]), - ("urllib.error", - ["ContentTooShortError"])], - "urllib2" : [ - ("urllib.request", - ["urlopen", "install_opener", "build_opener", - "Request", "OpenerDirector", "BaseHandler", - "HTTPDefaultErrorHandler", "HTTPRedirectHandler", - "HTTPCookieProcessor", "ProxyHandler", - "HTTPPasswordMgr", - "HTTPPasswordMgrWithDefaultRealm", - "AbstractBasicAuthHandler", - "HTTPBasicAuthHandler", "ProxyBasicAuthHandler", - "AbstractDigestAuthHandler", - "HTTPDigestAuthHandler", "ProxyDigestAuthHandler", - "HTTPHandler", "HTTPSHandler", "FileHandler", - "FTPHandler", "CacheFTPHandler", - "UnknownHandler"]), - ("urllib.error", - ["URLError", "HTTPError"]), - ] -} - -# Duplicate the url parsing functions for urllib2. -MAPPING["urllib2"].append(MAPPING["urllib"][1]) - - -def build_pattern(): - bare = set() - for old_module, changes in MAPPING.items(): - for change in changes: - new_module, members = change - members = alternates(members) - yield """import_name< 'import' (module=%r - | dotted_as_names< any* module=%r any* >) > - """ % (old_module, old_module) - yield """import_from< 'from' mod_member=%r 'import' - ( member=%s | import_as_name< member=%s 'as' any > | - import_as_names< members=any* >) > - """ % (old_module, members, members) - yield """import_from< 'from' module_star=%r 'import' star='*' > - """ % old_module - yield """import_name< 'import' - dotted_as_name< module_as=%r 'as' any > > - """ % old_module - # bare_with_attr has a special significance for FixImports.match(). - yield """power< bare_with_attr=%r trailer< '.' member=%s > any* > - """ % (old_module, members) - - -class FixUrllib(FixImports): - - def build_pattern(self): - return "|".join(build_pattern()) - - def transform_import(self, node, results): - """Transform for the basic import case. Replaces the old - import name with a comma separated list of its - replacements. - """ - import_mod = results.get("module") - pref = import_mod.prefix - - names = [] - - # create a Node list of the replacement modules - for name in MAPPING[import_mod.value][:-1]: - names.extend([Name(name[0], prefix=pref), Comma()]) - names.append(Name(MAPPING[import_mod.value][-1][0], prefix=pref)) - import_mod.replace(names) - - def transform_member(self, node, results): - """Transform for imports of specific module elements. Replaces - the module to be imported from with the appropriate new - module. - """ - mod_member = results.get("mod_member") - pref = mod_member.prefix - member = results.get("member") - - # Simple case with only a single member being imported - if member: - # this may be a list of length one, or just a node - if isinstance(member, list): - member = member[0] - new_name = None - for change in MAPPING[mod_member.value]: - if member.value in change[1]: - new_name = change[0] - break - if new_name: - mod_member.replace(Name(new_name, prefix=pref)) - else: - self.cannot_convert(node, "This is an invalid module element") - - # Multiple members being imported - else: - # a dictionary for replacements, order matters - modules = [] - mod_dict = {} - members = results["members"] - for member in members: - # we only care about the actual members - if member.type == syms.import_as_name: - as_name = member.children[2].value - member_name = member.children[0].value - else: - member_name = member.value - as_name = None - if member_name != ",": - for change in MAPPING[mod_member.value]: - if member_name in change[1]: - if change[0] not in mod_dict: - modules.append(change[0]) - mod_dict.setdefault(change[0], []).append(member) - - new_nodes = [] - indentation = find_indentation(node) - first = True - def handle_name(name, prefix): - if name.type == syms.import_as_name: - kids = [Name(name.children[0].value, prefix=prefix), - name.children[1].clone(), - name.children[2].clone()] - return [Node(syms.import_as_name, kids)] - return [Name(name.value, prefix=prefix)] - for module in modules: - elts = mod_dict[module] - names = [] - for elt in elts[:-1]: - names.extend(handle_name(elt, pref)) - names.append(Comma()) - names.extend(handle_name(elts[-1], pref)) - new = FromImport(module, names) - if not first or node.parent.prefix.endswith(indentation): - new.prefix = indentation - new_nodes.append(new) - first = False - if new_nodes: - nodes = [] - for new_node in new_nodes[:-1]: - nodes.extend([new_node, Newline()]) - nodes.append(new_nodes[-1]) - node.replace(nodes) - else: - self.cannot_convert(node, "All module elements are invalid") - - def transform_dot(self, node, results): - """Transform for calls to module members in code.""" - module_dot = results.get("bare_with_attr") - member = results.get("member") - new_name = None - if isinstance(member, list): - member = member[0] - for change in MAPPING[module_dot.value]: - if member.value in change[1]: - new_name = change[0] - break - if new_name: - module_dot.replace(Name(new_name, - prefix=module_dot.prefix)) - else: - self.cannot_convert(node, "This is an invalid module element") - - def transform(self, node, results): - if results.get("module"): - self.transform_import(node, results) - elif results.get("mod_member"): - self.transform_member(node, results) - elif results.get("bare_with_attr"): - self.transform_dot(node, results) - # Renaming and star imports are not supported for these modules. - elif results.get("module_star"): - self.cannot_convert(node, "Cannot handle star imports.") - elif results.get("module_as"): - self.cannot_convert(node, "This module is now multiple modules") diff --git a/PythonLib/full/lib2to3/fixes/fix_ws_comma.py b/PythonLib/full/lib2to3/fixes/fix_ws_comma.py deleted file mode 100644 index a54a376c..00000000 --- a/PythonLib/full/lib2to3/fixes/fix_ws_comma.py +++ /dev/null @@ -1,39 +0,0 @@ -"""Fixer that changes 'a ,b' into 'a, b'. - -This also changes '{a :b}' into '{a: b}', but does not touch other -uses of colons. It does not touch other uses of whitespace. - -""" - -from .. import pytree -from ..pgen2 import token -from .. import fixer_base - -class FixWsComma(fixer_base.BaseFix): - - explicit = True # The user must ask for this fixers - - PATTERN = """ - any<(not(',') any)+ ',' ((not(',') any)+ ',')* [not(',') any]> - """ - - COMMA = pytree.Leaf(token.COMMA, ",") - COLON = pytree.Leaf(token.COLON, ":") - SEPS = (COMMA, COLON) - - def transform(self, node, results): - new = node.clone() - comma = False - for child in new.children: - if child in self.SEPS: - prefix = child.prefix - if prefix.isspace() and "\n" not in prefix: - child.prefix = "" - comma = True - else: - if comma: - prefix = child.prefix - if not prefix: - child.prefix = " " - comma = False - return new diff --git a/PythonLib/full/lib2to3/fixes/fix_xrange.py b/PythonLib/full/lib2to3/fixes/fix_xrange.py deleted file mode 100644 index 1e491e16..00000000 --- a/PythonLib/full/lib2to3/fixes/fix_xrange.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright 2007 Google, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -"""Fixer that changes xrange(...) into range(...).""" - -# Local imports -from .. import fixer_base -from ..fixer_util import Name, Call, consuming_calls -from .. import patcomp - - -class FixXrange(fixer_base.BaseFix): - BM_compatible = True - PATTERN = """ - power< - (name='range'|name='xrange') trailer< '(' args=any ')' > - rest=any* > - """ - - def start_tree(self, tree, filename): - super(FixXrange, self).start_tree(tree, filename) - self.transformed_xranges = set() - - def finish_tree(self, tree, filename): - self.transformed_xranges = None - - def transform(self, node, results): - name = results["name"] - if name.value == "xrange": - return self.transform_xrange(node, results) - elif name.value == "range": - return self.transform_range(node, results) - else: - raise ValueError(repr(name)) - - def transform_xrange(self, node, results): - name = results["name"] - name.replace(Name("range", prefix=name.prefix)) - # This prevents the new range call from being wrapped in a list later. - self.transformed_xranges.add(id(node)) - - def transform_range(self, node, results): - if (id(node) not in self.transformed_xranges and - not self.in_special_context(node)): - range_call = Call(Name("range"), [results["args"].clone()]) - # Encase the range call in list(). - list_call = Call(Name("list"), [range_call], - prefix=node.prefix) - # Put things that were after the range() call after the list call. - for n in results["rest"]: - list_call.append_child(n) - return list_call - - P1 = "power< func=NAME trailer< '(' node=any ')' > any* >" - p1 = patcomp.compile_pattern(P1) - - P2 = """for_stmt< 'for' any 'in' node=any ':' any* > - | comp_for< 'for' any 'in' node=any any* > - | comparison< any 'in' node=any any*> - """ - p2 = patcomp.compile_pattern(P2) - - def in_special_context(self, node): - if node.parent is None: - return False - results = {} - if (node.parent.parent is not None and - self.p1.match(node.parent.parent, results) and - results["node"] is node): - # list(d.keys()) -> list(d.keys()), etc. - return results["func"].value in consuming_calls - # for ... in d.iterkeys() -> for ... in d.keys(), etc. - return self.p2.match(node.parent, results) and results["node"] is node diff --git a/PythonLib/full/lib2to3/fixes/fix_xreadlines.py b/PythonLib/full/lib2to3/fixes/fix_xreadlines.py deleted file mode 100644 index 3e3f71ab..00000000 --- a/PythonLib/full/lib2to3/fixes/fix_xreadlines.py +++ /dev/null @@ -1,25 +0,0 @@ -"""Fix "for x in f.xreadlines()" -> "for x in f". - -This fixer will also convert g(f.xreadlines) into g(f.__iter__).""" -# Author: Collin Winter - -# Local imports -from .. import fixer_base -from ..fixer_util import Name - - -class FixXreadlines(fixer_base.BaseFix): - BM_compatible = True - PATTERN = """ - power< call=any+ trailer< '.' 'xreadlines' > trailer< '(' ')' > > - | - power< any+ trailer< '.' no_call='xreadlines' > > - """ - - def transform(self, node, results): - no_call = results.get("no_call") - - if no_call: - no_call.replace(Name("__iter__", prefix=no_call.prefix)) - else: - node.replace([x.clone() for x in results["call"]]) diff --git a/PythonLib/full/lib2to3/fixes/fix_zip.py b/PythonLib/full/lib2to3/fixes/fix_zip.py deleted file mode 100644 index 52c28df6..00000000 --- a/PythonLib/full/lib2to3/fixes/fix_zip.py +++ /dev/null @@ -1,46 +0,0 @@ -""" -Fixer that changes zip(seq0, seq1, ...) into list(zip(seq0, seq1, ...) -unless there exists a 'from future_builtins import zip' statement in the -top-level namespace. - -We avoid the transformation if the zip() call is directly contained in -iter(<>), list(<>), tuple(<>), sorted(<>), ...join(<>), or for V in <>:. -""" - -# Local imports -from .. import fixer_base -from ..pytree import Node -from ..pygram import python_symbols as syms -from ..fixer_util import Name, ArgList, in_special_context - - -class FixZip(fixer_base.ConditionalFix): - - BM_compatible = True - PATTERN = """ - power< 'zip' args=trailer< '(' [any] ')' > [trailers=trailer*] - > - """ - - skip_on = "future_builtins.zip" - - def transform(self, node, results): - if self.should_skip(node): - return - - if in_special_context(node): - return None - - args = results['args'].clone() - args.prefix = "" - - trailers = [] - if 'trailers' in results: - trailers = [n.clone() for n in results['trailers']] - for n in trailers: - n.prefix = "" - - new = Node(syms.power, [Name("zip"), args], prefix="") - new = Node(syms.power, [Name("list"), ArgList([new])] + trailers) - new.prefix = node.prefix - return new diff --git a/PythonLib/full/lib2to3/main.py b/PythonLib/full/lib2to3/main.py deleted file mode 100644 index f2849fd6..00000000 --- a/PythonLib/full/lib2to3/main.py +++ /dev/null @@ -1,273 +0,0 @@ -""" -Main program for 2to3. -""" - -from __future__ import with_statement, print_function - -import sys -import os -import difflib -import logging -import shutil -import optparse - -from . import refactor - - -def diff_texts(a, b, filename): - """Return a unified diff of two strings.""" - a = a.splitlines() - b = b.splitlines() - return difflib.unified_diff(a, b, filename, filename, - "(original)", "(refactored)", - lineterm="") - - -class StdoutRefactoringTool(refactor.MultiprocessRefactoringTool): - """ - A refactoring tool that can avoid overwriting its input files. - Prints output to stdout. - - Output files can optionally be written to a different directory and or - have an extra file suffix appended to their name for use in situations - where you do not want to replace the input files. - """ - - def __init__(self, fixers, options, explicit, nobackups, show_diffs, - input_base_dir='', output_dir='', append_suffix=''): - """ - Args: - fixers: A list of fixers to import. - options: A dict with RefactoringTool configuration. - explicit: A list of fixers to run even if they are explicit. - nobackups: If true no backup '.bak' files will be created for those - files that are being refactored. - show_diffs: Should diffs of the refactoring be printed to stdout? - input_base_dir: The base directory for all input files. This class - will strip this path prefix off of filenames before substituting - it with output_dir. Only meaningful if output_dir is supplied. - All files processed by refactor() must start with this path. - output_dir: If supplied, all converted files will be written into - this directory tree instead of input_base_dir. - append_suffix: If supplied, all files output by this tool will have - this appended to their filename. Useful for changing .py to - .py3 for example by passing append_suffix='3'. - """ - self.nobackups = nobackups - self.show_diffs = show_diffs - if input_base_dir and not input_base_dir.endswith(os.sep): - input_base_dir += os.sep - self._input_base_dir = input_base_dir - self._output_dir = output_dir - self._append_suffix = append_suffix - super(StdoutRefactoringTool, self).__init__(fixers, options, explicit) - - def log_error(self, msg, *args, **kwargs): - self.errors.append((msg, args, kwargs)) - self.logger.error(msg, *args, **kwargs) - - def write_file(self, new_text, filename, old_text, encoding): - orig_filename = filename - if self._output_dir: - if filename.startswith(self._input_base_dir): - filename = os.path.join(self._output_dir, - filename[len(self._input_base_dir):]) - else: - raise ValueError('filename %s does not start with the ' - 'input_base_dir %s' % ( - filename, self._input_base_dir)) - if self._append_suffix: - filename += self._append_suffix - if orig_filename != filename: - output_dir = os.path.dirname(filename) - if not os.path.isdir(output_dir) and output_dir: - os.makedirs(output_dir) - self.log_message('Writing converted %s to %s.', orig_filename, - filename) - if not self.nobackups: - # Make backup - backup = filename + ".bak" - if os.path.lexists(backup): - try: - os.remove(backup) - except OSError: - self.log_message("Can't remove backup %s", backup) - try: - os.rename(filename, backup) - except OSError: - self.log_message("Can't rename %s to %s", filename, backup) - # Actually write the new file - write = super(StdoutRefactoringTool, self).write_file - write(new_text, filename, old_text, encoding) - if not self.nobackups: - shutil.copymode(backup, filename) - if orig_filename != filename: - # Preserve the file mode in the new output directory. - shutil.copymode(orig_filename, filename) - - def print_output(self, old, new, filename, equal): - if equal: - self.log_message("No changes to %s", filename) - else: - self.log_message("Refactored %s", filename) - if self.show_diffs: - diff_lines = diff_texts(old, new, filename) - try: - if self.output_lock is not None: - with self.output_lock: - for line in diff_lines: - print(line) - sys.stdout.flush() - else: - for line in diff_lines: - print(line) - except UnicodeEncodeError: - warn("couldn't encode %s's diff for your terminal" % - (filename,)) - return - -def warn(msg): - print("WARNING: %s" % (msg,), file=sys.stderr) - - -def main(fixer_pkg, args=None): - """Main program. - - Args: - fixer_pkg: the name of a package where the fixers are located. - args: optional; a list of command line arguments. If omitted, - sys.argv[1:] is used. - - Returns a suggested exit status (0, 1, 2). - """ - # Set up option parser - parser = optparse.OptionParser(usage="2to3 [options] file|dir ...") - parser.add_option("-d", "--doctests_only", action="store_true", - help="Fix up doctests only") - parser.add_option("-f", "--fix", action="append", default=[], - help="Each FIX specifies a transformation; default: all") - parser.add_option("-j", "--processes", action="store", default=1, - type="int", help="Run 2to3 concurrently") - parser.add_option("-x", "--nofix", action="append", default=[], - help="Prevent a transformation from being run") - parser.add_option("-l", "--list-fixes", action="store_true", - help="List available transformations") - parser.add_option("-p", "--print-function", action="store_true", - help="Modify the grammar so that print() is a function") - parser.add_option("-e", "--exec-function", action="store_true", - help="Modify the grammar so that exec() is a function") - parser.add_option("-v", "--verbose", action="store_true", - help="More verbose logging") - parser.add_option("--no-diffs", action="store_true", - help="Don't show diffs of the refactoring") - parser.add_option("-w", "--write", action="store_true", - help="Write back modified files") - parser.add_option("-n", "--nobackups", action="store_true", default=False, - help="Don't write backups for modified files") - parser.add_option("-o", "--output-dir", action="store", type="str", - default="", help="Put output files in this directory " - "instead of overwriting the input files. Requires -n.") - parser.add_option("-W", "--write-unchanged-files", action="store_true", - help="Also write files even if no changes were required" - " (useful with --output-dir); implies -w.") - parser.add_option("--add-suffix", action="store", type="str", default="", - help="Append this string to all output filenames." - " Requires -n if non-empty. " - "ex: --add-suffix='3' will generate .py3 files.") - - # Parse command line arguments - refactor_stdin = False - flags = {} - options, args = parser.parse_args(args) - if options.write_unchanged_files: - flags["write_unchanged_files"] = True - if not options.write: - warn("--write-unchanged-files/-W implies -w.") - options.write = True - # If we allowed these, the original files would be renamed to backup names - # but not replaced. - if options.output_dir and not options.nobackups: - parser.error("Can't use --output-dir/-o without -n.") - if options.add_suffix and not options.nobackups: - parser.error("Can't use --add-suffix without -n.") - - if not options.write and options.no_diffs: - warn("not writing files and not printing diffs; that's not very useful") - if not options.write and options.nobackups: - parser.error("Can't use -n without -w") - if options.list_fixes: - print("Available transformations for the -f/--fix option:") - for fixname in refactor.get_all_fix_names(fixer_pkg): - print(fixname) - if not args: - return 0 - if not args: - print("At least one file or directory argument required.", file=sys.stderr) - print("Use --help to show usage.", file=sys.stderr) - return 2 - if "-" in args: - refactor_stdin = True - if options.write: - print("Can't write to stdin.", file=sys.stderr) - return 2 - if options.print_function: - flags["print_function"] = True - - if options.exec_function: - flags["exec_function"] = True - - # Set up logging handler - level = logging.DEBUG if options.verbose else logging.INFO - logging.basicConfig(format='%(name)s: %(message)s', level=level) - logger = logging.getLogger('lib2to3.main') - - # Initialize the refactoring tool - avail_fixes = set(refactor.get_fixers_from_package(fixer_pkg)) - unwanted_fixes = set(fixer_pkg + ".fix_" + fix for fix in options.nofix) - explicit = set() - if options.fix: - all_present = False - for fix in options.fix: - if fix == "all": - all_present = True - else: - explicit.add(fixer_pkg + ".fix_" + fix) - requested = avail_fixes.union(explicit) if all_present else explicit - else: - requested = avail_fixes.union(explicit) - fixer_names = requested.difference(unwanted_fixes) - input_base_dir = os.path.commonprefix(args) - if (input_base_dir and not input_base_dir.endswith(os.sep) - and not os.path.isdir(input_base_dir)): - # One or more similar names were passed, their directory is the base. - # os.path.commonprefix() is ignorant of path elements, this corrects - # for that weird API. - input_base_dir = os.path.dirname(input_base_dir) - if options.output_dir: - input_base_dir = input_base_dir.rstrip(os.sep) - logger.info('Output in %r will mirror the input directory %r layout.', - options.output_dir, input_base_dir) - rt = StdoutRefactoringTool( - sorted(fixer_names), flags, sorted(explicit), - options.nobackups, not options.no_diffs, - input_base_dir=input_base_dir, - output_dir=options.output_dir, - append_suffix=options.add_suffix) - - # Refactor all files and directories passed as arguments - if not rt.errors: - if refactor_stdin: - rt.refactor_stdin() - else: - try: - rt.refactor(args, options.write, options.doctests_only, - options.processes) - except refactor.MultiprocessingUnsupported: - assert options.processes > 1 - print("Sorry, -j isn't supported on this platform.", - file=sys.stderr) - return 1 - rt.summarize() - - # Return error status (0 if rt.errors is zero) - return int(bool(rt.errors)) diff --git a/PythonLib/full/lib2to3/patcomp.py b/PythonLib/full/lib2to3/patcomp.py deleted file mode 100644 index f57f4954..00000000 --- a/PythonLib/full/lib2to3/patcomp.py +++ /dev/null @@ -1,204 +0,0 @@ -# Copyright 2006 Google, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -"""Pattern compiler. - -The grammar is taken from PatternGrammar.txt. - -The compiler compiles a pattern to a pytree.*Pattern instance. -""" - -__author__ = "Guido van Rossum " - -# Python imports -import io - -# Fairly local imports -from .pgen2 import driver, literals, token, tokenize, parse, grammar - -# Really local imports -from . import pytree -from . import pygram - - -class PatternSyntaxError(Exception): - pass - - -def tokenize_wrapper(input): - """Tokenizes a string suppressing significant whitespace.""" - skip = {token.NEWLINE, token.INDENT, token.DEDENT} - tokens = tokenize.generate_tokens(io.StringIO(input).readline) - for quintuple in tokens: - type, value, start, end, line_text = quintuple - if type not in skip: - yield quintuple - - -class PatternCompiler(object): - - def __init__(self, grammar_file=None): - """Initializer. - - Takes an optional alternative filename for the pattern grammar. - """ - if grammar_file is None: - self.grammar = pygram.pattern_grammar - self.syms = pygram.pattern_symbols - else: - self.grammar = driver.load_grammar(grammar_file) - self.syms = pygram.Symbols(self.grammar) - self.pygrammar = pygram.python_grammar - self.pysyms = pygram.python_symbols - self.driver = driver.Driver(self.grammar, convert=pattern_convert) - - def compile_pattern(self, input, debug=False, with_tree=False): - """Compiles a pattern string to a nested pytree.*Pattern object.""" - tokens = tokenize_wrapper(input) - try: - root = self.driver.parse_tokens(tokens, debug=debug) - except parse.ParseError as e: - raise PatternSyntaxError(str(e)) from None - if with_tree: - return self.compile_node(root), root - else: - return self.compile_node(root) - - def compile_node(self, node): - """Compiles a node, recursively. - - This is one big switch on the node type. - """ - # XXX Optimize certain Wildcard-containing-Wildcard patterns - # that can be merged - if node.type == self.syms.Matcher: - node = node.children[0] # Avoid unneeded recursion - - if node.type == self.syms.Alternatives: - # Skip the odd children since they are just '|' tokens - alts = [self.compile_node(ch) for ch in node.children[::2]] - if len(alts) == 1: - return alts[0] - p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1) - return p.optimize() - - if node.type == self.syms.Alternative: - units = [self.compile_node(ch) for ch in node.children] - if len(units) == 1: - return units[0] - p = pytree.WildcardPattern([units], min=1, max=1) - return p.optimize() - - if node.type == self.syms.NegatedUnit: - pattern = self.compile_basic(node.children[1:]) - p = pytree.NegatedPattern(pattern) - return p.optimize() - - assert node.type == self.syms.Unit - - name = None - nodes = node.children - if len(nodes) >= 3 and nodes[1].type == token.EQUAL: - name = nodes[0].value - nodes = nodes[2:] - repeat = None - if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater: - repeat = nodes[-1] - nodes = nodes[:-1] - - # Now we've reduced it to: STRING | NAME [Details] | (...) | [...] - pattern = self.compile_basic(nodes, repeat) - - if repeat is not None: - assert repeat.type == self.syms.Repeater - children = repeat.children - child = children[0] - if child.type == token.STAR: - min = 0 - max = pytree.HUGE - elif child.type == token.PLUS: - min = 1 - max = pytree.HUGE - elif child.type == token.LBRACE: - assert children[-1].type == token.RBRACE - assert len(children) in (3, 5) - min = max = self.get_int(children[1]) - if len(children) == 5: - max = self.get_int(children[3]) - else: - assert False - if min != 1 or max != 1: - pattern = pattern.optimize() - pattern = pytree.WildcardPattern([[pattern]], min=min, max=max) - - if name is not None: - pattern.name = name - return pattern.optimize() - - def compile_basic(self, nodes, repeat=None): - # Compile STRING | NAME [Details] | (...) | [...] - assert len(nodes) >= 1 - node = nodes[0] - if node.type == token.STRING: - value = str(literals.evalString(node.value)) - return pytree.LeafPattern(_type_of_literal(value), value) - elif node.type == token.NAME: - value = node.value - if value.isupper(): - if value not in TOKEN_MAP: - raise PatternSyntaxError("Invalid token: %r" % value) - if nodes[1:]: - raise PatternSyntaxError("Can't have details for token") - return pytree.LeafPattern(TOKEN_MAP[value]) - else: - if value == "any": - type = None - elif not value.startswith("_"): - type = getattr(self.pysyms, value, None) - if type is None: - raise PatternSyntaxError("Invalid symbol: %r" % value) - if nodes[1:]: # Details present - content = [self.compile_node(nodes[1].children[1])] - else: - content = None - return pytree.NodePattern(type, content) - elif node.value == "(": - return self.compile_node(nodes[1]) - elif node.value == "[": - assert repeat is None - subpattern = self.compile_node(nodes[1]) - return pytree.WildcardPattern([[subpattern]], min=0, max=1) - assert False, node - - def get_int(self, node): - assert node.type == token.NUMBER - return int(node.value) - - -# Map named tokens to the type value for a LeafPattern -TOKEN_MAP = {"NAME": token.NAME, - "STRING": token.STRING, - "NUMBER": token.NUMBER, - "TOKEN": None} - - -def _type_of_literal(value): - if value[0].isalpha(): - return token.NAME - elif value in grammar.opmap: - return grammar.opmap[value] - else: - return None - - -def pattern_convert(grammar, raw_node_info): - """Converts raw node information to a Node or Leaf instance.""" - type, value, context, children = raw_node_info - if children or type in grammar.number2symbol: - return pytree.Node(type, children, context=context) - else: - return pytree.Leaf(type, value, context=context) - - -def compile_pattern(pattern): - return PatternCompiler().compile_pattern(pattern) diff --git a/PythonLib/full/lib2to3/pgen2/__init__.py b/PythonLib/full/lib2to3/pgen2/__init__.py deleted file mode 100644 index af390484..00000000 --- a/PythonLib/full/lib2to3/pgen2/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -"""The pgen2 package.""" diff --git a/PythonLib/full/lib2to3/pgen2/conv.py b/PythonLib/full/lib2to3/pgen2/conv.py deleted file mode 100644 index ed0cac53..00000000 --- a/PythonLib/full/lib2to3/pgen2/conv.py +++ /dev/null @@ -1,257 +0,0 @@ -# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -"""Convert graminit.[ch] spit out by pgen to Python code. - -Pgen is the Python parser generator. It is useful to quickly create a -parser from a grammar file in Python's grammar notation. But I don't -want my parsers to be written in C (yet), so I'm translating the -parsing tables to Python data structures and writing a Python parse -engine. - -Note that the token numbers are constants determined by the standard -Python tokenizer. The standard token module defines these numbers and -their names (the names are not used much). The token numbers are -hardcoded into the Python tokenizer and into pgen. A Python -implementation of the Python tokenizer is also available, in the -standard tokenize module. - -On the other hand, symbol numbers (representing the grammar's -non-terminals) are assigned by pgen based on the actual grammar -input. - -Note: this module is pretty much obsolete; the pgen module generates -equivalent grammar tables directly from the Grammar.txt input file -without having to invoke the Python pgen C program. - -""" - -# Python imports -import re - -# Local imports -from pgen2 import grammar, token - - -class Converter(grammar.Grammar): - """Grammar subclass that reads classic pgen output files. - - The run() method reads the tables as produced by the pgen parser - generator, typically contained in two C files, graminit.h and - graminit.c. The other methods are for internal use only. - - See the base class for more documentation. - - """ - - def run(self, graminit_h, graminit_c): - """Load the grammar tables from the text files written by pgen.""" - self.parse_graminit_h(graminit_h) - self.parse_graminit_c(graminit_c) - self.finish_off() - - def parse_graminit_h(self, filename): - """Parse the .h file written by pgen. (Internal) - - This file is a sequence of #define statements defining the - nonterminals of the grammar as numbers. We build two tables - mapping the numbers to names and back. - - """ - try: - f = open(filename) - except OSError as err: - print("Can't open %s: %s" % (filename, err)) - return False - self.symbol2number = {} - self.number2symbol = {} - lineno = 0 - for line in f: - lineno += 1 - mo = re.match(r"^#define\s+(\w+)\s+(\d+)$", line) - if not mo and line.strip(): - print("%s(%s): can't parse %s" % (filename, lineno, - line.strip())) - else: - symbol, number = mo.groups() - number = int(number) - assert symbol not in self.symbol2number - assert number not in self.number2symbol - self.symbol2number[symbol] = number - self.number2symbol[number] = symbol - return True - - def parse_graminit_c(self, filename): - """Parse the .c file written by pgen. (Internal) - - The file looks as follows. The first two lines are always this: - - #include "pgenheaders.h" - #include "grammar.h" - - After that come four blocks: - - 1) one or more state definitions - 2) a table defining dfas - 3) a table defining labels - 4) a struct defining the grammar - - A state definition has the following form: - - one or more arc arrays, each of the form: - static arc arcs__[] = { - {, }, - ... - }; - - followed by a state array, of the form: - static state states_[] = { - {, arcs__}, - ... - }; - - """ - try: - f = open(filename) - except OSError as err: - print("Can't open %s: %s" % (filename, err)) - return False - # The code below essentially uses f's iterator-ness! - lineno = 0 - - # Expect the two #include lines - lineno, line = lineno+1, next(f) - assert line == '#include "pgenheaders.h"\n', (lineno, line) - lineno, line = lineno+1, next(f) - assert line == '#include "grammar.h"\n', (lineno, line) - - # Parse the state definitions - lineno, line = lineno+1, next(f) - allarcs = {} - states = [] - while line.startswith("static arc "): - while line.startswith("static arc "): - mo = re.match(r"static arc arcs_(\d+)_(\d+)\[(\d+)\] = {$", - line) - assert mo, (lineno, line) - n, m, k = list(map(int, mo.groups())) - arcs = [] - for _ in range(k): - lineno, line = lineno+1, next(f) - mo = re.match(r"\s+{(\d+), (\d+)},$", line) - assert mo, (lineno, line) - i, j = list(map(int, mo.groups())) - arcs.append((i, j)) - lineno, line = lineno+1, next(f) - assert line == "};\n", (lineno, line) - allarcs[(n, m)] = arcs - lineno, line = lineno+1, next(f) - mo = re.match(r"static state states_(\d+)\[(\d+)\] = {$", line) - assert mo, (lineno, line) - s, t = list(map(int, mo.groups())) - assert s == len(states), (lineno, line) - state = [] - for _ in range(t): - lineno, line = lineno+1, next(f) - mo = re.match(r"\s+{(\d+), arcs_(\d+)_(\d+)},$", line) - assert mo, (lineno, line) - k, n, m = list(map(int, mo.groups())) - arcs = allarcs[n, m] - assert k == len(arcs), (lineno, line) - state.append(arcs) - states.append(state) - lineno, line = lineno+1, next(f) - assert line == "};\n", (lineno, line) - lineno, line = lineno+1, next(f) - self.states = states - - # Parse the dfas - dfas = {} - mo = re.match(r"static dfa dfas\[(\d+)\] = {$", line) - assert mo, (lineno, line) - ndfas = int(mo.group(1)) - for i in range(ndfas): - lineno, line = lineno+1, next(f) - mo = re.match(r'\s+{(\d+), "(\w+)", (\d+), (\d+), states_(\d+),$', - line) - assert mo, (lineno, line) - symbol = mo.group(2) - number, x, y, z = list(map(int, mo.group(1, 3, 4, 5))) - assert self.symbol2number[symbol] == number, (lineno, line) - assert self.number2symbol[number] == symbol, (lineno, line) - assert x == 0, (lineno, line) - state = states[z] - assert y == len(state), (lineno, line) - lineno, line = lineno+1, next(f) - mo = re.match(r'\s+("(?:\\\d\d\d)*")},$', line) - assert mo, (lineno, line) - first = {} - rawbitset = eval(mo.group(1)) - for i, c in enumerate(rawbitset): - byte = ord(c) - for j in range(8): - if byte & (1<= os.path.getmtime(b) - - -def load_packaged_grammar(package, grammar_source): - """Normally, loads a pickled grammar by doing - pkgutil.get_data(package, pickled_grammar) - where *pickled_grammar* is computed from *grammar_source* by adding the - Python version and using a ``.pickle`` extension. - - However, if *grammar_source* is an extant file, load_grammar(grammar_source) - is called instead. This facilitates using a packaged grammar file when needed - but preserves load_grammar's automatic regeneration behavior when possible. - - """ - if os.path.isfile(grammar_source): - return load_grammar(grammar_source) - pickled_name = _generate_pickle_name(os.path.basename(grammar_source)) - data = pkgutil.get_data(package, pickled_name) - g = grammar.Grammar() - g.loads(data) - return g - - -def main(*args): - """Main program, when run as a script: produce grammar pickle files. - - Calls load_grammar for each argument, a path to a grammar text file. - """ - if not args: - args = sys.argv[1:] - logging.basicConfig(level=logging.INFO, stream=sys.stdout, - format='%(message)s') - for gt in args: - load_grammar(gt, save=True, force=True) - return True - -if __name__ == "__main__": - sys.exit(int(not main())) diff --git a/PythonLib/full/lib2to3/pgen2/grammar.py b/PythonLib/full/lib2to3/pgen2/grammar.py deleted file mode 100644 index 5d550aeb..00000000 --- a/PythonLib/full/lib2to3/pgen2/grammar.py +++ /dev/null @@ -1,189 +0,0 @@ -# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -"""This module defines the data structures used to represent a grammar. - -These are a bit arcane because they are derived from the data -structures used by Python's 'pgen' parser generator. - -There's also a table here mapping operators to their names in the -token module; the Python tokenize module reports all operators as the -fallback token code OP, but the parser needs the actual token code. - -""" - -# Python imports -import pickle - -# Local imports -from . import token - - -class Grammar(object): - """Pgen parsing tables conversion class. - - Once initialized, this class supplies the grammar tables for the - parsing engine implemented by parse.py. The parsing engine - accesses the instance variables directly. The class here does not - provide initialization of the tables; several subclasses exist to - do this (see the conv and pgen modules). - - The load() method reads the tables from a pickle file, which is - much faster than the other ways offered by subclasses. The pickle - file is written by calling dump() (after loading the grammar - tables using a subclass). The report() method prints a readable - representation of the tables to stdout, for debugging. - - The instance variables are as follows: - - symbol2number -- a dict mapping symbol names to numbers. Symbol - numbers are always 256 or higher, to distinguish - them from token numbers, which are between 0 and - 255 (inclusive). - - number2symbol -- a dict mapping numbers to symbol names; - these two are each other's inverse. - - states -- a list of DFAs, where each DFA is a list of - states, each state is a list of arcs, and each - arc is a (i, j) pair where i is a label and j is - a state number. The DFA number is the index into - this list. (This name is slightly confusing.) - Final states are represented by a special arc of - the form (0, j) where j is its own state number. - - dfas -- a dict mapping symbol numbers to (DFA, first) - pairs, where DFA is an item from the states list - above, and first is a set of tokens that can - begin this grammar rule (represented by a dict - whose values are always 1). - - labels -- a list of (x, y) pairs where x is either a token - number or a symbol number, and y is either None - or a string; the strings are keywords. The label - number is the index in this list; label numbers - are used to mark state transitions (arcs) in the - DFAs. - - start -- the number of the grammar's start symbol. - - keywords -- a dict mapping keyword strings to arc labels. - - tokens -- a dict mapping token numbers to arc labels. - - """ - - def __init__(self): - self.symbol2number = {} - self.number2symbol = {} - self.states = [] - self.dfas = {} - self.labels = [(0, "EMPTY")] - self.keywords = {} - self.tokens = {} - self.symbol2label = {} - self.start = 256 - - def dump(self, filename): - """Dump the grammar tables to a pickle file.""" - with open(filename, "wb") as f: - pickle.dump(self.__dict__, f, pickle.HIGHEST_PROTOCOL) - - def load(self, filename): - """Load the grammar tables from a pickle file.""" - with open(filename, "rb") as f: - d = pickle.load(f) - self.__dict__.update(d) - - def loads(self, pkl): - """Load the grammar tables from a pickle bytes object.""" - self.__dict__.update(pickle.loads(pkl)) - - def copy(self): - """ - Copy the grammar. - """ - new = self.__class__() - for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords", - "tokens", "symbol2label"): - setattr(new, dict_attr, getattr(self, dict_attr).copy()) - new.labels = self.labels[:] - new.states = self.states[:] - new.start = self.start - return new - - def report(self): - """Dump the grammar tables to standard output, for debugging.""" - from pprint import pprint - print("s2n") - pprint(self.symbol2number) - print("n2s") - pprint(self.number2symbol) - print("states") - pprint(self.states) - print("dfas") - pprint(self.dfas) - print("labels") - pprint(self.labels) - print("start", self.start) - - -# Map from operator to number (since tokenize doesn't do this) - -opmap_raw = """ -( LPAR -) RPAR -[ LSQB -] RSQB -: COLON -, COMMA -; SEMI -+ PLUS -- MINUS -* STAR -/ SLASH -| VBAR -& AMPER -< LESS -> GREATER -= EQUAL -. DOT -% PERCENT -` BACKQUOTE -{ LBRACE -} RBRACE -@ AT -@= ATEQUAL -== EQEQUAL -!= NOTEQUAL -<> NOTEQUAL -<= LESSEQUAL ->= GREATEREQUAL -~ TILDE -^ CIRCUMFLEX -<< LEFTSHIFT ->> RIGHTSHIFT -** DOUBLESTAR -+= PLUSEQUAL --= MINEQUAL -*= STAREQUAL -/= SLASHEQUAL -%= PERCENTEQUAL -&= AMPEREQUAL -|= VBAREQUAL -^= CIRCUMFLEXEQUAL -<<= LEFTSHIFTEQUAL ->>= RIGHTSHIFTEQUAL -**= DOUBLESTAREQUAL -// DOUBLESLASH -//= DOUBLESLASHEQUAL --> RARROW -:= COLONEQUAL -""" - -opmap = {} -for line in opmap_raw.splitlines(): - if line: - op, name = line.split() - opmap[op] = getattr(token, name) -del line, op, name diff --git a/PythonLib/full/lib2to3/pgen2/literals.py b/PythonLib/full/lib2to3/pgen2/literals.py deleted file mode 100644 index b9b63e6e..00000000 --- a/PythonLib/full/lib2to3/pgen2/literals.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -"""Safely evaluate Python string literals without using eval().""" - -import re - -simple_escapes = {"a": "\a", - "b": "\b", - "f": "\f", - "n": "\n", - "r": "\r", - "t": "\t", - "v": "\v", - "'": "'", - '"': '"', - "\\": "\\"} - -def escape(m): - all, tail = m.group(0, 1) - assert all.startswith("\\") - esc = simple_escapes.get(tail) - if esc is not None: - return esc - if tail.startswith("x"): - hexes = tail[1:] - if len(hexes) < 2: - raise ValueError("invalid hex string escape ('\\%s')" % tail) - try: - i = int(hexes, 16) - except ValueError: - raise ValueError("invalid hex string escape ('\\%s')" % tail) from None - else: - try: - i = int(tail, 8) - except ValueError: - raise ValueError("invalid octal string escape ('\\%s')" % tail) from None - return chr(i) - -def evalString(s): - assert s.startswith("'") or s.startswith('"'), repr(s[:1]) - q = s[0] - if s[:3] == q*3: - q = q*3 - assert s.endswith(q), repr(s[-len(q):]) - assert len(s) >= 2*len(q) - s = s[len(q):-len(q)] - return re.sub(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3})", escape, s) - -def test(): - for i in range(256): - c = chr(i) - s = repr(c) - e = evalString(s) - if e != c: - print(i, c, s, e) - - -if __name__ == "__main__": - test() diff --git a/PythonLib/full/lib2to3/pgen2/parse.py b/PythonLib/full/lib2to3/pgen2/parse.py deleted file mode 100644 index cf3fcf7e..00000000 --- a/PythonLib/full/lib2to3/pgen2/parse.py +++ /dev/null @@ -1,204 +0,0 @@ -# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -"""Parser engine for the grammar tables generated by pgen. - -The grammar table must be loaded first. - -See Parser/parser.c in the Python distribution for additional info on -how this parsing engine works. - -""" - -# Local imports -from . import token - -class ParseError(Exception): - """Exception to signal the parser is stuck.""" - - def __init__(self, msg, type, value, context): - Exception.__init__(self, "%s: type=%r, value=%r, context=%r" % - (msg, type, value, context)) - self.msg = msg - self.type = type - self.value = value - self.context = context - - def __reduce__(self): - return type(self), (self.msg, self.type, self.value, self.context) - -class Parser(object): - """Parser engine. - - The proper usage sequence is: - - p = Parser(grammar, [converter]) # create instance - p.setup([start]) # prepare for parsing - : - if p.addtoken(...): # parse a token; may raise ParseError - break - root = p.rootnode # root of abstract syntax tree - - A Parser instance may be reused by calling setup() repeatedly. - - A Parser instance contains state pertaining to the current token - sequence, and should not be used concurrently by different threads - to parse separate token sequences. - - See driver.py for how to get input tokens by tokenizing a file or - string. - - Parsing is complete when addtoken() returns True; the root of the - abstract syntax tree can then be retrieved from the rootnode - instance variable. When a syntax error occurs, addtoken() raises - the ParseError exception. There is no error recovery; the parser - cannot be used after a syntax error was reported (but it can be - reinitialized by calling setup()). - - """ - - def __init__(self, grammar, convert=None): - """Constructor. - - The grammar argument is a grammar.Grammar instance; see the - grammar module for more information. - - The parser is not ready yet for parsing; you must call the - setup() method to get it started. - - The optional convert argument is a function mapping concrete - syntax tree nodes to abstract syntax tree nodes. If not - given, no conversion is done and the syntax tree produced is - the concrete syntax tree. If given, it must be a function of - two arguments, the first being the grammar (a grammar.Grammar - instance), and the second being the concrete syntax tree node - to be converted. The syntax tree is converted from the bottom - up. - - A concrete syntax tree node is a (type, value, context, nodes) - tuple, where type is the node type (a token or symbol number), - value is None for symbols and a string for tokens, context is - None or an opaque value used for error reporting (typically a - (lineno, offset) pair), and nodes is a list of children for - symbols, and None for tokens. - - An abstract syntax tree node may be anything; this is entirely - up to the converter function. - - """ - self.grammar = grammar - self.convert = convert or (lambda grammar, node: node) - - def setup(self, start=None): - """Prepare for parsing. - - This *must* be called before starting to parse. - - The optional argument is an alternative start symbol; it - defaults to the grammar's start symbol. - - You can use a Parser instance to parse any number of programs; - each time you call setup() the parser is reset to an initial - state determined by the (implicit or explicit) start symbol. - - """ - if start is None: - start = self.grammar.start - # Each stack entry is a tuple: (dfa, state, node). - # A node is a tuple: (type, value, context, children), - # where children is a list of nodes or None, and context may be None. - newnode = (start, None, None, []) - stackentry = (self.grammar.dfas[start], 0, newnode) - self.stack = [stackentry] - self.rootnode = None - self.used_names = set() # Aliased to self.rootnode.used_names in pop() - - def addtoken(self, type, value, context): - """Add a token; return True iff this is the end of the program.""" - # Map from token to label - ilabel = self.classify(type, value, context) - # Loop until the token is shifted; may raise exceptions - while True: - dfa, state, node = self.stack[-1] - states, first = dfa - arcs = states[state] - # Look for a state with this label - for i, newstate in arcs: - t, v = self.grammar.labels[i] - if ilabel == i: - # Look it up in the list of labels - assert t < 256 - # Shift a token; we're done with it - self.shift(type, value, newstate, context) - # Pop while we are in an accept-only state - state = newstate - while states[state] == [(0, state)]: - self.pop() - if not self.stack: - # Done parsing! - return True - dfa, state, node = self.stack[-1] - states, first = dfa - # Done with this token - return False - elif t >= 256: - # See if it's a symbol and if we're in its first set - itsdfa = self.grammar.dfas[t] - itsstates, itsfirst = itsdfa - if ilabel in itsfirst: - # Push a symbol - self.push(t, self.grammar.dfas[t], newstate, context) - break # To continue the outer while loop - else: - if (0, state) in arcs: - # An accepting state, pop it and try something else - self.pop() - if not self.stack: - # Done parsing, but another token is input - raise ParseError("too much input", - type, value, context) - else: - # No success finding a transition - raise ParseError("bad input", type, value, context) - - def classify(self, type, value, context): - """Turn a token into a label. (Internal)""" - if type == token.NAME: - # Keep a listing of all used names - self.used_names.add(value) - # Check for reserved words - ilabel = self.grammar.keywords.get(value) - if ilabel is not None: - return ilabel - ilabel = self.grammar.tokens.get(type) - if ilabel is None: - raise ParseError("bad token", type, value, context) - return ilabel - - def shift(self, type, value, newstate, context): - """Shift a token. (Internal)""" - dfa, state, node = self.stack[-1] - newnode = (type, value, context, None) - newnode = self.convert(self.grammar, newnode) - if newnode is not None: - node[-1].append(newnode) - self.stack[-1] = (dfa, newstate, node) - - def push(self, type, newdfa, newstate, context): - """Push a nonterminal. (Internal)""" - dfa, state, node = self.stack[-1] - newnode = (type, None, context, []) - self.stack[-1] = (dfa, newstate, node) - self.stack.append((newdfa, 0, newnode)) - - def pop(self): - """Pop a nonterminal. (Internal)""" - popdfa, popstate, popnode = self.stack.pop() - newnode = self.convert(self.grammar, popnode) - if newnode is not None: - if self.stack: - dfa, state, node = self.stack[-1] - node[-1].append(newnode) - else: - self.rootnode = newnode - self.rootnode.used_names = self.used_names diff --git a/PythonLib/full/lib2to3/pgen2/pgen.py b/PythonLib/full/lib2to3/pgen2/pgen.py deleted file mode 100644 index 7abd5cef..00000000 --- a/PythonLib/full/lib2to3/pgen2/pgen.py +++ /dev/null @@ -1,386 +0,0 @@ -# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -# Pgen imports -from . import grammar, token, tokenize - -class PgenGrammar(grammar.Grammar): - pass - -class ParserGenerator(object): - - def __init__(self, filename, stream=None): - close_stream = None - if stream is None: - stream = open(filename, encoding="utf-8") - close_stream = stream.close - self.filename = filename - self.stream = stream - self.generator = tokenize.generate_tokens(stream.readline) - self.gettoken() # Initialize lookahead - self.dfas, self.startsymbol = self.parse() - if close_stream is not None: - close_stream() - self.first = {} # map from symbol name to set of tokens - self.addfirstsets() - - def make_grammar(self): - c = PgenGrammar() - names = list(self.dfas.keys()) - names.sort() - names.remove(self.startsymbol) - names.insert(0, self.startsymbol) - for name in names: - i = 256 + len(c.symbol2number) - c.symbol2number[name] = i - c.number2symbol[i] = name - for name in names: - dfa = self.dfas[name] - states = [] - for state in dfa: - arcs = [] - for label, next in sorted(state.arcs.items()): - arcs.append((self.make_label(c, label), dfa.index(next))) - if state.isfinal: - arcs.append((0, dfa.index(state))) - states.append(arcs) - c.states.append(states) - c.dfas[c.symbol2number[name]] = (states, self.make_first(c, name)) - c.start = c.symbol2number[self.startsymbol] - return c - - def make_first(self, c, name): - rawfirst = self.first[name] - first = {} - for label in sorted(rawfirst): - ilabel = self.make_label(c, label) - ##assert ilabel not in first # XXX failed on <> ... != - first[ilabel] = 1 - return first - - def make_label(self, c, label): - # XXX Maybe this should be a method on a subclass of converter? - ilabel = len(c.labels) - if label[0].isalpha(): - # Either a symbol name or a named token - if label in c.symbol2number: - # A symbol name (a non-terminal) - if label in c.symbol2label: - return c.symbol2label[label] - else: - c.labels.append((c.symbol2number[label], None)) - c.symbol2label[label] = ilabel - return ilabel - else: - # A named token (NAME, NUMBER, STRING) - itoken = getattr(token, label, None) - assert isinstance(itoken, int), label - assert itoken in token.tok_name, label - if itoken in c.tokens: - return c.tokens[itoken] - else: - c.labels.append((itoken, None)) - c.tokens[itoken] = ilabel - return ilabel - else: - # Either a keyword or an operator - assert label[0] in ('"', "'"), label - value = eval(label) - if value[0].isalpha(): - # A keyword - if value in c.keywords: - return c.keywords[value] - else: - c.labels.append((token.NAME, value)) - c.keywords[value] = ilabel - return ilabel - else: - # An operator (any non-numeric token) - itoken = grammar.opmap[value] # Fails if unknown token - if itoken in c.tokens: - return c.tokens[itoken] - else: - c.labels.append((itoken, None)) - c.tokens[itoken] = ilabel - return ilabel - - def addfirstsets(self): - names = list(self.dfas.keys()) - names.sort() - for name in names: - if name not in self.first: - self.calcfirst(name) - #print name, self.first[name].keys() - - def calcfirst(self, name): - dfa = self.dfas[name] - self.first[name] = None # dummy to detect left recursion - state = dfa[0] - totalset = {} - overlapcheck = {} - for label, next in state.arcs.items(): - if label in self.dfas: - if label in self.first: - fset = self.first[label] - if fset is None: - raise ValueError("recursion for rule %r" % name) - else: - self.calcfirst(label) - fset = self.first[label] - totalset.update(fset) - overlapcheck[label] = fset - else: - totalset[label] = 1 - overlapcheck[label] = {label: 1} - inverse = {} - for label, itsfirst in overlapcheck.items(): - for symbol in itsfirst: - if symbol in inverse: - raise ValueError("rule %s is ambiguous; %s is in the" - " first sets of %s as well as %s" % - (name, symbol, label, inverse[symbol])) - inverse[symbol] = label - self.first[name] = totalset - - def parse(self): - dfas = {} - startsymbol = None - # MSTART: (NEWLINE | RULE)* ENDMARKER - while self.type != token.ENDMARKER: - while self.type == token.NEWLINE: - self.gettoken() - # RULE: NAME ':' RHS NEWLINE - name = self.expect(token.NAME) - self.expect(token.OP, ":") - a, z = self.parse_rhs() - self.expect(token.NEWLINE) - #self.dump_nfa(name, a, z) - dfa = self.make_dfa(a, z) - #self.dump_dfa(name, dfa) - oldlen = len(dfa) - self.simplify_dfa(dfa) - newlen = len(dfa) - dfas[name] = dfa - #print name, oldlen, newlen - if startsymbol is None: - startsymbol = name - return dfas, startsymbol - - def make_dfa(self, start, finish): - # To turn an NFA into a DFA, we define the states of the DFA - # to correspond to *sets* of states of the NFA. Then do some - # state reduction. Let's represent sets as dicts with 1 for - # values. - assert isinstance(start, NFAState) - assert isinstance(finish, NFAState) - def closure(state): - base = {} - addclosure(state, base) - return base - def addclosure(state, base): - assert isinstance(state, NFAState) - if state in base: - return - base[state] = 1 - for label, next in state.arcs: - if label is None: - addclosure(next, base) - states = [DFAState(closure(start), finish)] - for state in states: # NB states grows while we're iterating - arcs = {} - for nfastate in state.nfaset: - for label, next in nfastate.arcs: - if label is not None: - addclosure(next, arcs.setdefault(label, {})) - for label, nfaset in sorted(arcs.items()): - for st in states: - if st.nfaset == nfaset: - break - else: - st = DFAState(nfaset, finish) - states.append(st) - state.addarc(st, label) - return states # List of DFAState instances; first one is start - - def dump_nfa(self, name, start, finish): - print("Dump of NFA for", name) - todo = [start] - for i, state in enumerate(todo): - print(" State", i, state is finish and "(final)" or "") - for label, next in state.arcs: - if next in todo: - j = todo.index(next) - else: - j = len(todo) - todo.append(next) - if label is None: - print(" -> %d" % j) - else: - print(" %s -> %d" % (label, j)) - - def dump_dfa(self, name, dfa): - print("Dump of DFA for", name) - for i, state in enumerate(dfa): - print(" State", i, state.isfinal and "(final)" or "") - for label, next in sorted(state.arcs.items()): - print(" %s -> %d" % (label, dfa.index(next))) - - def simplify_dfa(self, dfa): - # This is not theoretically optimal, but works well enough. - # Algorithm: repeatedly look for two states that have the same - # set of arcs (same labels pointing to the same nodes) and - # unify them, until things stop changing. - - # dfa is a list of DFAState instances - changes = True - while changes: - changes = False - for i, state_i in enumerate(dfa): - for j in range(i+1, len(dfa)): - state_j = dfa[j] - if state_i == state_j: - #print " unify", i, j - del dfa[j] - for state in dfa: - state.unifystate(state_j, state_i) - changes = True - break - - def parse_rhs(self): - # RHS: ALT ('|' ALT)* - a, z = self.parse_alt() - if self.value != "|": - return a, z - else: - aa = NFAState() - zz = NFAState() - aa.addarc(a) - z.addarc(zz) - while self.value == "|": - self.gettoken() - a, z = self.parse_alt() - aa.addarc(a) - z.addarc(zz) - return aa, zz - - def parse_alt(self): - # ALT: ITEM+ - a, b = self.parse_item() - while (self.value in ("(", "[") or - self.type in (token.NAME, token.STRING)): - c, d = self.parse_item() - b.addarc(c) - b = d - return a, b - - def parse_item(self): - # ITEM: '[' RHS ']' | ATOM ['+' | '*'] - if self.value == "[": - self.gettoken() - a, z = self.parse_rhs() - self.expect(token.OP, "]") - a.addarc(z) - return a, z - else: - a, z = self.parse_atom() - value = self.value - if value not in ("+", "*"): - return a, z - self.gettoken() - z.addarc(a) - if value == "+": - return a, z - else: - return a, a - - def parse_atom(self): - # ATOM: '(' RHS ')' | NAME | STRING - if self.value == "(": - self.gettoken() - a, z = self.parse_rhs() - self.expect(token.OP, ")") - return a, z - elif self.type in (token.NAME, token.STRING): - a = NFAState() - z = NFAState() - a.addarc(z, self.value) - self.gettoken() - return a, z - else: - self.raise_error("expected (...) or NAME or STRING, got %s/%s", - self.type, self.value) - - def expect(self, type, value=None): - if self.type != type or (value is not None and self.value != value): - self.raise_error("expected %s/%s, got %s/%s", - type, value, self.type, self.value) - value = self.value - self.gettoken() - return value - - def gettoken(self): - tup = next(self.generator) - while tup[0] in (tokenize.COMMENT, tokenize.NL): - tup = next(self.generator) - self.type, self.value, self.begin, self.end, self.line = tup - #print token.tok_name[self.type], repr(self.value) - - def raise_error(self, msg, *args): - if args: - try: - msg = msg % args - except: - msg = " ".join([msg] + list(map(str, args))) - raise SyntaxError(msg, (self.filename, self.end[0], - self.end[1], self.line)) - -class NFAState(object): - - def __init__(self): - self.arcs = [] # list of (label, NFAState) pairs - - def addarc(self, next, label=None): - assert label is None or isinstance(label, str) - assert isinstance(next, NFAState) - self.arcs.append((label, next)) - -class DFAState(object): - - def __init__(self, nfaset, final): - assert isinstance(nfaset, dict) - assert isinstance(next(iter(nfaset)), NFAState) - assert isinstance(final, NFAState) - self.nfaset = nfaset - self.isfinal = final in nfaset - self.arcs = {} # map from label to DFAState - - def addarc(self, next, label): - assert isinstance(label, str) - assert label not in self.arcs - assert isinstance(next, DFAState) - self.arcs[label] = next - - def unifystate(self, old, new): - for label, next in self.arcs.items(): - if next is old: - self.arcs[label] = new - - def __eq__(self, other): - # Equality test -- ignore the nfaset instance variable - assert isinstance(other, DFAState) - if self.isfinal != other.isfinal: - return False - # Can't just return self.arcs == other.arcs, because that - # would invoke this method recursively, with cycles... - if len(self.arcs) != len(other.arcs): - return False - for label, next in self.arcs.items(): - if next is not other.arcs.get(label): - return False - return True - - __hash__ = None # For Py3 compatibility. - -def generate_grammar(filename="Grammar.txt"): - p = ParserGenerator(filename) - return p.make_grammar() diff --git a/PythonLib/full/lib2to3/pgen2/token.py b/PythonLib/full/lib2to3/pgen2/token.py deleted file mode 100644 index 2a55138e..00000000 --- a/PythonLib/full/lib2to3/pgen2/token.py +++ /dev/null @@ -1,86 +0,0 @@ -#! /usr/bin/env python3 - -"""Token constants (from "token.h").""" - -# Taken from Python (r53757) and modified to include some tokens -# originally monkeypatched in by pgen2.tokenize - -#--start constants-- -ENDMARKER = 0 -NAME = 1 -NUMBER = 2 -STRING = 3 -NEWLINE = 4 -INDENT = 5 -DEDENT = 6 -LPAR = 7 -RPAR = 8 -LSQB = 9 -RSQB = 10 -COLON = 11 -COMMA = 12 -SEMI = 13 -PLUS = 14 -MINUS = 15 -STAR = 16 -SLASH = 17 -VBAR = 18 -AMPER = 19 -LESS = 20 -GREATER = 21 -EQUAL = 22 -DOT = 23 -PERCENT = 24 -BACKQUOTE = 25 -LBRACE = 26 -RBRACE = 27 -EQEQUAL = 28 -NOTEQUAL = 29 -LESSEQUAL = 30 -GREATEREQUAL = 31 -TILDE = 32 -CIRCUMFLEX = 33 -LEFTSHIFT = 34 -RIGHTSHIFT = 35 -DOUBLESTAR = 36 -PLUSEQUAL = 37 -MINEQUAL = 38 -STAREQUAL = 39 -SLASHEQUAL = 40 -PERCENTEQUAL = 41 -AMPEREQUAL = 42 -VBAREQUAL = 43 -CIRCUMFLEXEQUAL = 44 -LEFTSHIFTEQUAL = 45 -RIGHTSHIFTEQUAL = 46 -DOUBLESTAREQUAL = 47 -DOUBLESLASH = 48 -DOUBLESLASHEQUAL = 49 -AT = 50 -ATEQUAL = 51 -OP = 52 -COMMENT = 53 -NL = 54 -RARROW = 55 -AWAIT = 56 -ASYNC = 57 -ERRORTOKEN = 58 -COLONEQUAL = 59 -N_TOKENS = 60 -NT_OFFSET = 256 -#--end constants-- - -tok_name = {} -for _name, _value in list(globals().items()): - if isinstance(_value, int): - tok_name[_value] = _name - - -def ISTERMINAL(x): - return x < NT_OFFSET - -def ISNONTERMINAL(x): - return x >= NT_OFFSET - -def ISEOF(x): - return x == ENDMARKER diff --git a/PythonLib/full/lib2to3/pgen2/tokenize.py b/PythonLib/full/lib2to3/pgen2/tokenize.py deleted file mode 100644 index 099dfa77..00000000 --- a/PythonLib/full/lib2to3/pgen2/tokenize.py +++ /dev/null @@ -1,564 +0,0 @@ -# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation. -# All rights reserved. - -"""Tokenization help for Python programs. - -generate_tokens(readline) is a generator that breaks a stream of -text into Python tokens. It accepts a readline-like method which is called -repeatedly to get the next line of input (or "" for EOF). It generates -5-tuples with these members: - - the token type (see token.py) - the token (a string) - the starting (row, column) indices of the token (a 2-tuple of ints) - the ending (row, column) indices of the token (a 2-tuple of ints) - the original line (string) - -It is designed to match the working of the Python tokenizer exactly, except -that it produces COMMENT tokens for comments and gives type OP for all -operators - -Older entry points - tokenize_loop(readline, tokeneater) - tokenize(readline, tokeneater=printtoken) -are the same, except instead of generating tokens, tokeneater is a callback -function to which the 5 fields described above are passed as 5 arguments, -each time a new token is found.""" - -__author__ = 'Ka-Ping Yee ' -__credits__ = \ - 'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro' - -import string, re -from codecs import BOM_UTF8, lookup -from lib2to3.pgen2.token import * - -from . import token -__all__ = [x for x in dir(token) if x[0] != '_'] + ["tokenize", - "generate_tokens", "untokenize"] -del token - -try: - bytes -except NameError: - # Support bytes type in Python <= 2.5, so 2to3 turns itself into - # valid Python 3 code. - bytes = str - -def group(*choices): return '(' + '|'.join(choices) + ')' -def any(*choices): return group(*choices) + '*' -def maybe(*choices): return group(*choices) + '?' -def _combinations(*l): - return set( - x + y for x in l for y in l + ("",) if x.casefold() != y.casefold() - ) - -Whitespace = r'[ \f\t]*' -Comment = r'#[^\r\n]*' -Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment) -Name = r'\w+' - -Binnumber = r'0[bB]_?[01]+(?:_[01]+)*' -Hexnumber = r'0[xX]_?[\da-fA-F]+(?:_[\da-fA-F]+)*[lL]?' -Octnumber = r'0[oO]?_?[0-7]+(?:_[0-7]+)*[lL]?' -Decnumber = group(r'[1-9]\d*(?:_\d+)*[lL]?', '0[lL]?') -Intnumber = group(Binnumber, Hexnumber, Octnumber, Decnumber) -Exponent = r'[eE][-+]?\d+(?:_\d+)*' -Pointfloat = group(r'\d+(?:_\d+)*\.(?:\d+(?:_\d+)*)?', r'\.\d+(?:_\d+)*') + maybe(Exponent) -Expfloat = r'\d+(?:_\d+)*' + Exponent -Floatnumber = group(Pointfloat, Expfloat) -Imagnumber = group(r'\d+(?:_\d+)*[jJ]', Floatnumber + r'[jJ]') -Number = group(Imagnumber, Floatnumber, Intnumber) - -# Tail end of ' string. -Single = r"[^'\\]*(?:\\.[^'\\]*)*'" -# Tail end of " string. -Double = r'[^"\\]*(?:\\.[^"\\]*)*"' -# Tail end of ''' string. -Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''" -# Tail end of """ string. -Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""' -_litprefix = r"(?:[uUrRbBfF]|[rR][fFbB]|[fFbBuU][rR])?" -Triple = group(_litprefix + "'''", _litprefix + '"""') -# Single-line ' or " string. -String = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'", - _litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"') - -# Because of leftmost-then-longest match semantics, be sure to put the -# longest operators first (e.g., if = came before ==, == would get -# recognized as two instances of =). -Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=", - r"//=?", r"->", - r"[+\-*/%&@|^=<>]=?", - r"~") - -Bracket = '[][(){}]' -Special = group(r'\r?\n', r':=', r'[:;.,`@]') -Funny = group(Operator, Bracket, Special) - -PlainToken = group(Number, Funny, String, Name) -Token = Ignore + PlainToken - -# First (or only) line of ' or " string. -ContStr = group(_litprefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" + - group("'", r'\\\r?\n'), - _litprefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' + - group('"', r'\\\r?\n')) -PseudoExtras = group(r'\\\r?\n', Comment, Triple) -PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name) - -tokenprog, pseudoprog, single3prog, double3prog = map( - re.compile, (Token, PseudoToken, Single3, Double3)) - -_strprefixes = ( - _combinations('r', 'R', 'f', 'F') | - _combinations('r', 'R', 'b', 'B') | - {'u', 'U', 'ur', 'uR', 'Ur', 'UR'} -) - -endprogs = {"'": re.compile(Single), '"': re.compile(Double), - "'''": single3prog, '"""': double3prog, - **{f"{prefix}'''": single3prog for prefix in _strprefixes}, - **{f'{prefix}"""': double3prog for prefix in _strprefixes}, - **{prefix: None for prefix in _strprefixes}} - -triple_quoted = ( - {"'''", '"""'} | - {f"{prefix}'''" for prefix in _strprefixes} | - {f'{prefix}"""' for prefix in _strprefixes} -) -single_quoted = ( - {"'", '"'} | - {f"{prefix}'" for prefix in _strprefixes} | - {f'{prefix}"' for prefix in _strprefixes} -) - -tabsize = 8 - -class TokenError(Exception): pass - -class StopTokenizing(Exception): pass - -def printtoken(type, token, xxx_todo_changeme, xxx_todo_changeme1, line): # for testing - (srow, scol) = xxx_todo_changeme - (erow, ecol) = xxx_todo_changeme1 - print("%d,%d-%d,%d:\t%s\t%s" % \ - (srow, scol, erow, ecol, tok_name[type], repr(token))) - -def tokenize(readline, tokeneater=printtoken): - """ - The tokenize() function accepts two parameters: one representing the - input stream, and one providing an output mechanism for tokenize(). - - The first parameter, readline, must be a callable object which provides - the same interface as the readline() method of built-in file objects. - Each call to the function should return one line of input as a string. - - The second parameter, tokeneater, must also be a callable object. It is - called once for each token, with five arguments, corresponding to the - tuples generated by generate_tokens(). - """ - try: - tokenize_loop(readline, tokeneater) - except StopTokenizing: - pass - -# backwards compatible interface -def tokenize_loop(readline, tokeneater): - for token_info in generate_tokens(readline): - tokeneater(*token_info) - -class Untokenizer: - - def __init__(self): - self.tokens = [] - self.prev_row = 1 - self.prev_col = 0 - - def add_whitespace(self, start): - row, col = start - assert row <= self.prev_row - col_offset = col - self.prev_col - if col_offset: - self.tokens.append(" " * col_offset) - - def untokenize(self, iterable): - for t in iterable: - if len(t) == 2: - self.compat(t, iterable) - break - tok_type, token, start, end, line = t - self.add_whitespace(start) - self.tokens.append(token) - self.prev_row, self.prev_col = end - if tok_type in (NEWLINE, NL): - self.prev_row += 1 - self.prev_col = 0 - return "".join(self.tokens) - - def compat(self, token, iterable): - startline = False - indents = [] - toks_append = self.tokens.append - toknum, tokval = token - if toknum in (NAME, NUMBER): - tokval += ' ' - if toknum in (NEWLINE, NL): - startline = True - for tok in iterable: - toknum, tokval = tok[:2] - - if toknum in (NAME, NUMBER, ASYNC, AWAIT): - tokval += ' ' - - if toknum == INDENT: - indents.append(tokval) - continue - elif toknum == DEDENT: - indents.pop() - continue - elif toknum in (NEWLINE, NL): - startline = True - elif startline and indents: - toks_append(indents[-1]) - startline = False - toks_append(tokval) - -cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII) -blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII) - -def _get_normal_name(orig_enc): - """Imitates get_normal_name in tokenizer.c.""" - # Only care about the first 12 characters. - enc = orig_enc[:12].lower().replace("_", "-") - if enc == "utf-8" or enc.startswith("utf-8-"): - return "utf-8" - if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \ - enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")): - return "iso-8859-1" - return orig_enc - -def detect_encoding(readline): - """ - The detect_encoding() function is used to detect the encoding that should - be used to decode a Python source file. It requires one argument, readline, - in the same way as the tokenize() generator. - - It will call readline a maximum of twice, and return the encoding used - (as a string) and a list of any lines (left as bytes) it has read - in. - - It detects the encoding from the presence of a utf-8 bom or an encoding - cookie as specified in pep-0263. If both a bom and a cookie are present, but - disagree, a SyntaxError will be raised. If the encoding cookie is an invalid - charset, raise a SyntaxError. Note that if a utf-8 bom is found, - 'utf-8-sig' is returned. - - If no encoding is specified, then the default of 'utf-8' will be returned. - """ - bom_found = False - encoding = None - default = 'utf-8' - def read_or_stop(): - try: - return readline() - except StopIteration: - return bytes() - - def find_cookie(line): - try: - line_string = line.decode('ascii') - except UnicodeDecodeError: - return None - match = cookie_re.match(line_string) - if not match: - return None - encoding = _get_normal_name(match.group(1)) - try: - codec = lookup(encoding) - except LookupError: - # This behaviour mimics the Python interpreter - raise SyntaxError("unknown encoding: " + encoding) - - if bom_found: - if codec.name != 'utf-8': - # This behaviour mimics the Python interpreter - raise SyntaxError('encoding problem: utf-8') - encoding += '-sig' - return encoding - - first = read_or_stop() - if first.startswith(BOM_UTF8): - bom_found = True - first = first[3:] - default = 'utf-8-sig' - if not first: - return default, [] - - encoding = find_cookie(first) - if encoding: - return encoding, [first] - if not blank_re.match(first): - return default, [first] - - second = read_or_stop() - if not second: - return default, [first] - - encoding = find_cookie(second) - if encoding: - return encoding, [first, second] - - return default, [first, second] - -def untokenize(iterable): - """Transform tokens back into Python source code. - - Each element returned by the iterable must be a token sequence - with at least two elements, a token number and token value. If - only two tokens are passed, the resulting output is poor. - - Round-trip invariant for full input: - Untokenized source will match input source exactly - - Round-trip invariant for limited input: - # Output text will tokenize the back to the input - t1 = [tok[:2] for tok in generate_tokens(f.readline)] - newcode = untokenize(t1) - readline = iter(newcode.splitlines(1)).next - t2 = [tok[:2] for tokin generate_tokens(readline)] - assert t1 == t2 - """ - ut = Untokenizer() - return ut.untokenize(iterable) - -def generate_tokens(readline): - """ - The generate_tokens() generator requires one argument, readline, which - must be a callable object which provides the same interface as the - readline() method of built-in file objects. Each call to the function - should return one line of input as a string. Alternately, readline - can be a callable function terminating with StopIteration: - readline = open(myfile).next # Example of alternate readline - - The generator produces 5-tuples with these members: the token type; the - token string; a 2-tuple (srow, scol) of ints specifying the row and - column where the token begins in the source; a 2-tuple (erow, ecol) of - ints specifying the row and column where the token ends in the source; - and the line on which the token was found. The line passed is the - physical line. - """ - lnum = parenlev = continued = 0 - contstr, needcont = '', 0 - contline = None - indents = [0] - - # 'stashed' and 'async_*' are used for async/await parsing - stashed = None - async_def = False - async_def_indent = 0 - async_def_nl = False - - while 1: # loop over lines in stream - try: - line = readline() - except StopIteration: - line = '' - lnum = lnum + 1 - pos, max = 0, len(line) - - if contstr: # continued string - if not line: - raise TokenError("EOF in multi-line string", strstart) - endmatch = endprog.match(line) - if endmatch: - pos = end = endmatch.end(0) - yield (STRING, contstr + line[:end], - strstart, (lnum, end), contline + line) - contstr, needcont = '', 0 - contline = None - elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n': - yield (ERRORTOKEN, contstr + line, - strstart, (lnum, len(line)), contline) - contstr = '' - contline = None - continue - else: - contstr = contstr + line - contline = contline + line - continue - - elif parenlev == 0 and not continued: # new statement - if not line: break - column = 0 - while pos < max: # measure leading whitespace - if line[pos] == ' ': column = column + 1 - elif line[pos] == '\t': column = (column//tabsize + 1)*tabsize - elif line[pos] == '\f': column = 0 - else: break - pos = pos + 1 - if pos == max: break - - if stashed: - yield stashed - stashed = None - - if line[pos] in '#\r\n': # skip comments or blank lines - if line[pos] == '#': - comment_token = line[pos:].rstrip('\r\n') - nl_pos = pos + len(comment_token) - yield (COMMENT, comment_token, - (lnum, pos), (lnum, pos + len(comment_token)), line) - yield (NL, line[nl_pos:], - (lnum, nl_pos), (lnum, len(line)), line) - else: - yield ((NL, COMMENT)[line[pos] == '#'], line[pos:], - (lnum, pos), (lnum, len(line)), line) - continue - - if column > indents[-1]: # count indents or dedents - indents.append(column) - yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line) - while column < indents[-1]: - if column not in indents: - raise IndentationError( - "unindent does not match any outer indentation level", - ("", lnum, pos, line)) - indents = indents[:-1] - - if async_def and async_def_indent >= indents[-1]: - async_def = False - async_def_nl = False - async_def_indent = 0 - - yield (DEDENT, '', (lnum, pos), (lnum, pos), line) - - if async_def and async_def_nl and async_def_indent >= indents[-1]: - async_def = False - async_def_nl = False - async_def_indent = 0 - - else: # continued statement - if not line: - raise TokenError("EOF in multi-line statement", (lnum, 0)) - continued = 0 - - while pos < max: - pseudomatch = pseudoprog.match(line, pos) - if pseudomatch: # scan for tokens - start, end = pseudomatch.span(1) - spos, epos, pos = (lnum, start), (lnum, end), end - token, initial = line[start:end], line[start] - - if initial in string.digits or \ - (initial == '.' and token != '.'): # ordinary number - yield (NUMBER, token, spos, epos, line) - elif initial in '\r\n': - newline = NEWLINE - if parenlev > 0: - newline = NL - elif async_def: - async_def_nl = True - if stashed: - yield stashed - stashed = None - yield (newline, token, spos, epos, line) - - elif initial == '#': - assert not token.endswith("\n") - if stashed: - yield stashed - stashed = None - yield (COMMENT, token, spos, epos, line) - elif token in triple_quoted: - endprog = endprogs[token] - endmatch = endprog.match(line, pos) - if endmatch: # all on one line - pos = endmatch.end(0) - token = line[start:pos] - if stashed: - yield stashed - stashed = None - yield (STRING, token, spos, (lnum, pos), line) - else: - strstart = (lnum, start) # multiple lines - contstr = line[start:] - contline = line - break - elif initial in single_quoted or \ - token[:2] in single_quoted or \ - token[:3] in single_quoted: - if token[-1] == '\n': # continued string - strstart = (lnum, start) - endprog = (endprogs[initial] or endprogs[token[1]] or - endprogs[token[2]]) - contstr, needcont = line[start:], 1 - contline = line - break - else: # ordinary string - if stashed: - yield stashed - stashed = None - yield (STRING, token, spos, epos, line) - elif initial.isidentifier(): # ordinary name - if token in ('async', 'await'): - if async_def: - yield (ASYNC if token == 'async' else AWAIT, - token, spos, epos, line) - continue - - tok = (NAME, token, spos, epos, line) - if token == 'async' and not stashed: - stashed = tok - continue - - if token in ('def', 'for'): - if (stashed - and stashed[0] == NAME - and stashed[1] == 'async'): - - if token == 'def': - async_def = True - async_def_indent = indents[-1] - - yield (ASYNC, stashed[1], - stashed[2], stashed[3], - stashed[4]) - stashed = None - - if stashed: - yield stashed - stashed = None - - yield tok - elif initial == '\\': # continued stmt - # This yield is new; needed for better idempotency: - if stashed: - yield stashed - stashed = None - yield (NL, token, spos, (lnum, pos), line) - continued = 1 - else: - if initial in '([{': parenlev = parenlev + 1 - elif initial in ')]}': parenlev = parenlev - 1 - if stashed: - yield stashed - stashed = None - yield (OP, token, spos, epos, line) - else: - yield (ERRORTOKEN, line[pos], - (lnum, pos), (lnum, pos+1), line) - pos = pos + 1 - - if stashed: - yield stashed - stashed = None - - for indent in indents[1:]: # pop remaining indent levels - yield (DEDENT, '', (lnum, 0), (lnum, 0), '') - yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '') - -if __name__ == '__main__': # testing - import sys - if len(sys.argv) > 1: tokenize(open(sys.argv[1]).readline) - else: tokenize(sys.stdin.readline) diff --git a/PythonLib/full/lib2to3/pygram.py b/PythonLib/full/lib2to3/pygram.py deleted file mode 100644 index 24d9db92..00000000 --- a/PythonLib/full/lib2to3/pygram.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright 2006 Google, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -"""Export the Python grammar and symbols.""" - -# Python imports -import os - -# Local imports -from .pgen2 import token -from .pgen2 import driver -from . import pytree - -# The grammar file -_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), "Grammar.txt") -_PATTERN_GRAMMAR_FILE = os.path.join(os.path.dirname(__file__), - "PatternGrammar.txt") - - -class Symbols(object): - - def __init__(self, grammar): - """Initializer. - - Creates an attribute for each grammar symbol (nonterminal), - whose value is the symbol's type (an int >= 256). - """ - for name, symbol in grammar.symbol2number.items(): - setattr(self, name, symbol) - - -python_grammar = driver.load_packaged_grammar("lib2to3", _GRAMMAR_FILE) - -python_symbols = Symbols(python_grammar) - -python_grammar_no_print_statement = python_grammar.copy() -del python_grammar_no_print_statement.keywords["print"] - -python_grammar_no_print_and_exec_statement = python_grammar_no_print_statement.copy() -del python_grammar_no_print_and_exec_statement.keywords["exec"] - -pattern_grammar = driver.load_packaged_grammar("lib2to3", _PATTERN_GRAMMAR_FILE) -pattern_symbols = Symbols(pattern_grammar) diff --git a/PythonLib/full/lib2to3/pytree.py b/PythonLib/full/lib2to3/pytree.py deleted file mode 100644 index 729023df..00000000 --- a/PythonLib/full/lib2to3/pytree.py +++ /dev/null @@ -1,853 +0,0 @@ -# Copyright 2006 Google, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -""" -Python parse tree definitions. - -This is a very concrete parse tree; we need to keep every token and -even the comments and whitespace between tokens. - -There's also a pattern matching implementation here. -""" - -__author__ = "Guido van Rossum " - -import sys -from io import StringIO - -HUGE = 0x7FFFFFFF # maximum repeat count, default max - -_type_reprs = {} -def type_repr(type_num): - global _type_reprs - if not _type_reprs: - from .pygram import python_symbols - # printing tokens is possible but not as useful - # from .pgen2 import token // token.__dict__.items(): - for name, val in python_symbols.__dict__.items(): - if type(val) == int: _type_reprs[val] = name - return _type_reprs.setdefault(type_num, type_num) - -class Base(object): - - """ - Abstract base class for Node and Leaf. - - This provides some default functionality and boilerplate using the - template pattern. - - A node may be a subnode of at most one parent. - """ - - # Default values for instance variables - type = None # int: token number (< 256) or symbol number (>= 256) - parent = None # Parent node pointer, or None - children = () # Tuple of subnodes - was_changed = False - was_checked = False - - def __new__(cls, *args, **kwds): - """Constructor that prevents Base from being instantiated.""" - assert cls is not Base, "Cannot instantiate Base" - return object.__new__(cls) - - def __eq__(self, other): - """ - Compare two nodes for equality. - - This calls the method _eq(). - """ - if self.__class__ is not other.__class__: - return NotImplemented - return self._eq(other) - - __hash__ = None # For Py3 compatibility. - - def _eq(self, other): - """ - Compare two nodes for equality. - - This is called by __eq__ and __ne__. It is only called if the two nodes - have the same type. This must be implemented by the concrete subclass. - Nodes should be considered equal if they have the same structure, - ignoring the prefix string and other context information. - """ - raise NotImplementedError - - def clone(self): - """ - Return a cloned (deep) copy of self. - - This must be implemented by the concrete subclass. - """ - raise NotImplementedError - - def post_order(self): - """ - Return a post-order iterator for the tree. - - This must be implemented by the concrete subclass. - """ - raise NotImplementedError - - def pre_order(self): - """ - Return a pre-order iterator for the tree. - - This must be implemented by the concrete subclass. - """ - raise NotImplementedError - - def replace(self, new): - """Replace this node with a new one in the parent.""" - assert self.parent is not None, str(self) - assert new is not None - if not isinstance(new, list): - new = [new] - l_children = [] - found = False - for ch in self.parent.children: - if ch is self: - assert not found, (self.parent.children, self, new) - if new is not None: - l_children.extend(new) - found = True - else: - l_children.append(ch) - assert found, (self.children, self, new) - self.parent.changed() - self.parent.children = l_children - for x in new: - x.parent = self.parent - self.parent = None - - def get_lineno(self): - """Return the line number which generated the invocant node.""" - node = self - while not isinstance(node, Leaf): - if not node.children: - return - node = node.children[0] - return node.lineno - - def changed(self): - if self.parent: - self.parent.changed() - self.was_changed = True - - def remove(self): - """ - Remove the node from the tree. Returns the position of the node in its - parent's children before it was removed. - """ - if self.parent: - for i, node in enumerate(self.parent.children): - if node is self: - self.parent.changed() - del self.parent.children[i] - self.parent = None - return i - - @property - def next_sibling(self): - """ - The node immediately following the invocant in their parent's children - list. If the invocant does not have a next sibling, it is None - """ - if self.parent is None: - return None - - # Can't use index(); we need to test by identity - for i, child in enumerate(self.parent.children): - if child is self: - try: - return self.parent.children[i+1] - except IndexError: - return None - - @property - def prev_sibling(self): - """ - The node immediately preceding the invocant in their parent's children - list. If the invocant does not have a previous sibling, it is None. - """ - if self.parent is None: - return None - - # Can't use index(); we need to test by identity - for i, child in enumerate(self.parent.children): - if child is self: - if i == 0: - return None - return self.parent.children[i-1] - - def leaves(self): - for child in self.children: - yield from child.leaves() - - def depth(self): - if self.parent is None: - return 0 - return 1 + self.parent.depth() - - def get_suffix(self): - """ - Return the string immediately following the invocant node. This is - effectively equivalent to node.next_sibling.prefix - """ - next_sib = self.next_sibling - if next_sib is None: - return "" - return next_sib.prefix - - if sys.version_info < (3, 0): - def __str__(self): - return str(self).encode("ascii") - -class Node(Base): - - """Concrete implementation for interior nodes.""" - - def __init__(self,type, children, - context=None, - prefix=None, - fixers_applied=None): - """ - Initializer. - - Takes a type constant (a symbol number >= 256), a sequence of - child nodes, and an optional context keyword argument. - - As a side effect, the parent pointers of the children are updated. - """ - assert type >= 256, type - self.type = type - self.children = list(children) - for ch in self.children: - assert ch.parent is None, repr(ch) - ch.parent = self - if prefix is not None: - self.prefix = prefix - if fixers_applied: - self.fixers_applied = fixers_applied[:] - else: - self.fixers_applied = None - - def __repr__(self): - """Return a canonical string representation.""" - return "%s(%s, %r)" % (self.__class__.__name__, - type_repr(self.type), - self.children) - - def __unicode__(self): - """ - Return a pretty string representation. - - This reproduces the input source exactly. - """ - return "".join(map(str, self.children)) - - if sys.version_info > (3, 0): - __str__ = __unicode__ - - def _eq(self, other): - """Compare two nodes for equality.""" - return (self.type, self.children) == (other.type, other.children) - - def clone(self): - """Return a cloned (deep) copy of self.""" - return Node(self.type, [ch.clone() for ch in self.children], - fixers_applied=self.fixers_applied) - - def post_order(self): - """Return a post-order iterator for the tree.""" - for child in self.children: - yield from child.post_order() - yield self - - def pre_order(self): - """Return a pre-order iterator for the tree.""" - yield self - for child in self.children: - yield from child.pre_order() - - @property - def prefix(self): - """ - The whitespace and comments preceding this node in the input. - """ - if not self.children: - return "" - return self.children[0].prefix - - @prefix.setter - def prefix(self, prefix): - if self.children: - self.children[0].prefix = prefix - - def set_child(self, i, child): - """ - Equivalent to 'node.children[i] = child'. This method also sets the - child's parent attribute appropriately. - """ - child.parent = self - self.children[i].parent = None - self.children[i] = child - self.changed() - - def insert_child(self, i, child): - """ - Equivalent to 'node.children.insert(i, child)'. This method also sets - the child's parent attribute appropriately. - """ - child.parent = self - self.children.insert(i, child) - self.changed() - - def append_child(self, child): - """ - Equivalent to 'node.children.append(child)'. This method also sets the - child's parent attribute appropriately. - """ - child.parent = self - self.children.append(child) - self.changed() - - -class Leaf(Base): - - """Concrete implementation for leaf nodes.""" - - # Default values for instance variables - _prefix = "" # Whitespace and comments preceding this token in the input - lineno = 0 # Line where this token starts in the input - column = 0 # Column where this token tarts in the input - - def __init__(self, type, value, - context=None, - prefix=None, - fixers_applied=[]): - """ - Initializer. - - Takes a type constant (a token number < 256), a string value, and an - optional context keyword argument. - """ - assert 0 <= type < 256, type - if context is not None: - self._prefix, (self.lineno, self.column) = context - self.type = type - self.value = value - if prefix is not None: - self._prefix = prefix - self.fixers_applied = fixers_applied[:] - - def __repr__(self): - """Return a canonical string representation.""" - return "%s(%r, %r)" % (self.__class__.__name__, - self.type, - self.value) - - def __unicode__(self): - """ - Return a pretty string representation. - - This reproduces the input source exactly. - """ - return self.prefix + str(self.value) - - if sys.version_info > (3, 0): - __str__ = __unicode__ - - def _eq(self, other): - """Compare two nodes for equality.""" - return (self.type, self.value) == (other.type, other.value) - - def clone(self): - """Return a cloned (deep) copy of self.""" - return Leaf(self.type, self.value, - (self.prefix, (self.lineno, self.column)), - fixers_applied=self.fixers_applied) - - def leaves(self): - yield self - - def post_order(self): - """Return a post-order iterator for the tree.""" - yield self - - def pre_order(self): - """Return a pre-order iterator for the tree.""" - yield self - - @property - def prefix(self): - """ - The whitespace and comments preceding this token in the input. - """ - return self._prefix - - @prefix.setter - def prefix(self, prefix): - self.changed() - self._prefix = prefix - -def convert(gr, raw_node): - """ - Convert raw node information to a Node or Leaf instance. - - This is passed to the parser driver which calls it whenever a reduction of a - grammar rule produces a new complete node, so that the tree is build - strictly bottom-up. - """ - type, value, context, children = raw_node - if children or type in gr.number2symbol: - # If there's exactly one child, return that child instead of - # creating a new node. - if len(children) == 1: - return children[0] - return Node(type, children, context=context) - else: - return Leaf(type, value, context=context) - - -class BasePattern(object): - - """ - A pattern is a tree matching pattern. - - It looks for a specific node type (token or symbol), and - optionally for a specific content. - - This is an abstract base class. There are three concrete - subclasses: - - - LeafPattern matches a single leaf node; - - NodePattern matches a single node (usually non-leaf); - - WildcardPattern matches a sequence of nodes of variable length. - """ - - # Defaults for instance variables - type = None # Node type (token if < 256, symbol if >= 256) - content = None # Optional content matching pattern - name = None # Optional name used to store match in results dict - - def __new__(cls, *args, **kwds): - """Constructor that prevents BasePattern from being instantiated.""" - assert cls is not BasePattern, "Cannot instantiate BasePattern" - return object.__new__(cls) - - def __repr__(self): - args = [type_repr(self.type), self.content, self.name] - while args and args[-1] is None: - del args[-1] - return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args))) - - def optimize(self): - """ - A subclass can define this as a hook for optimizations. - - Returns either self or another node with the same effect. - """ - return self - - def match(self, node, results=None): - """ - Does this pattern exactly match a node? - - Returns True if it matches, False if not. - - If results is not None, it must be a dict which will be - updated with the nodes matching named subpatterns. - - Default implementation for non-wildcard patterns. - """ - if self.type is not None and node.type != self.type: - return False - if self.content is not None: - r = None - if results is not None: - r = {} - if not self._submatch(node, r): - return False - if r: - results.update(r) - if results is not None and self.name: - results[self.name] = node - return True - - def match_seq(self, nodes, results=None): - """ - Does this pattern exactly match a sequence of nodes? - - Default implementation for non-wildcard patterns. - """ - if len(nodes) != 1: - return False - return self.match(nodes[0], results) - - def generate_matches(self, nodes): - """ - Generator yielding all matches for this pattern. - - Default implementation for non-wildcard patterns. - """ - r = {} - if nodes and self.match(nodes[0], r): - yield 1, r - - -class LeafPattern(BasePattern): - - def __init__(self, type=None, content=None, name=None): - """ - Initializer. Takes optional type, content, and name. - - The type, if given must be a token type (< 256). If not given, - this matches any *leaf* node; the content may still be required. - - The content, if given, must be a string. - - If a name is given, the matching node is stored in the results - dict under that key. - """ - if type is not None: - assert 0 <= type < 256, type - if content is not None: - assert isinstance(content, str), repr(content) - self.type = type - self.content = content - self.name = name - - def match(self, node, results=None): - """Override match() to insist on a leaf node.""" - if not isinstance(node, Leaf): - return False - return BasePattern.match(self, node, results) - - def _submatch(self, node, results=None): - """ - Match the pattern's content to the node's children. - - This assumes the node type matches and self.content is not None. - - Returns True if it matches, False if not. - - If results is not None, it must be a dict which will be - updated with the nodes matching named subpatterns. - - When returning False, the results dict may still be updated. - """ - return self.content == node.value - - -class NodePattern(BasePattern): - - wildcards = False - - def __init__(self, type=None, content=None, name=None): - """ - Initializer. Takes optional type, content, and name. - - The type, if given, must be a symbol type (>= 256). If the - type is None this matches *any* single node (leaf or not), - except if content is not None, in which it only matches - non-leaf nodes that also match the content pattern. - - The content, if not None, must be a sequence of Patterns that - must match the node's children exactly. If the content is - given, the type must not be None. - - If a name is given, the matching node is stored in the results - dict under that key. - """ - if type is not None: - assert type >= 256, type - if content is not None: - assert not isinstance(content, str), repr(content) - content = list(content) - for i, item in enumerate(content): - assert isinstance(item, BasePattern), (i, item) - if isinstance(item, WildcardPattern): - self.wildcards = True - self.type = type - self.content = content - self.name = name - - def _submatch(self, node, results=None): - """ - Match the pattern's content to the node's children. - - This assumes the node type matches and self.content is not None. - - Returns True if it matches, False if not. - - If results is not None, it must be a dict which will be - updated with the nodes matching named subpatterns. - - When returning False, the results dict may still be updated. - """ - if self.wildcards: - for c, r in generate_matches(self.content, node.children): - if c == len(node.children): - if results is not None: - results.update(r) - return True - return False - if len(self.content) != len(node.children): - return False - for subpattern, child in zip(self.content, node.children): - if not subpattern.match(child, results): - return False - return True - - -class WildcardPattern(BasePattern): - - """ - A wildcard pattern can match zero or more nodes. - - This has all the flexibility needed to implement patterns like: - - .* .+ .? .{m,n} - (a b c | d e | f) - (...)* (...)+ (...)? (...){m,n} - - except it always uses non-greedy matching. - """ - - def __init__(self, content=None, min=0, max=HUGE, name=None): - """ - Initializer. - - Args: - content: optional sequence of subsequences of patterns; - if absent, matches one node; - if present, each subsequence is an alternative [*] - min: optional minimum number of times to match, default 0 - max: optional maximum number of times to match, default HUGE - name: optional name assigned to this match - - [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is - equivalent to (a b c | d e | f g h); if content is None, - this is equivalent to '.' in regular expression terms. - The min and max parameters work as follows: - min=0, max=maxint: .* - min=1, max=maxint: .+ - min=0, max=1: .? - min=1, max=1: . - If content is not None, replace the dot with the parenthesized - list of alternatives, e.g. (a b c | d e | f g h)* - """ - assert 0 <= min <= max <= HUGE, (min, max) - if content is not None: - content = tuple(map(tuple, content)) # Protect against alterations - # Check sanity of alternatives - assert len(content), repr(content) # Can't have zero alternatives - for alt in content: - assert len(alt), repr(alt) # Can have empty alternatives - self.content = content - self.min = min - self.max = max - self.name = name - - def optimize(self): - """Optimize certain stacked wildcard patterns.""" - subpattern = None - if (self.content is not None and - len(self.content) == 1 and len(self.content[0]) == 1): - subpattern = self.content[0][0] - if self.min == 1 and self.max == 1: - if self.content is None: - return NodePattern(name=self.name) - if subpattern is not None and self.name == subpattern.name: - return subpattern.optimize() - if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and - subpattern.min <= 1 and self.name == subpattern.name): - return WildcardPattern(subpattern.content, - self.min*subpattern.min, - self.max*subpattern.max, - subpattern.name) - return self - - def match(self, node, results=None): - """Does this pattern exactly match a node?""" - return self.match_seq([node], results) - - def match_seq(self, nodes, results=None): - """Does this pattern exactly match a sequence of nodes?""" - for c, r in self.generate_matches(nodes): - if c == len(nodes): - if results is not None: - results.update(r) - if self.name: - results[self.name] = list(nodes) - return True - return False - - def generate_matches(self, nodes): - """ - Generator yielding matches for a sequence of nodes. - - Args: - nodes: sequence of nodes - - Yields: - (count, results) tuples where: - count: the match comprises nodes[:count]; - results: dict containing named submatches. - """ - if self.content is None: - # Shortcut for special case (see __init__.__doc__) - for count in range(self.min, 1 + min(len(nodes), self.max)): - r = {} - if self.name: - r[self.name] = nodes[:count] - yield count, r - elif self.name == "bare_name": - yield self._bare_name_matches(nodes) - else: - # The reason for this is that hitting the recursion limit usually - # results in some ugly messages about how RuntimeErrors are being - # ignored. We only have to do this on CPython, though, because other - # implementations don't have this nasty bug in the first place. - if hasattr(sys, "getrefcount"): - save_stderr = sys.stderr - sys.stderr = StringIO() - try: - for count, r in self._recursive_matches(nodes, 0): - if self.name: - r[self.name] = nodes[:count] - yield count, r - except RuntimeError: - # Fall back to the iterative pattern matching scheme if the - # recursive scheme hits the recursion limit (RecursionError). - for count, r in self._iterative_matches(nodes): - if self.name: - r[self.name] = nodes[:count] - yield count, r - finally: - if hasattr(sys, "getrefcount"): - sys.stderr = save_stderr - - def _iterative_matches(self, nodes): - """Helper to iteratively yield the matches.""" - nodelen = len(nodes) - if 0 >= self.min: - yield 0, {} - - results = [] - # generate matches that use just one alt from self.content - for alt in self.content: - for c, r in generate_matches(alt, nodes): - yield c, r - results.append((c, r)) - - # for each match, iterate down the nodes - while results: - new_results = [] - for c0, r0 in results: - # stop if the entire set of nodes has been matched - if c0 < nodelen and c0 <= self.max: - for alt in self.content: - for c1, r1 in generate_matches(alt, nodes[c0:]): - if c1 > 0: - r = {} - r.update(r0) - r.update(r1) - yield c0 + c1, r - new_results.append((c0 + c1, r)) - results = new_results - - def _bare_name_matches(self, nodes): - """Special optimized matcher for bare_name.""" - count = 0 - r = {} - done = False - max = len(nodes) - while not done and count < max: - done = True - for leaf in self.content: - if leaf[0].match(nodes[count], r): - count += 1 - done = False - break - r[self.name] = nodes[:count] - return count, r - - def _recursive_matches(self, nodes, count): - """Helper to recursively yield the matches.""" - assert self.content is not None - if count >= self.min: - yield 0, {} - if count < self.max: - for alt in self.content: - for c0, r0 in generate_matches(alt, nodes): - for c1, r1 in self._recursive_matches(nodes[c0:], count+1): - r = {} - r.update(r0) - r.update(r1) - yield c0 + c1, r - - -class NegatedPattern(BasePattern): - - def __init__(self, content=None): - """ - Initializer. - - The argument is either a pattern or None. If it is None, this - only matches an empty sequence (effectively '$' in regex - lingo). If it is not None, this matches whenever the argument - pattern doesn't have any matches. - """ - if content is not None: - assert isinstance(content, BasePattern), repr(content) - self.content = content - - def match(self, node): - # We never match a node in its entirety - return False - - def match_seq(self, nodes): - # We only match an empty sequence of nodes in its entirety - return len(nodes) == 0 - - def generate_matches(self, nodes): - if self.content is None: - # Return a match if there is an empty sequence - if len(nodes) == 0: - yield 0, {} - else: - # Return a match if the argument pattern has no matches - for c, r in self.content.generate_matches(nodes): - return - yield 0, {} - - -def generate_matches(patterns, nodes): - """ - Generator yielding matches for a sequence of patterns and nodes. - - Args: - patterns: a sequence of patterns - nodes: a sequence of nodes - - Yields: - (count, results) tuples where: - count: the entire sequence of patterns matches nodes[:count]; - results: dict containing named submatches. - """ - if not patterns: - yield 0, {} - else: - p, rest = patterns[0], patterns[1:] - for c0, r0 in p.generate_matches(nodes): - if not rest: - yield c0, r0 - else: - for c1, r1 in generate_matches(rest, nodes[c0:]): - r = {} - r.update(r0) - r.update(r1) - yield c0 + c1, r diff --git a/PythonLib/full/lib2to3/refactor.py b/PythonLib/full/lib2to3/refactor.py deleted file mode 100644 index 3a5aafff..00000000 --- a/PythonLib/full/lib2to3/refactor.py +++ /dev/null @@ -1,732 +0,0 @@ -# Copyright 2006 Google, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -"""Refactoring framework. - -Used as a main program, this can refactor any number of files and/or -recursively descend down directories. Imported as a module, this -provides infrastructure to write your own refactoring tool. -""" - -__author__ = "Guido van Rossum " - - -# Python imports -import io -import os -import pkgutil -import sys -import logging -import operator -import collections -from itertools import chain - -# Local imports -from .pgen2 import driver, tokenize, token -from .fixer_util import find_root -from . import pytree, pygram -from . import btm_matcher as bm - - -def get_all_fix_names(fixer_pkg, remove_prefix=True): - """Return a sorted list of all available fix names in the given package.""" - pkg = __import__(fixer_pkg, [], [], ["*"]) - fix_names = [] - for finder, name, ispkg in pkgutil.iter_modules(pkg.__path__): - if name.startswith("fix_"): - if remove_prefix: - name = name[4:] - fix_names.append(name) - return fix_names - - -class _EveryNode(Exception): - pass - - -def _get_head_types(pat): - """ Accepts a pytree Pattern Node and returns a set - of the pattern types which will match first. """ - - if isinstance(pat, (pytree.NodePattern, pytree.LeafPattern)): - # NodePatters must either have no type and no content - # or a type and content -- so they don't get any farther - # Always return leafs - if pat.type is None: - raise _EveryNode - return {pat.type} - - if isinstance(pat, pytree.NegatedPattern): - if pat.content: - return _get_head_types(pat.content) - raise _EveryNode # Negated Patterns don't have a type - - if isinstance(pat, pytree.WildcardPattern): - # Recurse on each node in content - r = set() - for p in pat.content: - for x in p: - r.update(_get_head_types(x)) - return r - - raise Exception("Oh no! I don't understand pattern %s" %(pat)) - - -def _get_headnode_dict(fixer_list): - """ Accepts a list of fixers and returns a dictionary - of head node type --> fixer list. """ - head_nodes = collections.defaultdict(list) - every = [] - for fixer in fixer_list: - if fixer.pattern: - try: - heads = _get_head_types(fixer.pattern) - except _EveryNode: - every.append(fixer) - else: - for node_type in heads: - head_nodes[node_type].append(fixer) - else: - if fixer._accept_type is not None: - head_nodes[fixer._accept_type].append(fixer) - else: - every.append(fixer) - for node_type in chain(pygram.python_grammar.symbol2number.values(), - pygram.python_grammar.tokens): - head_nodes[node_type].extend(every) - return dict(head_nodes) - - -def get_fixers_from_package(pkg_name): - """ - Return the fully qualified names for fixers in the package pkg_name. - """ - return [pkg_name + "." + fix_name - for fix_name in get_all_fix_names(pkg_name, False)] - -def _identity(obj): - return obj - - -def _detect_future_features(source): - have_docstring = False - gen = tokenize.generate_tokens(io.StringIO(source).readline) - def advance(): - tok = next(gen) - return tok[0], tok[1] - ignore = frozenset({token.NEWLINE, tokenize.NL, token.COMMENT}) - features = set() - try: - while True: - tp, value = advance() - if tp in ignore: - continue - elif tp == token.STRING: - if have_docstring: - break - have_docstring = True - elif tp == token.NAME and value == "from": - tp, value = advance() - if tp != token.NAME or value != "__future__": - break - tp, value = advance() - if tp != token.NAME or value != "import": - break - tp, value = advance() - if tp == token.OP and value == "(": - tp, value = advance() - while tp == token.NAME: - features.add(value) - tp, value = advance() - if tp != token.OP or value != ",": - break - tp, value = advance() - else: - break - except StopIteration: - pass - return frozenset(features) - - -class FixerError(Exception): - """A fixer could not be loaded.""" - - -class RefactoringTool(object): - - _default_options = {"print_function" : False, - "exec_function": False, - "write_unchanged_files" : False} - - CLASS_PREFIX = "Fix" # The prefix for fixer classes - FILE_PREFIX = "fix_" # The prefix for modules with a fixer within - - def __init__(self, fixer_names, options=None, explicit=None): - """Initializer. - - Args: - fixer_names: a list of fixers to import - options: a dict with configuration. - explicit: a list of fixers to run even if they are explicit. - """ - self.fixers = fixer_names - self.explicit = explicit or [] - self.options = self._default_options.copy() - if options is not None: - self.options.update(options) - self.grammar = pygram.python_grammar.copy() - - if self.options['print_function']: - del self.grammar.keywords["print"] - elif self.options['exec_function']: - del self.grammar.keywords["exec"] - - # When this is True, the refactor*() methods will call write_file() for - # files processed even if they were not changed during refactoring. If - # and only if the refactor method's write parameter was True. - self.write_unchanged_files = self.options.get("write_unchanged_files") - self.errors = [] - self.logger = logging.getLogger("RefactoringTool") - self.fixer_log = [] - self.wrote = False - self.driver = driver.Driver(self.grammar, - convert=pytree.convert, - logger=self.logger) - self.pre_order, self.post_order = self.get_fixers() - - - self.files = [] # List of files that were or should be modified - - self.BM = bm.BottomMatcher() - self.bmi_pre_order = [] # Bottom Matcher incompatible fixers - self.bmi_post_order = [] - - for fixer in chain(self.post_order, self.pre_order): - if fixer.BM_compatible: - self.BM.add_fixer(fixer) - # remove fixers that will be handled by the bottom-up - # matcher - elif fixer in self.pre_order: - self.bmi_pre_order.append(fixer) - elif fixer in self.post_order: - self.bmi_post_order.append(fixer) - - self.bmi_pre_order_heads = _get_headnode_dict(self.bmi_pre_order) - self.bmi_post_order_heads = _get_headnode_dict(self.bmi_post_order) - - - - def get_fixers(self): - """Inspects the options to load the requested patterns and handlers. - - Returns: - (pre_order, post_order), where pre_order is the list of fixers that - want a pre-order AST traversal, and post_order is the list that want - post-order traversal. - """ - pre_order_fixers = [] - post_order_fixers = [] - for fix_mod_path in self.fixers: - mod = __import__(fix_mod_path, {}, {}, ["*"]) - fix_name = fix_mod_path.rsplit(".", 1)[-1] - if fix_name.startswith(self.FILE_PREFIX): - fix_name = fix_name[len(self.FILE_PREFIX):] - parts = fix_name.split("_") - class_name = self.CLASS_PREFIX + "".join([p.title() for p in parts]) - try: - fix_class = getattr(mod, class_name) - except AttributeError: - raise FixerError("Can't find %s.%s" % (fix_name, class_name)) from None - fixer = fix_class(self.options, self.fixer_log) - if fixer.explicit and self.explicit is not True and \ - fix_mod_path not in self.explicit: - self.log_message("Skipping optional fixer: %s", fix_name) - continue - - self.log_debug("Adding transformation: %s", fix_name) - if fixer.order == "pre": - pre_order_fixers.append(fixer) - elif fixer.order == "post": - post_order_fixers.append(fixer) - else: - raise FixerError("Illegal fixer order: %r" % fixer.order) - - key_func = operator.attrgetter("run_order") - pre_order_fixers.sort(key=key_func) - post_order_fixers.sort(key=key_func) - return (pre_order_fixers, post_order_fixers) - - def log_error(self, msg, *args, **kwds): - """Called when an error occurs.""" - raise - - def log_message(self, msg, *args): - """Hook to log a message.""" - if args: - msg = msg % args - self.logger.info(msg) - - def log_debug(self, msg, *args): - if args: - msg = msg % args - self.logger.debug(msg) - - def print_output(self, old_text, new_text, filename, equal): - """Called with the old version, new version, and filename of a - refactored file.""" - pass - - def refactor(self, items, write=False, doctests_only=False): - """Refactor a list of files and directories.""" - - for dir_or_file in items: - if os.path.isdir(dir_or_file): - self.refactor_dir(dir_or_file, write, doctests_only) - else: - self.refactor_file(dir_or_file, write, doctests_only) - - def refactor_dir(self, dir_name, write=False, doctests_only=False): - """Descends down a directory and refactor every Python file found. - - Python files are assumed to have a .py extension. - - Files and subdirectories starting with '.' are skipped. - """ - py_ext = os.extsep + "py" - for dirpath, dirnames, filenames in os.walk(dir_name): - self.log_debug("Descending into %s", dirpath) - dirnames.sort() - filenames.sort() - for name in filenames: - if (not name.startswith(".") and - os.path.splitext(name)[1] == py_ext): - fullname = os.path.join(dirpath, name) - self.refactor_file(fullname, write, doctests_only) - # Modify dirnames in-place to remove subdirs with leading dots - dirnames[:] = [dn for dn in dirnames if not dn.startswith(".")] - - def _read_python_source(self, filename): - """ - Do our best to decode a Python source file correctly. - """ - try: - f = open(filename, "rb") - except OSError as err: - self.log_error("Can't open %s: %s", filename, err) - return None, None - try: - encoding = tokenize.detect_encoding(f.readline)[0] - finally: - f.close() - with io.open(filename, "r", encoding=encoding, newline='') as f: - return f.read(), encoding - - def refactor_file(self, filename, write=False, doctests_only=False): - """Refactors a file.""" - input, encoding = self._read_python_source(filename) - if input is None: - # Reading the file failed. - return - input += "\n" # Silence certain parse errors - if doctests_only: - self.log_debug("Refactoring doctests in %s", filename) - output = self.refactor_docstring(input, filename) - if self.write_unchanged_files or output != input: - self.processed_file(output, filename, input, write, encoding) - else: - self.log_debug("No doctest changes in %s", filename) - else: - tree = self.refactor_string(input, filename) - if self.write_unchanged_files or (tree and tree.was_changed): - # The [:-1] is to take off the \n we added earlier - self.processed_file(str(tree)[:-1], filename, - write=write, encoding=encoding) - else: - self.log_debug("No changes in %s", filename) - - def refactor_string(self, data, name): - """Refactor a given input string. - - Args: - data: a string holding the code to be refactored. - name: a human-readable name for use in error/log messages. - - Returns: - An AST corresponding to the refactored input stream; None if - there were errors during the parse. - """ - features = _detect_future_features(data) - if "print_function" in features: - self.driver.grammar = pygram.python_grammar_no_print_statement - try: - tree = self.driver.parse_string(data) - except Exception as err: - self.log_error("Can't parse %s: %s: %s", - name, err.__class__.__name__, err) - return - finally: - self.driver.grammar = self.grammar - tree.future_features = features - self.log_debug("Refactoring %s", name) - self.refactor_tree(tree, name) - return tree - - def refactor_stdin(self, doctests_only=False): - input = sys.stdin.read() - if doctests_only: - self.log_debug("Refactoring doctests in stdin") - output = self.refactor_docstring(input, "") - if self.write_unchanged_files or output != input: - self.processed_file(output, "", input) - else: - self.log_debug("No doctest changes in stdin") - else: - tree = self.refactor_string(input, "") - if self.write_unchanged_files or (tree and tree.was_changed): - self.processed_file(str(tree), "", input) - else: - self.log_debug("No changes in stdin") - - def refactor_tree(self, tree, name): - """Refactors a parse tree (modifying the tree in place). - - For compatible patterns the bottom matcher module is - used. Otherwise the tree is traversed node-to-node for - matches. - - Args: - tree: a pytree.Node instance representing the root of the tree - to be refactored. - name: a human-readable name for this tree. - - Returns: - True if the tree was modified, False otherwise. - """ - - for fixer in chain(self.pre_order, self.post_order): - fixer.start_tree(tree, name) - - #use traditional matching for the incompatible fixers - self.traverse_by(self.bmi_pre_order_heads, tree.pre_order()) - self.traverse_by(self.bmi_post_order_heads, tree.post_order()) - - # obtain a set of candidate nodes - match_set = self.BM.run(tree.leaves()) - - while any(match_set.values()): - for fixer in self.BM.fixers: - if fixer in match_set and match_set[fixer]: - #sort by depth; apply fixers from bottom(of the AST) to top - match_set[fixer].sort(key=pytree.Base.depth, reverse=True) - - if fixer.keep_line_order: - #some fixers(eg fix_imports) must be applied - #with the original file's line order - match_set[fixer].sort(key=pytree.Base.get_lineno) - - for node in list(match_set[fixer]): - if node in match_set[fixer]: - match_set[fixer].remove(node) - - try: - find_root(node) - except ValueError: - # this node has been cut off from a - # previous transformation ; skip - continue - - if node.fixers_applied and fixer in node.fixers_applied: - # do not apply the same fixer again - continue - - results = fixer.match(node) - - if results: - new = fixer.transform(node, results) - if new is not None: - node.replace(new) - #new.fixers_applied.append(fixer) - for node in new.post_order(): - # do not apply the fixer again to - # this or any subnode - if not node.fixers_applied: - node.fixers_applied = [] - node.fixers_applied.append(fixer) - - # update the original match set for - # the added code - new_matches = self.BM.run(new.leaves()) - for fxr in new_matches: - if not fxr in match_set: - match_set[fxr]=[] - - match_set[fxr].extend(new_matches[fxr]) - - for fixer in chain(self.pre_order, self.post_order): - fixer.finish_tree(tree, name) - return tree.was_changed - - def traverse_by(self, fixers, traversal): - """Traverse an AST, applying a set of fixers to each node. - - This is a helper method for refactor_tree(). - - Args: - fixers: a list of fixer instances. - traversal: a generator that yields AST nodes. - - Returns: - None - """ - if not fixers: - return - for node in traversal: - for fixer in fixers[node.type]: - results = fixer.match(node) - if results: - new = fixer.transform(node, results) - if new is not None: - node.replace(new) - node = new - - def processed_file(self, new_text, filename, old_text=None, write=False, - encoding=None): - """ - Called when a file has been refactored and there may be changes. - """ - self.files.append(filename) - if old_text is None: - old_text = self._read_python_source(filename)[0] - if old_text is None: - return - equal = old_text == new_text - self.print_output(old_text, new_text, filename, equal) - if equal: - self.log_debug("No changes to %s", filename) - if not self.write_unchanged_files: - return - if write: - self.write_file(new_text, filename, old_text, encoding) - else: - self.log_debug("Not writing changes to %s", filename) - - def write_file(self, new_text, filename, old_text, encoding=None): - """Writes a string to a file. - - It first shows a unified diff between the old text and the new text, and - then rewrites the file; the latter is only done if the write option is - set. - """ - try: - fp = io.open(filename, "w", encoding=encoding, newline='') - except OSError as err: - self.log_error("Can't create %s: %s", filename, err) - return - - with fp: - try: - fp.write(new_text) - except OSError as err: - self.log_error("Can't write %s: %s", filename, err) - self.log_debug("Wrote changes to %s", filename) - self.wrote = True - - PS1 = ">>> " - PS2 = "... " - - def refactor_docstring(self, input, filename): - """Refactors a docstring, looking for doctests. - - This returns a modified version of the input string. It looks - for doctests, which start with a ">>>" prompt, and may be - continued with "..." prompts, as long as the "..." is indented - the same as the ">>>". - - (Unfortunately we can't use the doctest module's parser, - since, like most parsers, it is not geared towards preserving - the original source.) - """ - result = [] - block = None - block_lineno = None - indent = None - lineno = 0 - for line in input.splitlines(keepends=True): - lineno += 1 - if line.lstrip().startswith(self.PS1): - if block is not None: - result.extend(self.refactor_doctest(block, block_lineno, - indent, filename)) - block_lineno = lineno - block = [line] - i = line.find(self.PS1) - indent = line[:i] - elif (indent is not None and - (line.startswith(indent + self.PS2) or - line == indent + self.PS2.rstrip() + "\n")): - block.append(line) - else: - if block is not None: - result.extend(self.refactor_doctest(block, block_lineno, - indent, filename)) - block = None - indent = None - result.append(line) - if block is not None: - result.extend(self.refactor_doctest(block, block_lineno, - indent, filename)) - return "".join(result) - - def refactor_doctest(self, block, lineno, indent, filename): - """Refactors one doctest. - - A doctest is given as a block of lines, the first of which starts - with ">>>" (possibly indented), while the remaining lines start - with "..." (identically indented). - - """ - try: - tree = self.parse_block(block, lineno, indent) - except Exception as err: - if self.logger.isEnabledFor(logging.DEBUG): - for line in block: - self.log_debug("Source: %s", line.rstrip("\n")) - self.log_error("Can't parse docstring in %s line %s: %s: %s", - filename, lineno, err.__class__.__name__, err) - return block - if self.refactor_tree(tree, filename): - new = str(tree).splitlines(keepends=True) - # Undo the adjustment of the line numbers in wrap_toks() below. - clipped, new = new[:lineno-1], new[lineno-1:] - assert clipped == ["\n"] * (lineno-1), clipped - if not new[-1].endswith("\n"): - new[-1] += "\n" - block = [indent + self.PS1 + new.pop(0)] - if new: - block += [indent + self.PS2 + line for line in new] - return block - - def summarize(self): - if self.wrote: - were = "were" - else: - were = "need to be" - if not self.files: - self.log_message("No files %s modified.", were) - else: - self.log_message("Files that %s modified:", were) - for file in self.files: - self.log_message(file) - if self.fixer_log: - self.log_message("Warnings/messages while refactoring:") - for message in self.fixer_log: - self.log_message(message) - if self.errors: - if len(self.errors) == 1: - self.log_message("There was 1 error:") - else: - self.log_message("There were %d errors:", len(self.errors)) - for msg, args, kwds in self.errors: - self.log_message(msg, *args, **kwds) - - def parse_block(self, block, lineno, indent): - """Parses a block into a tree. - - This is necessary to get correct line number / offset information - in the parser diagnostics and embedded into the parse tree. - """ - tree = self.driver.parse_tokens(self.wrap_toks(block, lineno, indent)) - tree.future_features = frozenset() - return tree - - def wrap_toks(self, block, lineno, indent): - """Wraps a tokenize stream to systematically modify start/end.""" - tokens = tokenize.generate_tokens(self.gen_lines(block, indent).__next__) - for type, value, (line0, col0), (line1, col1), line_text in tokens: - line0 += lineno - 1 - line1 += lineno - 1 - # Don't bother updating the columns; this is too complicated - # since line_text would also have to be updated and it would - # still break for tokens spanning lines. Let the user guess - # that the column numbers for doctests are relative to the - # end of the prompt string (PS1 or PS2). - yield type, value, (line0, col0), (line1, col1), line_text - - - def gen_lines(self, block, indent): - """Generates lines as expected by tokenize from a list of lines. - - This strips the first len(indent + self.PS1) characters off each line. - """ - prefix1 = indent + self.PS1 - prefix2 = indent + self.PS2 - prefix = prefix1 - for line in block: - if line.startswith(prefix): - yield line[len(prefix):] - elif line == prefix.rstrip() + "\n": - yield "\n" - else: - raise AssertionError("line=%r, prefix=%r" % (line, prefix)) - prefix = prefix2 - while True: - yield "" - - -class MultiprocessingUnsupported(Exception): - pass - - -class MultiprocessRefactoringTool(RefactoringTool): - - def __init__(self, *args, **kwargs): - super(MultiprocessRefactoringTool, self).__init__(*args, **kwargs) - self.queue = None - self.output_lock = None - - def refactor(self, items, write=False, doctests_only=False, - num_processes=1): - if num_processes == 1: - return super(MultiprocessRefactoringTool, self).refactor( - items, write, doctests_only) - try: - import multiprocessing - except ImportError: - raise MultiprocessingUnsupported - if self.queue is not None: - raise RuntimeError("already doing multiple processes") - self.queue = multiprocessing.JoinableQueue() - self.output_lock = multiprocessing.Lock() - processes = [multiprocessing.Process(target=self._child) - for i in range(num_processes)] - try: - for p in processes: - p.start() - super(MultiprocessRefactoringTool, self).refactor(items, write, - doctests_only) - finally: - self.queue.join() - for i in range(num_processes): - self.queue.put(None) - for p in processes: - if p.is_alive(): - p.join() - self.queue = None - - def _child(self): - task = self.queue.get() - while task is not None: - args, kwargs = task - try: - super(MultiprocessRefactoringTool, self).refactor_file( - *args, **kwargs) - finally: - self.queue.task_done() - task = self.queue.get() - - def refactor_file(self, *args, **kwargs): - if self.queue is not None: - self.queue.put((args, kwargs)) - else: - return super(MultiprocessRefactoringTool, self).refactor_file( - *args, **kwargs) diff --git a/PythonLib/full/linecache.py b/PythonLib/full/linecache.py index b6453fe2..ef3b2d91 100644 --- a/PythonLib/full/linecache.py +++ b/PythonLib/full/linecache.py @@ -5,17 +5,13 @@ that name. """ -import functools -import sys -import os -import tokenize - __all__ = ["getline", "clearcache", "checkcache", "lazycache"] # The cache. Maps filenames to either a thunk which will provide source code, # or a tuple (size, mtime, lines, fullname) once loaded. cache = {} +_interactive_cache = {} def clearcache(): @@ -37,10 +33,9 @@ def getlines(filename, module_globals=None): """Get the lines for a Python source file from the cache. Update the cache if it doesn't contain an entry for this file already.""" - if filename in cache: - entry = cache[filename] - if len(entry) != 1: - return cache[filename][2] + entry = cache.get(filename, None) + if entry is not None and len(entry) != 1: + return entry[2] try: return updatecache(filename, module_globals) @@ -49,6 +44,33 @@ def getlines(filename, module_globals=None): return [] +def _getline_from_code(filename, lineno): + lines = _getlines_from_code(filename) + if 1 <= lineno <= len(lines): + return lines[lineno - 1] + return '' + +def _make_key(code): + return (code.co_filename, code.co_qualname, code.co_firstlineno) + +def _getlines_from_code(code): + code_id = _make_key(code) + entry = _interactive_cache.get(code_id, None) + if entry is not None and len(entry) != 1: + return entry[2] + return [] + + +def _source_unavailable(filename): + """Return True if the source code is unavailable for such file name.""" + return ( + not filename + or (filename.startswith('<') + and filename.endswith('>') + and not filename.startswith('')): + # These imports are not at top level because linecache is in the critical + # path of the interpreter startup and importing os and sys take a lot of time + # and slows down the startup sequence. + try: + import os + import sys + import tokenize + except ImportError: + # These import can fail if the interpreter is shutting down return [] - fullname = filename + entry = cache.pop(filename, None) + if _source_unavailable(filename): + return [] + + if filename.startswith('')): - return False + return None # Try for a __loader__, if available if module_globals and '__name__' in module_globals: spec = module_globals.get('__spec__') @@ -179,7 +234,23 @@ def lazycache(filename, module_globals): get_source = getattr(loader, 'get_source', None) if name and get_source: - get_lines = functools.partial(get_source, name) - cache[filename] = (get_lines,) - return True - return False + def get_lines(name=name, *args, **kwargs): + return get_source(name, *args, **kwargs) + return (get_lines,) + return None + + + +def _register_code(code, string, name): + entry = (len(string), + None, + [line + '\n' for line in string.splitlines()], + name) + stack = [code] + while stack: + code = stack.pop() + for const in code.co_consts: + if isinstance(const, type(code)): + stack.append(const) + key = _make_key(code) + _interactive_cache[key] = entry diff --git a/PythonLib/full/locale.py b/PythonLib/full/locale.py index 1fb39454..dfedc638 100644 --- a/PythonLib/full/locale.py +++ b/PythonLib/full/locale.py @@ -13,7 +13,6 @@ import sys import encodings import encodings.aliases -import re import _collections_abc from builtins import str as _builtin_str import functools @@ -25,7 +24,7 @@ # Yuck: LC_MESSAGES is non-standard: can't tell whether it exists before # trying the import. So __all__ is also fiddled at the end of the file. __all__ = ["getlocale", "getdefaultlocale", "getpreferredencoding", "Error", - "setlocale", "resetlocale", "localeconv", "strcoll", "strxfrm", + "setlocale", "localeconv", "strcoll", "strxfrm", "str", "atof", "atoi", "format_string", "currency", "normalize", "LC_CTYPE", "LC_COLLATE", "LC_TIME", "LC_MONETARY", "LC_NUMERIC", "LC_ALL", "CHAR_MAX", "getencoding"] @@ -177,8 +176,7 @@ def _strip_padding(s, amount): amount -= 1 return s[lpos:rpos+1] -_percent_re = re.compile(r'%(?:\((?P.*?)\))?' - r'(?P[-#0-9 +*.hlL]*?)[eEfFgGdiouxXcrs%]') +_percent_re = None def _format(percent, value, grouping=False, monetary=False, *additional): if additional: @@ -217,6 +215,13 @@ def format_string(f, val, grouping=False, monetary=False): Grouping is applied if the third parameter is true. Conversion uses monetary thousands separator and grouping strings if forth parameter monetary is true.""" + global _percent_re + if _percent_re is None: + import re + + _percent_re = re.compile(r'%(?:\((?P.*?)\))?(?P[-#0-9 +*.hlL]*?)[eEfFgGdiouxXcrs%]') + percents = list(_percent_re.finditer(f)) new_f = _percent_re.sub('%s', f) @@ -614,40 +619,15 @@ def setlocale(category, locale=None): locale = normalize(_build_localename(locale)) return _setlocale(category, locale) -def resetlocale(category=LC_ALL): - - """ Sets the locale for category to the default setting. - - The default setting is determined by calling - getdefaultlocale(). category defaults to LC_ALL. - - """ - import warnings - warnings.warn( - 'Use locale.setlocale(locale.LC_ALL, "") instead', - DeprecationWarning, stacklevel=2 - ) - - with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) - loc = getdefaultlocale() - - _setlocale(category, _build_localename(loc)) - try: from _locale import getencoding except ImportError: + # When _locale.getencoding() is missing, locale.getencoding() uses the + # Python filesystem encoding. def getencoding(): - if hasattr(sys, 'getandroidapilevel'): - # On Android langinfo.h and CODESET are missing, and UTF-8 is - # always used in mbstowcs() and wcstombs(). - return 'utf-8' - encoding = _getdefaultlocale()[1] - if encoding is None: - # LANG not set, default to UTF-8 - encoding = 'utf-8' - return encoding + return sys.getfilesystemencoding() + try: CODESET @@ -885,6 +865,28 @@ def getpreferredencoding(do_setlocale=True): # updated 'ca_es@valencia' -> 'ca_ES.ISO8859-15@valencia' to 'ca_ES.UTF-8@valencia' # updated 'kk_kz' -> 'kk_KZ.RK1048' to 'kk_KZ.ptcp154' # updated 'russian' -> 'ru_RU.ISO8859-5' to 'ru_RU.KOI8-R' +# +# SS 2025-02-04: +# Updated alias mapping with glibc 2.41 supported locales and the latest +# X lib alias mapping. +# +# These are the differences compared to the old mapping (Python 3.13.1 +# and older): +# +# updated 'c.utf8' -> 'C.UTF-8' to 'en_US.UTF-8' +# updated 'de_it' -> 'de_IT.ISO8859-1' to 'de_IT.UTF-8' +# removed 'de_li.utf8' +# updated 'en_il' -> 'en_IL.UTF-8' to 'en_IL.ISO8859-1' +# removed 'english.iso88591' +# updated 'es_cu' -> 'es_CU.UTF-8' to 'es_CU.ISO8859-1' +# updated 'russian' -> 'ru_RU.KOI8-R' to 'ru_RU.ISO8859-5' +# updated 'sr@latn' -> 'sr_CS.UTF-8@latin' to 'sr_RS.UTF-8@latin' +# removed 'univ' +# removed 'universal' +# +# SS 2025-06-10: +# Remove 'c.utf8' -> 'en_US.UTF-8' because 'en_US.UTF-8' does not exist +# on all platforms. locale_alias = { 'a3': 'az_AZ.KOI8-C', @@ -964,7 +966,6 @@ def getpreferredencoding(do_setlocale=True): 'c.ascii': 'C', 'c.en': 'C', 'c.iso88591': 'en_US.ISO8859-1', - 'c.utf8': 'C.UTF-8', 'c_c': 'C', 'c_c.c': 'C', 'ca': 'ca_ES.ISO8859-1', @@ -981,6 +982,7 @@ def getpreferredencoding(do_setlocale=True): 'chr_us': 'chr_US.UTF-8', 'ckb_iq': 'ckb_IQ.UTF-8', 'cmn_tw': 'cmn_TW.UTF-8', + 'crh_ru': 'crh_RU.UTF-8', 'crh_ua': 'crh_UA.UTF-8', 'croatian': 'hr_HR.ISO8859-2', 'cs': 'cs_CZ.ISO8859-2', @@ -1002,11 +1004,12 @@ def getpreferredencoding(do_setlocale=True): 'de_be': 'de_BE.ISO8859-1', 'de_ch': 'de_CH.ISO8859-1', 'de_de': 'de_DE.ISO8859-1', - 'de_it': 'de_IT.ISO8859-1', - 'de_li.utf8': 'de_LI.UTF-8', + 'de_it': 'de_IT.UTF-8', + 'de_li': 'de_LI.ISO8859-1', 'de_lu': 'de_LU.ISO8859-1', 'deutsch': 'de_DE.ISO8859-1', 'doi_in': 'doi_IN.UTF-8', + 'dsb_de': 'dsb_DE.UTF-8', 'dutch': 'nl_NL.ISO8859-1', 'dutch.iso88591': 'nl_BE.ISO8859-1', 'dv_mv': 'dv_MV.UTF-8', @@ -1029,7 +1032,7 @@ def getpreferredencoding(do_setlocale=True): 'en_gb': 'en_GB.ISO8859-1', 'en_hk': 'en_HK.ISO8859-1', 'en_ie': 'en_IE.ISO8859-1', - 'en_il': 'en_IL.UTF-8', + 'en_il': 'en_IL.ISO8859-1', 'en_in': 'en_IN.ISO8859-1', 'en_ng': 'en_NG.UTF-8', 'en_nz': 'en_NZ.ISO8859-1', @@ -1045,7 +1048,6 @@ def getpreferredencoding(do_setlocale=True): 'en_zw.utf8': 'en_ZS.UTF-8', 'eng_gb': 'en_GB.ISO8859-1', 'english': 'en_EN.ISO8859-1', - 'english.iso88591': 'en_US.ISO8859-1', 'english_uk': 'en_GB.ISO8859-1', 'english_united-states': 'en_US.ISO8859-1', 'english_united-states.437': 'C', @@ -1061,7 +1063,7 @@ def getpreferredencoding(do_setlocale=True): 'es_cl': 'es_CL.ISO8859-1', 'es_co': 'es_CO.ISO8859-1', 'es_cr': 'es_CR.ISO8859-1', - 'es_cu': 'es_CU.UTF-8', + 'es_cu': 'es_CU.ISO8859-1', 'es_do': 'es_DO.ISO8859-1', 'es_ec': 'es_EC.ISO8859-1', 'es_es': 'es_ES.ISO8859-1', @@ -1111,6 +1113,7 @@ def getpreferredencoding(do_setlocale=True): 'ga_ie': 'ga_IE.ISO8859-1', 'galego': 'gl_ES.ISO8859-1', 'galician': 'gl_ES.ISO8859-1', + 'gbm_in': 'gbm_IN.UTF-8', 'gd': 'gd_GB.ISO8859-1', 'gd_gb': 'gd_GB.ISO8859-1', 'ger_de': 'de_DE.ISO8859-1', @@ -1151,6 +1154,7 @@ def getpreferredencoding(do_setlocale=True): 'icelandic': 'is_IS.ISO8859-1', 'id': 'id_ID.ISO8859-1', 'id_id': 'id_ID.ISO8859-1', + 'ie': 'ie.UTF-8', 'ig_ng': 'ig_NG.UTF-8', 'ik_ca': 'ik_CA.UTF-8', 'in': 'id_ID.ISO8859-1', @@ -1205,6 +1209,7 @@ def getpreferredencoding(do_setlocale=True): 'ks_in': 'ks_IN.UTF-8', 'ks_in@devanagari.utf8': 'ks_IN.UTF-8@devanagari', 'ku_tr': 'ku_TR.ISO8859-9', + 'kv_ru': 'kv_RU.UTF-8', 'kw': 'kw_GB.ISO8859-1', 'kw_gb': 'kw_GB.ISO8859-1', 'ky': 'ky_KG.UTF-8', @@ -1223,6 +1228,7 @@ def getpreferredencoding(do_setlocale=True): 'lo_la.mulelao1': 'lo_LA.MULELAO-1', 'lt': 'lt_LT.ISO8859-13', 'lt_lt': 'lt_LT.ISO8859-13', + 'ltg_lv.utf8': 'ltg_LV.UTF-8', 'lv': 'lv_LV.ISO8859-13', 'lv_lv': 'lv_LV.ISO8859-13', 'lzh_tw': 'lzh_TW.UTF-8', @@ -1230,6 +1236,7 @@ def getpreferredencoding(do_setlocale=True): 'mai': 'mai_IN.UTF-8', 'mai_in': 'mai_IN.UTF-8', 'mai_np': 'mai_NP.UTF-8', + 'mdf_ru': 'mdf_RU.UTF-8', 'mfe_mu': 'mfe_MU.UTF-8', 'mg_mg': 'mg_MG.ISO8859-15', 'mhr_ru': 'mhr_RU.UTF-8', @@ -1243,6 +1250,7 @@ def getpreferredencoding(do_setlocale=True): 'ml_in': 'ml_IN.UTF-8', 'mn_mn': 'mn_MN.UTF-8', 'mni_in': 'mni_IN.UTF-8', + 'mnw_mm': 'mnw_MM.UTF-8', 'mr': 'mr_IN.UTF-8', 'mr_in': 'mr_IN.UTF-8', 'ms': 'ms_MY.ISO8859-1', @@ -1311,6 +1319,7 @@ def getpreferredencoding(do_setlocale=True): 'pt_pt': 'pt_PT.ISO8859-1', 'quz_pe': 'quz_PE.UTF-8', 'raj_in': 'raj_IN.UTF-8', + 'rif_ma': 'rif_MA.UTF-8', 'ro': 'ro_RO.ISO8859-2', 'ro_ro': 'ro_RO.ISO8859-2', 'romanian': 'ro_RO.ISO8859-2', @@ -1318,12 +1327,14 @@ def getpreferredencoding(do_setlocale=True): 'ru_ru': 'ru_RU.UTF-8', 'ru_ua': 'ru_UA.KOI8-U', 'rumanian': 'ro_RO.ISO8859-2', - 'russian': 'ru_RU.KOI8-R', + 'russian': 'ru_RU.ISO8859-5', 'rw': 'rw_RW.ISO8859-1', 'rw_rw': 'rw_RW.ISO8859-1', 'sa_in': 'sa_IN.UTF-8', + 'sah_ru': 'sah_RU.UTF-8', 'sat_in': 'sat_IN.UTF-8', 'sc_it': 'sc_IT.UTF-8', + 'scn_it': 'scn_IT.UTF-8', 'sd': 'sd_IN.UTF-8', 'sd_in': 'sd_IN.UTF-8', 'sd_in@devanagari.utf8': 'sd_IN.UTF-8@devanagari', @@ -1365,7 +1376,7 @@ def getpreferredencoding(do_setlocale=True): 'sq_mk': 'sq_MK.UTF-8', 'sr': 'sr_RS.UTF-8', 'sr@cyrillic': 'sr_RS.UTF-8', - 'sr@latn': 'sr_CS.UTF-8@latin', + 'sr@latn': 'sr_RS.UTF-8@latin', 'sr_cs': 'sr_CS.UTF-8', 'sr_cs.iso88592@latn': 'sr_CS.ISO8859-2', 'sr_cs@latn': 'sr_CS.UTF-8@latin', @@ -1384,14 +1395,17 @@ def getpreferredencoding(do_setlocale=True): 'sr_yu@cyrillic': 'sr_RS.UTF-8', 'ss': 'ss_ZA.ISO8859-1', 'ss_za': 'ss_ZA.ISO8859-1', + 'ssy_er': 'ssy_ER.UTF-8', 'st': 'st_ZA.ISO8859-1', 'st_za': 'st_ZA.ISO8859-1', + 'su_id': 'su_ID.UTF-8', 'sv': 'sv_SE.ISO8859-1', 'sv_fi': 'sv_FI.ISO8859-1', 'sv_se': 'sv_SE.ISO8859-1', 'sw_ke': 'sw_KE.UTF-8', 'sw_tz': 'sw_TZ.UTF-8', 'swedish': 'sv_SE.ISO8859-1', + 'syr': 'syr.UTF-8', 'szl_pl': 'szl_PL.UTF-8', 'ta': 'ta_IN.TSCII-0', 'ta_in': 'ta_IN.TSCII-0', @@ -1418,6 +1432,7 @@ def getpreferredencoding(do_setlocale=True): 'tn': 'tn_ZA.ISO8859-15', 'tn_za': 'tn_ZA.ISO8859-15', 'to_to': 'to_TO.UTF-8', + 'tok': 'tok.UTF-8', 'tpi_pg': 'tpi_PG.UTF-8', 'tr': 'tr_TR.ISO8859-9', 'tr_cy': 'tr_CY.ISO8859-9', @@ -1432,8 +1447,7 @@ def getpreferredencoding(do_setlocale=True): 'ug_cn': 'ug_CN.UTF-8', 'uk': 'uk_UA.KOI8-U', 'uk_ua': 'uk_UA.KOI8-U', - 'univ': 'en_US.utf', - 'universal': 'en_US.utf', + 'univ.utf8': 'en_US.UTF-8', 'universal.utf8@ucs4': 'en_US.UTF-8', 'unm_us': 'unm_US.UTF-8', 'ur': 'ur_PK.CP1256', @@ -1462,6 +1476,7 @@ def getpreferredencoding(do_setlocale=True): 'yo_ng': 'yo_NG.UTF-8', 'yue_hk': 'yue_HK.UTF-8', 'yuw_pg': 'yuw_PG.UTF-8', + 'zgh_ma': 'zgh_MA.UTF-8', 'zh': 'zh_CN.eucCN', 'zh_cn': 'zh_CN.gb2312', 'zh_cn.big5': 'zh_TW.big5', @@ -1732,17 +1747,6 @@ def _init_categories(categories=categories): print(' Encoding: ', enc or '(undefined)') print() - print() - print('Locale settings after calling resetlocale():') - print('-'*72) - resetlocale() - for name,category in categories.items(): - print(name, '...') - lang, enc = getlocale(category) - print(' Language: ', lang or '(undefined)') - print(' Encoding: ', enc or '(undefined)') - print() - try: setlocale(LC_ALL, "") except: diff --git a/PythonLib/full/logging/__init__.py b/PythonLib/full/logging/__init__.py index 22d31983..9005f1ef 100644 --- a/PythonLib/full/logging/__init__.py +++ b/PythonLib/full/logging/__init__.py @@ -56,7 +56,7 @@ # #_startTime is used as the base when calculating the relative time of events # -_startTime = time.time() +_startTime = time.time_ns() # #raiseExceptions is used to see if exceptions during handling should be @@ -159,12 +159,9 @@ def addLevelName(level, levelName): This is used when converting levels to text during message formatting. """ - _acquireLock() - try: #unlikely to cause an exception, but you never know... + with _lock: _levelToName[level] = levelName _nameToLevel[levelName] = level - finally: - _releaseLock() if hasattr(sys, "_getframe"): currentframe = lambda: sys._getframe(1) @@ -231,21 +228,27 @@ def _checkLevel(level): # _lock = threading.RLock() -def _acquireLock(): +def _prepareFork(): """ - Acquire the module-level lock for serializing access to shared data. + Prepare to fork a new child process by acquiring the module-level lock. - This should be released with _releaseLock(). + This should be used in conjunction with _afterFork(). """ - if _lock: + # Wrap the lock acquisition in a try-except to prevent the lock from being + # abandoned in the event of an asynchronous exception. See gh-106238. + try: _lock.acquire() + except BaseException: + _lock.release() + raise -def _releaseLock(): +def _afterFork(): """ - Release the module-level lock acquired by calling _acquireLock(). + After a new child process has been forked, release the module-level lock. + + This should be used in conjunction with _prepareFork(). """ - if _lock: - _lock.release() + _lock.release() # Prevent a held logging lock from blocking a child from logging. @@ -260,23 +263,20 @@ def _register_at_fork_reinit_lock(instance): _at_fork_reinit_lock_weakset = weakref.WeakSet() def _register_at_fork_reinit_lock(instance): - _acquireLock() - try: + with _lock: _at_fork_reinit_lock_weakset.add(instance) - finally: - _releaseLock() def _after_at_fork_child_reinit_locks(): for handler in _at_fork_reinit_lock_weakset: handler._at_fork_reinit() - # _acquireLock() was called in the parent before forking. + # _prepareFork() was called in the parent before forking. # The lock is reinitialized to unlocked state. _lock._at_fork_reinit() - os.register_at_fork(before=_acquireLock, + os.register_at_fork(before=_prepareFork, after_in_child=_after_at_fork_child_reinit_locks, - after_in_parent=_releaseLock) + after_in_parent=_afterFork) #--------------------------------------------------------------------------- @@ -300,7 +300,7 @@ def __init__(self, name, level, pathname, lineno, """ Initialize a logging record with interesting information. """ - ct = time.time() + ct = time.time_ns() self.name = name self.msg = msg # @@ -339,9 +339,17 @@ def __init__(self, name, level, pathname, lineno, self.stack_info = sinfo self.lineno = lineno self.funcName = func - self.created = ct - self.msecs = int((ct - int(ct)) * 1000) + 0.0 # see gh-89047 - self.relativeCreated = (self.created - _startTime) * 1000 + self.created = ct / 1e9 # ns to float seconds + # Get the number of whole milliseconds (0-999) in the fractional part of seconds. + # Eg: 1_677_903_920_999_998_503 ns --> 999_998_503 ns--> 999 ms + # Convert to float by adding 0.0 for historical reasons. See gh-89047 + self.msecs = (ct % 1_000_000_000) // 1_000_000 + 0.0 + if self.msecs == 999.0 and int(self.created) != ct // 1_000_000_000: + # ns -> sec conversion can round up, e.g: + # 1_677_903_920_999_999_900 ns --> 1_677_903_921.0 sec + self.msecs = 0.0 + + self.relativeCreated = (ct - _startTime) / 1e6 if logThreads: self.thread = threading.get_ident() self.threadName = threading.current_thread().name @@ -572,7 +580,7 @@ class Formatter(object): %(lineno)d Source line number where the logging call was issued (if available) %(funcName)s Function name - %(created)f Time when the LogRecord was created (time.time() + %(created)f Time when the LogRecord was created (time.time_ns() / 1e9 return value) %(asctime)s Textual time when the LogRecord was created %(msecs)d Millisecond portion of the creation time @@ -583,6 +591,7 @@ class Formatter(object): %(threadName)s Thread name (if available) %(taskName)s Task name (if available) %(process)d Process ID (if available) + %(processName)s Process name (if available) %(message)s The result of record.getMessage(), computed just as the record is emitted """ @@ -658,7 +667,7 @@ def formatException(self, ei): # See issues #9427, #1553375. Commented out for now. #if getattr(self, 'fullstack', False): # traceback.print_stack(tb.tb_frame.f_back, file=sio) - traceback.print_exception(ei[0], ei[1], tb, None, sio) + traceback.print_exception(ei[0], ei[1], tb, limit=None, file=sio) s = sio.getvalue() sio.close() if s[-1:] == "\n": @@ -879,25 +888,20 @@ def _removeHandlerRef(wr): # set to None. It can also be called from another thread. So we need to # pre-emptively grab the necessary globals and check if they're None, # to prevent race conditions and failures during interpreter shutdown. - acquire, release, handlers = _acquireLock, _releaseLock, _handlerList - if acquire and release and handlers: - acquire() - try: - handlers.remove(wr) - except ValueError: - pass - finally: - release() + handlers, lock = _handlerList, _lock + if lock and handlers: + with lock: + try: + handlers.remove(wr) + except ValueError: + pass def _addHandlerRef(handler): """ Add a handler to the internal cleanup list using a weak reference. """ - _acquireLock() - try: + with _lock: _handlerList.append(weakref.ref(handler, _removeHandlerRef)) - finally: - _releaseLock() def getHandlerByName(name): @@ -912,8 +916,7 @@ def getHandlerNames(): """ Return all known handler names as an immutable set. """ - result = set(_handlers.keys()) - return frozenset(result) + return frozenset(_handlers) class Handler(Filterer): @@ -943,15 +946,12 @@ def get_name(self): return self._name def set_name(self, name): - _acquireLock() - try: + with _lock: if self._name in _handlers: del _handlers[self._name] self._name = name if name: _handlers[name] = self - finally: - _releaseLock() name = property(get_name, set_name) @@ -1023,11 +1023,8 @@ def handle(self, record): if isinstance(rv, LogRecord): record = rv if rv: - self.acquire() - try: + with self.lock: self.emit(record) - finally: - self.release() return rv def setFormatter(self, fmt): @@ -1055,13 +1052,10 @@ def close(self): methods. """ #get the module data lock, as we're updating a shared structure. - _acquireLock() - try: #unlikely to raise an exception, but you never know... + with _lock: self._closed = True if self._name and self._name in _handlers: del _handlers[self._name] - finally: - _releaseLock() def handleError(self, record): """ @@ -1076,14 +1070,14 @@ def handleError(self, record): The record which was being processed is passed in to this method. """ if raiseExceptions and sys.stderr: # see issue 13807 - t, v, tb = sys.exc_info() + exc = sys.exception() try: sys.stderr.write('--- Logging error ---\n') - traceback.print_exception(t, v, tb, None, sys.stderr) + traceback.print_exception(exc, limit=None, file=sys.stderr) sys.stderr.write('Call stack:\n') # Walk the stack frame up until we're out of logging, # so as to print the calling context. - frame = tb.tb_frame + frame = exc.__traceback__.tb_frame while (frame and os.path.dirname(frame.f_code.co_filename) == __path__[0]): frame = frame.f_back @@ -1108,7 +1102,7 @@ def handleError(self, record): except OSError: #pragma: no cover pass # see issue 5971 finally: - del t, v, tb + del exc def __repr__(self): level = getLevelName(self.level) @@ -1138,12 +1132,9 @@ def flush(self): """ Flushes the stream. """ - self.acquire() - try: + with self.lock: if self.stream and hasattr(self.stream, "flush"): self.stream.flush() - finally: - self.release() def emit(self, record): """ @@ -1179,12 +1170,9 @@ def setStream(self, stream): result = None else: result = self.stream - self.acquire() - try: + with self.lock: self.flush() self.stream = stream - finally: - self.release() return result def __repr__(self): @@ -1234,8 +1222,7 @@ def close(self): """ Closes the stream. """ - self.acquire() - try: + with self.lock: try: if self.stream: try: @@ -1251,8 +1238,6 @@ def close(self): # Also see Issue #42378: we also rely on # self._closed being set to True there StreamHandler.close(self) - finally: - self.release() def _open(self): """ @@ -1388,8 +1373,7 @@ def getLogger(self, name): rv = None if not isinstance(name, str): raise TypeError('A logger name must be a string') - _acquireLock() - try: + with _lock: if name in self.loggerDict: rv = self.loggerDict[name] if isinstance(rv, PlaceHolder): @@ -1404,8 +1388,6 @@ def getLogger(self, name): rv.manager = self self.loggerDict[name] = rv self._fixupParents(rv) - finally: - _releaseLock() return rv def setLoggerClass(self, klass): @@ -1468,12 +1450,11 @@ def _clear_cache(self): Called when level changes are made """ - _acquireLock() - for logger in self.loggerDict.values(): - if isinstance(logger, Logger): - logger._cache.clear() - self.root._cache.clear() - _releaseLock() + with _lock: + for logger in self.loggerDict.values(): + if isinstance(logger, Logger): + logger._cache.clear() + self.root._cache.clear() #--------------------------------------------------------------------------- # Logger classes and functions @@ -1703,23 +1684,17 @@ def addHandler(self, hdlr): """ Add the specified handler to this logger. """ - _acquireLock() - try: + with _lock: if not (hdlr in self.handlers): self.handlers.append(hdlr) - finally: - _releaseLock() def removeHandler(self, hdlr): """ Remove the specified handler from this logger. """ - _acquireLock() - try: + with _lock: if hdlr in self.handlers: self.handlers.remove(hdlr) - finally: - _releaseLock() def hasHandlers(self): """ @@ -1797,16 +1772,13 @@ def isEnabledFor(self, level): try: return self._cache[level] except KeyError: - _acquireLock() - try: + with _lock: if self.manager.disable >= level: is_enabled = self._cache[level] = False else: is_enabled = self._cache[level] = ( level >= self.getEffectiveLevel() ) - finally: - _releaseLock() return is_enabled def getChild(self, suffix): @@ -1836,16 +1808,13 @@ def _hierlevel(logger): return 1 + logger.name.count('.') d = self.manager.loggerDict - _acquireLock() - try: + with _lock: # exclude PlaceHolders - the last check is to ensure that lower-level # descendants aren't returned - if there are placeholders, a logger's # parent field might point to a grandparent or ancestor thereof. return set(item for item in d.values() if isinstance(item, Logger) and item.parent is self and _hierlevel(item) == 1 + _hierlevel(item.parent)) - finally: - _releaseLock() def __repr__(self): level = getLevelName(self.getEffectiveLevel()) @@ -1881,19 +1850,30 @@ class LoggerAdapter(object): information in logging output. """ - def __init__(self, logger, extra=None): + def __init__(self, logger, extra=None, merge_extra=False): """ - Initialize the adapter with a logger and a dict-like object which - provides contextual information. This constructor signature allows - easy stacking of LoggerAdapters, if so desired. + Initialize the adapter with a logger and an optional dict-like object + which provides contextual information. This constructor signature + allows easy stacking of LoggerAdapters, if so desired. You can effectively pass keyword arguments as shown in the following example: adapter = LoggerAdapter(someLogger, dict(p1=v1, p2="v2")) + + By default, LoggerAdapter objects will drop the "extra" argument + passed on the individual log calls to use its own instead. + + Initializing it with merge_extra=True will instead merge both + maps when logging, the individual call extra taking precedence + over the LoggerAdapter instance extra + + .. versionchanged:: 3.13 + The *merge_extra* argument was added. """ self.logger = logger self.extra = extra + self.merge_extra = merge_extra def process(self, msg, kwargs): """ @@ -1905,7 +1885,11 @@ def process(self, msg, kwargs): Normally, you'll only need to override this one method in a LoggerAdapter subclass for your specific needs. """ - kwargs["extra"] = self.extra + if self.merge_extra and kwargs.get("extra") is not None: + if self.extra is not None: + kwargs["extra"] = {**self.extra, **kwargs["extra"]} + else: + kwargs["extra"] = self.extra return msg, kwargs # @@ -2088,8 +2072,7 @@ def basicConfig(**kwargs): """ # Add thread safety in case someone mistakenly calls # basicConfig() from multiple threads - _acquireLock() - try: + with _lock: force = kwargs.pop('force', False) encoding = kwargs.pop('encoding', None) errors = kwargs.pop('errors', 'backslashreplace') @@ -2138,8 +2121,6 @@ def basicConfig(**kwargs): if kwargs: keys = ', '.join(kwargs.keys()) raise ValueError('Unrecognised argument(s): %s' % keys) - finally: - _releaseLock() #--------------------------------------------------------------------------- # Utility functions at module level. diff --git a/PythonLib/full/logging/config.py b/PythonLib/full/logging/config.py index c128bc7a..3d9aa00f 100644 --- a/PythonLib/full/logging/config.py +++ b/PythonLib/full/logging/config.py @@ -83,15 +83,12 @@ def fileConfig(fname, defaults=None, disable_existing_loggers=True, encoding=Non formatters = _create_formatters(cp) # critical section - logging._acquireLock() - try: + with logging._lock: _clearExistingHandlers() # Handlers add themselves to logging._handlers handlers = _install_handlers(cp, formatters) _install_loggers(cp, handlers, disable_existing_loggers) - finally: - logging._releaseLock() def _resolve(name): @@ -378,7 +375,7 @@ class BaseConfigurator(object): WORD_PATTERN = re.compile(r'^\s*(\w+)\s*') DOT_PATTERN = re.compile(r'^\.\s*(\w+)\s*') - INDEX_PATTERN = re.compile(r'^\[\s*(\w+)\s*\]\s*') + INDEX_PATTERN = re.compile(r'^\[([^\[\]]*)\]\s*') DIGIT_PATTERN = re.compile(r'^\d+$') value_converters = { @@ -543,8 +540,7 @@ def configure(self): raise ValueError("Unsupported version: %s" % config['version']) incremental = config.pop('incremental', False) EMPTY_DICT = {} - logging._acquireLock() - try: + with logging._lock: if incremental: handlers = config.get('handlers', EMPTY_DICT) for name in handlers: @@ -688,8 +684,6 @@ def configure(self): except Exception as e: raise ValueError('Unable to configure root ' 'logger') from e - finally: - logging._releaseLock() def configure_formatter(self, config): """Configure a formatter from a dictionary.""" @@ -700,10 +694,9 @@ def configure_formatter(self, config): except TypeError as te: if "'format'" not in str(te): raise - #Name of parameter changed from fmt to format. - #Retry with old name. - #This is so that code can be used with older Python versions - #(e.g. by Django) + # logging.Formatter and its subclasses expect the `fmt` + # parameter instead of `format`. Retry passing configuration + # with `fmt`. config['fmt'] = config.pop('format') config['()'] = factory result = self.configure_custom(config) @@ -872,6 +865,8 @@ def configure_handler(self, config): else: factory = klass kwargs = {k: config[k] for k in config if (k != '.' and valid_ident(k))} + # When deprecation ends for using the 'strm' parameter, remove the + # "except TypeError ..." try: result = factory(**kwargs) except TypeError as te: @@ -883,6 +878,15 @@ def configure_handler(self, config): #(e.g. by Django) kwargs['strm'] = kwargs.pop('stream') result = factory(**kwargs) + + import warnings + warnings.warn( + "Support for custom logging handlers with the 'strm' argument " + "is deprecated and scheduled for removal in Python 3.16. " + "Define handlers with the 'stream' argument instead.", + DeprecationWarning, + stacklevel=2, + ) if formatter: result.setFormatter(formatter) if level is not None: @@ -1013,14 +1017,14 @@ class ConfigSocketReceiver(ThreadingTCPServer): A simple TCP socket-based logging config receiver. """ - allow_reuse_address = 1 + allow_reuse_address = True + allow_reuse_port = False def __init__(self, host='localhost', port=DEFAULT_LOGGING_CONFIG_PORT, handler=None, ready=None, verify=None): ThreadingTCPServer.__init__(self, (host, port), handler) - logging._acquireLock() - self.abort = 0 - logging._releaseLock() + with logging._lock: + self.abort = 0 self.timeout = 1 self.ready = ready self.verify = verify @@ -1034,9 +1038,8 @@ def serve_until_stopped(self): self.timeout) if rd: self.handle_request() - logging._acquireLock() - abort = self.abort - logging._releaseLock() + with logging._lock: + abort = self.abort self.server_close() class Server(threading.Thread): @@ -1057,9 +1060,8 @@ def run(self): self.port = server.server_address[1] self.ready.set() global _listener - logging._acquireLock() - _listener = server - logging._releaseLock() + with logging._lock: + _listener = server server.serve_until_stopped() return Server(ConfigSocketReceiver, ConfigStreamHandler, port, verify) @@ -1069,10 +1071,7 @@ def stopListening(): Stop the listening server which was created with a call to listen(). """ global _listener - logging._acquireLock() - try: + with logging._lock: if _listener: _listener.abort = 1 _listener = None - finally: - logging._releaseLock() diff --git a/PythonLib/full/logging/handlers.py b/PythonLib/full/logging/handlers.py index 0aa7b15e..4a07258f 100644 --- a/PythonLib/full/logging/handlers.py +++ b/PythonLib/full/logging/handlers.py @@ -23,11 +23,17 @@ To use, simply 'import logging.handlers' and log away! """ -import io, logging, socket, os, pickle, struct, time, re -from stat import ST_DEV, ST_INO, ST_MTIME +import copy +import io +import logging +import os +import pickle import queue +import re +import socket +import struct import threading -import copy +import time # # Some constants... @@ -190,7 +196,11 @@ def shouldRollover(self, record): if self.stream is None: # delay was set... self.stream = self._open() if self.maxBytes > 0: # are we rolling over? - pos = self.stream.tell() + try: + pos = self.stream.tell() + except io.UnsupportedOperation: + # gh-143237: Never rollover a named pipe. + return False if not pos: # gh-116263: Never rollover an empty file return False @@ -272,7 +282,7 @@ def __init__(self, filename, when='h', interval=1, backupCount=0, # path object (see Issue #27493), but self.baseFilename will be a string filename = self.baseFilename if os.path.exists(filename): - t = os.stat(filename)[ST_MTIME] + t = int(os.stat(filename).st_mtime) else: t = int(time.time()) self.rolloverAt = self.computeRollover(t) @@ -465,8 +475,7 @@ class WatchedFileHandler(logging.FileHandler): This handler is not appropriate for use under Windows, because under Windows open files cannot be moved or renamed - logging opens the files with exclusive locks - and so there is no need - for such a handler. Furthermore, ST_INO is not supported under - Windows; stat always returns zero for this value. + for such a handler. This handler is based on a suggestion and patch by Chad J. Schroeder. @@ -482,9 +491,11 @@ def __init__(self, filename, mode='a', encoding=None, delay=False, self._statstream() def _statstream(self): - if self.stream: - sres = os.fstat(self.stream.fileno()) - self.dev, self.ino = sres[ST_DEV], sres[ST_INO] + if self.stream is None: + return + sres = os.fstat(self.stream.fileno()) + self.dev = sres.st_dev + self.ino = sres.st_ino def reopenIfNeeded(self): """ @@ -494,6 +505,9 @@ def reopenIfNeeded(self): has, close the old stream and reopen the file to get the current stream. """ + if self.stream is None: + return + # Reduce the chance of race conditions by stat'ing by path only # once and then fstat'ing our new fd if we opened a new log stream. # See issue #14632: Thanks to John Mulligan for the problem report @@ -501,18 +515,23 @@ def reopenIfNeeded(self): try: # stat the file by path, checking for existence sres = os.stat(self.baseFilename) + + # compare file system stat with that of our stream file handle + reopen = (sres.st_dev != self.dev or sres.st_ino != self.ino) except FileNotFoundError: - sres = None - # compare file system stat with that of our stream file handle - if not sres or sres[ST_DEV] != self.dev or sres[ST_INO] != self.ino: - if self.stream is not None: - # we have an open file handle, clean it up - self.stream.flush() - self.stream.close() - self.stream = None # See Issue #21742: _open () might fail. - # open a new file handle and get new stat info from that fd - self.stream = self._open() - self._statstream() + reopen = True + + if not reopen: + return + + # we have an open file handle, clean it up + self.stream.flush() + self.stream.close() + self.stream = None # See Issue #21742: _open () might fail. + + # open a new file handle and get new stat info from that fd + self.stream = self._open() + self._statstream() def emit(self, record): """ @@ -682,15 +701,12 @@ def close(self): """ Closes the socket. """ - self.acquire() - try: + with self.lock: sock = self.sock if sock: self.sock = None sock.close() logging.Handler.close(self) - finally: - self.release() class DatagramHandler(SocketHandler): """ @@ -843,7 +859,7 @@ class SysLogHandler(logging.Handler): } def __init__(self, address=('localhost', SYSLOG_UDP_PORT), - facility=LOG_USER, socktype=None): + facility=LOG_USER, socktype=None, timeout=None): """ Initialize a handler. @@ -860,6 +876,7 @@ def __init__(self, address=('localhost', SYSLOG_UDP_PORT), self.address = address self.facility = facility self.socktype = socktype + self.timeout = timeout self.socket = None self.createSocket() @@ -921,6 +938,8 @@ def createSocket(self): err = sock = None try: sock = socket.socket(af, socktype, proto) + if self.timeout: + sock.settimeout(self.timeout) if socktype == socket.SOCK_STREAM: sock.connect(sa) break @@ -950,15 +969,12 @@ def close(self): """ Closes the socket. """ - self.acquire() - try: + with self.lock: sock = self.socket if sock: self.socket = None sock.close() logging.Handler.close(self) - finally: - self.release() def mapPriority(self, levelName): """ @@ -1346,11 +1362,8 @@ def flush(self): This version just zaps the buffer to empty. """ - self.acquire() - try: + with self.lock: self.buffer.clear() - finally: - self.release() def close(self): """ @@ -1400,11 +1413,8 @@ def setTarget(self, target): """ Set the target handler for this handler. """ - self.acquire() - try: + with self.lock: self.target = target - finally: - self.release() def flush(self): """ @@ -1414,14 +1424,11 @@ def flush(self): The record buffer is only cleared if a target has been set. """ - self.acquire() - try: + with self.lock: if self.target: for record in self.buffer: self.target.handle(record) self.buffer.clear() - finally: - self.release() def close(self): """ @@ -1432,12 +1439,9 @@ def close(self): if self.flushOnClose: self.flush() finally: - self.acquire() - try: + with self.lock: self.target = None BufferingHandler.close(self) - finally: - self.release() class QueueHandler(logging.Handler): @@ -1532,6 +1536,19 @@ def __init__(self, queue, *handlers, respect_handler_level=False): self._thread = None self.respect_handler_level = respect_handler_level + def __enter__(self): + """ + For use as a context manager. Starts the listener. + """ + self.start() + return self + + def __exit__(self, *args): + """ + For use as a context manager. Stops the listener. + """ + self.stop() + def dequeue(self, block): """ Dequeue a record and return it, optionally blocking. @@ -1548,6 +1565,9 @@ def start(self): This starts up a background thread to monitor the queue for LogRecords to process. """ + if self._thread is not None: + raise RuntimeError("Listener already started") + self._thread = t = threading.Thread(target=self._monitor) t.daemon = True t.start() @@ -1619,6 +1639,7 @@ def stop(self): Note that if you don't call this before your application exits, there may be some records still left on the queue, which won't be processed. """ - self.enqueue_sentinel() - self._thread.join() - self._thread = None + if self._thread: # see gh-114706 - allow calling this more than once + self.enqueue_sentinel() + self._thread.join() + self._thread = None diff --git a/PythonLib/full/lzma.py b/PythonLib/full/lzma.py index 800f5219..316066d0 100644 --- a/PythonLib/full/lzma.py +++ b/PythonLib/full/lzma.py @@ -24,18 +24,18 @@ import builtins import io import os +from compression._common import _streams from _lzma import * -from _lzma import _encode_filter_properties, _decode_filter_properties -import _compression +from _lzma import _encode_filter_properties, _decode_filter_properties # noqa: F401 -_MODE_CLOSED = 0 +# Value 0 no longer used _MODE_READ = 1 # Value 2 no longer used _MODE_WRITE = 3 -class LZMAFile(_compression.BaseStream): +class LZMAFile(_streams.BaseStream): """A file object providing transparent LZMA (de)compression. @@ -92,7 +92,7 @@ def __init__(self, filename=None, mode="r", *, """ self._fp = None self._closefp = False - self._mode = _MODE_CLOSED + self._mode = None if mode in ("r", "rb"): if check != -1: @@ -127,7 +127,7 @@ def __init__(self, filename=None, mode="r", *, raise TypeError("filename must be a str, bytes, file or PathLike object") if self._mode == _MODE_READ: - raw = _compression.DecompressReader(self._fp, LZMADecompressor, + raw = _streams.DecompressReader(self._fp, LZMADecompressor, trailing_error=LZMAError, format=format, filters=filters) self._buffer = io.BufferedReader(raw) @@ -137,7 +137,7 @@ def close(self): May be called more than once without error. Once the file is closed, any other operation on it will raise a ValueError. """ - if self._mode == _MODE_CLOSED: + if self.closed: return try: if self._mode == _MODE_READ: @@ -153,12 +153,20 @@ def close(self): finally: self._fp = None self._closefp = False - self._mode = _MODE_CLOSED @property def closed(self): """True if this file is closed.""" - return self._mode == _MODE_CLOSED + return self._fp is None + + @property + def name(self): + self._check_not_closed() + return self._fp.name + + @property + def mode(self): + return 'wb' if self._mode == _MODE_WRITE else 'rb' def fileno(self): """Return the file descriptor for the underlying file.""" diff --git a/PythonLib/full/mailbox.py b/PythonLib/full/mailbox.py index c7f060ce..364af6bb 100644 --- a/PythonLib/full/mailbox.py +++ b/PythonLib/full/mailbox.py @@ -395,6 +395,56 @@ def get_file(self, key): f = open(os.path.join(self._path, self._lookup(key)), 'rb') return _ProxyFile(f) + def get_info(self, key): + """Get the keyed message's "info" as a string.""" + subpath = self._lookup(key) + if self.colon in subpath: + return subpath.split(self.colon)[-1] + return '' + + def set_info(self, key, info: str): + """Set the keyed message's "info" string.""" + if not isinstance(info, str): + raise TypeError(f'info must be a string: {type(info)}') + old_subpath = self._lookup(key) + new_subpath = old_subpath.split(self.colon)[0] + if info: + new_subpath += self.colon + info + if new_subpath == old_subpath: + return + old_path = os.path.join(self._path, old_subpath) + new_path = os.path.join(self._path, new_subpath) + os.rename(old_path, new_path) + self._toc[key] = new_subpath + + def get_flags(self, key): + """Return as a string the standard flags that are set on the keyed message.""" + info = self.get_info(key) + if info.startswith('2,'): + return info[2:] + return '' + + def set_flags(self, key, flags: str): + """Set the given flags and unset all others on the keyed message.""" + if not isinstance(flags, str): + raise TypeError(f'flags must be a string: {type(flags)}') + # TODO: check if flags are valid standard flag characters? + self.set_info(key, '2,' + ''.join(sorted(set(flags)))) + + def add_flag(self, key, flag: str): + """Set the given flag(s) without changing others on the keyed message.""" + if not isinstance(flag, str): + raise TypeError(f'flag must be a string: {type(flag)}') + # TODO: check that flag is a valid standard flag character? + self.set_flags(key, ''.join(set(self.get_flags(key)) | set(flag))) + + def remove_flag(self, key, flag: str): + """Unset the given string flag(s) without changing others on the keyed message.""" + if not isinstance(flag, str): + raise TypeError(f'flag must be a string: {type(flag)}') + if self.get_flags(key): + self.set_flags(key, ''.join(set(self.get_flags(key)) - set(flag))) + def iterkeys(self): """Return an iterator over keys.""" self._refresh() @@ -540,6 +590,8 @@ def _refresh(self): for subdir in self._toc_mtimes: path = self._paths[subdir] for entry in os.listdir(path): + if entry.startswith('.'): + continue p = os.path.join(path, entry) if os.path.isdir(p): continue @@ -1094,10 +1146,24 @@ def __len__(self): """Return a count of messages in the mailbox.""" return len(list(self.iterkeys())) + def _open_mh_sequences_file(self, text): + mode = '' if text else 'b' + kwargs = {'encoding': 'ASCII'} if text else {} + path = os.path.join(self._path, '.mh_sequences') + while True: + try: + return open(path, 'r+' + mode, **kwargs) + except FileNotFoundError: + pass + try: + return open(path, 'x+' + mode, **kwargs) + except FileExistsError: + pass + def lock(self): """Lock the mailbox.""" if not self._locked: - self._file = open(os.path.join(self._path, '.mh_sequences'), 'rb+') + self._file = self._open_mh_sequences_file(text=False) _lock_file(self._file) self._locked = True @@ -1151,7 +1217,11 @@ def remove_folder(self, folder): def get_sequences(self): """Return a name-to-key-list dictionary to define each sequence.""" results = {} - with open(os.path.join(self._path, '.mh_sequences'), 'r', encoding='ASCII') as f: + try: + f = open(os.path.join(self._path, '.mh_sequences'), 'r', encoding='ASCII') + except FileNotFoundError: + return results + with f: all_keys = set(self.keys()) for line in f: try: @@ -1174,7 +1244,7 @@ def get_sequences(self): def set_sequences(self, sequences): """Set sequences using the given name-to-key-list dictionary.""" - f = open(os.path.join(self._path, '.mh_sequences'), 'r+', encoding='ASCII') + f = self._open_mh_sequences_file(text=True) try: os.close(os.open(f.name, os.O_WRONLY | os.O_TRUNC)) for name, keys in sequences.items(): @@ -2113,11 +2183,7 @@ def _unlock_file(f): def _create_carefully(path): """Create a file if it doesn't exist and open for reading and writing.""" - fd = os.open(path, os.O_CREAT | os.O_EXCL | os.O_RDWR, 0o666) - try: - return open(path, 'rb+') - finally: - os.close(fd) + return open(path, 'xb+') def _create_temporary(path): """Create a temp file based on path and open for reading and writing.""" diff --git a/PythonLib/full/mailcap.py b/PythonLib/full/mailcap.py deleted file mode 100644 index 2f4656e8..00000000 --- a/PythonLib/full/mailcap.py +++ /dev/null @@ -1,302 +0,0 @@ -"""Mailcap file handling. See RFC 1524.""" - -import os -import warnings -import re - -__all__ = ["getcaps","findmatch"] - - -_DEPRECATION_MSG = ('The {name} module is deprecated and will be removed in ' - 'Python {remove}. See the mimetypes module for an ' - 'alternative.') -warnings._deprecated(__name__, _DEPRECATION_MSG, remove=(3, 13)) - - -def lineno_sort_key(entry): - # Sort in ascending order, with unspecified entries at the end - if 'lineno' in entry: - return 0, entry['lineno'] - else: - return 1, 0 - -_find_unsafe = re.compile(r'[^\xa1-\U0010FFFF\w@+=:,./-]').search - -class UnsafeMailcapInput(Warning): - """Warning raised when refusing unsafe input""" - - -# Part 1: top-level interface. - -def getcaps(): - """Return a dictionary containing the mailcap database. - - The dictionary maps a MIME type (in all lowercase, e.g. 'text/plain') - to a list of dictionaries corresponding to mailcap entries. The list - collects all the entries for that MIME type from all available mailcap - files. Each dictionary contains key-value pairs for that MIME type, - where the viewing command is stored with the key "view". - - """ - caps = {} - lineno = 0 - for mailcap in listmailcapfiles(): - try: - fp = open(mailcap, 'r') - except OSError: - continue - with fp: - morecaps, lineno = _readmailcapfile(fp, lineno) - for key, value in morecaps.items(): - if not key in caps: - caps[key] = value - else: - caps[key] = caps[key] + value - return caps - -def listmailcapfiles(): - """Return a list of all mailcap files found on the system.""" - # This is mostly a Unix thing, but we use the OS path separator anyway - if 'MAILCAPS' in os.environ: - pathstr = os.environ['MAILCAPS'] - mailcaps = pathstr.split(os.pathsep) - else: - if 'HOME' in os.environ: - home = os.environ['HOME'] - else: - # Don't bother with getpwuid() - home = '.' # Last resort - mailcaps = [home + '/.mailcap', '/etc/mailcap', - '/usr/etc/mailcap', '/usr/local/etc/mailcap'] - return mailcaps - - -# Part 2: the parser. -def readmailcapfile(fp): - """Read a mailcap file and return a dictionary keyed by MIME type.""" - warnings.warn('readmailcapfile is deprecated, use getcaps instead', - DeprecationWarning, 2) - caps, _ = _readmailcapfile(fp, None) - return caps - - -def _readmailcapfile(fp, lineno): - """Read a mailcap file and return a dictionary keyed by MIME type. - - Each MIME type is mapped to an entry consisting of a list of - dictionaries; the list will contain more than one such dictionary - if a given MIME type appears more than once in the mailcap file. - Each dictionary contains key-value pairs for that MIME type, where - the viewing command is stored with the key "view". - """ - caps = {} - while line := fp.readline(): - # Ignore comments and blank lines - if line[0] == '#' or line.strip() == '': - continue - nextline = line - # Join continuation lines - while nextline[-2:] == '\\\n': - nextline = fp.readline() - if not nextline: nextline = '\n' - line = line[:-2] + nextline - # Parse the line - key, fields = parseline(line) - if not (key and fields): - continue - if lineno is not None: - fields['lineno'] = lineno - lineno += 1 - # Normalize the key - types = key.split('/') - for j in range(len(types)): - types[j] = types[j].strip() - key = '/'.join(types).lower() - # Update the database - if key in caps: - caps[key].append(fields) - else: - caps[key] = [fields] - return caps, lineno - -def parseline(line): - """Parse one entry in a mailcap file and return a dictionary. - - The viewing command is stored as the value with the key "view", - and the rest of the fields produce key-value pairs in the dict. - """ - fields = [] - i, n = 0, len(line) - while i < n: - field, i = parsefield(line, i, n) - fields.append(field) - i = i+1 # Skip semicolon - if len(fields) < 2: - return None, None - key, view, rest = fields[0], fields[1], fields[2:] - fields = {'view': view} - for field in rest: - i = field.find('=') - if i < 0: - fkey = field - fvalue = "" - else: - fkey = field[:i].strip() - fvalue = field[i+1:].strip() - if fkey in fields: - # Ignore it - pass - else: - fields[fkey] = fvalue - return key, fields - -def parsefield(line, i, n): - """Separate one key-value pair in a mailcap entry.""" - start = i - while i < n: - c = line[i] - if c == ';': - break - elif c == '\\': - i = i+2 - else: - i = i+1 - return line[start:i].strip(), i - - -# Part 3: using the database. - -def findmatch(caps, MIMEtype, key='view', filename="/dev/null", plist=[]): - """Find a match for a mailcap entry. - - Return a tuple containing the command line, and the mailcap entry - used; (None, None) if no match is found. This may invoke the - 'test' command of several matching entries before deciding which - entry to use. - - """ - if _find_unsafe(filename): - msg = "Refusing to use mailcap with filename %r. Use a safe temporary filename." % (filename,) - warnings.warn(msg, UnsafeMailcapInput) - return None, None - entries = lookup(caps, MIMEtype, key) - # XXX This code should somehow check for the needsterminal flag. - for e in entries: - if 'test' in e: - test = subst(e['test'], filename, plist) - if test is None: - continue - if test and os.system(test) != 0: - continue - command = subst(e[key], MIMEtype, filename, plist) - if command is not None: - return command, e - return None, None - -def lookup(caps, MIMEtype, key=None): - entries = [] - if MIMEtype in caps: - entries = entries + caps[MIMEtype] - MIMEtypes = MIMEtype.split('/') - MIMEtype = MIMEtypes[0] + '/*' - if MIMEtype in caps: - entries = entries + caps[MIMEtype] - if key is not None: - entries = [e for e in entries if key in e] - entries = sorted(entries, key=lineno_sort_key) - return entries - -def subst(field, MIMEtype, filename, plist=[]): - # XXX Actually, this is Unix-specific - res = '' - i, n = 0, len(field) - while i < n: - c = field[i]; i = i+1 - if c != '%': - if c == '\\': - c = field[i:i+1]; i = i+1 - res = res + c - else: - c = field[i]; i = i+1 - if c == '%': - res = res + c - elif c == 's': - res = res + filename - elif c == 't': - if _find_unsafe(MIMEtype): - msg = "Refusing to substitute MIME type %r into a shell command." % (MIMEtype,) - warnings.warn(msg, UnsafeMailcapInput) - return None - res = res + MIMEtype - elif c == '{': - start = i - while i < n and field[i] != '}': - i = i+1 - name = field[start:i] - i = i+1 - param = findparam(name, plist) - if _find_unsafe(param): - msg = "Refusing to substitute parameter %r (%s) into a shell command" % (param, name) - warnings.warn(msg, UnsafeMailcapInput) - return None - res = res + param - # XXX To do: - # %n == number of parts if type is multipart/* - # %F == list of alternating type and filename for parts - else: - res = res + '%' + c - return res - -def findparam(name, plist): - name = name.lower() + '=' - n = len(name) - for p in plist: - if p[:n].lower() == name: - return p[n:] - return '' - - -# Part 4: test program. - -def test(): - import sys - caps = getcaps() - if not sys.argv[1:]: - show(caps) - return - for i in range(1, len(sys.argv), 2): - args = sys.argv[i:i+2] - if len(args) < 2: - print("usage: mailcap [MIMEtype file] ...") - return - MIMEtype = args[0] - file = args[1] - command, e = findmatch(caps, MIMEtype, 'view', file) - if not command: - print("No viewer found for", type) - else: - print("Executing:", command) - sts = os.system(command) - sts = os.waitstatus_to_exitcode(sts) - if sts: - print("Exit status:", sts) - -def show(caps): - print("Mailcap files:") - for fn in listmailcapfiles(): print("\t" + fn) - print() - if not caps: caps = getcaps() - print("Mailcap entries:") - print() - ckeys = sorted(caps) - for type in ckeys: - print(type) - entries = caps[type] - for e in entries: - keys = sorted(e) - for k in keys: - print(" %-15s" % k, e[k]) - print() - -if __name__ == '__main__': - test() diff --git a/PythonLib/full/mimetypes.py b/PythonLib/full/mimetypes.py index 814b3f9c..7d0f4c1f 100644 --- a/PythonLib/full/mimetypes.py +++ b/PythonLib/full/mimetypes.py @@ -23,11 +23,6 @@ read_mime_types(file) -- parse one file, return a dictionary or None """ -import os -import sys -import posixpath -import urllib.parse - try: from _winapi import _mimetypes_read_windows_registry except ImportError: @@ -40,7 +35,7 @@ __all__ = [ "knownfiles", "inited", "MimeTypes", - "guess_type", "guess_all_extensions", "guess_extension", + "guess_type", "guess_file_type", "guess_all_extensions", "guess_extension", "add_type", "init", "read_mime_types", "suffix_map", "encodings_map", "types_map", "common_types" ] @@ -94,7 +89,19 @@ def add_type(self, type, ext, strict=True): If strict is true, information will be added to list of standard types, else to the list of non-standard types. + + Valid extensions are empty or start with a '.'. """ + if ext and not ext.startswith('.'): + from warnings import _deprecated + + _deprecated( + "Undotted extensions", + "Using undotted extensions is deprecated and " + "will raise a ValueError in Python {remove}", + remove=(3, 16), + ) + if not type: return self.types_map[strict][ext] = type @@ -118,17 +125,21 @@ def guess_type(self, url, strict=True): mapped to '.tar.gz'. (This is table-driven too, using the dictionary suffix_map.) - Optional `strict' argument when False adds a bunch of commonly found, + Optional 'strict' argument when False adds a bunch of commonly found, but non-standard types. """ + # Lazy import to improve module import time + import os + import urllib.parse + + # TODO: Deprecate accepting file paths (in particular path-like objects). url = os.fspath(url) p = urllib.parse.urlparse(url) if p.scheme and len(p.scheme) > 1: scheme = p.scheme url = p.path else: - scheme = None - url = os.path.splitdrive(url)[1] + return self.guess_file_type(url, strict=strict) if scheme == 'data': # syntax of data URLs: # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data @@ -148,13 +159,32 @@ def guess_type(self, url, strict=True): if '=' in type or '/' not in type: type = 'text/plain' return type, None # never compressed, so encoding is None - base, ext = posixpath.splitext(url) + + # Lazy import to improve module import time + import posixpath + + return self._guess_file_type(url, strict, posixpath.splitext) + + def guess_file_type(self, path, *, strict=True): + """Guess the type of a file based on its path. + + Similar to guess_type(), but takes file path instead of URL. + """ + # Lazy import to improve module import time + import os + + path = os.fsdecode(path) + path = os.path.splitdrive(path)[1] + return self._guess_file_type(path, strict, os.path.splitext) + + def _guess_file_type(self, path, strict, splitext): + base, ext = splitext(path) while (ext_lower := ext.lower()) in self.suffix_map: - base, ext = posixpath.splitext(base + self.suffix_map[ext_lower]) + base, ext = splitext(base + self.suffix_map[ext_lower]) # encodings_map is case sensitive if ext in self.encodings_map: encoding = self.encodings_map[ext] - base, ext = posixpath.splitext(base) + base, ext = splitext(base) else: encoding = None ext = ext.lower() @@ -175,9 +205,9 @@ def guess_all_extensions(self, type, strict=True): Return value is a list of strings giving the possible filename extensions, including the leading dot ('.'). The extension is not guaranteed to have been associated with any particular data stream, - but would be mapped to the MIME type `type' by guess_type(). + but would be mapped to the MIME type 'type' by guess_type(). - Optional `strict' argument when false adds a bunch of commonly found, + Optional 'strict' argument when false adds a bunch of commonly found, but non-standard types. """ type = type.lower() @@ -194,11 +224,11 @@ def guess_extension(self, type, strict=True): Return value is a string giving a filename extension, including the leading dot ('.'). The extension is not guaranteed to have been associated with any particular data - stream, but would be mapped to the MIME type `type' by - guess_type(). If no extension can be guessed for `type', None + stream, but would be mapped to the MIME type 'type' by + guess_type(). If no extension can be guessed for 'type', None is returned. - Optional `strict' argument when false adds a bunch of commonly found, + Optional 'strict' argument when false adds a bunch of commonly found, but non-standard types. """ extensions = self.guess_all_extensions(type, strict) @@ -304,7 +334,7 @@ def guess_type(url, strict=True): to ".tar.gz". (This is table-driven too, using the dictionary suffix_map). - Optional `strict' argument when false adds a bunch of commonly found, but + Optional 'strict' argument when false adds a bunch of commonly found, but non-standard types. """ if _db is None: @@ -312,17 +342,27 @@ def guess_type(url, strict=True): return _db.guess_type(url, strict) +def guess_file_type(path, *, strict=True): + """Guess the type of a file based on its path. + + Similar to guess_type(), but takes file path instead of URL. + """ + if _db is None: + init() + return _db.guess_file_type(path, strict=strict) + + def guess_all_extensions(type, strict=True): """Guess the extensions for a file based on its MIME type. Return value is a list of strings giving the possible filename extensions, including the leading dot ('.'). The extension is not guaranteed to have been associated with any particular data - stream, but would be mapped to the MIME type `type' by - guess_type(). If no extension can be guessed for `type', None + stream, but would be mapped to the MIME type 'type' by + guess_type(). If no extension can be guessed for 'type', None is returned. - Optional `strict' argument when false adds a bunch of commonly found, + Optional 'strict' argument when false adds a bunch of commonly found, but non-standard types. """ if _db is None: @@ -335,10 +375,10 @@ def guess_extension(type, strict=True): Return value is a string giving a filename extension, including the leading dot ('.'). The extension is not guaranteed to have been associated with any particular data stream, but would be mapped to the - MIME type `type' by guess_type(). If no extension can be guessed for - `type', None is returned. + MIME type 'type' by guess_type(). If no extension can be guessed for + 'type', None is returned. - Optional `strict' argument when false adds a bunch of commonly found, + Optional 'strict' argument when false adds a bunch of commonly found, but non-standard types. """ if _db is None: @@ -379,6 +419,9 @@ def init(files=None): else: db = _db + # Lazy import to improve module import time + import os + for file in files: if os.path.isfile(file): db.read(file) @@ -425,7 +468,7 @@ def _default_mime_types(): } # Before adding new types, make sure they are either registered with IANA, - # at http://www.iana.org/assignments/media-types + # at https://www.iana.org/assignments/media-types/media-types.xhtml # or extensions, i.e. using the x- prefix # If you add to these, please keep them sorted by mime type. @@ -434,6 +477,8 @@ def _default_mime_types(): types_map = _types_map_default = { '.js' : 'text/javascript', '.mjs' : 'text/javascript', + '.epub' : 'application/epub+zip', + '.gz' : 'application/gzip', '.json' : 'application/json', '.webmanifest': 'application/manifest+json', '.doc' : 'application/msword', @@ -449,6 +494,7 @@ def _default_mime_types(): '.obj' : 'application/octet-stream', '.so' : 'application/octet-stream', '.oda' : 'application/oda', + '.ogx' : 'application/ogg', '.pdf' : 'application/pdf', '.p7c' : 'application/pkcs7-mime', '.ps' : 'application/postscript', @@ -459,15 +505,26 @@ def _default_mime_types(): '.m3u8' : 'application/vnd.apple.mpegurl', '.xls' : 'application/vnd.ms-excel', '.xlb' : 'application/vnd.ms-excel', + '.eot' : 'application/vnd.ms-fontobject', '.ppt' : 'application/vnd.ms-powerpoint', '.pot' : 'application/vnd.ms-powerpoint', '.ppa' : 'application/vnd.ms-powerpoint', '.pps' : 'application/vnd.ms-powerpoint', '.pwz' : 'application/vnd.ms-powerpoint', + '.odg' : 'application/vnd.oasis.opendocument.graphics', + '.odp' : 'application/vnd.oasis.opendocument.presentation', + '.ods' : 'application/vnd.oasis.opendocument.spreadsheet', + '.odt' : 'application/vnd.oasis.opendocument.text', + '.pptx' : 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + '.xlsx' : 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + '.docx' : 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + '.rar' : 'application/vnd.rar', '.wasm' : 'application/wasm', + '.7z' : 'application/x-7z-compressed', '.bcpio' : 'application/x-bcpio', '.cpio' : 'application/x-cpio', '.csh' : 'application/x-csh', + '.deb' : 'application/x-debian-package', '.dvi' : 'application/x-dvi', '.gtar' : 'application/x-gtar', '.hdf' : 'application/x-hdf', @@ -477,10 +534,12 @@ def _default_mime_types(): '.cdf' : 'application/x-netcdf', '.nc' : 'application/x-netcdf', '.p12' : 'application/x-pkcs12', + '.php' : 'application/x-httpd-php', '.pfx' : 'application/x-pkcs12', '.ram' : 'application/x-pn-realaudio', '.pyc' : 'application/x-python-code', '.pyo' : 'application/x-python-code', + '.rpm' : 'application/x-rpm', '.sh' : 'application/x-sh', '.shar' : 'application/x-shar', '.swf' : 'application/x-shockwave-flash', @@ -503,6 +562,8 @@ def _default_mime_types(): '.rdf' : 'application/xml', '.wsdl' : 'application/xml', '.xpdl' : 'application/xml', + '.yaml' : 'application/yaml', + '.yml' : 'application/yaml', '.zip' : 'application/zip', '.3gp' : 'audio/3gpp', '.3gpp' : 'audio/3gpp', @@ -514,28 +575,47 @@ def _default_mime_types(): '.ass' : 'audio/aac', '.au' : 'audio/basic', '.snd' : 'audio/basic', + '.flac' : 'audio/flac', + '.mka' : 'audio/matroska', + '.m4a' : 'audio/mp4', '.mp3' : 'audio/mpeg', '.mp2' : 'audio/mpeg', + '.ogg' : 'audio/ogg', '.opus' : 'audio/opus', '.aif' : 'audio/x-aiff', '.aifc' : 'audio/x-aiff', '.aiff' : 'audio/x-aiff', '.ra' : 'audio/x-pn-realaudio', - '.wav' : 'audio/x-wav', + '.wav' : 'audio/vnd.wave', + '.otf' : 'font/otf', + '.ttf' : 'font/ttf', + '.weba' : 'audio/webm', + '.woff' : 'font/woff', + '.woff2' : 'font/woff2', '.avif' : 'image/avif', '.bmp' : 'image/bmp', + '.emf' : 'image/emf', + '.fits' : 'image/fits', + '.g3' : 'image/g3fax', '.gif' : 'image/gif', '.ief' : 'image/ief', + '.jp2' : 'image/jp2', '.jpg' : 'image/jpeg', '.jpe' : 'image/jpeg', '.jpeg' : 'image/jpeg', + '.jpm' : 'image/jpm', + '.jpx' : 'image/jpx', '.heic' : 'image/heic', '.heif' : 'image/heif', '.png' : 'image/png', '.svg' : 'image/svg+xml', + '.t38' : 'image/t38', '.tiff' : 'image/tiff', '.tif' : 'image/tiff', + '.tfx' : 'image/tiff-fx', '.ico' : 'image/vnd.microsoft.icon', + '.webp' : 'image/webp', + '.wmf' : 'image/wmf', '.ras' : 'image/x-cmu-raster', '.pnm' : 'image/x-portable-anymap', '.pbm' : 'image/x-portable-bitmap', @@ -549,6 +629,9 @@ def _default_mime_types(): '.mht' : 'message/rfc822', '.mhtml' : 'message/rfc822', '.nws' : 'message/rfc822', + '.gltf' : 'model/gltf+json', + '.glb' : 'model/gltf-binary', + '.stl' : 'model/stl', '.css' : 'text/css', '.csv' : 'text/csv', '.html' : 'text/html', @@ -564,6 +647,7 @@ def _default_mime_types(): '.pl' : 'text/plain', '.srt' : 'text/plain', '.rtx' : 'text/richtext', + '.rtf' : 'text/rtf', '.tsv' : 'text/tab-separated-values', '.vtt' : 'text/vtt', '.py' : 'text/x-python', @@ -573,16 +657,21 @@ def _default_mime_types(): '.sgml' : 'text/x-sgml', '.vcf' : 'text/x-vcard', '.xml' : 'text/xml', + '.mkv' : 'video/matroska', + '.mk3d' : 'video/matroska-3d', '.mp4' : 'video/mp4', '.mpeg' : 'video/mpeg', '.m1v' : 'video/mpeg', '.mpa' : 'video/mpeg', '.mpe' : 'video/mpeg', '.mpg' : 'video/mpeg', + '.ogv' : 'video/ogg', '.mov' : 'video/quicktime', '.qt' : 'video/quicktime', '.webm' : 'video/webm', - '.avi' : 'video/x-msvideo', + '.avi' : 'video/vnd.avi', + '.m4v' : 'video/x-m4v', + '.wmv' : 'video/x-ms-wmv', '.movie' : 'video/x-sgi-movie', } @@ -592,13 +681,13 @@ def _default_mime_types(): # Please sort these too common_types = _common_types_default = { '.rtf' : 'application/rtf', + '.apk' : 'application/vnd.android.package-archive', '.midi': 'audio/midi', '.mid' : 'audio/midi', '.jpg' : 'image/jpg', '.pict': 'image/pict', '.pct' : 'image/pict', '.pic' : 'image/pict', - '.webp': 'image/webp', '.xul' : 'text/xul', } @@ -606,51 +695,53 @@ def _default_mime_types(): _default_mime_types() -def _main(): - import getopt - - USAGE = """\ -Usage: mimetypes.py [options] type - -Options: - --help / -h -- print this message and exit - --lenient / -l -- additionally search of some common, but non-standard - types. - --extension / -e -- guess extension instead of type - -More than one type argument may be given. -""" - - def usage(code, msg=''): - print(USAGE) - if msg: print(msg) - sys.exit(code) - - try: - opts, args = getopt.getopt(sys.argv[1:], 'hle', - ['help', 'lenient', 'extension']) - except getopt.error as msg: - usage(1, msg) - - strict = 1 - extension = 0 - for opt, arg in opts: - if opt in ('-h', '--help'): - usage(0) - elif opt in ('-l', '--lenient'): - strict = 0 - elif opt in ('-e', '--extension'): - extension = 1 - for gtype in args: - if extension: - guess = guess_extension(gtype, strict) - if not guess: print("I don't know anything about type", gtype) - else: print(guess) - else: - guess, encoding = guess_type(gtype, strict) - if not guess: print("I don't know anything about type", gtype) - else: print('type:', guess, 'encoding:', encoding) +def _parse_args(args): + from argparse import ArgumentParser + + parser = ArgumentParser( + description='map filename extensions to MIME types', color=True + ) + parser.add_argument( + '-e', '--extension', + action='store_true', + help='guess extension instead of type' + ) + parser.add_argument( + '-l', '--lenient', + action='store_true', + help='additionally search for common but non-standard types' + ) + parser.add_argument('type', nargs='+', help='a type to search') + args = parser.parse_args(args) + return args, parser.format_help() + + +def _main(args=None): + """Run the mimetypes command-line interface and return a text to print.""" + args, help_text = _parse_args(args) + + results = [] + if args.extension: + for gtype in args.type: + guess = guess_extension(gtype, not args.lenient) + if guess: + results.append(str(guess)) + else: + results.append(f"error: unknown type {gtype}") + return results + else: + for gtype in args.type: + guess, encoding = guess_type(gtype, not args.lenient) + if guess: + results.append(f"type: {guess} encoding: {encoding}") + else: + results.append(f"error: media type unknown for {gtype}") + return results if __name__ == '__main__': - _main() + import sys + + results = _main() + print("\n".join(results)) + sys.exit(any(result.startswith("error: ") for result in results)) diff --git a/PythonLib/full/modulefinder.py b/PythonLib/full/modulefinder.py index a0a020f9..ac478ee7 100644 --- a/PythonLib/full/modulefinder.py +++ b/PythonLib/full/modulefinder.py @@ -72,7 +72,12 @@ def _find_module(name, path=None): if isinstance(spec.loader, importlib.machinery.SourceFileLoader): kind = _PY_SOURCE - elif isinstance(spec.loader, importlib.machinery.ExtensionFileLoader): + elif isinstance( + spec.loader, ( + importlib.machinery.ExtensionFileLoader, + importlib.machinery.AppleFrameworkLoader, + ) + ): kind = _C_EXTENSION elif isinstance(spec.loader, importlib.machinery.SourcelessFileLoader): diff --git a/PythonLib/full/msilib/__init__.py b/PythonLib/full/msilib/__init__.py deleted file mode 100644 index 565bf631..00000000 --- a/PythonLib/full/msilib/__init__.py +++ /dev/null @@ -1,484 +0,0 @@ -# Copyright (C) 2005 Martin v. Löwis -# Licensed to PSF under a Contributor Agreement. -from _msi import * -import fnmatch -import os -import re -import string -import sys -import warnings - -warnings._deprecated(__name__, remove=(3, 13)) - -AMD64 = "AMD64" in sys.version -# Keep msilib.Win64 around to preserve backwards compatibility. -Win64 = AMD64 - -# Partially taken from Wine -datasizemask= 0x00ff -type_valid= 0x0100 -type_localizable= 0x0200 - -typemask= 0x0c00 -type_long= 0x0000 -type_short= 0x0400 -type_string= 0x0c00 -type_binary= 0x0800 - -type_nullable= 0x1000 -type_key= 0x2000 -# XXX temporary, localizable? -knownbits = datasizemask | type_valid | type_localizable | \ - typemask | type_nullable | type_key - -class Table: - def __init__(self, name): - self.name = name - self.fields = [] - - def add_field(self, index, name, type): - self.fields.append((index,name,type)) - - def sql(self): - fields = [] - keys = [] - self.fields.sort() - fields = [None]*len(self.fields) - for index, name, type in self.fields: - index -= 1 - unk = type & ~knownbits - if unk: - print("%s.%s unknown bits %x" % (self.name, name, unk)) - size = type & datasizemask - dtype = type & typemask - if dtype == type_string: - if size: - tname="CHAR(%d)" % size - else: - tname="CHAR" - elif dtype == type_short: - assert size==2 - tname = "SHORT" - elif dtype == type_long: - assert size==4 - tname="LONG" - elif dtype == type_binary: - assert size==0 - tname="OBJECT" - else: - tname="unknown" - print("%s.%sunknown integer type %d" % (self.name, name, size)) - if type & type_nullable: - flags = "" - else: - flags = " NOT NULL" - if type & type_localizable: - flags += " LOCALIZABLE" - fields[index] = "`%s` %s%s" % (name, tname, flags) - if type & type_key: - keys.append("`%s`" % name) - fields = ", ".join(fields) - keys = ", ".join(keys) - return "CREATE TABLE %s (%s PRIMARY KEY %s)" % (self.name, fields, keys) - - def create(self, db): - v = db.OpenView(self.sql()) - v.Execute(None) - v.Close() - -class _Unspecified:pass -def change_sequence(seq, action, seqno=_Unspecified, cond = _Unspecified): - "Change the sequence number of an action in a sequence list" - for i in range(len(seq)): - if seq[i][0] == action: - if cond is _Unspecified: - cond = seq[i][1] - if seqno is _Unspecified: - seqno = seq[i][2] - seq[i] = (action, cond, seqno) - return - raise ValueError("Action not found in sequence") - -def add_data(db, table, values): - v = db.OpenView("SELECT * FROM `%s`" % table) - count = v.GetColumnInfo(MSICOLINFO_NAMES).GetFieldCount() - r = CreateRecord(count) - for value in values: - assert len(value) == count, value - for i in range(count): - field = value[i] - if isinstance(field, int): - r.SetInteger(i+1,field) - elif isinstance(field, str): - r.SetString(i+1,field) - elif field is None: - pass - elif isinstance(field, Binary): - r.SetStream(i+1, field.name) - else: - raise TypeError("Unsupported type %s" % field.__class__.__name__) - try: - v.Modify(MSIMODIFY_INSERT, r) - except Exception: - raise MSIError("Could not insert "+repr(values)+" into "+table) - - r.ClearData() - v.Close() - - -def add_stream(db, name, path): - v = db.OpenView("INSERT INTO _Streams (Name, Data) VALUES ('%s', ?)" % name) - r = CreateRecord(1) - r.SetStream(1, path) - v.Execute(r) - v.Close() - -def init_database(name, schema, - ProductName, ProductCode, ProductVersion, - Manufacturer): - try: - os.unlink(name) - except OSError: - pass - ProductCode = ProductCode.upper() - # Create the database - db = OpenDatabase(name, MSIDBOPEN_CREATE) - # Create the tables - for t in schema.tables: - t.create(db) - # Fill the validation table - add_data(db, "_Validation", schema._Validation_records) - # Initialize the summary information, allowing atmost 20 properties - si = db.GetSummaryInformation(20) - si.SetProperty(PID_TITLE, "Installation Database") - si.SetProperty(PID_SUBJECT, ProductName) - si.SetProperty(PID_AUTHOR, Manufacturer) - if AMD64: - si.SetProperty(PID_TEMPLATE, "x64;1033") - else: - si.SetProperty(PID_TEMPLATE, "Intel;1033") - si.SetProperty(PID_REVNUMBER, gen_uuid()) - si.SetProperty(PID_WORDCOUNT, 2) # long file names, compressed, original media - si.SetProperty(PID_PAGECOUNT, 200) - si.SetProperty(PID_APPNAME, "Python MSI Library") - # XXX more properties - si.Persist() - add_data(db, "Property", [ - ("ProductName", ProductName), - ("ProductCode", ProductCode), - ("ProductVersion", ProductVersion), - ("Manufacturer", Manufacturer), - ("ProductLanguage", "1033")]) - db.Commit() - return db - -def add_tables(db, module): - for table in module.tables: - add_data(db, table, getattr(module, table)) - -def make_id(str): - identifier_chars = string.ascii_letters + string.digits + "._" - str = "".join([c if c in identifier_chars else "_" for c in str]) - if str[0] in (string.digits + "."): - str = "_" + str - assert re.match("^[A-Za-z_][A-Za-z0-9_.]*$", str), "FILE"+str - return str - -def gen_uuid(): - return "{"+UuidCreate().upper()+"}" - -class CAB: - def __init__(self, name): - self.name = name - self.files = [] - self.filenames = set() - self.index = 0 - - def gen_id(self, file): - logical = _logical = make_id(file) - pos = 1 - while logical in self.filenames: - logical = "%s.%d" % (_logical, pos) - pos += 1 - self.filenames.add(logical) - return logical - - def append(self, full, file, logical): - if os.path.isdir(full): - return - if not logical: - logical = self.gen_id(file) - self.index += 1 - self.files.append((full, logical)) - return self.index, logical - - def commit(self, db): - from tempfile import mktemp - filename = mktemp() - FCICreate(filename, self.files) - add_data(db, "Media", - [(1, self.index, None, "#"+self.name, None, None)]) - add_stream(db, self.name, filename) - os.unlink(filename) - db.Commit() - -_directories = set() -class Directory: - def __init__(self, db, cab, basedir, physical, _logical, default, componentflags=None): - """Create a new directory in the Directory table. There is a current component - at each point in time for the directory, which is either explicitly created - through start_component, or implicitly when files are added for the first - time. Files are added into the current component, and into the cab file. - To create a directory, a base directory object needs to be specified (can be - None), the path to the physical directory, and a logical directory name. - Default specifies the DefaultDir slot in the directory table. componentflags - specifies the default flags that new components get.""" - index = 1 - _logical = make_id(_logical) - logical = _logical - while logical in _directories: - logical = "%s%d" % (_logical, index) - index += 1 - _directories.add(logical) - self.db = db - self.cab = cab - self.basedir = basedir - self.physical = physical - self.logical = logical - self.component = None - self.short_names = set() - self.ids = set() - self.keyfiles = {} - self.componentflags = componentflags - if basedir: - self.absolute = os.path.join(basedir.absolute, physical) - blogical = basedir.logical - else: - self.absolute = physical - blogical = None - add_data(db, "Directory", [(logical, blogical, default)]) - - def start_component(self, component = None, feature = None, flags = None, keyfile = None, uuid=None): - """Add an entry to the Component table, and make this component the current for this - directory. If no component name is given, the directory name is used. If no feature - is given, the current feature is used. If no flags are given, the directory's default - flags are used. If no keyfile is given, the KeyPath is left null in the Component - table.""" - if flags is None: - flags = self.componentflags - if uuid is None: - uuid = gen_uuid() - else: - uuid = uuid.upper() - if component is None: - component = self.logical - self.component = component - if AMD64: - flags |= 256 - if keyfile: - keyid = self.cab.gen_id(keyfile) - self.keyfiles[keyfile] = keyid - else: - keyid = None - add_data(self.db, "Component", - [(component, uuid, self.logical, flags, None, keyid)]) - if feature is None: - feature = current_feature - add_data(self.db, "FeatureComponents", - [(feature.id, component)]) - - def make_short(self, file): - oldfile = file - file = file.replace('+', '_') - file = ''.join(c for c in file if not c in r' "/\[]:;=,') - parts = file.split(".") - if len(parts) > 1: - prefix = "".join(parts[:-1]).upper() - suffix = parts[-1].upper() - if not prefix: - prefix = suffix - suffix = None - else: - prefix = file.upper() - suffix = None - if len(parts) < 3 and len(prefix) <= 8 and file == oldfile and ( - not suffix or len(suffix) <= 3): - if suffix: - file = prefix+"."+suffix - else: - file = prefix - else: - file = None - if file is None or file in self.short_names: - prefix = prefix[:6] - if suffix: - suffix = suffix[:3] - pos = 1 - while 1: - if suffix: - file = "%s~%d.%s" % (prefix, pos, suffix) - else: - file = "%s~%d" % (prefix, pos) - if file not in self.short_names: break - pos += 1 - assert pos < 10000 - if pos in (10, 100, 1000): - prefix = prefix[:-1] - self.short_names.add(file) - assert not re.search(r'[\?|><:/*"+,;=\[\]]', file) # restrictions on short names - return file - - def add_file(self, file, src=None, version=None, language=None): - """Add a file to the current component of the directory, starting a new one - if there is no current component. By default, the file name in the source - and the file table will be identical. If the src file is specified, it is - interpreted relative to the current directory. Optionally, a version and a - language can be specified for the entry in the File table.""" - if not self.component: - self.start_component(self.logical, current_feature, 0) - if not src: - # Allow relative paths for file if src is not specified - src = file - file = os.path.basename(file) - absolute = os.path.join(self.absolute, src) - assert not re.search(r'[\?|><:/*]"', file) # restrictions on long names - if file in self.keyfiles: - logical = self.keyfiles[file] - else: - logical = None - sequence, logical = self.cab.append(absolute, file, logical) - assert logical not in self.ids - self.ids.add(logical) - short = self.make_short(file) - full = "%s|%s" % (short, file) - filesize = os.stat(absolute).st_size - # constants.msidbFileAttributesVital - # Compressed omitted, since it is the database default - # could add r/o, system, hidden - attributes = 512 - add_data(self.db, "File", - [(logical, self.component, full, filesize, version, - language, attributes, sequence)]) - #if not version: - # # Add hash if the file is not versioned - # filehash = FileHash(absolute, 0) - # add_data(self.db, "MsiFileHash", - # [(logical, 0, filehash.IntegerData(1), - # filehash.IntegerData(2), filehash.IntegerData(3), - # filehash.IntegerData(4))]) - # Automatically remove .pyc files on uninstall (2) - # XXX: adding so many RemoveFile entries makes installer unbelievably - # slow. So instead, we have to use wildcard remove entries - if file.endswith(".py"): - add_data(self.db, "RemoveFile", - [(logical+"c", self.component, "%sC|%sc" % (short, file), - self.logical, 2), - (logical+"o", self.component, "%sO|%so" % (short, file), - self.logical, 2)]) - return logical - - def glob(self, pattern, exclude = None): - """Add a list of files to the current component as specified in the - glob pattern. Individual files can be excluded in the exclude list.""" - try: - files = os.listdir(self.absolute) - except OSError: - return [] - if pattern[:1] != '.': - files = (f for f in files if f[0] != '.') - files = fnmatch.filter(files, pattern) - for f in files: - if exclude and f in exclude: continue - self.add_file(f) - return files - - def remove_pyc(self): - "Remove .pyc files on uninstall" - add_data(self.db, "RemoveFile", - [(self.component+"c", self.component, "*.pyc", self.logical, 2)]) - -class Binary: - def __init__(self, fname): - self.name = fname - def __repr__(self): - return 'msilib.Binary(os.path.join(dirname,"%s"))' % self.name - -class Feature: - def __init__(self, db, id, title, desc, display, level = 1, - parent=None, directory = None, attributes=0): - self.id = id - if parent: - parent = parent.id - add_data(db, "Feature", - [(id, parent, title, desc, display, - level, directory, attributes)]) - def set_current(self): - global current_feature - current_feature = self - -class Control: - def __init__(self, dlg, name): - self.dlg = dlg - self.name = name - - def event(self, event, argument, condition = "1", ordering = None): - add_data(self.dlg.db, "ControlEvent", - [(self.dlg.name, self.name, event, argument, - condition, ordering)]) - - def mapping(self, event, attribute): - add_data(self.dlg.db, "EventMapping", - [(self.dlg.name, self.name, event, attribute)]) - - def condition(self, action, condition): - add_data(self.dlg.db, "ControlCondition", - [(self.dlg.name, self.name, action, condition)]) - -class RadioButtonGroup(Control): - def __init__(self, dlg, name, property): - self.dlg = dlg - self.name = name - self.property = property - self.index = 1 - - def add(self, name, x, y, w, h, text, value = None): - if value is None: - value = name - add_data(self.dlg.db, "RadioButton", - [(self.property, self.index, value, - x, y, w, h, text, None)]) - self.index += 1 - -class Dialog: - def __init__(self, db, name, x, y, w, h, attr, title, first, default, cancel): - self.db = db - self.name = name - self.x, self.y, self.w, self.h = x,y,w,h - add_data(db, "Dialog", [(name, x,y,w,h,attr,title,first,default,cancel)]) - - def control(self, name, type, x, y, w, h, attr, prop, text, next, help): - add_data(self.db, "Control", - [(self.name, name, type, x, y, w, h, attr, prop, text, next, help)]) - return Control(self, name) - - def text(self, name, x, y, w, h, attr, text): - return self.control(name, "Text", x, y, w, h, attr, None, - text, None, None) - - def bitmap(self, name, x, y, w, h, text): - return self.control(name, "Bitmap", x, y, w, h, 1, None, text, None, None) - - def line(self, name, x, y, w, h): - return self.control(name, "Line", x, y, w, h, 1, None, None, None, None) - - def pushbutton(self, name, x, y, w, h, attr, text, next): - return self.control(name, "PushButton", x, y, w, h, attr, None, text, next, None) - - def radiogroup(self, name, x, y, w, h, attr, prop, text, next): - add_data(self.db, "Control", - [(self.name, name, "RadioButtonGroup", - x, y, w, h, attr, prop, text, next, None)]) - return RadioButtonGroup(self, name, prop) - - def checkbox(self, name, x, y, w, h, attr, prop, text, next): - return self.control(name, "CheckBox", x, y, w, h, attr, prop, text, next, None) diff --git a/PythonLib/full/msilib/schema.py b/PythonLib/full/msilib/schema.py deleted file mode 100644 index 9f5745cf..00000000 --- a/PythonLib/full/msilib/schema.py +++ /dev/null @@ -1,1007 +0,0 @@ -from . import Table - -_Validation = Table('_Validation') -_Validation.add_field(1,'Table',11552) -_Validation.add_field(2,'Column',11552) -_Validation.add_field(3,'Nullable',3332) -_Validation.add_field(4,'MinValue',4356) -_Validation.add_field(5,'MaxValue',4356) -_Validation.add_field(6,'KeyTable',7679) -_Validation.add_field(7,'KeyColumn',5378) -_Validation.add_field(8,'Category',7456) -_Validation.add_field(9,'Set',7679) -_Validation.add_field(10,'Description',7679) - -ActionText = Table('ActionText') -ActionText.add_field(1,'Action',11592) -ActionText.add_field(2,'Description',7936) -ActionText.add_field(3,'Template',7936) - -AdminExecuteSequence = Table('AdminExecuteSequence') -AdminExecuteSequence.add_field(1,'Action',11592) -AdminExecuteSequence.add_field(2,'Condition',7679) -AdminExecuteSequence.add_field(3,'Sequence',5378) - -Condition = Table('Condition') -Condition.add_field(1,'Feature_',11558) -Condition.add_field(2,'Level',9474) -Condition.add_field(3,'Condition',7679) - -AdminUISequence = Table('AdminUISequence') -AdminUISequence.add_field(1,'Action',11592) -AdminUISequence.add_field(2,'Condition',7679) -AdminUISequence.add_field(3,'Sequence',5378) - -AdvtExecuteSequence = Table('AdvtExecuteSequence') -AdvtExecuteSequence.add_field(1,'Action',11592) -AdvtExecuteSequence.add_field(2,'Condition',7679) -AdvtExecuteSequence.add_field(3,'Sequence',5378) - -AdvtUISequence = Table('AdvtUISequence') -AdvtUISequence.add_field(1,'Action',11592) -AdvtUISequence.add_field(2,'Condition',7679) -AdvtUISequence.add_field(3,'Sequence',5378) - -AppId = Table('AppId') -AppId.add_field(1,'AppId',11558) -AppId.add_field(2,'RemoteServerName',7679) -AppId.add_field(3,'LocalService',7679) -AppId.add_field(4,'ServiceParameters',7679) -AppId.add_field(5,'DllSurrogate',7679) -AppId.add_field(6,'ActivateAtStorage',5378) -AppId.add_field(7,'RunAsInteractiveUser',5378) - -AppSearch = Table('AppSearch') -AppSearch.add_field(1,'Property',11592) -AppSearch.add_field(2,'Signature_',11592) - -Property = Table('Property') -Property.add_field(1,'Property',11592) -Property.add_field(2,'Value',3840) - -BBControl = Table('BBControl') -BBControl.add_field(1,'Billboard_',11570) -BBControl.add_field(2,'BBControl',11570) -BBControl.add_field(3,'Type',3378) -BBControl.add_field(4,'X',1282) -BBControl.add_field(5,'Y',1282) -BBControl.add_field(6,'Width',1282) -BBControl.add_field(7,'Height',1282) -BBControl.add_field(8,'Attributes',4356) -BBControl.add_field(9,'Text',7986) - -Billboard = Table('Billboard') -Billboard.add_field(1,'Billboard',11570) -Billboard.add_field(2,'Feature_',3366) -Billboard.add_field(3,'Action',7474) -Billboard.add_field(4,'Ordering',5378) - -Feature = Table('Feature') -Feature.add_field(1,'Feature',11558) -Feature.add_field(2,'Feature_Parent',7462) -Feature.add_field(3,'Title',8000) -Feature.add_field(4,'Description',8191) -Feature.add_field(5,'Display',5378) -Feature.add_field(6,'Level',1282) -Feature.add_field(7,'Directory_',7496) -Feature.add_field(8,'Attributes',1282) - -Binary = Table('Binary') -Binary.add_field(1,'Name',11592) -Binary.add_field(2,'Data',2304) - -BindImage = Table('BindImage') -BindImage.add_field(1,'File_',11592) -BindImage.add_field(2,'Path',7679) - -File = Table('File') -File.add_field(1,'File',11592) -File.add_field(2,'Component_',3400) -File.add_field(3,'FileName',4095) -File.add_field(4,'FileSize',260) -File.add_field(5,'Version',7496) -File.add_field(6,'Language',7444) -File.add_field(7,'Attributes',5378) -File.add_field(8,'Sequence',1282) - -CCPSearch = Table('CCPSearch') -CCPSearch.add_field(1,'Signature_',11592) - -CheckBox = Table('CheckBox') -CheckBox.add_field(1,'Property',11592) -CheckBox.add_field(2,'Value',7488) - -Class = Table('Class') -Class.add_field(1,'CLSID',11558) -Class.add_field(2,'Context',11552) -Class.add_field(3,'Component_',11592) -Class.add_field(4,'ProgId_Default',7679) -Class.add_field(5,'Description',8191) -Class.add_field(6,'AppId_',7462) -Class.add_field(7,'FileTypeMask',7679) -Class.add_field(8,'Icon_',7496) -Class.add_field(9,'IconIndex',5378) -Class.add_field(10,'DefInprocHandler',7456) -Class.add_field(11,'Argument',7679) -Class.add_field(12,'Feature_',3366) -Class.add_field(13,'Attributes',5378) - -Component = Table('Component') -Component.add_field(1,'Component',11592) -Component.add_field(2,'ComponentId',7462) -Component.add_field(3,'Directory_',3400) -Component.add_field(4,'Attributes',1282) -Component.add_field(5,'Condition',7679) -Component.add_field(6,'KeyPath',7496) - -Icon = Table('Icon') -Icon.add_field(1,'Name',11592) -Icon.add_field(2,'Data',2304) - -ProgId = Table('ProgId') -ProgId.add_field(1,'ProgId',11775) -ProgId.add_field(2,'ProgId_Parent',7679) -ProgId.add_field(3,'Class_',7462) -ProgId.add_field(4,'Description',8191) -ProgId.add_field(5,'Icon_',7496) -ProgId.add_field(6,'IconIndex',5378) - -ComboBox = Table('ComboBox') -ComboBox.add_field(1,'Property',11592) -ComboBox.add_field(2,'Order',9474) -ComboBox.add_field(3,'Value',3392) -ComboBox.add_field(4,'Text',8000) - -CompLocator = Table('CompLocator') -CompLocator.add_field(1,'Signature_',11592) -CompLocator.add_field(2,'ComponentId',3366) -CompLocator.add_field(3,'Type',5378) - -Complus = Table('Complus') -Complus.add_field(1,'Component_',11592) -Complus.add_field(2,'ExpType',13570) - -Directory = Table('Directory') -Directory.add_field(1,'Directory',11592) -Directory.add_field(2,'Directory_Parent',7496) -Directory.add_field(3,'DefaultDir',4095) - -Control = Table('Control') -Control.add_field(1,'Dialog_',11592) -Control.add_field(2,'Control',11570) -Control.add_field(3,'Type',3348) -Control.add_field(4,'X',1282) -Control.add_field(5,'Y',1282) -Control.add_field(6,'Width',1282) -Control.add_field(7,'Height',1282) -Control.add_field(8,'Attributes',4356) -Control.add_field(9,'Property',7474) -Control.add_field(10,'Text',7936) -Control.add_field(11,'Control_Next',7474) -Control.add_field(12,'Help',7986) - -Dialog = Table('Dialog') -Dialog.add_field(1,'Dialog',11592) -Dialog.add_field(2,'HCentering',1282) -Dialog.add_field(3,'VCentering',1282) -Dialog.add_field(4,'Width',1282) -Dialog.add_field(5,'Height',1282) -Dialog.add_field(6,'Attributes',4356) -Dialog.add_field(7,'Title',8064) -Dialog.add_field(8,'Control_First',3378) -Dialog.add_field(9,'Control_Default',7474) -Dialog.add_field(10,'Control_Cancel',7474) - -ControlCondition = Table('ControlCondition') -ControlCondition.add_field(1,'Dialog_',11592) -ControlCondition.add_field(2,'Control_',11570) -ControlCondition.add_field(3,'Action',11570) -ControlCondition.add_field(4,'Condition',11775) - -ControlEvent = Table('ControlEvent') -ControlEvent.add_field(1,'Dialog_',11592) -ControlEvent.add_field(2,'Control_',11570) -ControlEvent.add_field(3,'Event',11570) -ControlEvent.add_field(4,'Argument',11775) -ControlEvent.add_field(5,'Condition',15871) -ControlEvent.add_field(6,'Ordering',5378) - -CreateFolder = Table('CreateFolder') -CreateFolder.add_field(1,'Directory_',11592) -CreateFolder.add_field(2,'Component_',11592) - -CustomAction = Table('CustomAction') -CustomAction.add_field(1,'Action',11592) -CustomAction.add_field(2,'Type',1282) -CustomAction.add_field(3,'Source',7496) -CustomAction.add_field(4,'Target',7679) - -DrLocator = Table('DrLocator') -DrLocator.add_field(1,'Signature_',11592) -DrLocator.add_field(2,'Parent',15688) -DrLocator.add_field(3,'Path',15871) -DrLocator.add_field(4,'Depth',5378) - -DuplicateFile = Table('DuplicateFile') -DuplicateFile.add_field(1,'FileKey',11592) -DuplicateFile.add_field(2,'Component_',3400) -DuplicateFile.add_field(3,'File_',3400) -DuplicateFile.add_field(4,'DestName',8191) -DuplicateFile.add_field(5,'DestFolder',7496) - -Environment = Table('Environment') -Environment.add_field(1,'Environment',11592) -Environment.add_field(2,'Name',4095) -Environment.add_field(3,'Value',8191) -Environment.add_field(4,'Component_',3400) - -Error = Table('Error') -Error.add_field(1,'Error',9474) -Error.add_field(2,'Message',7936) - -EventMapping = Table('EventMapping') -EventMapping.add_field(1,'Dialog_',11592) -EventMapping.add_field(2,'Control_',11570) -EventMapping.add_field(3,'Event',11570) -EventMapping.add_field(4,'Attribute',3378) - -Extension = Table('Extension') -Extension.add_field(1,'Extension',11775) -Extension.add_field(2,'Component_',11592) -Extension.add_field(3,'ProgId_',7679) -Extension.add_field(4,'MIME_',7488) -Extension.add_field(5,'Feature_',3366) - -MIME = Table('MIME') -MIME.add_field(1,'ContentType',11584) -MIME.add_field(2,'Extension_',3583) -MIME.add_field(3,'CLSID',7462) - -FeatureComponents = Table('FeatureComponents') -FeatureComponents.add_field(1,'Feature_',11558) -FeatureComponents.add_field(2,'Component_',11592) - -FileSFPCatalog = Table('FileSFPCatalog') -FileSFPCatalog.add_field(1,'File_',11592) -FileSFPCatalog.add_field(2,'SFPCatalog_',11775) - -SFPCatalog = Table('SFPCatalog') -SFPCatalog.add_field(1,'SFPCatalog',11775) -SFPCatalog.add_field(2,'Catalog',2304) -SFPCatalog.add_field(3,'Dependency',7424) - -Font = Table('Font') -Font.add_field(1,'File_',11592) -Font.add_field(2,'FontTitle',7552) - -IniFile = Table('IniFile') -IniFile.add_field(1,'IniFile',11592) -IniFile.add_field(2,'FileName',4095) -IniFile.add_field(3,'DirProperty',7496) -IniFile.add_field(4,'Section',3936) -IniFile.add_field(5,'Key',3968) -IniFile.add_field(6,'Value',4095) -IniFile.add_field(7,'Action',1282) -IniFile.add_field(8,'Component_',3400) - -IniLocator = Table('IniLocator') -IniLocator.add_field(1,'Signature_',11592) -IniLocator.add_field(2,'FileName',3583) -IniLocator.add_field(3,'Section',3424) -IniLocator.add_field(4,'Key',3456) -IniLocator.add_field(5,'Field',5378) -IniLocator.add_field(6,'Type',5378) - -InstallExecuteSequence = Table('InstallExecuteSequence') -InstallExecuteSequence.add_field(1,'Action',11592) -InstallExecuteSequence.add_field(2,'Condition',7679) -InstallExecuteSequence.add_field(3,'Sequence',5378) - -InstallUISequence = Table('InstallUISequence') -InstallUISequence.add_field(1,'Action',11592) -InstallUISequence.add_field(2,'Condition',7679) -InstallUISequence.add_field(3,'Sequence',5378) - -IsolatedComponent = Table('IsolatedComponent') -IsolatedComponent.add_field(1,'Component_Shared',11592) -IsolatedComponent.add_field(2,'Component_Application',11592) - -LaunchCondition = Table('LaunchCondition') -LaunchCondition.add_field(1,'Condition',11775) -LaunchCondition.add_field(2,'Description',4095) - -ListBox = Table('ListBox') -ListBox.add_field(1,'Property',11592) -ListBox.add_field(2,'Order',9474) -ListBox.add_field(3,'Value',3392) -ListBox.add_field(4,'Text',8000) - -ListView = Table('ListView') -ListView.add_field(1,'Property',11592) -ListView.add_field(2,'Order',9474) -ListView.add_field(3,'Value',3392) -ListView.add_field(4,'Text',8000) -ListView.add_field(5,'Binary_',7496) - -LockPermissions = Table('LockPermissions') -LockPermissions.add_field(1,'LockObject',11592) -LockPermissions.add_field(2,'Table',11552) -LockPermissions.add_field(3,'Domain',15871) -LockPermissions.add_field(4,'User',11775) -LockPermissions.add_field(5,'Permission',4356) - -Media = Table('Media') -Media.add_field(1,'DiskId',9474) -Media.add_field(2,'LastSequence',1282) -Media.add_field(3,'DiskPrompt',8000) -Media.add_field(4,'Cabinet',7679) -Media.add_field(5,'VolumeLabel',7456) -Media.add_field(6,'Source',7496) - -MoveFile = Table('MoveFile') -MoveFile.add_field(1,'FileKey',11592) -MoveFile.add_field(2,'Component_',3400) -MoveFile.add_field(3,'SourceName',8191) -MoveFile.add_field(4,'DestName',8191) -MoveFile.add_field(5,'SourceFolder',7496) -MoveFile.add_field(6,'DestFolder',3400) -MoveFile.add_field(7,'Options',1282) - -MsiAssembly = Table('MsiAssembly') -MsiAssembly.add_field(1,'Component_',11592) -MsiAssembly.add_field(2,'Feature_',3366) -MsiAssembly.add_field(3,'File_Manifest',7496) -MsiAssembly.add_field(4,'File_Application',7496) -MsiAssembly.add_field(5,'Attributes',5378) - -MsiAssemblyName = Table('MsiAssemblyName') -MsiAssemblyName.add_field(1,'Component_',11592) -MsiAssemblyName.add_field(2,'Name',11775) -MsiAssemblyName.add_field(3,'Value',3583) - -MsiDigitalCertificate = Table('MsiDigitalCertificate') -MsiDigitalCertificate.add_field(1,'DigitalCertificate',11592) -MsiDigitalCertificate.add_field(2,'CertData',2304) - -MsiDigitalSignature = Table('MsiDigitalSignature') -MsiDigitalSignature.add_field(1,'Table',11552) -MsiDigitalSignature.add_field(2,'SignObject',11592) -MsiDigitalSignature.add_field(3,'DigitalCertificate_',3400) -MsiDigitalSignature.add_field(4,'Hash',6400) - -MsiFileHash = Table('MsiFileHash') -MsiFileHash.add_field(1,'File_',11592) -MsiFileHash.add_field(2,'Options',1282) -MsiFileHash.add_field(3,'HashPart1',260) -MsiFileHash.add_field(4,'HashPart2',260) -MsiFileHash.add_field(5,'HashPart3',260) -MsiFileHash.add_field(6,'HashPart4',260) - -MsiPatchHeaders = Table('MsiPatchHeaders') -MsiPatchHeaders.add_field(1,'StreamRef',11558) -MsiPatchHeaders.add_field(2,'Header',2304) - -ODBCAttribute = Table('ODBCAttribute') -ODBCAttribute.add_field(1,'Driver_',11592) -ODBCAttribute.add_field(2,'Attribute',11560) -ODBCAttribute.add_field(3,'Value',8191) - -ODBCDriver = Table('ODBCDriver') -ODBCDriver.add_field(1,'Driver',11592) -ODBCDriver.add_field(2,'Component_',3400) -ODBCDriver.add_field(3,'Description',3583) -ODBCDriver.add_field(4,'File_',3400) -ODBCDriver.add_field(5,'File_Setup',7496) - -ODBCDataSource = Table('ODBCDataSource') -ODBCDataSource.add_field(1,'DataSource',11592) -ODBCDataSource.add_field(2,'Component_',3400) -ODBCDataSource.add_field(3,'Description',3583) -ODBCDataSource.add_field(4,'DriverDescription',3583) -ODBCDataSource.add_field(5,'Registration',1282) - -ODBCSourceAttribute = Table('ODBCSourceAttribute') -ODBCSourceAttribute.add_field(1,'DataSource_',11592) -ODBCSourceAttribute.add_field(2,'Attribute',11552) -ODBCSourceAttribute.add_field(3,'Value',8191) - -ODBCTranslator = Table('ODBCTranslator') -ODBCTranslator.add_field(1,'Translator',11592) -ODBCTranslator.add_field(2,'Component_',3400) -ODBCTranslator.add_field(3,'Description',3583) -ODBCTranslator.add_field(4,'File_',3400) -ODBCTranslator.add_field(5,'File_Setup',7496) - -Patch = Table('Patch') -Patch.add_field(1,'File_',11592) -Patch.add_field(2,'Sequence',9474) -Patch.add_field(3,'PatchSize',260) -Patch.add_field(4,'Attributes',1282) -Patch.add_field(5,'Header',6400) -Patch.add_field(6,'StreamRef_',7462) - -PatchPackage = Table('PatchPackage') -PatchPackage.add_field(1,'PatchId',11558) -PatchPackage.add_field(2,'Media_',1282) - -PublishComponent = Table('PublishComponent') -PublishComponent.add_field(1,'ComponentId',11558) -PublishComponent.add_field(2,'Qualifier',11775) -PublishComponent.add_field(3,'Component_',11592) -PublishComponent.add_field(4,'AppData',8191) -PublishComponent.add_field(5,'Feature_',3366) - -RadioButton = Table('RadioButton') -RadioButton.add_field(1,'Property',11592) -RadioButton.add_field(2,'Order',9474) -RadioButton.add_field(3,'Value',3392) -RadioButton.add_field(4,'X',1282) -RadioButton.add_field(5,'Y',1282) -RadioButton.add_field(6,'Width',1282) -RadioButton.add_field(7,'Height',1282) -RadioButton.add_field(8,'Text',8000) -RadioButton.add_field(9,'Help',7986) - -Registry = Table('Registry') -Registry.add_field(1,'Registry',11592) -Registry.add_field(2,'Root',1282) -Registry.add_field(3,'Key',4095) -Registry.add_field(4,'Name',8191) -Registry.add_field(5,'Value',7936) -Registry.add_field(6,'Component_',3400) - -RegLocator = Table('RegLocator') -RegLocator.add_field(1,'Signature_',11592) -RegLocator.add_field(2,'Root',1282) -RegLocator.add_field(3,'Key',3583) -RegLocator.add_field(4,'Name',7679) -RegLocator.add_field(5,'Type',5378) - -RemoveFile = Table('RemoveFile') -RemoveFile.add_field(1,'FileKey',11592) -RemoveFile.add_field(2,'Component_',3400) -RemoveFile.add_field(3,'FileName',8191) -RemoveFile.add_field(4,'DirProperty',3400) -RemoveFile.add_field(5,'InstallMode',1282) - -RemoveIniFile = Table('RemoveIniFile') -RemoveIniFile.add_field(1,'RemoveIniFile',11592) -RemoveIniFile.add_field(2,'FileName',4095) -RemoveIniFile.add_field(3,'DirProperty',7496) -RemoveIniFile.add_field(4,'Section',3936) -RemoveIniFile.add_field(5,'Key',3968) -RemoveIniFile.add_field(6,'Value',8191) -RemoveIniFile.add_field(7,'Action',1282) -RemoveIniFile.add_field(8,'Component_',3400) - -RemoveRegistry = Table('RemoveRegistry') -RemoveRegistry.add_field(1,'RemoveRegistry',11592) -RemoveRegistry.add_field(2,'Root',1282) -RemoveRegistry.add_field(3,'Key',4095) -RemoveRegistry.add_field(4,'Name',8191) -RemoveRegistry.add_field(5,'Component_',3400) - -ReserveCost = Table('ReserveCost') -ReserveCost.add_field(1,'ReserveKey',11592) -ReserveCost.add_field(2,'Component_',3400) -ReserveCost.add_field(3,'ReserveFolder',7496) -ReserveCost.add_field(4,'ReserveLocal',260) -ReserveCost.add_field(5,'ReserveSource',260) - -SelfReg = Table('SelfReg') -SelfReg.add_field(1,'File_',11592) -SelfReg.add_field(2,'Cost',5378) - -ServiceControl = Table('ServiceControl') -ServiceControl.add_field(1,'ServiceControl',11592) -ServiceControl.add_field(2,'Name',4095) -ServiceControl.add_field(3,'Event',1282) -ServiceControl.add_field(4,'Arguments',8191) -ServiceControl.add_field(5,'Wait',5378) -ServiceControl.add_field(6,'Component_',3400) - -ServiceInstall = Table('ServiceInstall') -ServiceInstall.add_field(1,'ServiceInstall',11592) -ServiceInstall.add_field(2,'Name',3583) -ServiceInstall.add_field(3,'DisplayName',8191) -ServiceInstall.add_field(4,'ServiceType',260) -ServiceInstall.add_field(5,'StartType',260) -ServiceInstall.add_field(6,'ErrorControl',260) -ServiceInstall.add_field(7,'LoadOrderGroup',7679) -ServiceInstall.add_field(8,'Dependencies',7679) -ServiceInstall.add_field(9,'StartName',7679) -ServiceInstall.add_field(10,'Password',7679) -ServiceInstall.add_field(11,'Arguments',7679) -ServiceInstall.add_field(12,'Component_',3400) -ServiceInstall.add_field(13,'Description',8191) - -Shortcut = Table('Shortcut') -Shortcut.add_field(1,'Shortcut',11592) -Shortcut.add_field(2,'Directory_',3400) -Shortcut.add_field(3,'Name',3968) -Shortcut.add_field(4,'Component_',3400) -Shortcut.add_field(5,'Target',3400) -Shortcut.add_field(6,'Arguments',7679) -Shortcut.add_field(7,'Description',8191) -Shortcut.add_field(8,'Hotkey',5378) -Shortcut.add_field(9,'Icon_',7496) -Shortcut.add_field(10,'IconIndex',5378) -Shortcut.add_field(11,'ShowCmd',5378) -Shortcut.add_field(12,'WkDir',7496) - -Signature = Table('Signature') -Signature.add_field(1,'Signature',11592) -Signature.add_field(2,'FileName',3583) -Signature.add_field(3,'MinVersion',7444) -Signature.add_field(4,'MaxVersion',7444) -Signature.add_field(5,'MinSize',4356) -Signature.add_field(6,'MaxSize',4356) -Signature.add_field(7,'MinDate',4356) -Signature.add_field(8,'MaxDate',4356) -Signature.add_field(9,'Languages',7679) - -TextStyle = Table('TextStyle') -TextStyle.add_field(1,'TextStyle',11592) -TextStyle.add_field(2,'FaceName',3360) -TextStyle.add_field(3,'Size',1282) -TextStyle.add_field(4,'Color',4356) -TextStyle.add_field(5,'StyleBits',5378) - -TypeLib = Table('TypeLib') -TypeLib.add_field(1,'LibID',11558) -TypeLib.add_field(2,'Language',9474) -TypeLib.add_field(3,'Component_',11592) -TypeLib.add_field(4,'Version',4356) -TypeLib.add_field(5,'Description',8064) -TypeLib.add_field(6,'Directory_',7496) -TypeLib.add_field(7,'Feature_',3366) -TypeLib.add_field(8,'Cost',4356) - -UIText = Table('UIText') -UIText.add_field(1,'Key',11592) -UIText.add_field(2,'Text',8191) - -Upgrade = Table('Upgrade') -Upgrade.add_field(1,'UpgradeCode',11558) -Upgrade.add_field(2,'VersionMin',15636) -Upgrade.add_field(3,'VersionMax',15636) -Upgrade.add_field(4,'Language',15871) -Upgrade.add_field(5,'Attributes',8452) -Upgrade.add_field(6,'Remove',7679) -Upgrade.add_field(7,'ActionProperty',3400) - -Verb = Table('Verb') -Verb.add_field(1,'Extension_',11775) -Verb.add_field(2,'Verb',11552) -Verb.add_field(3,'Sequence',5378) -Verb.add_field(4,'Command',8191) -Verb.add_field(5,'Argument',8191) - -tables=[_Validation, ActionText, AdminExecuteSequence, Condition, AdminUISequence, AdvtExecuteSequence, AdvtUISequence, AppId, AppSearch, Property, BBControl, Billboard, Feature, Binary, BindImage, File, CCPSearch, CheckBox, Class, Component, Icon, ProgId, ComboBox, CompLocator, Complus, Directory, Control, Dialog, ControlCondition, ControlEvent, CreateFolder, CustomAction, DrLocator, DuplicateFile, Environment, Error, EventMapping, Extension, MIME, FeatureComponents, FileSFPCatalog, SFPCatalog, Font, IniFile, IniLocator, InstallExecuteSequence, InstallUISequence, IsolatedComponent, LaunchCondition, ListBox, ListView, LockPermissions, Media, MoveFile, MsiAssembly, MsiAssemblyName, MsiDigitalCertificate, MsiDigitalSignature, MsiFileHash, MsiPatchHeaders, ODBCAttribute, ODBCDriver, ODBCDataSource, ODBCSourceAttribute, ODBCTranslator, Patch, PatchPackage, PublishComponent, RadioButton, Registry, RegLocator, RemoveFile, RemoveIniFile, RemoveRegistry, ReserveCost, SelfReg, ServiceControl, ServiceInstall, Shortcut, Signature, TextStyle, TypeLib, UIText, Upgrade, Verb] - -_Validation_records = [ -('_Validation','Table','N',None, None, None, None, 'Identifier',None, 'Name of table',), -('_Validation','Column','N',None, None, None, None, 'Identifier',None, 'Name of column',), -('_Validation','Description','Y',None, None, None, None, 'Text',None, 'Description of column',), -('_Validation','Set','Y',None, None, None, None, 'Text',None, 'Set of values that are permitted',), -('_Validation','Category','Y',None, None, None, None, None, 'Text;Formatted;Template;Condition;Guid;Path;Version;Language;Identifier;Binary;UpperCase;LowerCase;Filename;Paths;AnyPath;WildCardFilename;RegPath;KeyFormatted;CustomSource;Property;Cabinet;Shortcut;URL','String category',), -('_Validation','KeyColumn','Y',1,32,None, None, None, None, 'Column to which foreign key connects',), -('_Validation','KeyTable','Y',None, None, None, None, 'Identifier',None, 'For foreign key, Name of table to which data must link',), -('_Validation','MaxValue','Y',-2147483647,2147483647,None, None, None, None, 'Maximum value allowed',), -('_Validation','MinValue','Y',-2147483647,2147483647,None, None, None, None, 'Minimum value allowed',), -('_Validation','Nullable','N',None, None, None, None, None, 'Y;N;@','Whether the column is nullable',), -('ActionText','Description','Y',None, None, None, None, 'Text',None, 'Localized description displayed in progress dialog and log when action is executing.',), -('ActionText','Action','N',None, None, None, None, 'Identifier',None, 'Name of action to be described.',), -('ActionText','Template','Y',None, None, None, None, 'Template',None, 'Optional localized format template used to format action data records for display during action execution.',), -('AdminExecuteSequence','Action','N',None, None, None, None, 'Identifier',None, 'Name of action to invoke, either in the engine or the handler DLL.',), -('AdminExecuteSequence','Condition','Y',None, None, None, None, 'Condition',None, 'Optional expression which skips the action if evaluates to expFalse.If the expression syntax is invalid, the engine will terminate, returning iesBadActionData.',), -('AdminExecuteSequence','Sequence','Y',-4,32767,None, None, None, None, 'Number that determines the sort order in which the actions are to be executed. Leave blank to suppress action.',), -('Condition','Condition','Y',None, None, None, None, 'Condition',None, 'Expression evaluated to determine if Level in the Feature table is to change.',), -('Condition','Feature_','N',None, None, 'Feature',1,'Identifier',None, 'Reference to a Feature entry in Feature table.',), -('Condition','Level','N',0,32767,None, None, None, None, 'New selection Level to set in Feature table if Condition evaluates to TRUE.',), -('AdminUISequence','Action','N',None, None, None, None, 'Identifier',None, 'Name of action to invoke, either in the engine or the handler DLL.',), -('AdminUISequence','Condition','Y',None, None, None, None, 'Condition',None, 'Optional expression which skips the action if evaluates to expFalse.If the expression syntax is invalid, the engine will terminate, returning iesBadActionData.',), -('AdminUISequence','Sequence','Y',-4,32767,None, None, None, None, 'Number that determines the sort order in which the actions are to be executed. Leave blank to suppress action.',), -('AdvtExecuteSequence','Action','N',None, None, None, None, 'Identifier',None, 'Name of action to invoke, either in the engine or the handler DLL.',), -('AdvtExecuteSequence','Condition','Y',None, None, None, None, 'Condition',None, 'Optional expression which skips the action if evaluates to expFalse.If the expression syntax is invalid, the engine will terminate, returning iesBadActionData.',), -('AdvtExecuteSequence','Sequence','Y',-4,32767,None, None, None, None, 'Number that determines the sort order in which the actions are to be executed. Leave blank to suppress action.',), -('AdvtUISequence','Action','N',None, None, None, None, 'Identifier',None, 'Name of action to invoke, either in the engine or the handler DLL.',), -('AdvtUISequence','Condition','Y',None, None, None, None, 'Condition',None, 'Optional expression which skips the action if evaluates to expFalse.If the expression syntax is invalid, the engine will terminate, returning iesBadActionData.',), -('AdvtUISequence','Sequence','Y',-4,32767,None, None, None, None, 'Number that determines the sort order in which the actions are to be executed. Leave blank to suppress action.',), -('AppId','AppId','N',None, None, None, None, 'Guid',None, None, ), -('AppId','ActivateAtStorage','Y',0,1,None, None, None, None, None, ), -('AppId','DllSurrogate','Y',None, None, None, None, 'Text',None, None, ), -('AppId','LocalService','Y',None, None, None, None, 'Text',None, None, ), -('AppId','RemoteServerName','Y',None, None, None, None, 'Formatted',None, None, ), -('AppId','RunAsInteractiveUser','Y',0,1,None, None, None, None, None, ), -('AppId','ServiceParameters','Y',None, None, None, None, 'Text',None, None, ), -('AppSearch','Property','N',None, None, None, None, 'Identifier',None, 'The property associated with a Signature',), -('AppSearch','Signature_','N',None, None, 'Signature;RegLocator;IniLocator;DrLocator;CompLocator',1,'Identifier',None, 'The Signature_ represents a unique file signature and is also the foreign key in the Signature, RegLocator, IniLocator, CompLocator and the DrLocator tables.',), -('Property','Property','N',None, None, None, None, 'Identifier',None, 'Name of property, uppercase if settable by launcher or loader.',), -('Property','Value','N',None, None, None, None, 'Text',None, 'String value for property. Never null or empty.',), -('BBControl','Type','N',None, None, None, None, 'Identifier',None, 'The type of the control.',), -('BBControl','Y','N',0,32767,None, None, None, None, 'Vertical coordinate of the upper left corner of the bounding rectangle of the control.',), -('BBControl','Text','Y',None, None, None, None, 'Text',None, 'A string used to set the initial text contained within a control (if appropriate).',), -('BBControl','BBControl','N',None, None, None, None, 'Identifier',None, 'Name of the control. This name must be unique within a billboard, but can repeat on different billboard.',), -('BBControl','Attributes','Y',0,2147483647,None, None, None, None, 'A 32-bit word that specifies the attribute flags to be applied to this control.',), -('BBControl','Billboard_','N',None, None, 'Billboard',1,'Identifier',None, 'External key to the Billboard table, name of the billboard.',), -('BBControl','Height','N',0,32767,None, None, None, None, 'Height of the bounding rectangle of the control.',), -('BBControl','Width','N',0,32767,None, None, None, None, 'Width of the bounding rectangle of the control.',), -('BBControl','X','N',0,32767,None, None, None, None, 'Horizontal coordinate of the upper left corner of the bounding rectangle of the control.',), -('Billboard','Action','Y',None, None, None, None, 'Identifier',None, 'The name of an action. The billboard is displayed during the progress messages received from this action.',), -('Billboard','Billboard','N',None, None, None, None, 'Identifier',None, 'Name of the billboard.',), -('Billboard','Feature_','N',None, None, 'Feature',1,'Identifier',None, 'An external key to the Feature Table. The billboard is shown only if this feature is being installed.',), -('Billboard','Ordering','Y',0,32767,None, None, None, None, 'A positive integer. If there is more than one billboard corresponding to an action they will be shown in the order defined by this column.',), -('Feature','Description','Y',None, None, None, None, 'Text',None, 'Longer descriptive text describing a visible feature item.',), -('Feature','Attributes','N',None, None, None, None, None, '0;1;2;4;5;6;8;9;10;16;17;18;20;21;22;24;25;26;32;33;34;36;37;38;48;49;50;52;53;54','Feature attributes',), -('Feature','Feature','N',None, None, None, None, 'Identifier',None, 'Primary key used to identify a particular feature record.',), -('Feature','Directory_','Y',None, None, 'Directory',1,'UpperCase',None, 'The name of the Directory that can be configured by the UI. A non-null value will enable the browse button.',), -('Feature','Level','N',0,32767,None, None, None, None, 'The install level at which record will be initially selected. An install level of 0 will disable an item and prevent its display.',), -('Feature','Title','Y',None, None, None, None, 'Text',None, 'Short text identifying a visible feature item.',), -('Feature','Display','Y',0,32767,None, None, None, None, 'Numeric sort order, used to force a specific display ordering.',), -('Feature','Feature_Parent','Y',None, None, 'Feature',1,'Identifier',None, 'Optional key of a parent record in the same table. If the parent is not selected, then the record will not be installed. Null indicates a root item.',), -('Binary','Name','N',None, None, None, None, 'Identifier',None, 'Unique key identifying the binary data.',), -('Binary','Data','N',None, None, None, None, 'Binary',None, 'The unformatted binary data.',), -('BindImage','File_','N',None, None, 'File',1,'Identifier',None, 'The index into the File table. This must be an executable file.',), -('BindImage','Path','Y',None, None, None, None, 'Paths',None, 'A list of ; delimited paths that represent the paths to be searched for the import DLLS. The list is usually a list of properties each enclosed within square brackets [] .',), -('File','Sequence','N',1,32767,None, None, None, None, 'Sequence with respect to the media images; order must track cabinet order.',), -('File','Attributes','Y',0,32767,None, None, None, None, 'Integer containing bit flags representing file attributes (with the decimal value of each bit position in parentheses)',), -('File','File','N',None, None, None, None, 'Identifier',None, 'Primary key, non-localized token, must match identifier in cabinet. For uncompressed files, this field is ignored.',), -('File','Component_','N',None, None, 'Component',1,'Identifier',None, 'Foreign key referencing Component that controls the file.',), -('File','FileName','N',None, None, None, None, 'Filename',None, 'File name used for installation, may be localized. This may contain a "short name|long name" pair.',), -('File','FileSize','N',0,2147483647,None, None, None, None, 'Size of file in bytes (integer).',), -('File','Language','Y',None, None, None, None, 'Language',None, 'List of decimal language Ids, comma-separated if more than one.',), -('File','Version','Y',None, None, 'File',1,'Version',None, 'Version string for versioned files; Blank for unversioned files.',), -('CCPSearch','Signature_','N',None, None, 'Signature;RegLocator;IniLocator;DrLocator;CompLocator',1,'Identifier',None, 'The Signature_ represents a unique file signature and is also the foreign key in the Signature, RegLocator, IniLocator, CompLocator and the DrLocator tables.',), -('CheckBox','Property','N',None, None, None, None, 'Identifier',None, 'A named property to be tied to the item.',), -('CheckBox','Value','Y',None, None, None, None, 'Formatted',None, 'The value string associated with the item.',), -('Class','Description','Y',None, None, None, None, 'Text',None, 'Localized description for the Class.',), -('Class','Attributes','Y',None, 32767,None, None, None, None, 'Class registration attributes.',), -('Class','Feature_','N',None, None, 'Feature',1,'Identifier',None, 'Required foreign key into the Feature Table, specifying the feature to validate or install in order for the CLSID factory to be operational.',), -('Class','AppId_','Y',None, None, 'AppId',1,'Guid',None, 'Optional AppID containing DCOM information for associated application (string GUID).',), -('Class','Argument','Y',None, None, None, None, 'Formatted',None, 'optional argument for LocalServers.',), -('Class','CLSID','N',None, None, None, None, 'Guid',None, 'The CLSID of an OLE factory.',), -('Class','Component_','N',None, None, 'Component',1,'Identifier',None, 'Required foreign key into the Component Table, specifying the component for which to return a path when called through LocateComponent.',), -('Class','Context','N',None, None, None, None, 'Identifier',None, 'The numeric server context for this server. CLSCTX_xxxx',), -('Class','DefInprocHandler','Y',None, None, None, None, 'Filename','1;2;3','Optional default inproc handler. Only optionally provided if Context=CLSCTX_LOCAL_SERVER. Typically "ole32.dll" or "mapi32.dll"',), -('Class','FileTypeMask','Y',None, None, None, None, 'Text',None, 'Optional string containing information for the HKCRthis CLSID key. If multiple patterns exist, they must be delimited by a semicolon, and numeric subkeys will be generated: 0,1,2...',), -('Class','Icon_','Y',None, None, 'Icon',1,'Identifier',None, 'Optional foreign key into the Icon Table, specifying the icon file associated with this CLSID. Will be written under the DefaultIcon key.',), -('Class','IconIndex','Y',-32767,32767,None, None, None, None, 'Optional icon index.',), -('Class','ProgId_Default','Y',None, None, 'ProgId',1,'Text',None, 'Optional ProgId associated with this CLSID.',), -('Component','Condition','Y',None, None, None, None, 'Condition',None, "A conditional statement that will disable this component if the specified condition evaluates to the 'True' state. If a component is disabled, it will not be installed, regardless of the 'Action' state associated with the component.",), -('Component','Attributes','N',None, None, None, None, None, None, 'Remote execution option, one of irsEnum',), -('Component','Component','N',None, None, None, None, 'Identifier',None, 'Primary key used to identify a particular component record.',), -('Component','ComponentId','Y',None, None, None, None, 'Guid',None, 'A string GUID unique to this component, version, and language.',), -('Component','Directory_','N',None, None, 'Directory',1,'Identifier',None, 'Required key of a Directory table record. This is actually a property name whose value contains the actual path, set either by the AppSearch action or with the default setting obtained from the Directory table.',), -('Component','KeyPath','Y',None, None, 'File;Registry;ODBCDataSource',1,'Identifier',None, 'Either the primary key into the File table, Registry table, or ODBCDataSource table. This extract path is stored when the component is installed, and is used to detect the presence of the component and to return the path to it.',), -('Icon','Name','N',None, None, None, None, 'Identifier',None, 'Primary key. Name of the icon file.',), -('Icon','Data','N',None, None, None, None, 'Binary',None, 'Binary stream. The binary icon data in PE (.DLL or .EXE) or icon (.ICO) format.',), -('ProgId','Description','Y',None, None, None, None, 'Text',None, 'Localized description for the Program identifier.',), -('ProgId','Icon_','Y',None, None, 'Icon',1,'Identifier',None, 'Optional foreign key into the Icon Table, specifying the icon file associated with this ProgId. Will be written under the DefaultIcon key.',), -('ProgId','IconIndex','Y',-32767,32767,None, None, None, None, 'Optional icon index.',), -('ProgId','ProgId','N',None, None, None, None, 'Text',None, 'The Program Identifier. Primary key.',), -('ProgId','Class_','Y',None, None, 'Class',1,'Guid',None, 'The CLSID of an OLE factory corresponding to the ProgId.',), -('ProgId','ProgId_Parent','Y',None, None, 'ProgId',1,'Text',None, 'The Parent Program Identifier. If specified, the ProgId column becomes a version independent prog id.',), -('ComboBox','Text','Y',None, None, None, None, 'Formatted',None, 'The visible text to be assigned to the item. Optional. If this entry or the entire column is missing, the text is the same as the value.',), -('ComboBox','Property','N',None, None, None, None, 'Identifier',None, 'A named property to be tied to this item. All the items tied to the same property become part of the same combobox.',), -('ComboBox','Value','N',None, None, None, None, 'Formatted',None, 'The value string associated with this item. Selecting the line will set the associated property to this value.',), -('ComboBox','Order','N',1,32767,None, None, None, None, 'A positive integer used to determine the ordering of the items within one list.\tThe integers do not have to be consecutive.',), -('CompLocator','Type','Y',0,1,None, None, None, None, 'A boolean value that determines if the registry value is a filename or a directory location.',), -('CompLocator','Signature_','N',None, None, None, None, 'Identifier',None, 'The table key. The Signature_ represents a unique file signature and is also the foreign key in the Signature table.',), -('CompLocator','ComponentId','N',None, None, None, None, 'Guid',None, 'A string GUID unique to this component, version, and language.',), -('Complus','Component_','N',None, None, 'Component',1,'Identifier',None, 'Foreign key referencing Component that controls the ComPlus component.',), -('Complus','ExpType','Y',0,32767,None, None, None, None, 'ComPlus component attributes.',), -('Directory','Directory','N',None, None, None, None, 'Identifier',None, 'Unique identifier for directory entry, primary key. If a property by this name is defined, it contains the full path to the directory.',), -('Directory','DefaultDir','N',None, None, None, None, 'DefaultDir',None, "The default sub-path under parent's path.",), -('Directory','Directory_Parent','Y',None, None, 'Directory',1,'Identifier',None, 'Reference to the entry in this table specifying the default parent directory. A record parented to itself or with a Null parent represents a root of the install tree.',), -('Control','Type','N',None, None, None, None, 'Identifier',None, 'The type of the control.',), -('Control','Y','N',0,32767,None, None, None, None, 'Vertical coordinate of the upper left corner of the bounding rectangle of the control.',), -('Control','Text','Y',None, None, None, None, 'Formatted',None, 'A string used to set the initial text contained within a control (if appropriate).',), -('Control','Property','Y',None, None, None, None, 'Identifier',None, 'The name of a defined property to be linked to this control. ',), -('Control','Attributes','Y',0,2147483647,None, None, None, None, 'A 32-bit word that specifies the attribute flags to be applied to this control.',), -('Control','Height','N',0,32767,None, None, None, None, 'Height of the bounding rectangle of the control.',), -('Control','Width','N',0,32767,None, None, None, None, 'Width of the bounding rectangle of the control.',), -('Control','X','N',0,32767,None, None, None, None, 'Horizontal coordinate of the upper left corner of the bounding rectangle of the control.',), -('Control','Control','N',None, None, None, None, 'Identifier',None, 'Name of the control. This name must be unique within a dialog, but can repeat on different dialogs. ',), -('Control','Control_Next','Y',None, None, 'Control',2,'Identifier',None, 'The name of an other control on the same dialog. This link defines the tab order of the controls. The links have to form one or more cycles!',), -('Control','Dialog_','N',None, None, 'Dialog',1,'Identifier',None, 'External key to the Dialog table, name of the dialog.',), -('Control','Help','Y',None, None, None, None, 'Text',None, 'The help strings used with the button. The text is optional. ',), -('Dialog','Attributes','Y',0,2147483647,None, None, None, None, 'A 32-bit word that specifies the attribute flags to be applied to this dialog.',), -('Dialog','Height','N',0,32767,None, None, None, None, 'Height of the bounding rectangle of the dialog.',), -('Dialog','Width','N',0,32767,None, None, None, None, 'Width of the bounding rectangle of the dialog.',), -('Dialog','Dialog','N',None, None, None, None, 'Identifier',None, 'Name of the dialog.',), -('Dialog','Control_Cancel','Y',None, None, 'Control',2,'Identifier',None, 'Defines the cancel control. Hitting escape or clicking on the close icon on the dialog is equivalent to pushing this button.',), -('Dialog','Control_Default','Y',None, None, 'Control',2,'Identifier',None, 'Defines the default control. Hitting return is equivalent to pushing this button.',), -('Dialog','Control_First','N',None, None, 'Control',2,'Identifier',None, 'Defines the control that has the focus when the dialog is created.',), -('Dialog','HCentering','N',0,100,None, None, None, None, 'Horizontal position of the dialog on a 0-100 scale. 0 means left end, 100 means right end of the screen, 50 center.',), -('Dialog','Title','Y',None, None, None, None, 'Formatted',None, "A text string specifying the title to be displayed in the title bar of the dialog's window.",), -('Dialog','VCentering','N',0,100,None, None, None, None, 'Vertical position of the dialog on a 0-100 scale. 0 means top end, 100 means bottom end of the screen, 50 center.',), -('ControlCondition','Action','N',None, None, None, None, None, 'Default;Disable;Enable;Hide;Show','The desired action to be taken on the specified control.',), -('ControlCondition','Condition','N',None, None, None, None, 'Condition',None, 'A standard conditional statement that specifies under which conditions the action should be triggered.',), -('ControlCondition','Dialog_','N',None, None, 'Dialog',1,'Identifier',None, 'A foreign key to the Dialog table, name of the dialog.',), -('ControlCondition','Control_','N',None, None, 'Control',2,'Identifier',None, 'A foreign key to the Control table, name of the control.',), -('ControlEvent','Condition','Y',None, None, None, None, 'Condition',None, 'A standard conditional statement that specifies under which conditions an event should be triggered.',), -('ControlEvent','Ordering','Y',0,2147483647,None, None, None, None, 'An integer used to order several events tied to the same control. Can be left blank.',), -('ControlEvent','Argument','N',None, None, None, None, 'Formatted',None, 'A value to be used as a modifier when triggering a particular event.',), -('ControlEvent','Dialog_','N',None, None, 'Dialog',1,'Identifier',None, 'A foreign key to the Dialog table, name of the dialog.',), -('ControlEvent','Control_','N',None, None, 'Control',2,'Identifier',None, 'A foreign key to the Control table, name of the control',), -('ControlEvent','Event','N',None, None, None, None, 'Formatted',None, 'An identifier that specifies the type of the event that should take place when the user interacts with control specified by the first two entries.',), -('CreateFolder','Component_','N',None, None, 'Component',1,'Identifier',None, 'Foreign key into the Component table.',), -('CreateFolder','Directory_','N',None, None, 'Directory',1,'Identifier',None, 'Primary key, could be foreign key into the Directory table.',), -('CustomAction','Type','N',1,16383,None, None, None, None, 'The numeric custom action type, consisting of source location, code type, entry, option flags.',), -('CustomAction','Action','N',None, None, None, None, 'Identifier',None, 'Primary key, name of action, normally appears in sequence table unless private use.',), -('CustomAction','Source','Y',None, None, None, None, 'CustomSource',None, 'The table reference of the source of the code.',), -('CustomAction','Target','Y',None, None, None, None, 'Formatted',None, 'Execution parameter, depends on the type of custom action',), -('DrLocator','Signature_','N',None, None, None, None, 'Identifier',None, 'The Signature_ represents a unique file signature and is also the foreign key in the Signature table.',), -('DrLocator','Path','Y',None, None, None, None, 'AnyPath',None, 'The path on the user system. This is either a subpath below the value of the Parent or a full path. The path may contain properties enclosed within [ ] that will be expanded.',), -('DrLocator','Depth','Y',0,32767,None, None, None, None, 'The depth below the path to which the Signature_ is recursively searched. If absent, the depth is assumed to be 0.',), -('DrLocator','Parent','Y',None, None, None, None, 'Identifier',None, 'The parent file signature. It is also a foreign key in the Signature table. If null and the Path column does not expand to a full path, then all the fixed drives of the user system are searched using the Path.',), -('DuplicateFile','File_','N',None, None, 'File',1,'Identifier',None, 'Foreign key referencing the source file to be duplicated.',), -('DuplicateFile','Component_','N',None, None, 'Component',1,'Identifier',None, 'Foreign key referencing Component that controls the duplicate file.',), -('DuplicateFile','DestFolder','Y',None, None, None, None, 'Identifier',None, 'Name of a property whose value is assumed to resolve to the full pathname to a destination folder.',), -('DuplicateFile','DestName','Y',None, None, None, None, 'Filename',None, 'Filename to be given to the duplicate file.',), -('DuplicateFile','FileKey','N',None, None, None, None, 'Identifier',None, 'Primary key used to identify a particular file entry',), -('Environment','Name','N',None, None, None, None, 'Text',None, 'The name of the environmental value.',), -('Environment','Value','Y',None, None, None, None, 'Formatted',None, 'The value to set in the environmental settings.',), -('Environment','Component_','N',None, None, 'Component',1,'Identifier',None, 'Foreign key into the Component table referencing component that controls the installing of the environmental value.',), -('Environment','Environment','N',None, None, None, None, 'Identifier',None, 'Unique identifier for the environmental variable setting',), -('Error','Error','N',0,32767,None, None, None, None, 'Integer error number, obtained from header file IError(...) macros.',), -('Error','Message','Y',None, None, None, None, 'Template',None, 'Error formatting template, obtained from user ed. or localizers.',), -('EventMapping','Dialog_','N',None, None, 'Dialog',1,'Identifier',None, 'A foreign key to the Dialog table, name of the Dialog.',), -('EventMapping','Control_','N',None, None, 'Control',2,'Identifier',None, 'A foreign key to the Control table, name of the control.',), -('EventMapping','Event','N',None, None, None, None, 'Identifier',None, 'An identifier that specifies the type of the event that the control subscribes to.',), -('EventMapping','Attribute','N',None, None, None, None, 'Identifier',None, 'The name of the control attribute, that is set when this event is received.',), -('Extension','Feature_','N',None, None, 'Feature',1,'Identifier',None, 'Required foreign key into the Feature Table, specifying the feature to validate or install in order for the CLSID factory to be operational.',), -('Extension','Component_','N',None, None, 'Component',1,'Identifier',None, 'Required foreign key into the Component Table, specifying the component for which to return a path when called through LocateComponent.',), -('Extension','Extension','N',None, None, None, None, 'Text',None, 'The extension associated with the table row.',), -('Extension','MIME_','Y',None, None, 'MIME',1,'Text',None, 'Optional Context identifier, typically "type/format" associated with the extension',), -('Extension','ProgId_','Y',None, None, 'ProgId',1,'Text',None, 'Optional ProgId associated with this extension.',), -('MIME','CLSID','Y',None, None, None, None, 'Guid',None, 'Optional associated CLSID.',), -('MIME','ContentType','N',None, None, None, None, 'Text',None, 'Primary key. Context identifier, typically "type/format".',), -('MIME','Extension_','N',None, None, 'Extension',1,'Text',None, 'Optional associated extension (without dot)',), -('FeatureComponents','Feature_','N',None, None, 'Feature',1,'Identifier',None, 'Foreign key into Feature table.',), -('FeatureComponents','Component_','N',None, None, 'Component',1,'Identifier',None, 'Foreign key into Component table.',), -('FileSFPCatalog','File_','N',None, None, 'File',1,'Identifier',None, 'File associated with the catalog',), -('FileSFPCatalog','SFPCatalog_','N',None, None, 'SFPCatalog',1,'Filename',None, 'Catalog associated with the file',), -('SFPCatalog','SFPCatalog','N',None, None, None, None, 'Filename',None, 'File name for the catalog.',), -('SFPCatalog','Catalog','N',None, None, None, None, 'Binary',None, 'SFP Catalog',), -('SFPCatalog','Dependency','Y',None, None, None, None, 'Formatted',None, 'Parent catalog - only used by SFP',), -('Font','File_','N',None, None, 'File',1,'Identifier',None, 'Primary key, foreign key into File table referencing font file.',), -('Font','FontTitle','Y',None, None, None, None, 'Text',None, 'Font name.',), -('IniFile','Action','N',None, None, None, None, None, '0;1;3','The type of modification to be made, one of iifEnum',), -('IniFile','Value','N',None, None, None, None, 'Formatted',None, 'The value to be written.',), -('IniFile','Component_','N',None, None, 'Component',1,'Identifier',None, 'Foreign key into the Component table referencing component that controls the installing of the .INI value.',), -('IniFile','FileName','N',None, None, None, None, 'Filename',None, 'The .INI file name in which to write the information',), -('IniFile','IniFile','N',None, None, None, None, 'Identifier',None, 'Primary key, non-localized token.',), -('IniFile','DirProperty','Y',None, None, None, None, 'Identifier',None, 'Foreign key into the Directory table denoting the directory where the .INI file is.',), -('IniFile','Key','N',None, None, None, None, 'Formatted',None, 'The .INI file key below Section.',), -('IniFile','Section','N',None, None, None, None, 'Formatted',None, 'The .INI file Section.',), -('IniLocator','Type','Y',0,2,None, None, None, None, 'An integer value that determines if the .INI value read is a filename or a directory location or to be used as is w/o interpretation.',), -('IniLocator','Signature_','N',None, None, None, None, 'Identifier',None, 'The table key. The Signature_ represents a unique file signature and is also the foreign key in the Signature table.',), -('IniLocator','FileName','N',None, None, None, None, 'Filename',None, 'The .INI file name.',), -('IniLocator','Key','N',None, None, None, None, 'Text',None, 'Key value (followed by an equals sign in INI file).',), -('IniLocator','Section','N',None, None, None, None, 'Text',None, 'Section name within in file (within square brackets in INI file).',), -('IniLocator','Field','Y',0,32767,None, None, None, None, 'The field in the .INI line. If Field is null or 0 the entire line is read.',), -('InstallExecuteSequence','Action','N',None, None, None, None, 'Identifier',None, 'Name of action to invoke, either in the engine or the handler DLL.',), -('InstallExecuteSequence','Condition','Y',None, None, None, None, 'Condition',None, 'Optional expression which skips the action if evaluates to expFalse.If the expression syntax is invalid, the engine will terminate, returning iesBadActionData.',), -('InstallExecuteSequence','Sequence','Y',-4,32767,None, None, None, None, 'Number that determines the sort order in which the actions are to be executed. Leave blank to suppress action.',), -('InstallUISequence','Action','N',None, None, None, None, 'Identifier',None, 'Name of action to invoke, either in the engine or the handler DLL.',), -('InstallUISequence','Condition','Y',None, None, None, None, 'Condition',None, 'Optional expression which skips the action if evaluates to expFalse.If the expression syntax is invalid, the engine will terminate, returning iesBadActionData.',), -('InstallUISequence','Sequence','Y',-4,32767,None, None, None, None, 'Number that determines the sort order in which the actions are to be executed. Leave blank to suppress action.',), -('IsolatedComponent','Component_Application','N',None, None, 'Component',1,'Identifier',None, 'Key to Component table item for application',), -('IsolatedComponent','Component_Shared','N',None, None, 'Component',1,'Identifier',None, 'Key to Component table item to be isolated',), -('LaunchCondition','Description','N',None, None, None, None, 'Formatted',None, 'Localizable text to display when condition fails and install must abort.',), -('LaunchCondition','Condition','N',None, None, None, None, 'Condition',None, 'Expression which must evaluate to TRUE in order for install to commence.',), -('ListBox','Text','Y',None, None, None, None, 'Text',None, 'The visible text to be assigned to the item. Optional. If this entry or the entire column is missing, the text is the same as the value.',), -('ListBox','Property','N',None, None, None, None, 'Identifier',None, 'A named property to be tied to this item. All the items tied to the same property become part of the same listbox.',), -('ListBox','Value','N',None, None, None, None, 'Formatted',None, 'The value string associated with this item. Selecting the line will set the associated property to this value.',), -('ListBox','Order','N',1,32767,None, None, None, None, 'A positive integer used to determine the ordering of the items within one list..The integers do not have to be consecutive.',), -('ListView','Text','Y',None, None, None, None, 'Text',None, 'The visible text to be assigned to the item. Optional. If this entry or the entire column is missing, the text is the same as the value.',), -('ListView','Property','N',None, None, None, None, 'Identifier',None, 'A named property to be tied to this item. All the items tied to the same property become part of the same listview.',), -('ListView','Value','N',None, None, None, None, 'Identifier',None, 'The value string associated with this item. Selecting the line will set the associated property to this value.',), -('ListView','Order','N',1,32767,None, None, None, None, 'A positive integer used to determine the ordering of the items within one list..The integers do not have to be consecutive.',), -('ListView','Binary_','Y',None, None, 'Binary',1,'Identifier',None, 'The name of the icon to be displayed with the icon. The binary information is looked up from the Binary Table.',), -('LockPermissions','Table','N',None, None, None, None, 'Identifier','Directory;File;Registry','Reference to another table name',), -('LockPermissions','Domain','Y',None, None, None, None, 'Formatted',None, 'Domain name for user whose permissions are being set. (usually a property)',), -('LockPermissions','LockObject','N',None, None, None, None, 'Identifier',None, 'Foreign key into Registry or File table',), -('LockPermissions','Permission','Y',-2147483647,2147483647,None, None, None, None, 'Permission Access mask. Full Control = 268435456 (GENERIC_ALL = 0x10000000)',), -('LockPermissions','User','N',None, None, None, None, 'Formatted',None, 'User for permissions to be set. (usually a property)',), -('Media','Source','Y',None, None, None, None, 'Property',None, 'The property defining the location of the cabinet file.',), -('Media','Cabinet','Y',None, None, None, None, 'Cabinet',None, 'If some or all of the files stored on the media are compressed in a cabinet, the name of that cabinet.',), -('Media','DiskId','N',1,32767,None, None, None, None, 'Primary key, integer to determine sort order for table.',), -('Media','DiskPrompt','Y',None, None, None, None, 'Text',None, 'Disk name: the visible text actually printed on the disk. This will be used to prompt the user when this disk needs to be inserted.',), -('Media','LastSequence','N',0,32767,None, None, None, None, 'File sequence number for the last file for this media.',), -('Media','VolumeLabel','Y',None, None, None, None, 'Text',None, 'The label attributed to the volume.',), -('ModuleComponents','Component','N',None, None, 'Component',1,'Identifier',None, 'Component contained in the module.',), -('ModuleComponents','Language','N',None, None, 'ModuleSignature',2,None, None, 'Default language ID for module (may be changed by transform).',), -('ModuleComponents','ModuleID','N',None, None, 'ModuleSignature',1,'Identifier',None, 'Module containing the component.',), -('ModuleSignature','Language','N',None, None, None, None, None, None, 'Default decimal language of module.',), -('ModuleSignature','Version','N',None, None, None, None, 'Version',None, 'Version of the module.',), -('ModuleSignature','ModuleID','N',None, None, None, None, 'Identifier',None, 'Module identifier (String.GUID).',), -('ModuleDependency','ModuleID','N',None, None, 'ModuleSignature',1,'Identifier',None, 'Module requiring the dependency.',), -('ModuleDependency','ModuleLanguage','N',None, None, 'ModuleSignature',2,None, None, 'Language of module requiring the dependency.',), -('ModuleDependency','RequiredID','N',None, None, None, None, None, None, 'String.GUID of required module.',), -('ModuleDependency','RequiredLanguage','N',None, None, None, None, None, None, 'LanguageID of the required module.',), -('ModuleDependency','RequiredVersion','Y',None, None, None, None, 'Version',None, 'Version of the required version.',), -('ModuleExclusion','ModuleID','N',None, None, 'ModuleSignature',1,'Identifier',None, 'String.GUID of module with exclusion requirement.',), -('ModuleExclusion','ModuleLanguage','N',None, None, 'ModuleSignature',2,None, None, 'LanguageID of module with exclusion requirement.',), -('ModuleExclusion','ExcludedID','N',None, None, None, None, None, None, 'String.GUID of excluded module.',), -('ModuleExclusion','ExcludedLanguage','N',None, None, None, None, None, None, 'Language of excluded module.',), -('ModuleExclusion','ExcludedMaxVersion','Y',None, None, None, None, 'Version',None, 'Maximum version of excluded module.',), -('ModuleExclusion','ExcludedMinVersion','Y',None, None, None, None, 'Version',None, 'Minimum version of excluded module.',), -('MoveFile','Component_','N',None, None, 'Component',1,'Identifier',None, 'If this component is not "selected" for installation or removal, no action will be taken on the associated MoveFile entry',), -('MoveFile','DestFolder','N',None, None, None, None, 'Identifier',None, 'Name of a property whose value is assumed to resolve to the full path to the destination directory',), -('MoveFile','DestName','Y',None, None, None, None, 'Filename',None, 'Name to be given to the original file after it is moved or copied. If blank, the destination file will be given the same name as the source file',), -('MoveFile','FileKey','N',None, None, None, None, 'Identifier',None, 'Primary key that uniquely identifies a particular MoveFile record',), -('MoveFile','Options','N',0,1,None, None, None, None, 'Integer value specifying the MoveFile operating mode, one of imfoEnum',), -('MoveFile','SourceFolder','Y',None, None, None, None, 'Identifier',None, 'Name of a property whose value is assumed to resolve to the full path to the source directory',), -('MoveFile','SourceName','Y',None, None, None, None, 'Text',None, "Name of the source file(s) to be moved or copied. Can contain the '*' or '?' wildcards.",), -('MsiAssembly','Attributes','Y',None, None, None, None, None, None, 'Assembly attributes',), -('MsiAssembly','Feature_','N',None, None, 'Feature',1,'Identifier',None, 'Foreign key into Feature table.',), -('MsiAssembly','Component_','N',None, None, 'Component',1,'Identifier',None, 'Foreign key into Component table.',), -('MsiAssembly','File_Application','Y',None, None, 'File',1,'Identifier',None, 'Foreign key into File table, denoting the application context for private assemblies. Null for global assemblies.',), -('MsiAssembly','File_Manifest','Y',None, None, 'File',1,'Identifier',None, 'Foreign key into the File table denoting the manifest file for the assembly.',), -('MsiAssemblyName','Name','N',None, None, None, None, 'Text',None, 'The name part of the name-value pairs for the assembly name.',), -('MsiAssemblyName','Value','N',None, None, None, None, 'Text',None, 'The value part of the name-value pairs for the assembly name.',), -('MsiAssemblyName','Component_','N',None, None, 'Component',1,'Identifier',None, 'Foreign key into Component table.',), -('MsiDigitalCertificate','CertData','N',None, None, None, None, 'Binary',None, 'A certificate context blob for a signer certificate',), -('MsiDigitalCertificate','DigitalCertificate','N',None, None, None, None, 'Identifier',None, 'A unique identifier for the row',), -('MsiDigitalSignature','Table','N',None, None, None, None, None, 'Media','Reference to another table name (only Media table is supported)',), -('MsiDigitalSignature','DigitalCertificate_','N',None, None, 'MsiDigitalCertificate',1,'Identifier',None, 'Foreign key to MsiDigitalCertificate table identifying the signer certificate',), -('MsiDigitalSignature','Hash','Y',None, None, None, None, 'Binary',None, 'The encoded hash blob from the digital signature',), -('MsiDigitalSignature','SignObject','N',None, None, None, None, 'Text',None, 'Foreign key to Media table',), -('MsiFileHash','File_','N',None, None, 'File',1,'Identifier',None, 'Primary key, foreign key into File table referencing file with this hash',), -('MsiFileHash','Options','N',0,32767,None, None, None, None, 'Various options and attributes for this hash.',), -('MsiFileHash','HashPart1','N',None, None, None, None, None, None, 'Size of file in bytes (integer).',), -('MsiFileHash','HashPart2','N',None, None, None, None, None, None, 'Size of file in bytes (integer).',), -('MsiFileHash','HashPart3','N',None, None, None, None, None, None, 'Size of file in bytes (integer).',), -('MsiFileHash','HashPart4','N',None, None, None, None, None, None, 'Size of file in bytes (integer).',), -('MsiPatchHeaders','StreamRef','N',None, None, None, None, 'Identifier',None, 'Primary key. A unique identifier for the row.',), -('MsiPatchHeaders','Header','N',None, None, None, None, 'Binary',None, 'Binary stream. The patch header, used for patch validation.',), -('ODBCAttribute','Value','Y',None, None, None, None, 'Text',None, 'Value for ODBC driver attribute',), -('ODBCAttribute','Attribute','N',None, None, None, None, 'Text',None, 'Name of ODBC driver attribute',), -('ODBCAttribute','Driver_','N',None, None, 'ODBCDriver',1,'Identifier',None, 'Reference to ODBC driver in ODBCDriver table',), -('ODBCDriver','Description','N',None, None, None, None, 'Text',None, 'Text used as registered name for driver, non-localized',), -('ODBCDriver','File_','N',None, None, 'File',1,'Identifier',None, 'Reference to key driver file',), -('ODBCDriver','Component_','N',None, None, 'Component',1,'Identifier',None, 'Reference to associated component',), -('ODBCDriver','Driver','N',None, None, None, None, 'Identifier',None, 'Primary key, non-localized.internal token for driver',), -('ODBCDriver','File_Setup','Y',None, None, 'File',1,'Identifier',None, 'Optional reference to key driver setup DLL',), -('ODBCDataSource','Description','N',None, None, None, None, 'Text',None, 'Text used as registered name for data source',), -('ODBCDataSource','Component_','N',None, None, 'Component',1,'Identifier',None, 'Reference to associated component',), -('ODBCDataSource','DataSource','N',None, None, None, None, 'Identifier',None, 'Primary key, non-localized.internal token for data source',), -('ODBCDataSource','DriverDescription','N',None, None, None, None, 'Text',None, 'Reference to driver description, may be existing driver',), -('ODBCDataSource','Registration','N',0,1,None, None, None, None, 'Registration option: 0=machine, 1=user, others t.b.d.',), -('ODBCSourceAttribute','Value','Y',None, None, None, None, 'Text',None, 'Value for ODBC data source attribute',), -('ODBCSourceAttribute','Attribute','N',None, None, None, None, 'Text',None, 'Name of ODBC data source attribute',), -('ODBCSourceAttribute','DataSource_','N',None, None, 'ODBCDataSource',1,'Identifier',None, 'Reference to ODBC data source in ODBCDataSource table',), -('ODBCTranslator','Description','N',None, None, None, None, 'Text',None, 'Text used as registered name for translator',), -('ODBCTranslator','File_','N',None, None, 'File',1,'Identifier',None, 'Reference to key translator file',), -('ODBCTranslator','Component_','N',None, None, 'Component',1,'Identifier',None, 'Reference to associated component',), -('ODBCTranslator','File_Setup','Y',None, None, 'File',1,'Identifier',None, 'Optional reference to key translator setup DLL',), -('ODBCTranslator','Translator','N',None, None, None, None, 'Identifier',None, 'Primary key, non-localized.internal token for translator',), -('Patch','Sequence','N',0,32767,None, None, None, None, 'Primary key, sequence with respect to the media images; order must track cabinet order.',), -('Patch','Attributes','N',0,32767,None, None, None, None, 'Integer containing bit flags representing patch attributes',), -('Patch','File_','N',None, None, None, None, 'Identifier',None, 'Primary key, non-localized token, foreign key to File table, must match identifier in cabinet.',), -('Patch','Header','Y',None, None, None, None, 'Binary',None, 'Binary stream. The patch header, used for patch validation.',), -('Patch','PatchSize','N',0,2147483647,None, None, None, None, 'Size of patch in bytes (integer).',), -('Patch','StreamRef_','Y',None, None, None, None, 'Identifier',None, 'Identifier. Foreign key to the StreamRef column of the MsiPatchHeaders table.',), -('PatchPackage','Media_','N',0,32767,None, None, None, None, 'Foreign key to DiskId column of Media table. Indicates the disk containing the patch package.',), -('PatchPackage','PatchId','N',None, None, None, None, 'Guid',None, 'A unique string GUID representing this patch.',), -('PublishComponent','Feature_','N',None, None, 'Feature',1,'Identifier',None, 'Foreign key into the Feature table.',), -('PublishComponent','Component_','N',None, None, 'Component',1,'Identifier',None, 'Foreign key into the Component table.',), -('PublishComponent','ComponentId','N',None, None, None, None, 'Guid',None, 'A string GUID that represents the component id that will be requested by the alien product.',), -('PublishComponent','AppData','Y',None, None, None, None, 'Text',None, 'This is localisable Application specific data that can be associated with a Qualified Component.',), -('PublishComponent','Qualifier','N',None, None, None, None, 'Text',None, 'This is defined only when the ComponentId column is an Qualified Component Id. This is the Qualifier for ProvideComponentIndirect.',), -('RadioButton','Y','N',0,32767,None, None, None, None, 'The vertical coordinate of the upper left corner of the bounding rectangle of the radio button.',), -('RadioButton','Text','Y',None, None, None, None, 'Text',None, 'The visible title to be assigned to the radio button.',), -('RadioButton','Property','N',None, None, None, None, 'Identifier',None, 'A named property to be tied to this radio button. All the buttons tied to the same property become part of the same group.',), -('RadioButton','Height','N',0,32767,None, None, None, None, 'The height of the button.',), -('RadioButton','Width','N',0,32767,None, None, None, None, 'The width of the button.',), -('RadioButton','X','N',0,32767,None, None, None, None, 'The horizontal coordinate of the upper left corner of the bounding rectangle of the radio button.',), -('RadioButton','Value','N',None, None, None, None, 'Formatted',None, 'The value string associated with this button. Selecting the button will set the associated property to this value.',), -('RadioButton','Order','N',1,32767,None, None, None, None, 'A positive integer used to determine the ordering of the items within one list..The integers do not have to be consecutive.',), -('RadioButton','Help','Y',None, None, None, None, 'Text',None, 'The help strings used with the button. The text is optional.',), -('Registry','Name','Y',None, None, None, None, 'Formatted',None, 'The registry value name.',), -('Registry','Value','Y',None, None, None, None, 'Formatted',None, 'The registry value.',), -('Registry','Component_','N',None, None, 'Component',1,'Identifier',None, 'Foreign key into the Component table referencing component that controls the installing of the registry value.',), -('Registry','Key','N',None, None, None, None, 'RegPath',None, 'The key for the registry value.',), -('Registry','Registry','N',None, None, None, None, 'Identifier',None, 'Primary key, non-localized token.',), -('Registry','Root','N',-1,3,None, None, None, None, 'The predefined root key for the registry value, one of rrkEnum.',), -('RegLocator','Name','Y',None, None, None, None, 'Formatted',None, 'The registry value name.',), -('RegLocator','Type','Y',0,18,None, None, None, None, 'An integer value that determines if the registry value is a filename or a directory location or to be used as is w/o interpretation.',), -('RegLocator','Signature_','N',None, None, None, None, 'Identifier',None, 'The table key. The Signature_ represents a unique file signature and is also the foreign key in the Signature table. If the type is 0, the registry values refers a directory, and _Signature is not a foreign key.',), -('RegLocator','Key','N',None, None, None, None, 'RegPath',None, 'The key for the registry value.',), -('RegLocator','Root','N',0,3,None, None, None, None, 'The predefined root key for the registry value, one of rrkEnum.',), -('RemoveFile','Component_','N',None, None, 'Component',1,'Identifier',None, 'Foreign key referencing Component that controls the file to be removed.',), -('RemoveFile','FileKey','N',None, None, None, None, 'Identifier',None, 'Primary key used to identify a particular file entry',), -('RemoveFile','FileName','Y',None, None, None, None, 'WildCardFilename',None, 'Name of the file to be removed.',), -('RemoveFile','DirProperty','N',None, None, None, None, 'Identifier',None, 'Name of a property whose value is assumed to resolve to the full pathname to the folder of the file to be removed.',), -('RemoveFile','InstallMode','N',None, None, None, None, None, '1;2;3','Installation option, one of iimEnum.',), -('RemoveIniFile','Action','N',None, None, None, None, None, '2;4','The type of modification to be made, one of iifEnum.',), -('RemoveIniFile','Value','Y',None, None, None, None, 'Formatted',None, 'The value to be deleted. The value is required when Action is iifIniRemoveTag',), -('RemoveIniFile','Component_','N',None, None, 'Component',1,'Identifier',None, 'Foreign key into the Component table referencing component that controls the deletion of the .INI value.',), -('RemoveIniFile','FileName','N',None, None, None, None, 'Filename',None, 'The .INI file name in which to delete the information',), -('RemoveIniFile','DirProperty','Y',None, None, None, None, 'Identifier',None, 'Foreign key into the Directory table denoting the directory where the .INI file is.',), -('RemoveIniFile','Key','N',None, None, None, None, 'Formatted',None, 'The .INI file key below Section.',), -('RemoveIniFile','Section','N',None, None, None, None, 'Formatted',None, 'The .INI file Section.',), -('RemoveIniFile','RemoveIniFile','N',None, None, None, None, 'Identifier',None, 'Primary key, non-localized token.',), -('RemoveRegistry','Name','Y',None, None, None, None, 'Formatted',None, 'The registry value name.',), -('RemoveRegistry','Component_','N',None, None, 'Component',1,'Identifier',None, 'Foreign key into the Component table referencing component that controls the deletion of the registry value.',), -('RemoveRegistry','Key','N',None, None, None, None, 'RegPath',None, 'The key for the registry value.',), -('RemoveRegistry','Root','N',-1,3,None, None, None, None, 'The predefined root key for the registry value, one of rrkEnum',), -('RemoveRegistry','RemoveRegistry','N',None, None, None, None, 'Identifier',None, 'Primary key, non-localized token.',), -('ReserveCost','Component_','N',None, None, 'Component',1,'Identifier',None, 'Reserve a specified amount of space if this component is to be installed.',), -('ReserveCost','ReserveFolder','Y',None, None, None, None, 'Identifier',None, 'Name of a property whose value is assumed to resolve to the full path to the destination directory',), -('ReserveCost','ReserveKey','N',None, None, None, None, 'Identifier',None, 'Primary key that uniquely identifies a particular ReserveCost record',), -('ReserveCost','ReserveLocal','N',0,2147483647,None, None, None, None, 'Disk space to reserve if linked component is installed locally.',), -('ReserveCost','ReserveSource','N',0,2147483647,None, None, None, None, 'Disk space to reserve if linked component is installed to run from the source location.',), -('SelfReg','File_','N',None, None, 'File',1,'Identifier',None, 'Foreign key into the File table denoting the module that needs to be registered.',), -('SelfReg','Cost','Y',0,32767,None, None, None, None, 'The cost of registering the module.',), -('ServiceControl','Name','N',None, None, None, None, 'Formatted',None, 'Name of a service. /, \\, comma and space are invalid',), -('ServiceControl','Component_','N',None, None, 'Component',1,'Identifier',None, 'Required foreign key into the Component Table that controls the startup of the service',), -('ServiceControl','Event','N',0,187,None, None, None, None, 'Bit field: Install: 0x1 = Start, 0x2 = Stop, 0x8 = Delete, Uninstall: 0x10 = Start, 0x20 = Stop, 0x80 = Delete',), -('ServiceControl','ServiceControl','N',None, None, None, None, 'Identifier',None, 'Primary key, non-localized token.',), -('ServiceControl','Arguments','Y',None, None, None, None, 'Formatted',None, 'Arguments for the service. Separate by [~].',), -('ServiceControl','Wait','Y',0,1,None, None, None, None, 'Boolean for whether to wait for the service to fully start',), -('ServiceInstall','Name','N',None, None, None, None, 'Formatted',None, 'Internal Name of the Service',), -('ServiceInstall','Description','Y',None, None, None, None, 'Text',None, 'Description of service.',), -('ServiceInstall','Component_','N',None, None, 'Component',1,'Identifier',None, 'Required foreign key into the Component Table that controls the startup of the service',), -('ServiceInstall','Arguments','Y',None, None, None, None, 'Formatted',None, 'Arguments to include in every start of the service, passed to WinMain',), -('ServiceInstall','ServiceInstall','N',None, None, None, None, 'Identifier',None, 'Primary key, non-localized token.',), -('ServiceInstall','Dependencies','Y',None, None, None, None, 'Formatted',None, 'Other services this depends on to start. Separate by [~], and end with [~][~]',), -('ServiceInstall','DisplayName','Y',None, None, None, None, 'Formatted',None, 'External Name of the Service',), -('ServiceInstall','ErrorControl','N',-2147483647,2147483647,None, None, None, None, 'Severity of error if service fails to start',), -('ServiceInstall','LoadOrderGroup','Y',None, None, None, None, 'Formatted',None, 'LoadOrderGroup',), -('ServiceInstall','Password','Y',None, None, None, None, 'Formatted',None, 'password to run service with. (with StartName)',), -('ServiceInstall','ServiceType','N',-2147483647,2147483647,None, None, None, None, 'Type of the service',), -('ServiceInstall','StartName','Y',None, None, None, None, 'Formatted',None, 'User or object name to run service as',), -('ServiceInstall','StartType','N',0,4,None, None, None, None, 'Type of the service',), -('Shortcut','Name','N',None, None, None, None, 'Filename',None, 'The name of the shortcut to be created.',), -('Shortcut','Description','Y',None, None, None, None, 'Text',None, 'The description for the shortcut.',), -('Shortcut','Component_','N',None, None, 'Component',1,'Identifier',None, 'Foreign key into the Component table denoting the component whose selection gates the shortcut creation/deletion.',), -('Shortcut','Icon_','Y',None, None, 'Icon',1,'Identifier',None, 'Foreign key into the File table denoting the external icon file for the shortcut.',), -('Shortcut','IconIndex','Y',-32767,32767,None, None, None, None, 'The icon index for the shortcut.',), -('Shortcut','Directory_','N',None, None, 'Directory',1,'Identifier',None, 'Foreign key into the Directory table denoting the directory where the shortcut file is created.',), -('Shortcut','Target','N',None, None, None, None, 'Shortcut',None, 'The shortcut target. This is usually a property that is expanded to a file or a folder that the shortcut points to.',), -('Shortcut','Arguments','Y',None, None, None, None, 'Formatted',None, 'The command-line arguments for the shortcut.',), -('Shortcut','Shortcut','N',None, None, None, None, 'Identifier',None, 'Primary key, non-localized token.',), -('Shortcut','Hotkey','Y',0,32767,None, None, None, None, 'The hotkey for the shortcut. It has the virtual-key code for the key in the low-order byte, and the modifier flags in the high-order byte. ',), -('Shortcut','ShowCmd','Y',None, None, None, None, None, '1;3;7','The show command for the application window.The following values may be used.',), -('Shortcut','WkDir','Y',None, None, None, None, 'Identifier',None, 'Name of property defining location of working directory.',), -('Signature','FileName','N',None, None, None, None, 'Filename',None, 'The name of the file. This may contain a "short name|long name" pair.',), -('Signature','Signature','N',None, None, None, None, 'Identifier',None, 'The table key. The Signature represents a unique file signature.',), -('Signature','Languages','Y',None, None, None, None, 'Language',None, 'The languages supported by the file.',), -('Signature','MaxDate','Y',0,2147483647,None, None, None, None, 'The maximum creation date of the file.',), -('Signature','MaxSize','Y',0,2147483647,None, None, None, None, 'The maximum size of the file. ',), -('Signature','MaxVersion','Y',None, None, None, None, 'Text',None, 'The maximum version of the file.',), -('Signature','MinDate','Y',0,2147483647,None, None, None, None, 'The minimum creation date of the file.',), -('Signature','MinSize','Y',0,2147483647,None, None, None, None, 'The minimum size of the file.',), -('Signature','MinVersion','Y',None, None, None, None, 'Text',None, 'The minimum version of the file.',), -('TextStyle','TextStyle','N',None, None, None, None, 'Identifier',None, 'Name of the style. The primary key of this table. This name is embedded in the texts to indicate a style change.',), -('TextStyle','Color','Y',0,16777215,None, None, None, None, 'An integer indicating the color of the string in the RGB format (Red, Green, Blue each 0-255, RGB = R + 256*G + 256^2*B).',), -('TextStyle','FaceName','N',None, None, None, None, 'Text',None, 'A string indicating the name of the font used. Required. The string must be at most 31 characters long.',), -('TextStyle','Size','N',0,32767,None, None, None, None, 'The size of the font used. This size is given in our units (1/12 of the system font height). Assuming that the system font is set to 12 point size, this is equivalent to the point size.',), -('TextStyle','StyleBits','Y',0,15,None, None, None, None, 'A combination of style bits.',), -('TypeLib','Description','Y',None, None, None, None, 'Text',None, None, ), -('TypeLib','Feature_','N',None, None, 'Feature',1,'Identifier',None, 'Required foreign key into the Feature Table, specifying the feature to validate or install in order for the type library to be operational.',), -('TypeLib','Component_','N',None, None, 'Component',1,'Identifier',None, 'Required foreign key into the Component Table, specifying the component for which to return a path when called through LocateComponent.',), -('TypeLib','Directory_','Y',None, None, 'Directory',1,'Identifier',None, 'Optional. The foreign key into the Directory table denoting the path to the help file for the type library.',), -('TypeLib','Language','N',0,32767,None, None, None, None, 'The language of the library.',), -('TypeLib','Version','Y',0,16777215,None, None, None, None, 'The version of the library. The minor version is in the lower 8 bits of the integer. The major version is in the next 16 bits. ',), -('TypeLib','Cost','Y',0,2147483647,None, None, None, None, 'The cost associated with the registration of the typelib. This column is currently optional.',), -('TypeLib','LibID','N',None, None, None, None, 'Guid',None, 'The GUID that represents the library.',), -('UIText','Text','Y',None, None, None, None, 'Text',None, 'The localized version of the string.',), -('UIText','Key','N',None, None, None, None, 'Identifier',None, 'A unique key that identifies the particular string.',), -('Upgrade','Attributes','N',0,2147483647,None, None, None, None, 'The attributes of this product set.',), -('Upgrade','Language','Y',None, None, None, None, 'Language',None, 'A comma-separated list of languages for either products in this set or products not in this set.',), -('Upgrade','ActionProperty','N',None, None, None, None, 'UpperCase',None, 'The property to set when a product in this set is found.',), -('Upgrade','Remove','Y',None, None, None, None, 'Formatted',None, 'The list of features to remove when uninstalling a product from this set. The default is "ALL".',), -('Upgrade','UpgradeCode','N',None, None, None, None, 'Guid',None, 'The UpgradeCode GUID belonging to the products in this set.',), -('Upgrade','VersionMax','Y',None, None, None, None, 'Text',None, 'The maximum ProductVersion of the products in this set. The set may or may not include products with this particular version.',), -('Upgrade','VersionMin','Y',None, None, None, None, 'Text',None, 'The minimum ProductVersion of the products in this set. The set may or may not include products with this particular version.',), -('Verb','Sequence','Y',0,32767,None, None, None, None, 'Order within the verbs for a particular extension. Also used simply to specify the default verb.',), -('Verb','Argument','Y',None, None, None, None, 'Formatted',None, 'Optional value for the command arguments.',), -('Verb','Extension_','N',None, None, 'Extension',1,'Text',None, 'The extension associated with the table row.',), -('Verb','Verb','N',None, None, None, None, 'Text',None, 'The verb for the command.',), -('Verb','Command','Y',None, None, None, None, 'Formatted',None, 'The command text.',), -] diff --git a/PythonLib/full/msilib/sequence.py b/PythonLib/full/msilib/sequence.py deleted file mode 100644 index 7012a21f..00000000 --- a/PythonLib/full/msilib/sequence.py +++ /dev/null @@ -1,126 +0,0 @@ -AdminExecuteSequence = [ -('InstallInitialize', None, 1500), -('InstallFinalize', None, 6600), -('InstallFiles', None, 4000), -('InstallAdminPackage', None, 3900), -('FileCost', None, 900), -('CostInitialize', None, 800), -('CostFinalize', None, 1000), -('InstallValidate', None, 1400), -] - -AdminUISequence = [ -('FileCost', None, 900), -('CostInitialize', None, 800), -('CostFinalize', None, 1000), -('ExecuteAction', None, 1300), -('ExitDialog', None, -1), -('FatalError', None, -3), -('UserExit', None, -2), -] - -AdvtExecuteSequence = [ -('InstallInitialize', None, 1500), -('InstallFinalize', None, 6600), -('CostInitialize', None, 800), -('CostFinalize', None, 1000), -('InstallValidate', None, 1400), -('CreateShortcuts', None, 4500), -('MsiPublishAssemblies', None, 6250), -('PublishComponents', None, 6200), -('PublishFeatures', None, 6300), -('PublishProduct', None, 6400), -('RegisterClassInfo', None, 4600), -('RegisterExtensionInfo', None, 4700), -('RegisterMIMEInfo', None, 4900), -('RegisterProgIdInfo', None, 4800), -] - -InstallExecuteSequence = [ -('InstallInitialize', None, 1500), -('InstallFinalize', None, 6600), -('InstallFiles', None, 4000), -('FileCost', None, 900), -('CostInitialize', None, 800), -('CostFinalize', None, 1000), -('InstallValidate', None, 1400), -('CreateShortcuts', None, 4500), -('MsiPublishAssemblies', None, 6250), -('PublishComponents', None, 6200), -('PublishFeatures', None, 6300), -('PublishProduct', None, 6400), -('RegisterClassInfo', None, 4600), -('RegisterExtensionInfo', None, 4700), -('RegisterMIMEInfo', None, 4900), -('RegisterProgIdInfo', None, 4800), -('AllocateRegistrySpace', 'NOT Installed', 1550), -('AppSearch', None, 400), -('BindImage', None, 4300), -('CCPSearch', 'NOT Installed', 500), -('CreateFolders', None, 3700), -('DeleteServices', 'VersionNT', 2000), -('DuplicateFiles', None, 4210), -('FindRelatedProducts', None, 200), -('InstallODBC', None, 5400), -('InstallServices', 'VersionNT', 5800), -('IsolateComponents', None, 950), -('LaunchConditions', None, 100), -('MigrateFeatureStates', None, 1200), -('MoveFiles', None, 3800), -('PatchFiles', None, 4090), -('ProcessComponents', None, 1600), -('RegisterComPlus', None, 5700), -('RegisterFonts', None, 5300), -('RegisterProduct', None, 6100), -('RegisterTypeLibraries', None, 5500), -('RegisterUser', None, 6000), -('RemoveDuplicateFiles', None, 3400), -('RemoveEnvironmentStrings', None, 3300), -('RemoveExistingProducts', None, 6700), -('RemoveFiles', None, 3500), -('RemoveFolders', None, 3600), -('RemoveIniValues', None, 3100), -('RemoveODBC', None, 2400), -('RemoveRegistryValues', None, 2600), -('RemoveShortcuts', None, 3200), -('RMCCPSearch', 'NOT Installed', 600), -('SelfRegModules', None, 5600), -('SelfUnregModules', None, 2200), -('SetODBCFolders', None, 1100), -('StartServices', 'VersionNT', 5900), -('StopServices', 'VersionNT', 1900), -('MsiUnpublishAssemblies', None, 1750), -('UnpublishComponents', None, 1700), -('UnpublishFeatures', None, 1800), -('UnregisterClassInfo', None, 2700), -('UnregisterComPlus', None, 2100), -('UnregisterExtensionInfo', None, 2800), -('UnregisterFonts', None, 2500), -('UnregisterMIMEInfo', None, 3000), -('UnregisterProgIdInfo', None, 2900), -('UnregisterTypeLibraries', None, 2300), -('ValidateProductID', None, 700), -('WriteEnvironmentStrings', None, 5200), -('WriteIniValues', None, 5100), -('WriteRegistryValues', None, 5000), -] - -InstallUISequence = [ -('FileCost', None, 900), -('CostInitialize', None, 800), -('CostFinalize', None, 1000), -('ExecuteAction', None, 1300), -('ExitDialog', None, -1), -('FatalError', None, -3), -('UserExit', None, -2), -('AppSearch', None, 400), -('CCPSearch', 'NOT Installed', 500), -('FindRelatedProducts', None, 200), -('IsolateComponents', None, 950), -('LaunchConditions', None, 100), -('MigrateFeatureStates', None, 1200), -('RMCCPSearch', 'NOT Installed', 600), -('ValidateProductID', None, 700), -] - -tables=['AdminExecuteSequence', 'AdminUISequence', 'AdvtExecuteSequence', 'InstallExecuteSequence', 'InstallUISequence'] diff --git a/PythonLib/full/msilib/text.py b/PythonLib/full/msilib/text.py deleted file mode 100644 index c4b9e869..00000000 --- a/PythonLib/full/msilib/text.py +++ /dev/null @@ -1,129 +0,0 @@ -import msilib,os;dirname=os.path.dirname(__file__) - -ActionText = [ -('InstallValidate', 'Validating install', None), -('InstallFiles', 'Copying new files', 'File: [1], Directory: [9], Size: [6]'), -('InstallAdminPackage', 'Copying network install files', 'File: [1], Directory: [9], Size: [6]'), -('FileCost', 'Computing space requirements', None), -('CostInitialize', 'Computing space requirements', None), -('CostFinalize', 'Computing space requirements', None), -('CreateShortcuts', 'Creating shortcuts', 'Shortcut: [1]'), -('PublishComponents', 'Publishing Qualified Components', 'Component ID: [1], Qualifier: [2]'), -('PublishFeatures', 'Publishing Product Features', 'Feature: [1]'), -('PublishProduct', 'Publishing product information', None), -('RegisterClassInfo', 'Registering Class servers', 'Class Id: [1]'), -('RegisterExtensionInfo', 'Registering extension servers', 'Extension: [1]'), -('RegisterMIMEInfo', 'Registering MIME info', 'MIME Content Type: [1], Extension: [2]'), -('RegisterProgIdInfo', 'Registering program identifiers', 'ProgId: [1]'), -('AllocateRegistrySpace', 'Allocating registry space', 'Free space: [1]'), -('AppSearch', 'Searching for installed applications', 'Property: [1], Signature: [2]'), -('BindImage', 'Binding executables', 'File: [1]'), -('CCPSearch', 'Searching for qualifying products', None), -('CreateFolders', 'Creating folders', 'Folder: [1]'), -('DeleteServices', 'Deleting services', 'Service: [1]'), -('DuplicateFiles', 'Creating duplicate files', 'File: [1], Directory: [9], Size: [6]'), -('FindRelatedProducts', 'Searching for related applications', 'Found application: [1]'), -('InstallODBC', 'Installing ODBC components', None), -('InstallServices', 'Installing new services', 'Service: [2]'), -('LaunchConditions', 'Evaluating launch conditions', None), -('MigrateFeatureStates', 'Migrating feature states from related applications', 'Application: [1]'), -('MoveFiles', 'Moving files', 'File: [1], Directory: [9], Size: [6]'), -('PatchFiles', 'Patching files', 'File: [1], Directory: [2], Size: [3]'), -('ProcessComponents', 'Updating component registration', None), -('RegisterComPlus', 'Registering COM+ Applications and Components', 'AppId: [1]{{, AppType: [2], Users: [3], RSN: [4]}}'), -('RegisterFonts', 'Registering fonts', 'Font: [1]'), -('RegisterProduct', 'Registering product', '[1]'), -('RegisterTypeLibraries', 'Registering type libraries', 'LibID: [1]'), -('RegisterUser', 'Registering user', '[1]'), -('RemoveDuplicateFiles', 'Removing duplicated files', 'File: [1], Directory: [9]'), -('RemoveEnvironmentStrings', 'Updating environment strings', 'Name: [1], Value: [2], Action [3]'), -('RemoveExistingProducts', 'Removing applications', 'Application: [1], Command line: [2]'), -('RemoveFiles', 'Removing files', 'File: [1], Directory: [9]'), -('RemoveFolders', 'Removing folders', 'Folder: [1]'), -('RemoveIniValues', 'Removing INI files entries', 'File: [1], Section: [2], Key: [3], Value: [4]'), -('RemoveODBC', 'Removing ODBC components', None), -('RemoveRegistryValues', 'Removing system registry values', 'Key: [1], Name: [2]'), -('RemoveShortcuts', 'Removing shortcuts', 'Shortcut: [1]'), -('RMCCPSearch', 'Searching for qualifying products', None), -('SelfRegModules', 'Registering modules', 'File: [1], Folder: [2]'), -('SelfUnregModules', 'Unregistering modules', 'File: [1], Folder: [2]'), -('SetODBCFolders', 'Initializing ODBC directories', None), -('StartServices', 'Starting services', 'Service: [1]'), -('StopServices', 'Stopping services', 'Service: [1]'), -('UnpublishComponents', 'Unpublishing Qualified Components', 'Component ID: [1], Qualifier: [2]'), -('UnpublishFeatures', 'Unpublishing Product Features', 'Feature: [1]'), -('UnregisterClassInfo', 'Unregister Class servers', 'Class Id: [1]'), -('UnregisterComPlus', 'Unregistering COM+ Applications and Components', 'AppId: [1]{{, AppType: [2]}}'), -('UnregisterExtensionInfo', 'Unregistering extension servers', 'Extension: [1]'), -('UnregisterFonts', 'Unregistering fonts', 'Font: [1]'), -('UnregisterMIMEInfo', 'Unregistering MIME info', 'MIME Content Type: [1], Extension: [2]'), -('UnregisterProgIdInfo', 'Unregistering program identifiers', 'ProgId: [1]'), -('UnregisterTypeLibraries', 'Unregistering type libraries', 'LibID: [1]'), -('WriteEnvironmentStrings', 'Updating environment strings', 'Name: [1], Value: [2], Action [3]'), -('WriteIniValues', 'Writing INI files values', 'File: [1], Section: [2], Key: [3], Value: [4]'), -('WriteRegistryValues', 'Writing system registry values', 'Key: [1], Name: [2], Value: [3]'), -('Advertise', 'Advertising application', None), -('GenerateScript', 'Generating script operations for action:', '[1]'), -('InstallSFPCatalogFile', 'Installing system catalog', 'File: [1], Dependencies: [2]'), -('MsiPublishAssemblies', 'Publishing assembly information', 'Application Context:[1], Assembly Name:[2]'), -('MsiUnpublishAssemblies', 'Unpublishing assembly information', 'Application Context:[1], Assembly Name:[2]'), -('Rollback', 'Rolling back action:', '[1]'), -('RollbackCleanup', 'Removing backup files', 'File: [1]'), -('UnmoveFiles', 'Removing moved files', 'File: [1], Directory: [9]'), -('UnpublishProduct', 'Unpublishing product information', None), -] - -UIText = [ -('AbsentPath', None), -('bytes', 'bytes'), -('GB', 'GB'), -('KB', 'KB'), -('MB', 'MB'), -('MenuAbsent', 'Entire feature will be unavailable'), -('MenuAdvertise', 'Feature will be installed when required'), -('MenuAllCD', 'Entire feature will be installed to run from CD'), -('MenuAllLocal', 'Entire feature will be installed on local hard drive'), -('MenuAllNetwork', 'Entire feature will be installed to run from network'), -('MenuCD', 'Will be installed to run from CD'), -('MenuLocal', 'Will be installed on local hard drive'), -('MenuNetwork', 'Will be installed to run from network'), -('ScriptInProgress', 'Gathering required information...'), -('SelAbsentAbsent', 'This feature will remain uninstalled'), -('SelAbsentAdvertise', 'This feature will be set to be installed when required'), -('SelAbsentCD', 'This feature will be installed to run from CD'), -('SelAbsentLocal', 'This feature will be installed on the local hard drive'), -('SelAbsentNetwork', 'This feature will be installed to run from the network'), -('SelAdvertiseAbsent', 'This feature will become unavailable'), -('SelAdvertiseAdvertise', 'Will be installed when required'), -('SelAdvertiseCD', 'This feature will be available to run from CD'), -('SelAdvertiseLocal', 'This feature will be installed on your local hard drive'), -('SelAdvertiseNetwork', 'This feature will be available to run from the network'), -('SelCDAbsent', "This feature will be uninstalled completely, you won't be able to run it from CD"), -('SelCDAdvertise', 'This feature will change from run from CD state to set to be installed when required'), -('SelCDCD', 'This feature will remain to be run from CD'), -('SelCDLocal', 'This feature will change from run from CD state to be installed on the local hard drive'), -('SelChildCostNeg', 'This feature frees up [1] on your hard drive.'), -('SelChildCostPos', 'This feature requires [1] on your hard drive.'), -('SelCostPending', 'Compiling cost for this feature...'), -('SelLocalAbsent', 'This feature will be completely removed'), -('SelLocalAdvertise', 'This feature will be removed from your local hard drive, but will be set to be installed when required'), -('SelLocalCD', 'This feature will be removed from your local hard drive, but will be still available to run from CD'), -('SelLocalLocal', 'This feature will remain on you local hard drive'), -('SelLocalNetwork', 'This feature will be removed from your local hard drive, but will be still available to run from the network'), -('SelNetworkAbsent', "This feature will be uninstalled completely, you won't be able to run it from the network"), -('SelNetworkAdvertise', 'This feature will change from run from network state to set to be installed when required'), -('SelNetworkLocal', 'This feature will change from run from network state to be installed on the local hard drive'), -('SelNetworkNetwork', 'This feature will remain to be run from the network'), -('SelParentCostNegNeg', 'This feature frees up [1] on your hard drive. It has [2] of [3] subfeatures selected. The subfeatures free up [4] on your hard drive.'), -('SelParentCostNegPos', 'This feature frees up [1] on your hard drive. It has [2] of [3] subfeatures selected. The subfeatures require [4] on your hard drive.'), -('SelParentCostPosNeg', 'This feature requires [1] on your hard drive. It has [2] of [3] subfeatures selected. The subfeatures free up [4] on your hard drive.'), -('SelParentCostPosPos', 'This feature requires [1] on your hard drive. It has [2] of [3] subfeatures selected. The subfeatures require [4] on your hard drive.'), -('TimeRemaining', 'Time remaining: {[1] minutes }{[2] seconds}'), -('VolumeCostAvailable', 'Available'), -('VolumeCostDifference', 'Difference'), -('VolumeCostRequired', 'Required'), -('VolumeCostSize', 'Disk Size'), -('VolumeCostVolume', 'Volume'), -] - -tables=['ActionText', 'UIText'] diff --git a/PythonLib/full/multiprocessing/connection.py b/PythonLib/full/multiprocessing/connection.py index 81ed2ae5..fc00d286 100644 --- a/PythonLib/full/multiprocessing/connection.py +++ b/PythonLib/full/multiprocessing/connection.py @@ -11,15 +11,14 @@ import errno import io +import itertools import os import sys import socket import struct -import time import tempfile -import itertools +import time -import _multiprocessing from . import util @@ -28,6 +27,7 @@ _ForkingPickler = reduction.ForkingPickler try: + import _multiprocessing import _winapi from _winapi import WAIT_OBJECT_0, WAIT_ABANDONED_0, WAIT_TIMEOUT, INFINITE except ImportError: @@ -39,7 +39,9 @@ # # -BUFSIZE = 8192 +# 64 KiB is the default PIPE buffer size of most POSIX platforms. +BUFSIZE = 64 * 1024 + # A very generous timeout when it comes to local connections... CONNECTION_TIMEOUT = 20. @@ -74,7 +76,7 @@ def arbitrary_address(family): if family == 'AF_INET': return ('localhost', 0) elif family == 'AF_UNIX': - return tempfile.mktemp(prefix='listener-', dir=util.get_temp_dir()) + return tempfile.mktemp(prefix='sock-', dir=util.get_temp_dir()) elif family == 'AF_PIPE': return tempfile.mktemp(prefix=r'\\.\pipe\pyc-%d-%d-' % (os.getpid(), next(_mmap_counter)), dir="") @@ -179,6 +181,10 @@ def close(self): finally: self._handle = None + def _detach(self): + """Stop managing the underlying file descriptor or handle.""" + self._handle = None + def send_bytes(self, buf, offset=0, size=None): """Send the bytes data from a bytes-like object""" self._check_closed() @@ -316,22 +322,32 @@ def _recv_bytes(self, maxsize=None): try: ov, err = _winapi.ReadFile(self._handle, bsize, overlapped=True) + + sentinel = object() + return_value = sentinel try: - if err == _winapi.ERROR_IO_PENDING: - waitres = _winapi.WaitForMultipleObjects( - [ov.event], False, INFINITE) - assert waitres == WAIT_OBJECT_0 + try: + if err == _winapi.ERROR_IO_PENDING: + waitres = _winapi.WaitForMultipleObjects( + [ov.event], False, INFINITE) + assert waitres == WAIT_OBJECT_0 + except: + ov.cancel() + raise + finally: + nread, err = ov.GetOverlappedResult(True) + if err == 0: + f = io.BytesIO() + f.write(ov.getbuffer()) + return_value = f + elif err == _winapi.ERROR_MORE_DATA: + return_value = self._get_more_data(ov, maxsize) except: - ov.cancel() - raise - finally: - nread, err = ov.GetOverlappedResult(True) - if err == 0: - f = io.BytesIO() - f.write(ov.getbuffer()) - return f - elif err == _winapi.ERROR_MORE_DATA: - return self._get_more_data(ov, maxsize) + if return_value is sentinel: + raise + + if return_value is not sentinel: + return return_value except OSError as e: if e.winerror == _winapi.ERROR_BROKEN_PIPE: raise EOFError @@ -392,7 +408,8 @@ def _recv(self, size, read=_read): handle = self._handle remaining = size while remaining > 0: - chunk = read(handle, remaining) + to_read = min(BUFSIZE, remaining) + chunk = read(handle, to_read) n = len(chunk) if n == 0: if remaining == size: @@ -1012,8 +1029,20 @@ def _exhaustive_wait(handles, timeout): # returning the first signalled might create starvation issues.) L = list(handles) ready = [] + # Windows limits WaitForMultipleObjects at 64 handles, and we use a + # few for synchronisation, so we switch to batched waits at 60. + if len(L) > 60: + try: + res = _winapi.BatchedWaitForMultipleObjects(L, False, timeout) + except TimeoutError: + return [] + ready.extend(L[i] for i in res) + if res: + L = [h for i, h in enumerate(L) if i > res[0] & i not in res] + timeout = 0 while L: - res = _winapi.WaitForMultipleObjects(L, False, timeout) + short_L = L[:60] if len(L) > 60 else L + res = _winapi.WaitForMultipleObjects(short_L, False, timeout) if res == WAIT_TIMEOUT: break elif WAIT_OBJECT_0 <= res < WAIT_OBJECT_0 + len(L): diff --git a/PythonLib/full/multiprocessing/context.py b/PythonLib/full/multiprocessing/context.py index de8a2648..051d567d 100644 --- a/PythonLib/full/multiprocessing/context.py +++ b/PythonLib/full/multiprocessing/context.py @@ -145,7 +145,7 @@ def freeze_support(self): '''Check whether this is a fake forked process in a frozen executable. If so then run code specified by commandline and exit. ''' - if sys.platform == 'win32' and getattr(sys, 'frozen', False): + if self.get_start_method() == 'spawn' and getattr(sys, 'frozen', False): from .spawn import freeze_support freeze_support() @@ -167,7 +167,7 @@ def allow_connection_pickling(self): ''' # This is undocumented. In previous versions of multiprocessing # its only effect was to make socket objects inheritable on Windows. - from . import connection + from . import connection # noqa: F401 def set_executable(self, executable): '''Sets the path to a python.exe or pythonw.exe binary used to run @@ -259,13 +259,12 @@ def get_start_method(self, allow_none=False): def get_all_start_methods(self): """Returns a list of the supported start methods, default first.""" - if sys.platform == 'win32': - return ['spawn'] - else: - methods = ['spawn', 'fork'] if sys.platform == 'darwin' else ['fork', 'spawn'] - if reduction.HAVE_SEND_HANDLE: - methods.append('forkserver') - return methods + default = self._default_context.get_start_method() + start_method_names = [default] + start_method_names.extend( + name for name in _concrete_contexts if name != default + ) + return start_method_names # @@ -320,14 +319,15 @@ def _check_available(self): 'spawn': SpawnContext(), 'forkserver': ForkServerContext(), } - if sys.platform == 'darwin': - # bpo-33725: running arbitrary code after fork() is no longer reliable - # on macOS since macOS 10.14 (Mojave). Use spawn by default instead. - _default_context = DefaultContext(_concrete_contexts['spawn']) + # bpo-33725: running arbitrary code after fork() is no longer reliable + # on macOS since macOS 10.14 (Mojave). Use spawn by default instead. + # gh-84559: We changed everyones default to a thread safeish one in 3.14. + if reduction.HAVE_SEND_HANDLE and sys.platform != 'darwin': + _default_context = DefaultContext(_concrete_contexts['forkserver']) else: - _default_context = DefaultContext(_concrete_contexts['fork']) + _default_context = DefaultContext(_concrete_contexts['spawn']) -else: +else: # Windows class SpawnProcess(process.BaseProcess): _start_method = 'spawn' diff --git a/PythonLib/full/multiprocessing/dummy/__init__.py b/PythonLib/full/multiprocessing/dummy/__init__.py index 6a146860..7dc5d1c8 100644 --- a/PythonLib/full/multiprocessing/dummy/__init__.py +++ b/PythonLib/full/multiprocessing/dummy/__init__.py @@ -33,7 +33,7 @@ class DummyProcess(threading.Thread): - def __init__(self, group=None, target=None, name=None, args=(), kwargs={}): + def __init__(self, group=None, target=None, name=None, args=(), kwargs=None): threading.Thread.__init__(self, group, target, name, args, kwargs) self._pid = None self._children = weakref.WeakKeyDictionary() diff --git a/PythonLib/full/multiprocessing/forkserver.py b/PythonLib/full/multiprocessing/forkserver.py index 9fb56327..15c455a5 100644 --- a/PythonLib/full/multiprocessing/forkserver.py +++ b/PythonLib/full/multiprocessing/forkserver.py @@ -1,3 +1,4 @@ +import atexit import errno import os import selectors @@ -8,6 +9,7 @@ import threading import warnings +from . import AuthenticationError from . import connection from . import process from .context import reduction @@ -24,6 +26,7 @@ MAXFDS_TO_SEND = 256 SIGNED_STRUCT = struct.Struct('q') # large enough for pid_t +_AUTHKEY_LEN = 32 # <= PIPEBUF so it fits a single write to an empty pipe. # # Forkserver class @@ -32,6 +35,7 @@ class ForkServer(object): def __init__(self): + self._forkserver_authkey = None self._forkserver_address = None self._forkserver_alive_fd = None self._forkserver_pid = None @@ -58,6 +62,7 @@ def _stop_unlocked(self): if not util.is_abstract_socket_namespace(self._forkserver_address): os.unlink(self._forkserver_address) self._forkserver_address = None + self._forkserver_authkey = None def set_forkserver_preload(self, modules_names): '''Set list of module names to try to load in forkserver process.''' @@ -82,6 +87,7 @@ def connect_to_new_process(self, fds): process data. ''' self.ensure_running() + assert self._forkserver_authkey if len(fds) + 4 >= MAXFDS_TO_SEND: raise ValueError('too many fds') with socket.socket(socket.AF_UNIX) as client: @@ -92,6 +98,18 @@ def connect_to_new_process(self, fds): resource_tracker.getfd()] allfds += fds try: + client.setblocking(True) + wrapped_client = connection.Connection(client.fileno()) + # The other side of this exchange happens in the child as + # implemented in main(). + try: + connection.answer_challenge( + wrapped_client, self._forkserver_authkey) + connection.deliver_challenge( + wrapped_client, self._forkserver_authkey) + finally: + wrapped_client._detach() + del wrapped_client reduction.sendfds(client, allfds) return parent_r, parent_w except: @@ -119,6 +137,7 @@ def ensure_running(self): return # dead, launch it again os.close(self._forkserver_alive_fd) + self._forkserver_authkey = None self._forkserver_address = None self._forkserver_alive_fd = None self._forkserver_pid = None @@ -126,12 +145,15 @@ def ensure_running(self): cmd = ('from multiprocessing.forkserver import main; ' + 'main(%d, %d, %r, **%r)') + main_kws = {} if self._preload_modules: - desired_keys = {'main_path', 'sys_path'} data = spawn.get_preparation_data('ignore') - data = {x: y for x, y in data.items() if x in desired_keys} - else: - data = {} + if 'sys_path' in data: + main_kws['sys_path'] = data['sys_path'] + if 'init_main_from_path' in data: + main_kws['main_path'] = data['init_main_from_path'] + if 'sys_argv' in data: + main_kws['sys_argv'] = data['sys_argv'] with socket.socket(socket.AF_UNIX) as listener: address = connection.arbitrary_address('AF_UNIX') @@ -143,19 +165,31 @@ def ensure_running(self): # all client processes own the write end of the "alive" pipe; # when they all terminate the read end becomes ready. alive_r, alive_w = os.pipe() + # A short lived pipe to initialize the forkserver authkey. + authkey_r, authkey_w = os.pipe() try: - fds_to_pass = [listener.fileno(), alive_r] + fds_to_pass = [listener.fileno(), alive_r, authkey_r] + main_kws['authkey_r'] = authkey_r cmd %= (listener.fileno(), alive_r, self._preload_modules, - data) + main_kws) exe = spawn.get_executable() args = [exe] + util._args_from_interpreter_flags() args += ['-c', cmd] pid = util.spawnv_passfds(exe, args, fds_to_pass) except: os.close(alive_w) + os.close(authkey_w) raise finally: os.close(alive_r) + os.close(authkey_r) + # Authenticate our control socket to prevent access from + # processes we have not shared this key with. + try: + self._forkserver_authkey = os.urandom(_AUTHKEY_LEN) + os.write(authkey_w, self._forkserver_authkey) + finally: + os.close(authkey_w) self._forkserver_address = address self._forkserver_alive_fd = alive_w self._forkserver_pid = pid @@ -164,9 +198,21 @@ def ensure_running(self): # # -def main(listener_fd, alive_r, preload, main_path=None, sys_path=None): - '''Run forkserver.''' +def main(listener_fd, alive_r, preload, main_path=None, sys_path=None, + *, sys_argv=None, authkey_r=None): + """Run forkserver.""" + if authkey_r is not None: + try: + authkey = os.read(authkey_r, _AUTHKEY_LEN) + assert len(authkey) == _AUTHKEY_LEN, f'{len(authkey)} < {_AUTHKEY_LEN}' + finally: + os.close(authkey_r) + else: + authkey = b'' + if preload: + if sys_argv is not None: + sys.argv[:] = sys_argv if sys_path is not None: sys.path[:] = sys_path if '__main__' in preload and main_path is not None: @@ -181,6 +227,10 @@ def main(listener_fd, alive_r, preload, main_path=None, sys_path=None): except ImportError: pass + # gh-135335: flush stdout/stderr in case any of the preloaded modules + # wrote to them, otherwise children might inherit buffered data + util._flush_std_streams() + util._close_stdin() sig_r, sig_w = os.pipe() @@ -256,8 +306,24 @@ def sigchld_handler(*_unused): if listener in rfds: # Incoming fork request with listener.accept()[0] as s: - # Receive fds from client - fds = reduction.recvfds(s, MAXFDS_TO_SEND + 1) + try: + if authkey: + wrapped_s = connection.Connection(s.fileno()) + # The other side of this exchange happens in + # in connect_to_new_process(). + try: + connection.deliver_challenge( + wrapped_s, authkey) + connection.answer_challenge( + wrapped_s, authkey) + finally: + wrapped_s._detach() + del wrapped_s + # Receive fds from client + fds = reduction.recvfds(s, MAXFDS_TO_SEND + 1) + except (EOFError, BrokenPipeError, AuthenticationError): + s.close() + continue if len(fds) > MAXFDS_TO_SEND: raise RuntimeError( "Too many ({0:n}) fds to send".format( @@ -273,6 +339,8 @@ def sigchld_handler(*_unused): selector.close() unused_fds = [alive_r, child_w, sig_r, sig_w] unused_fds.extend(pid_to_fd.values()) + atexit._clear() + atexit.register(util._exit_function) code = _serve_one(child_r, fds, unused_fds, old_handlers) @@ -280,6 +348,7 @@ def sigchld_handler(*_unused): sys.excepthook(*sys.exc_info()) sys.stderr.flush() finally: + atexit._run_exitfuncs() os._exit(code) else: # Send pid to client process @@ -322,13 +391,14 @@ def _serve_one(child_r, fds, unused_fds, handlers): # def read_signed(fd): - data = b'' - length = SIGNED_STRUCT.size - while len(data) < length: - s = os.read(fd, length - len(data)) - if not s: + data = bytearray(SIGNED_STRUCT.size) + unread = memoryview(data) + while unread: + count = os.readinto(fd, unread) + if count == 0: raise EOFError('unexpected EOF') - data += s + unread = unread[count:] + return SIGNED_STRUCT.unpack(data)[0] def write_signed(fd, n): diff --git a/PythonLib/full/multiprocessing/managers.py b/PythonLib/full/multiprocessing/managers.py index 010d16e1..91bcf243 100644 --- a/PythonLib/full/multiprocessing/managers.py +++ b/PythonLib/full/multiprocessing/managers.py @@ -18,6 +18,7 @@ import threading import signal import array +import collections.abc import queue import time import types @@ -90,7 +91,10 @@ def dispatch(c, id, methodname, args=(), kwds={}): kind, result = c.recv() if kind == '#RETURN': return result - raise convert_to_error(kind, result) + try: + raise convert_to_error(kind, result) + finally: + del result # break reference cycle def convert_to_error(kind, result): if kind == '#ERROR': @@ -840,7 +844,10 @@ def _callmethod(self, methodname, args=(), kwds={}): conn = self._Client(token.address, authkey=self._authkey) dispatch(conn, None, 'decref', (token.id,)) return proxy - raise convert_to_error(kind, result) + try: + raise convert_to_error(kind, result) + finally: + del result # break reference cycle def _getvalue(self): ''' @@ -1052,12 +1059,14 @@ def close(self, *args): class AcquirerProxy(BaseProxy): - _exposed_ = ('acquire', 'release') + _exposed_ = ('acquire', 'release', 'locked') def acquire(self, blocking=True, timeout=None): args = (blocking,) if timeout is None else (blocking, timeout) return self._callmethod('acquire', args) def release(self): return self._callmethod('release') + def locked(self): + return self._callmethod('locked') def __enter__(self): return self._callmethod('acquire') def __exit__(self, exc_type, exc_val, exc_tb): @@ -1065,7 +1074,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): class ConditionProxy(AcquirerProxy): - _exposed_ = ('acquire', 'release', 'wait', 'notify', 'notify_all') + _exposed_ = ('acquire', 'release', 'locked', 'wait', 'notify', 'notify_all') def wait(self, timeout=None): return self._callmethod('wait', (timeout,)) def notify(self, n=1): @@ -1153,10 +1162,10 @@ def set(self, value): BaseListProxy = MakeProxyType('BaseListProxy', ( - '__add__', '__contains__', '__delitem__', '__getitem__', '__len__', - '__mul__', '__reversed__', '__rmul__', '__setitem__', - 'append', 'count', 'extend', 'index', 'insert', 'pop', 'remove', - 'reverse', 'sort', '__imul__' + '__add__', '__contains__', '__delitem__', '__getitem__', '__imul__', + '__len__', '__mul__', '__reversed__', '__rmul__', '__setitem__', + 'append', 'clear', 'copy', 'count', 'extend', 'index', 'insert', 'pop', + 'remove', 'reverse', 'sort', )) class ListProxy(BaseListProxy): def __iadd__(self, value): @@ -1166,15 +1175,56 @@ def __imul__(self, value): self._callmethod('__imul__', (value,)) return self + __class_getitem__ = classmethod(types.GenericAlias) + +collections.abc.MutableSequence.register(BaseListProxy) -DictProxy = MakeProxyType('DictProxy', ( - '__contains__', '__delitem__', '__getitem__', '__iter__', '__len__', - '__setitem__', 'clear', 'copy', 'get', 'items', +_BaseDictProxy = MakeProxyType('_BaseDictProxy', ( + '__contains__', '__delitem__', '__getitem__', '__ior__', '__iter__', + '__len__', '__or__', '__reversed__', '__ror__', + '__setitem__', 'clear', 'copy', 'fromkeys', 'get', 'items', 'keys', 'pop', 'popitem', 'setdefault', 'update', 'values' )) -DictProxy._method_to_typeid_ = { +_BaseDictProxy._method_to_typeid_ = { '__iter__': 'Iterator', } +class DictProxy(_BaseDictProxy): + def __ior__(self, value): + self._callmethod('__ior__', (value,)) + return self + + __class_getitem__ = classmethod(types.GenericAlias) + +collections.abc.MutableMapping.register(_BaseDictProxy) + +_BaseSetProxy = MakeProxyType("_BaseSetProxy", ( + '__and__', '__class_getitem__', '__contains__', '__iand__', '__ior__', + '__isub__', '__iter__', '__ixor__', '__len__', '__or__', '__rand__', + '__ror__', '__rsub__', '__rxor__', '__sub__', '__xor__', + '__ge__', '__gt__', '__le__', '__lt__', + 'add', 'clear', 'copy', 'difference', 'difference_update', 'discard', + 'intersection', 'intersection_update', 'isdisjoint', 'issubset', + 'issuperset', 'pop', 'remove', 'symmetric_difference', + 'symmetric_difference_update', 'union', 'update', +)) + +class SetProxy(_BaseSetProxy): + def __ior__(self, value): + self._callmethod('__ior__', (value,)) + return self + def __iand__(self, value): + self._callmethod('__iand__', (value,)) + return self + def __ixor__(self, value): + self._callmethod('__ixor__', (value,)) + return self + def __isub__(self, value): + self._callmethod('__isub__', (value,)) + return self + + __class_getitem__ = classmethod(types.GenericAlias) + +collections.abc.MutableMapping.register(_BaseSetProxy) ArrayProxy = MakeProxyType('ArrayProxy', ( @@ -1227,6 +1277,7 @@ class SyncManager(BaseManager): SyncManager.register('Pool', pool.Pool, PoolProxy) SyncManager.register('list', list, ListProxy) SyncManager.register('dict', dict, DictProxy) +SyncManager.register('set', set, SetProxy) SyncManager.register('Value', Value, ValueProxy) SyncManager.register('Array', Array, ArrayProxy) SyncManager.register('Namespace', Namespace, NamespaceProxy) diff --git a/PythonLib/full/multiprocessing/pool.py b/PythonLib/full/multiprocessing/pool.py index 4f5d88cb..f9798901 100644 --- a/PythonLib/full/multiprocessing/pool.py +++ b/PythonLib/full/multiprocessing/pool.py @@ -200,7 +200,7 @@ def __init__(self, processes=None, initializer=None, initargs=(), self._initargs = initargs if processes is None: - processes = os.cpu_count() or 1 + processes = os.process_cpu_count() or 1 if processes < 1: raise ValueError("Number of processes must be at least 1") if maxtasksperchild is not None: diff --git a/PythonLib/full/multiprocessing/popen_fork.py b/PythonLib/full/multiprocessing/popen_fork.py index 625981cf..7affa1b9 100644 --- a/PythonLib/full/multiprocessing/popen_fork.py +++ b/PythonLib/full/multiprocessing/popen_fork.py @@ -1,3 +1,4 @@ +import atexit import os import signal @@ -53,6 +54,9 @@ def _send_signal(self, sig): if self.wait(timeout=0.1) is None: raise + def interrupt(self): + self._send_signal(signal.SIGINT) + def terminate(self): self._send_signal(signal.SIGTERM) @@ -66,10 +70,13 @@ def _launch(self, process_obj): self.pid = os.fork() if self.pid == 0: try: + atexit._clear() + atexit.register(util._exit_function) os.close(parent_r) os.close(parent_w) code = process_obj._bootstrap(parent_sentinel=child_r) finally: + atexit._run_exitfuncs() os._exit(code) else: os.close(child_w) diff --git a/PythonLib/full/multiprocessing/popen_spawn_posix.py b/PythonLib/full/multiprocessing/popen_spawn_posix.py index 24b86345..cccd659a 100644 --- a/PythonLib/full/multiprocessing/popen_spawn_posix.py +++ b/PythonLib/full/multiprocessing/popen_spawn_posix.py @@ -57,6 +57,10 @@ def _launch(self, process_obj): self._fds.extend([child_r, child_w]) self.pid = util.spawnv_passfds(spawn.get_executable(), cmd, self._fds) + os.close(child_r) + child_r = None + os.close(child_w) + child_w = None self.sentinel = parent_r with open(parent_w, 'wb', closefd=False) as f: f.write(fp.getbuffer()) diff --git a/PythonLib/full/multiprocessing/popen_spawn_win32.py b/PythonLib/full/multiprocessing/popen_spawn_win32.py index 49d4c7ee..62fb0ddb 100644 --- a/PythonLib/full/multiprocessing/popen_spawn_win32.py +++ b/PythonLib/full/multiprocessing/popen_spawn_win32.py @@ -3,6 +3,7 @@ import signal import sys import _winapi +from subprocess import STARTUPINFO, STARTF_FORCEOFFFEEDBACK from .context import reduction, get_spawning_popen, set_spawning_popen from . import spawn @@ -74,7 +75,8 @@ def __init__(self, process_obj): try: hp, ht, pid, tid = _winapi.CreateProcess( python_exe, cmd, - None, None, False, 0, env, None, None) + None, None, False, 0, env, None, + STARTUPINFO(dwFlags=STARTF_FORCEOFFFEEDBACK)) _winapi.CloseHandle(ht) except: _winapi.CloseHandle(rhandle) diff --git a/PythonLib/full/multiprocessing/process.py b/PythonLib/full/multiprocessing/process.py index 271ba3fd..262513f2 100644 --- a/PythonLib/full/multiprocessing/process.py +++ b/PythonLib/full/multiprocessing/process.py @@ -77,7 +77,7 @@ class BaseProcess(object): def _Popen(self): raise NotImplementedError - def __init__(self, group=None, target=None, name=None, args=(), kwargs={}, + def __init__(self, group=None, target=None, name=None, args=(), kwargs=None, *, daemon=None): assert group is None, 'group argument must be None for now' count = next(_process_counter) @@ -89,7 +89,7 @@ def __init__(self, group=None, target=None, name=None, args=(), kwargs={}, self._closed = False self._target = target self._args = tuple(args) - self._kwargs = dict(kwargs) + self._kwargs = dict(kwargs) if kwargs else {} self._name = name or type(self).__name__ + '-' + \ ':'.join(str(i) for i in self._identity) if daemon is not None: @@ -125,6 +125,13 @@ def start(self): del self._target, self._args, self._kwargs _children.add(self) + def interrupt(self): + ''' + Terminate process; sends SIGINT signal + ''' + self._check_closed() + self._popen.interrupt() + def terminate(self): ''' Terminate process; sends SIGTERM signal or uses TerminateProcess() @@ -310,11 +317,8 @@ def _bootstrap(self, parent_sentinel=None): # _run_after_forkers() is executed del old_process util.info('child process calling self.run()') - try: - self.run() - exitcode = 0 - finally: - util._exit_function() + self.run() + exitcode = 0 except SystemExit as e: if e.code is None: exitcode = 0 diff --git a/PythonLib/full/multiprocessing/queues.py b/PythonLib/full/multiprocessing/queues.py index 852ae87b..981599ac 100644 --- a/PythonLib/full/multiprocessing/queues.py +++ b/PythonLib/full/multiprocessing/queues.py @@ -20,8 +20,6 @@ from queue import Empty, Full -import _multiprocessing - from . import connection from . import context _ForkingPickler = context.reduction.ForkingPickler @@ -123,7 +121,7 @@ def get(self, block=True, timeout=None): def qsize(self): # Raises NotImplementedError on Mac OSX because of broken sem_getvalue() - return self._maxsize - self._sem._semlock._get_value() + return self._maxsize - self._sem.get_value() def empty(self): return not self._poll() diff --git a/PythonLib/full/multiprocessing/reduction.py b/PythonLib/full/multiprocessing/reduction.py index 5593f068..fcccd3ee 100644 --- a/PythonLib/full/multiprocessing/reduction.py +++ b/PythonLib/full/multiprocessing/reduction.py @@ -139,15 +139,12 @@ def detach(self): __all__ += ['DupFd', 'sendfds', 'recvfds'] import array - # On MacOSX we should acknowledge receipt of fds -- see Issue14669 - ACKNOWLEDGE = sys.platform == 'darwin' - def sendfds(sock, fds): '''Send an array of fds over an AF_UNIX socket.''' fds = array.array('i', fds) msg = bytes([len(fds) % 256]) sock.sendmsg([msg], [(socket.SOL_SOCKET, socket.SCM_RIGHTS, fds)]) - if ACKNOWLEDGE and sock.recv(1) != b'A': + if sock.recv(1) != b'A': raise RuntimeError('did not receive acknowledgement of fd') def recvfds(sock, size): @@ -158,8 +155,11 @@ def recvfds(sock, size): if not msg and not ancdata: raise EOFError try: - if ACKNOWLEDGE: - sock.send(b'A') + # We send/recv an Ack byte after the fds to work around an old + # macOS bug; it isn't clear if this is still required but it + # makes unit testing fd sending easier. + # See: https://github.com/python/cpython/issues/58874 + sock.send(b'A') # Acknowledge if len(ancdata) != 1: raise RuntimeError('received %d items of ancdata' % len(ancdata)) diff --git a/PythonLib/full/multiprocessing/resource_tracker.py b/PythonLib/full/multiprocessing/resource_tracker.py index e4da1293..2ef2d1ec 100644 --- a/PythonLib/full/multiprocessing/resource_tracker.py +++ b/PythonLib/full/multiprocessing/resource_tracker.py @@ -15,11 +15,15 @@ # this resource tracker process, "killall python" would probably leave unlinked # resources. +import base64 import os import signal import sys import threading import warnings +from collections import deque + +import json from . import spawn from . import util @@ -29,8 +33,12 @@ _HAVE_SIGMASK = hasattr(signal, 'pthread_sigmask') _IGNORED_SIGNALS = (signal.SIGINT, signal.SIGTERM) +def cleanup_noop(name): + raise RuntimeError('noop should never be registered or cleaned up') + _CLEANUP_FUNCS = { - 'noop': lambda: None, + 'noop': cleanup_noop, + 'dummy': lambda name: None, # Dummy resource used in tests } if os.name == 'posix': @@ -43,12 +51,8 @@ # absence of POSIX named semaphores. In that case, no named semaphores were # ever opened, so no cleanup would be necessary. if hasattr(_multiprocessing, 'sem_unlink'): - _CLEANUP_FUNCS.update({ - 'semaphore': _multiprocessing.sem_unlink, - }) - _CLEANUP_FUNCS.update({ - 'shared_memory': _posixshmem.shm_unlink, - }) + _CLEANUP_FUNCS['semaphore'] = _multiprocessing.sem_unlink + _CLEANUP_FUNCS['shared_memory'] = _posixshmem.shm_unlink class ReentrantCallError(RuntimeError): @@ -61,6 +65,15 @@ def __init__(self): self._lock = threading.RLock() self._fd = None self._pid = None + self._exitcode = None + self._reentrant_messages = deque() + + # True to use colon-separated lines, rather than JSON lines, + # for internal communication. (Mainly for testing). + # Filenames not supported by the simple format will always be sent + # using JSON. + # The reader should understand all formats. + self._use_simple_format = True def _reentrant_call_error(self): # gh-109629: this happens if an explicit call to the ResourceTracker @@ -97,7 +110,7 @@ def _stop_locked( # This shouldn't happen (it might when called by a finalizer) # so we check for it anyway. if self._lock._recursion_count() > 1: - return self._reentrant_call_error() + raise self._reentrant_call_error() if self._fd is None: # not running return @@ -108,9 +121,21 @@ def _stop_locked( close(self._fd) self._fd = None - waitpid(self._pid, 0) + try: + _, status = waitpid(self._pid, 0) + except ChildProcessError: + self._pid = None + self._exitcode = None + return + self._pid = None + try: + self._exitcode = waitstatus_to_exitcode(status) + except ValueError: + # os.waitstatus_to_exitcode may raise an exception for invalid values + self._exitcode = None + def getfd(self): self.ensure_running() return self._fd @@ -120,75 +145,119 @@ def ensure_running(self): This can be run from any process. Usually a child process will use the resource created by its parent.''' + return self._ensure_running_and_write() + + def _teardown_dead_process(self): + os.close(self._fd) + + # Clean-up to avoid dangling processes. + try: + # _pid can be None if this process is a child from another + # python process, which has started the resource_tracker. + if self._pid is not None: + os.waitpid(self._pid, 0) + except ChildProcessError: + # The resource_tracker has already been terminated. + pass + self._fd = None + self._pid = None + self._exitcode = None + + warnings.warn('resource_tracker: process died unexpectedly, ' + 'relaunching. Some resources might leak.') + + def _launch(self): + fds_to_pass = [] + try: + fds_to_pass.append(sys.stderr.fileno()) + except Exception: + pass + r, w = os.pipe() + try: + fds_to_pass.append(r) + # process will out live us, so no need to wait on pid + exe = spawn.get_executable() + args = [ + exe, + *util._args_from_interpreter_flags(), + '-c', + f'from multiprocessing.resource_tracker import main;main({r})', + ] + # bpo-33613: Register a signal mask that will block the signals. + # This signal mask will be inherited by the child that is going + # to be spawned and will protect the child from a race condition + # that can make the child die before it registers signal handlers + # for SIGINT and SIGTERM. The mask is unregistered after spawning + # the child. + prev_sigmask = None + try: + if _HAVE_SIGMASK: + prev_sigmask = signal.pthread_sigmask(signal.SIG_BLOCK, _IGNORED_SIGNALS) + pid = util.spawnv_passfds(exe, args, fds_to_pass) + finally: + if prev_sigmask is not None: + signal.pthread_sigmask(signal.SIG_SETMASK, prev_sigmask) + except: + os.close(w) + raise + else: + self._fd = w + self._pid = pid + finally: + os.close(r) + + def _make_probe_message(self): + """Return a probe message.""" + if self._use_simple_format: + return b'PROBE:0:noop\n' + return ( + json.dumps( + {"cmd": "PROBE", "rtype": "noop"}, + ensure_ascii=True, + separators=(",", ":"), + ) + + "\n" + ).encode("ascii") + + def _ensure_running_and_write(self, msg=None): with self._lock: if self._lock._recursion_count() > 1: # The code below is certainly not reentrant-safe, so bail out - return self._reentrant_call_error() + if msg is None: + raise self._reentrant_call_error() + return self._reentrant_messages.append(msg) + if self._fd is not None: # resource tracker was launched before, is it still running? - if self._check_alive(): - # => still alive - return - # => dead, launch it again - os.close(self._fd) - - # Clean-up to avoid dangling processes. + if msg is None: + to_send = self._make_probe_message() + else: + to_send = msg try: - # _pid can be None if this process is a child from another - # python process, which has started the resource_tracker. - if self._pid is not None: - os.waitpid(self._pid, 0) - except ChildProcessError: - # The resource_tracker has already been terminated. - pass - self._fd = None - self._pid = None + self._write(to_send) + except OSError: + self._teardown_dead_process() + self._launch() - warnings.warn('resource_tracker: process died unexpectedly, ' - 'relaunching. Some resources might leak.') + msg = None # message was sent in probe + else: + self._launch() - fds_to_pass = [] + while True: try: - fds_to_pass.append(sys.stderr.fileno()) - except Exception: - pass - cmd = 'from multiprocessing.resource_tracker import main;main(%d)' - r, w = os.pipe() - try: - fds_to_pass.append(r) - # process will out live us, so no need to wait on pid - exe = spawn.get_executable() - args = [exe] + util._args_from_interpreter_flags() - args += ['-c', cmd % r] - # bpo-33613: Register a signal mask that will block the signals. - # This signal mask will be inherited by the child that is going - # to be spawned and will protect the child from a race condition - # that can make the child die before it registers signal handlers - # for SIGINT and SIGTERM. The mask is unregistered after spawning - # the child. - prev_sigmask = None - try: - if _HAVE_SIGMASK: - prev_sigmask = signal.pthread_sigmask(signal.SIG_BLOCK, _IGNORED_SIGNALS) - pid = util.spawnv_passfds(exe, args, fds_to_pass) - finally: - if prev_sigmask is not None: - signal.pthread_sigmask(signal.SIG_SETMASK, prev_sigmask) - except: - os.close(w) - raise - else: - self._fd = w - self._pid = pid - finally: - os.close(r) + reentrant_msg = self._reentrant_messages.popleft() + except IndexError: + break + self._write(reentrant_msg) + if msg is not None: + self._write(msg) def _check_alive(self): '''Check that the pipe has not been closed by sending a probe.''' try: # We cannot use send here as it calls ensure_running, creating # a cycle. - os.write(self._fd, b'PROBE:0:noop\n') + os.write(self._fd, self._make_probe_message()) except OSError: return False else: @@ -202,27 +271,42 @@ def unregister(self, name, rtype): '''Unregister name of resource with resource tracker.''' self._send('UNREGISTER', name, rtype) - def _send(self, cmd, name, rtype): - try: - self.ensure_running() - except ReentrantCallError: - # The code below might or might not work, depending on whether - # the resource tracker was already running and still alive. - # Better warn the user. - # (XXX is warnings.warn itself reentrant-safe? :-) - warnings.warn( - f"ResourceTracker called reentrantly for resource cleanup, " - f"which is unsupported. " - f"The {rtype} object {name!r} might leak.") - msg = '{0}:{1}:{2}\n'.format(cmd, name, rtype).encode('ascii') - if len(msg) > 512: - # posix guarantees that writes to a pipe of less than PIPE_BUF - # bytes are atomic, and that PIPE_BUF >= 512 - raise ValueError('msg too long') + def _write(self, msg): nbytes = os.write(self._fd, msg) - assert nbytes == len(msg), "nbytes {0:n} but len(msg) {1:n}".format( - nbytes, len(msg)) + assert nbytes == len(msg), f"{nbytes=} != {len(msg)=}" + + def _send(self, cmd, name, rtype): + if self._use_simple_format and '\n' not in name: + msg = f"{cmd}:{name}:{rtype}\n".encode("ascii") + if len(msg) > 512: + # posix guarantees that writes to a pipe of less than PIPE_BUF + # bytes are atomic, and that PIPE_BUF >= 512 + raise ValueError('msg too long') + self._ensure_running_and_write(msg) + return + # POSIX guarantees that writes to a pipe of less than PIPE_BUF (512 on Linux) + # bytes are atomic. Therefore, we want the message to be shorter than 512 bytes. + # POSIX shm_open() and sem_open() require the name, including its leading slash, + # to be at most NAME_MAX bytes (255 on Linux) + # With json.dump(..., ensure_ascii=True) every non-ASCII byte becomes a 6-char + # escape like \uDC80. + # As we want the overall message to be kept atomic and therefore smaller than 512, + # we encode encode the raw name bytes with URL-safe Base64 - so a 255 long name + # will not exceed 340 bytes. + b = name.encode('utf-8', 'surrogateescape') + if len(b) > 255: + raise ValueError('shared memory name too long (max 255 bytes)') + b64 = base64.urlsafe_b64encode(b).decode('ascii') + + payload = {"cmd": cmd, "rtype": rtype, "base64_name": b64} + msg = (json.dumps(payload, ensure_ascii=True, separators=(",", ":")) + "\n").encode("ascii") + + # The entire JSON message is guaranteed < PIPE_BUF (512 bytes) by construction. + assert len(msg) <= 512, f"internal error: message too long ({len(msg)} bytes)" + assert msg.startswith(b'{') + + self._ensure_running_and_write(msg) _resource_tracker = ResourceTracker() ensure_running = _resource_tracker.ensure_running @@ -231,6 +315,30 @@ def _send(self, cmd, name, rtype): getfd = _resource_tracker.getfd +def _decode_message(line): + if line.startswith(b'{'): + try: + obj = json.loads(line.decode('ascii')) + except Exception as e: + raise ValueError("malformed resource_tracker message: %r" % (line,)) from e + + cmd = obj["cmd"] + rtype = obj["rtype"] + b64 = obj.get("base64_name", "") + + if not isinstance(cmd, str) or not isinstance(rtype, str) or not isinstance(b64, str): + raise ValueError("malformed resource_tracker fields: %r" % (obj,)) + + try: + name = base64.urlsafe_b64decode(b64).decode('utf-8', 'surrogateescape') + except ValueError as e: + raise ValueError("malformed resource_tracker base64_name: %r" % (b64,)) from e + else: + cmd, rest = line.strip().decode('ascii').split(':', maxsplit=1) + name, rtype = rest.rsplit(':', maxsplit=1) + return cmd, rtype, name + + def main(fd): '''Run resource tracker.''' # protect the process from ^C and "killall python" etc @@ -246,12 +354,14 @@ def main(fd): pass cache = {rtype: set() for rtype in _CLEANUP_FUNCS.keys()} + exit_code = 0 + try: # keep track of registered/unregistered resources with open(fd, 'rb') as f: for line in f: try: - cmd, name, rtype = line.strip().decode('ascii').split(':') + cmd, rtype, name = _decode_message(line) cleanup_func = _CLEANUP_FUNCS.get(rtype, None) if cleanup_func is None: raise ValueError( @@ -267,6 +377,7 @@ def main(fd): else: raise RuntimeError('unrecognized command %r' % cmd) except Exception: + exit_code = 3 try: sys.excepthook(*sys.exc_info()) except: @@ -276,9 +387,17 @@ def main(fd): for rtype, rtype_cache in cache.items(): if rtype_cache: try: - warnings.warn('resource_tracker: There appear to be %d ' - 'leaked %s objects to clean up at shutdown' % - (len(rtype_cache), rtype)) + exit_code = 1 + if rtype == 'dummy': + # The test 'dummy' resource is expected to leak. + # We skip the warning (and *only* the warning) for it. + pass + else: + warnings.warn( + f'resource_tracker: There appear to be ' + f'{len(rtype_cache)} leaked {rtype} objects to ' + f'clean up at shutdown: {rtype_cache}' + ) except Exception: pass for name in rtype_cache: @@ -289,6 +408,9 @@ def main(fd): try: _CLEANUP_FUNCS[rtype](name) except Exception as e: + exit_code = 2 warnings.warn('resource_tracker: %r: %s' % (name, e)) finally: pass + + sys.exit(exit_code) diff --git a/PythonLib/full/multiprocessing/shared_memory.py b/PythonLib/full/multiprocessing/shared_memory.py index 9a1e5aa1..99a8ce33 100644 --- a/PythonLib/full/multiprocessing/shared_memory.py +++ b/PythonLib/full/multiprocessing/shared_memory.py @@ -71,8 +71,9 @@ class SharedMemory: _flags = os.O_RDWR _mode = 0o600 _prepend_leading_slash = True if _USE_POSIX else False + _track = True - def __init__(self, name=None, create=False, size=0): + def __init__(self, name=None, create=False, size=0, *, track=True): if not size >= 0: raise ValueError("'size' must be a positive integer") if create: @@ -82,6 +83,7 @@ def __init__(self, name=None, create=False, size=0): if name is None and not self._flags & os.O_EXCL: raise ValueError("'name' can only be None if create=True") + self._track = track if _USE_POSIX: # POSIX Shared Memory @@ -116,8 +118,8 @@ def __init__(self, name=None, create=False, size=0): except OSError: self.unlink() raise - - resource_tracker.register(self._name, "shared_memory") + if self._track: + resource_tracker.register(self._name, "shared_memory") else: @@ -236,12 +238,20 @@ def close(self): def unlink(self): """Requests that the underlying shared memory block be destroyed. - In order to ensure proper cleanup of resources, unlink should be - called once (and only once) across all processes which have access - to the shared memory block.""" + Unlink should be called once (and only once) across all handles + which have access to the shared memory block, even if these + handles belong to different processes. Closing and unlinking may + happen in any order, but trying to access data inside a shared + memory block after unlinking may result in memory errors, + depending on platform. + + This method has no effect on Windows, where the only way to + delete a shared memory block is to close all handles.""" + if _USE_POSIX and self._name: _posixshmem.shm_unlink(self._name) - resource_tracker.unregister(self._name, "shared_memory") + if self._track: + resource_tracker.unregister(self._name, "shared_memory") _encoding = "utf8" @@ -529,6 +539,6 @@ def index(self, value): if value == entry: return position else: - raise ValueError(f"{value!r} not in this container") + raise ValueError("ShareableList.index(x): x not in list") __class_getitem__ = classmethod(types.GenericAlias) diff --git a/PythonLib/full/multiprocessing/spawn.py b/PythonLib/full/multiprocessing/spawn.py index daac1ecc..d43864c9 100644 --- a/PythonLib/full/multiprocessing/spawn.py +++ b/PythonLib/full/multiprocessing/spawn.py @@ -184,7 +184,7 @@ def get_preparation_data(name): sys_argv=sys.argv, orig_dir=process.ORIGINAL_DIR, dir=os.getcwd(), - start_method=get_start_method(), + start_method=get_start_method(allow_none=True), ) # Figure out whether to initialise main in the subprocess as a module diff --git a/PythonLib/full/multiprocessing/synchronize.py b/PythonLib/full/multiprocessing/synchronize.py index 870c9134..9188114a 100644 --- a/PythonLib/full/multiprocessing/synchronize.py +++ b/PythonLib/full/multiprocessing/synchronize.py @@ -21,22 +21,21 @@ from . import process from . import util -# Try to import the mp.synchronize module cleanly, if it fails -# raise ImportError for platforms lacking a working sem_open implementation. -# See issue 3770 +# TODO: Do any platforms still lack a functioning sem_open? try: from _multiprocessing import SemLock, sem_unlink -except (ImportError): +except ImportError: raise ImportError("This platform lacks a functioning sem_open" + - " implementation, therefore, the required" + - " synchronization primitives needed will not" + - " function, see issue 3770.") + " implementation. https://github.com/python/cpython/issues/48020.") # # Constants # -RECURSIVE_MUTEX, SEMAPHORE = list(range(2)) +# These match the enum in Modules/_multiprocessing/semaphore.c +RECURSIVE_MUTEX = 0 +SEMAPHORE = 1 + SEM_VALUE_MAX = _multiprocessing.SemLock.SEM_VALUE_MAX # @@ -91,6 +90,9 @@ def _make_methods(self): self.acquire = self._semlock.acquire self.release = self._semlock.release + def locked(self): + return self._semlock._is_zero() + def __enter__(self): return self._semlock.__enter__() @@ -133,11 +135,16 @@ def __init__(self, value=1, *, ctx): SemLock.__init__(self, SEMAPHORE, value, SEM_VALUE_MAX, ctx=ctx) def get_value(self): + '''Returns current value of Semaphore. + + Raises NotImplementedError on Mac OSX + because of broken sem_getvalue(). + ''' return self._semlock._get_value() def __repr__(self): try: - value = self._semlock._get_value() + value = self.get_value() except Exception: value = 'unknown' return '<%s(value=%s)>' % (self.__class__.__name__, value) @@ -153,7 +160,7 @@ def __init__(self, value=1, *, ctx): def __repr__(self): try: - value = self._semlock._get_value() + value = self.get_value() except Exception: value = 'unknown' return '<%s(value=%s, maxvalue=%s)>' % \ @@ -245,8 +252,8 @@ def _make_methods(self): def __repr__(self): try: - num_waiters = (self._sleeping_count._semlock._get_value() - - self._woken_count._semlock._get_value()) + num_waiters = (self._sleeping_count.get_value() - + self._woken_count.get_value()) except Exception: num_waiters = 'unknown' return '<%s(%s, %s)>' % (self.__class__.__name__, self._lock, num_waiters) diff --git a/PythonLib/full/multiprocessing/util.py b/PythonLib/full/multiprocessing/util.py index 79559823..549fb07c 100644 --- a/PythonLib/full/multiprocessing/util.py +++ b/PythonLib/full/multiprocessing/util.py @@ -14,12 +14,12 @@ import atexit import threading # we want threading to install it's # cleanup function before multiprocessing does -from subprocess import _args_from_interpreter_flags +from subprocess import _args_from_interpreter_flags # noqa: F401 from . import process __all__ = [ - 'sub_debug', 'debug', 'info', 'sub_warning', 'get_logger', + 'sub_debug', 'debug', 'info', 'sub_warning', 'warn', 'get_logger', 'log_to_stderr', 'get_temp_dir', 'register_after_fork', 'is_exiting', 'Finalize', 'ForkAwareThreadLock', 'ForkAwareLocal', 'close_all_fds_except', 'SUBDEBUG', 'SUBWARNING', @@ -34,6 +34,7 @@ DEBUG = 10 INFO = 20 SUBWARNING = 25 +WARNING = 30 LOGGER_NAME = 'multiprocessing' DEFAULT_LOGGING_FORMAT = '[%(levelname)s/%(processName)s] %(message)s' @@ -53,6 +54,10 @@ def info(msg, *args): if _logger: _logger.log(INFO, msg, *args, stacklevel=2) +def warn(msg, *args): + if _logger: + _logger.log(WARNING, msg, *args, stacklevel=2) + def sub_warning(msg, *args): if _logger: _logger.log(SUBWARNING, msg, *args, stacklevel=2) @@ -64,8 +69,7 @@ def get_logger(): global _logger import logging - logging._acquireLock() - try: + with logging._lock: if not _logger: _logger = logging.getLogger(LOGGER_NAME) @@ -79,9 +83,6 @@ def get_logger(): atexit._exithandlers.remove((_exit_function, (), {})) atexit._exithandlers.append((_exit_function, (), {})) - finally: - logging._releaseLock() - return _logger def log_to_stderr(level=None): @@ -106,11 +107,7 @@ def log_to_stderr(level=None): # Abstract socket support def _platform_supports_abstract_sockets(): - if sys.platform == "linux": - return True - if hasattr(sys, 'getandroidapilevel'): - return True - return False + return sys.platform in ("linux", "android") def is_abstract_socket_namespace(address): @@ -129,11 +126,25 @@ def is_abstract_socket_namespace(address): # Function returning a temp directory which will be removed on exit # +# Maximum length of a NULL-terminated [1] socket file path is usually +# between 92 and 108 [2], but Linux is known to use a size of 108 [3]. +# BSD-based systems usually use a size of 104 or 108 and Windows does +# not create AF_UNIX sockets. +# +# [1]: https://github.com/python/cpython/issues/140734 +# [2]: https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/sys_un.h.html +# [3]: https://man7.org/linux/man-pages/man7/unix.7.html + +if sys.platform == 'linux': + _SUN_PATH_MAX = 108 +elif sys.platform.startswith(('openbsd', 'freebsd')): + _SUN_PATH_MAX = 104 +else: + # On Windows platforms, we do not create AF_UNIX sockets. + _SUN_PATH_MAX = None if os.name == 'nt' else 92 + def _remove_temp_dir(rmtree, tempdir): - def onerror(func, path, err_info): - if not issubclass(err_info[0], FileNotFoundError): - raise - rmtree(tempdir, onerror=onerror) + rmtree(tempdir) current_process = process.current_process() # current_process() can be None if the finalizer is called @@ -141,12 +152,69 @@ def onerror(func, path, err_info): if current_process is not None: current_process._config['tempdir'] = None +def _get_base_temp_dir(tempfile): + """Get a temporary directory where socket files will be created. + + To prevent additional imports, pass a pre-imported 'tempfile' module. + """ + if os.name == 'nt': + return None + # Most of the time, the default temporary directory is /tmp. Thus, + # listener sockets files "$TMPDIR/pymp-XXXXXXXX/sock-XXXXXXXX" do + # not have a path length exceeding SUN_PATH_MAX. + # + # If users specify their own temporary directory, we may be unable + # to create those files. Therefore, we fall back to the system-wide + # temporary directory /tmp, assumed to exist on POSIX systems. + # + # See https://github.com/python/cpython/issues/132124. + base_tempdir = tempfile.gettempdir() + # Files created in a temporary directory are suffixed by a string + # generated by tempfile._RandomNameSequence, which, by design, + # is 8 characters long. + # + # Thus, the socket file path length (without NULL terminator) will be: + # + # len(base_tempdir + '/pymp-XXXXXXXX' + '/sock-XXXXXXXX') + sun_path_len = len(base_tempdir) + 14 + 14 + # Strict inequality to account for the NULL terminator. + # See https://github.com/python/cpython/issues/140734. + if sun_path_len < _SUN_PATH_MAX: + return base_tempdir + # Fallback to the default system-wide temporary directory. + # This ignores user-defined environment variables. + # + # On POSIX systems, /tmp MUST be writable by any application [1]. + # We however emit a warning if this is not the case to prevent + # obscure errors later in the execution. + # + # On some legacy systems, /var/tmp and /usr/tmp can be present + # and will be used instead. + # + # [1]: https://refspecs.linuxfoundation.org/FHS_3.0/fhs/ch03s18.html + dirlist = ['/tmp', '/var/tmp', '/usr/tmp'] + try: + base_system_tempdir = tempfile._get_default_tempdir(dirlist) + except FileNotFoundError: + warn("Process-wide temporary directory %s will not be usable for " + "creating socket files and no usable system-wide temporary " + "directory was found in %s", base_tempdir, dirlist) + # At this point, the system-wide temporary directory is not usable + # but we may assume that the user-defined one is, even if we will + # not be able to write socket files out there. + return base_tempdir + warn("Ignoring user-defined temporary directory: %s", base_tempdir) + # at most max(map(len, dirlist)) + 14 + 14 = 36 characters + assert len(base_system_tempdir) + 14 + 14 < _SUN_PATH_MAX + return base_system_tempdir + def get_temp_dir(): # get name of a temp directory which will be automatically cleaned up tempdir = process.current_process()._config.get('tempdir') if tempdir is None: import shutil, tempfile - tempdir = tempfile.mkdtemp(prefix='pymp-') + base_tempdir = _get_base_temp_dir(tempfile) + tempdir = tempfile.mkdtemp(prefix='pymp-', dir=base_tempdir) info('created temp directory %s', tempdir) # keep a strong reference to shutil.rmtree(), since the finalizer # can be called late during Python shutdown @@ -449,15 +517,13 @@ def _flush_std_streams(): def spawnv_passfds(path, args, passfds): import _posixsubprocess - import subprocess passfds = tuple(sorted(map(int, passfds))) errpipe_read, errpipe_write = os.pipe() try: return _posixsubprocess.fork_exec( args, [path], True, passfds, None, None, -1, -1, -1, -1, -1, -1, errpipe_read, errpipe_write, - False, False, -1, None, None, None, -1, None, - subprocess._USE_VFORK) + False, False, -1, None, None, None, -1, None) finally: os.close(errpipe_read) os.close(errpipe_write) diff --git a/PythonLib/full/netrc.py b/PythonLib/full/netrc.py index b285fd8e..bd003e80 100644 --- a/PythonLib/full/netrc.py +++ b/PythonLib/full/netrc.py @@ -7,6 +7,19 @@ __all__ = ["netrc", "NetrcParseError"] +def _can_security_check(): + # On WASI, getuid() is indicated as a stub but it may also be missing. + return os.name == 'posix' and hasattr(os, 'getuid') + + +def _getpwuid(uid): + try: + import pwd + return pwd.getpwuid(uid)[0] + except (ImportError, LookupError): + return f'uid {uid}' + + class NetrcParseError(Exception): """Exception raised on syntax errors in the .netrc file.""" def __init__(self, msg, filename=None, lineno=None): @@ -142,18 +155,12 @@ def _parse(self, file, fp, default_netrc): self._security_check(fp, default_netrc, self.hosts[entryname][0]) def _security_check(self, fp, default_netrc, login): - if os.name == 'posix' and default_netrc and login != "anonymous": + if _can_security_check() and default_netrc and login != "anonymous": prop = os.fstat(fp.fileno()) - if prop.st_uid != os.getuid(): - import pwd - try: - fowner = pwd.getpwuid(prop.st_uid)[0] - except KeyError: - fowner = 'uid %s' % prop.st_uid - try: - user = pwd.getpwuid(os.getuid())[0] - except KeyError: - user = 'uid %s' % os.getuid() + current_user_id = os.getuid() + if prop.st_uid != current_user_id: + fowner = _getpwuid(prop.st_uid) + user = _getpwuid(current_user_id) raise NetrcParseError( (f"~/.netrc file owner ({fowner}, {user}) does not match" " current user")) diff --git a/PythonLib/full/nntplib.py b/PythonLib/full/nntplib.py deleted file mode 100644 index dddea059..00000000 --- a/PythonLib/full/nntplib.py +++ /dev/null @@ -1,1093 +0,0 @@ -"""An NNTP client class based on: -- RFC 977: Network News Transfer Protocol -- RFC 2980: Common NNTP Extensions -- RFC 3977: Network News Transfer Protocol (version 2) - -Example: - ->>> from nntplib import NNTP ->>> s = NNTP('news') ->>> resp, count, first, last, name = s.group('comp.lang.python') ->>> print('Group', name, 'has', count, 'articles, range', first, 'to', last) -Group comp.lang.python has 51 articles, range 5770 to 5821 ->>> resp, subs = s.xhdr('subject', '{0}-{1}'.format(first, last)) ->>> resp = s.quit() ->>> - -Here 'resp' is the server response line. -Error responses are turned into exceptions. - -To post an article from a file: ->>> f = open(filename, 'rb') # file containing article, including header ->>> resp = s.post(f) ->>> - -For descriptions of all methods, read the comments in the code below. -Note that all arguments and return values representing article numbers -are strings, not numbers, since they are rarely used for calculations. -""" - -# RFC 977 by Brian Kantor and Phil Lapsley. -# xover, xgtitle, xpath, date methods by Kevan Heydon - -# Incompatible changes from the 2.x nntplib: -# - all commands are encoded as UTF-8 data (using the "surrogateescape" -# error handler), except for raw message data (POST, IHAVE) -# - all responses are decoded as UTF-8 data (using the "surrogateescape" -# error handler), except for raw message data (ARTICLE, HEAD, BODY) -# - the `file` argument to various methods is keyword-only -# -# - NNTP.date() returns a datetime object -# - NNTP.newgroups() and NNTP.newnews() take a datetime (or date) object, -# rather than a pair of (date, time) strings. -# - NNTP.newgroups() and NNTP.list() return a list of GroupInfo named tuples -# - NNTP.descriptions() returns a dict mapping group names to descriptions -# - NNTP.xover() returns a list of dicts mapping field names (header or metadata) -# to field values; each dict representing a message overview. -# - NNTP.article(), NNTP.head() and NNTP.body() return a (response, ArticleInfo) -# tuple. -# - the "internal" methods have been marked private (they now start with -# an underscore) - -# Other changes from the 2.x/3.1 nntplib: -# - automatic querying of capabilities at connect -# - New method NNTP.getcapabilities() -# - New method NNTP.over() -# - New helper function decode_header() -# - NNTP.post() and NNTP.ihave() accept file objects, bytes-like objects and -# arbitrary iterables yielding lines. -# - An extensive test suite :-) - -# TODO: -# - return structured data (GroupInfo etc.) everywhere -# - support HDR - -# Imports -import re -import socket -import collections -import datetime -import sys -import warnings - -try: - import ssl -except ImportError: - _have_ssl = False -else: - _have_ssl = True - -from email.header import decode_header as _email_decode_header -from socket import _GLOBAL_DEFAULT_TIMEOUT - -__all__ = ["NNTP", - "NNTPError", "NNTPReplyError", "NNTPTemporaryError", - "NNTPPermanentError", "NNTPProtocolError", "NNTPDataError", - "decode_header", - ] - -warnings._deprecated(__name__, remove=(3, 13)) - -# maximal line length when calling readline(). This is to prevent -# reading arbitrary length lines. RFC 3977 limits NNTP line length to -# 512 characters, including CRLF. We have selected 2048 just to be on -# the safe side. -_MAXLINE = 2048 - - -# Exceptions raised when an error or invalid response is received -class NNTPError(Exception): - """Base class for all nntplib exceptions""" - def __init__(self, *args): - Exception.__init__(self, *args) - try: - self.response = args[0] - except IndexError: - self.response = 'No response given' - -class NNTPReplyError(NNTPError): - """Unexpected [123]xx reply""" - pass - -class NNTPTemporaryError(NNTPError): - """4xx errors""" - pass - -class NNTPPermanentError(NNTPError): - """5xx errors""" - pass - -class NNTPProtocolError(NNTPError): - """Response does not begin with [1-5]""" - pass - -class NNTPDataError(NNTPError): - """Error in response data""" - pass - - -# Standard port used by NNTP servers -NNTP_PORT = 119 -NNTP_SSL_PORT = 563 - -# Response numbers that are followed by additional text (e.g. article) -_LONGRESP = { - '100', # HELP - '101', # CAPABILITIES - '211', # LISTGROUP (also not multi-line with GROUP) - '215', # LIST - '220', # ARTICLE - '221', # HEAD, XHDR - '222', # BODY - '224', # OVER, XOVER - '225', # HDR - '230', # NEWNEWS - '231', # NEWGROUPS - '282', # XGTITLE -} - -# Default decoded value for LIST OVERVIEW.FMT if not supported -_DEFAULT_OVERVIEW_FMT = [ - "subject", "from", "date", "message-id", "references", ":bytes", ":lines"] - -# Alternative names allowed in LIST OVERVIEW.FMT response -_OVERVIEW_FMT_ALTERNATIVES = { - 'bytes': ':bytes', - 'lines': ':lines', -} - -# Line terminators (we always output CRLF, but accept any of CRLF, CR, LF) -_CRLF = b'\r\n' - -GroupInfo = collections.namedtuple('GroupInfo', - ['group', 'last', 'first', 'flag']) - -ArticleInfo = collections.namedtuple('ArticleInfo', - ['number', 'message_id', 'lines']) - - -# Helper function(s) -def decode_header(header_str): - """Takes a unicode string representing a munged header value - and decodes it as a (possibly non-ASCII) readable value.""" - parts = [] - for v, enc in _email_decode_header(header_str): - if isinstance(v, bytes): - parts.append(v.decode(enc or 'ascii')) - else: - parts.append(v) - return ''.join(parts) - -def _parse_overview_fmt(lines): - """Parse a list of string representing the response to LIST OVERVIEW.FMT - and return a list of header/metadata names. - Raises NNTPDataError if the response is not compliant - (cf. RFC 3977, section 8.4).""" - fmt = [] - for line in lines: - if line[0] == ':': - # Metadata name (e.g. ":bytes") - name, _, suffix = line[1:].partition(':') - name = ':' + name - else: - # Header name (e.g. "Subject:" or "Xref:full") - name, _, suffix = line.partition(':') - name = name.lower() - name = _OVERVIEW_FMT_ALTERNATIVES.get(name, name) - # Should we do something with the suffix? - fmt.append(name) - defaults = _DEFAULT_OVERVIEW_FMT - if len(fmt) < len(defaults): - raise NNTPDataError("LIST OVERVIEW.FMT response too short") - if fmt[:len(defaults)] != defaults: - raise NNTPDataError("LIST OVERVIEW.FMT redefines default fields") - return fmt - -def _parse_overview(lines, fmt, data_process_func=None): - """Parse the response to an OVER or XOVER command according to the - overview format `fmt`.""" - n_defaults = len(_DEFAULT_OVERVIEW_FMT) - overview = [] - for line in lines: - fields = {} - article_number, *tokens = line.split('\t') - article_number = int(article_number) - for i, token in enumerate(tokens): - if i >= len(fmt): - # XXX should we raise an error? Some servers might not - # support LIST OVERVIEW.FMT and still return additional - # headers. - continue - field_name = fmt[i] - is_metadata = field_name.startswith(':') - if i >= n_defaults and not is_metadata: - # Non-default header names are included in full in the response - # (unless the field is totally empty) - h = field_name + ": " - if token and token[:len(h)].lower() != h: - raise NNTPDataError("OVER/XOVER response doesn't include " - "names of additional headers") - token = token[len(h):] if token else None - fields[fmt[i]] = token - overview.append((article_number, fields)) - return overview - -def _parse_datetime(date_str, time_str=None): - """Parse a pair of (date, time) strings, and return a datetime object. - If only the date is given, it is assumed to be date and time - concatenated together (e.g. response to the DATE command). - """ - if time_str is None: - time_str = date_str[-6:] - date_str = date_str[:-6] - hours = int(time_str[:2]) - minutes = int(time_str[2:4]) - seconds = int(time_str[4:]) - year = int(date_str[:-4]) - month = int(date_str[-4:-2]) - day = int(date_str[-2:]) - # RFC 3977 doesn't say how to interpret 2-char years. Assume that - # there are no dates before 1970 on Usenet. - if year < 70: - year += 2000 - elif year < 100: - year += 1900 - return datetime.datetime(year, month, day, hours, minutes, seconds) - -def _unparse_datetime(dt, legacy=False): - """Format a date or datetime object as a pair of (date, time) strings - in the format required by the NEWNEWS and NEWGROUPS commands. If a - date object is passed, the time is assumed to be midnight (00h00). - - The returned representation depends on the legacy flag: - * if legacy is False (the default): - date has the YYYYMMDD format and time the HHMMSS format - * if legacy is True: - date has the YYMMDD format and time the HHMMSS format. - RFC 3977 compliant servers should understand both formats; therefore, - legacy is only needed when talking to old servers. - """ - if not isinstance(dt, datetime.datetime): - time_str = "000000" - else: - time_str = "{0.hour:02d}{0.minute:02d}{0.second:02d}".format(dt) - y = dt.year - if legacy: - y = y % 100 - date_str = "{0:02d}{1.month:02d}{1.day:02d}".format(y, dt) - else: - date_str = "{0:04d}{1.month:02d}{1.day:02d}".format(y, dt) - return date_str, time_str - - -if _have_ssl: - - def _encrypt_on(sock, context, hostname): - """Wrap a socket in SSL/TLS. Arguments: - - sock: Socket to wrap - - context: SSL context to use for the encrypted connection - Returns: - - sock: New, encrypted socket. - """ - # Generate a default SSL context if none was passed. - if context is None: - context = ssl._create_stdlib_context() - return context.wrap_socket(sock, server_hostname=hostname) - - -# The classes themselves -class NNTP: - # UTF-8 is the character set for all NNTP commands and responses: they - # are automatically encoded (when sending) and decoded (and receiving) - # by this class. - # However, some multi-line data blocks can contain arbitrary bytes (for - # example, latin-1 or utf-16 data in the body of a message). Commands - # taking (POST, IHAVE) or returning (HEAD, BODY, ARTICLE) raw message - # data will therefore only accept and produce bytes objects. - # Furthermore, since there could be non-compliant servers out there, - # we use 'surrogateescape' as the error handler for fault tolerance - # and easy round-tripping. This could be useful for some applications - # (e.g. NNTP gateways). - - encoding = 'utf-8' - errors = 'surrogateescape' - - def __init__(self, host, port=NNTP_PORT, user=None, password=None, - readermode=None, usenetrc=False, - timeout=_GLOBAL_DEFAULT_TIMEOUT): - """Initialize an instance. Arguments: - - host: hostname to connect to - - port: port to connect to (default the standard NNTP port) - - user: username to authenticate with - - password: password to use with username - - readermode: if true, send 'mode reader' command after - connecting. - - usenetrc: allow loading username and password from ~/.netrc file - if not specified explicitly - - timeout: timeout (in seconds) used for socket connections - - readermode is sometimes necessary if you are connecting to an - NNTP server on the local machine and intend to call - reader-specific commands, such as `group'. If you get - unexpected NNTPPermanentErrors, you might need to set - readermode. - """ - self.host = host - self.port = port - self.sock = self._create_socket(timeout) - self.file = None - try: - self.file = self.sock.makefile("rwb") - self._base_init(readermode) - if user or usenetrc: - self.login(user, password, usenetrc) - except: - if self.file: - self.file.close() - self.sock.close() - raise - - def _base_init(self, readermode): - """Partial initialization for the NNTP protocol. - This instance method is extracted for supporting the test code. - """ - self.debugging = 0 - self.welcome = self._getresp() - - # Inquire about capabilities (RFC 3977). - self._caps = None - self.getcapabilities() - - # 'MODE READER' is sometimes necessary to enable 'reader' mode. - # However, the order in which 'MODE READER' and 'AUTHINFO' need to - # arrive differs between some NNTP servers. If _setreadermode() fails - # with an authorization failed error, it will set this to True; - # the login() routine will interpret that as a request to try again - # after performing its normal function. - # Enable only if we're not already in READER mode anyway. - self.readermode_afterauth = False - if readermode and 'READER' not in self._caps: - self._setreadermode() - if not self.readermode_afterauth: - # Capabilities might have changed after MODE READER - self._caps = None - self.getcapabilities() - - # RFC 4642 2.2.2: Both the client and the server MUST know if there is - # a TLS session active. A client MUST NOT attempt to start a TLS - # session if a TLS session is already active. - self.tls_on = False - - # Log in and encryption setup order is left to subclasses. - self.authenticated = False - - def __enter__(self): - return self - - def __exit__(self, *args): - is_connected = lambda: hasattr(self, "file") - if is_connected(): - try: - self.quit() - except (OSError, EOFError): - pass - finally: - if is_connected(): - self._close() - - def _create_socket(self, timeout): - if timeout is not None and not timeout: - raise ValueError('Non-blocking socket (timeout=0) is not supported') - sys.audit("nntplib.connect", self, self.host, self.port) - return socket.create_connection((self.host, self.port), timeout) - - def getwelcome(self): - """Get the welcome message from the server - (this is read and squirreled away by __init__()). - If the response code is 200, posting is allowed; - if it 201, posting is not allowed.""" - - if self.debugging: print('*welcome*', repr(self.welcome)) - return self.welcome - - def getcapabilities(self): - """Get the server capabilities, as read by __init__(). - If the CAPABILITIES command is not supported, an empty dict is - returned.""" - if self._caps is None: - self.nntp_version = 1 - self.nntp_implementation = None - try: - resp, caps = self.capabilities() - except (NNTPPermanentError, NNTPTemporaryError): - # Server doesn't support capabilities - self._caps = {} - else: - self._caps = caps - if 'VERSION' in caps: - # The server can advertise several supported versions, - # choose the highest. - self.nntp_version = max(map(int, caps['VERSION'])) - if 'IMPLEMENTATION' in caps: - self.nntp_implementation = ' '.join(caps['IMPLEMENTATION']) - return self._caps - - def set_debuglevel(self, level): - """Set the debugging level. Argument 'level' means: - 0: no debugging output (default) - 1: print commands and responses but not body text etc. - 2: also print raw lines read and sent before stripping CR/LF""" - - self.debugging = level - debug = set_debuglevel - - def _putline(self, line): - """Internal: send one line to the server, appending CRLF. - The `line` must be a bytes-like object.""" - sys.audit("nntplib.putline", self, line) - line = line + _CRLF - if self.debugging > 1: print('*put*', repr(line)) - self.file.write(line) - self.file.flush() - - def _putcmd(self, line): - """Internal: send one command to the server (through _putline()). - The `line` must be a unicode string.""" - if self.debugging: print('*cmd*', repr(line)) - line = line.encode(self.encoding, self.errors) - self._putline(line) - - def _getline(self, strip_crlf=True): - """Internal: return one line from the server, stripping _CRLF. - Raise EOFError if the connection is closed. - Returns a bytes object.""" - line = self.file.readline(_MAXLINE +1) - if len(line) > _MAXLINE: - raise NNTPDataError('line too long') - if self.debugging > 1: - print('*get*', repr(line)) - if not line: raise EOFError - if strip_crlf: - if line[-2:] == _CRLF: - line = line[:-2] - elif line[-1:] in _CRLF: - line = line[:-1] - return line - - def _getresp(self): - """Internal: get a response from the server. - Raise various errors if the response indicates an error. - Returns a unicode string.""" - resp = self._getline() - if self.debugging: print('*resp*', repr(resp)) - resp = resp.decode(self.encoding, self.errors) - c = resp[:1] - if c == '4': - raise NNTPTemporaryError(resp) - if c == '5': - raise NNTPPermanentError(resp) - if c not in '123': - raise NNTPProtocolError(resp) - return resp - - def _getlongresp(self, file=None): - """Internal: get a response plus following text from the server. - Raise various errors if the response indicates an error. - - Returns a (response, lines) tuple where `response` is a unicode - string and `lines` is a list of bytes objects. - If `file` is a file-like object, it must be open in binary mode. - """ - - openedFile = None - try: - # If a string was passed then open a file with that name - if isinstance(file, (str, bytes)): - openedFile = file = open(file, "wb") - - resp = self._getresp() - if resp[:3] not in _LONGRESP: - raise NNTPReplyError(resp) - - lines = [] - if file is not None: - # XXX lines = None instead? - terminators = (b'.' + _CRLF, b'.\n') - while 1: - line = self._getline(False) - if line in terminators: - break - if line.startswith(b'..'): - line = line[1:] - file.write(line) - else: - terminator = b'.' - while 1: - line = self._getline() - if line == terminator: - break - if line.startswith(b'..'): - line = line[1:] - lines.append(line) - finally: - # If this method created the file, then it must close it - if openedFile: - openedFile.close() - - return resp, lines - - def _shortcmd(self, line): - """Internal: send a command and get the response. - Same return value as _getresp().""" - self._putcmd(line) - return self._getresp() - - def _longcmd(self, line, file=None): - """Internal: send a command and get the response plus following text. - Same return value as _getlongresp().""" - self._putcmd(line) - return self._getlongresp(file) - - def _longcmdstring(self, line, file=None): - """Internal: send a command and get the response plus following text. - Same as _longcmd() and _getlongresp(), except that the returned `lines` - are unicode strings rather than bytes objects. - """ - self._putcmd(line) - resp, list = self._getlongresp(file) - return resp, [line.decode(self.encoding, self.errors) - for line in list] - - def _getoverviewfmt(self): - """Internal: get the overview format. Queries the server if not - already done, else returns the cached value.""" - try: - return self._cachedoverviewfmt - except AttributeError: - pass - try: - resp, lines = self._longcmdstring("LIST OVERVIEW.FMT") - except NNTPPermanentError: - # Not supported by server? - fmt = _DEFAULT_OVERVIEW_FMT[:] - else: - fmt = _parse_overview_fmt(lines) - self._cachedoverviewfmt = fmt - return fmt - - def _grouplist(self, lines): - # Parse lines into "group last first flag" - return [GroupInfo(*line.split()) for line in lines] - - def capabilities(self): - """Process a CAPABILITIES command. Not supported by all servers. - Return: - - resp: server response if successful - - caps: a dictionary mapping capability names to lists of tokens - (for example {'VERSION': ['2'], 'OVER': [], LIST: ['ACTIVE', 'HEADERS'] }) - """ - caps = {} - resp, lines = self._longcmdstring("CAPABILITIES") - for line in lines: - name, *tokens = line.split() - caps[name] = tokens - return resp, caps - - def newgroups(self, date, *, file=None): - """Process a NEWGROUPS command. Arguments: - - date: a date or datetime object - Return: - - resp: server response if successful - - list: list of newsgroup names - """ - if not isinstance(date, (datetime.date, datetime.date)): - raise TypeError( - "the date parameter must be a date or datetime object, " - "not '{:40}'".format(date.__class__.__name__)) - date_str, time_str = _unparse_datetime(date, self.nntp_version < 2) - cmd = 'NEWGROUPS {0} {1}'.format(date_str, time_str) - resp, lines = self._longcmdstring(cmd, file) - return resp, self._grouplist(lines) - - def newnews(self, group, date, *, file=None): - """Process a NEWNEWS command. Arguments: - - group: group name or '*' - - date: a date or datetime object - Return: - - resp: server response if successful - - list: list of message ids - """ - if not isinstance(date, (datetime.date, datetime.date)): - raise TypeError( - "the date parameter must be a date or datetime object, " - "not '{:40}'".format(date.__class__.__name__)) - date_str, time_str = _unparse_datetime(date, self.nntp_version < 2) - cmd = 'NEWNEWS {0} {1} {2}'.format(group, date_str, time_str) - return self._longcmdstring(cmd, file) - - def list(self, group_pattern=None, *, file=None): - """Process a LIST or LIST ACTIVE command. Arguments: - - group_pattern: a pattern indicating which groups to query - - file: Filename string or file object to store the result in - Returns: - - resp: server response if successful - - list: list of (group, last, first, flag) (strings) - """ - if group_pattern is not None: - command = 'LIST ACTIVE ' + group_pattern - else: - command = 'LIST' - resp, lines = self._longcmdstring(command, file) - return resp, self._grouplist(lines) - - def _getdescriptions(self, group_pattern, return_all): - line_pat = re.compile('^(?P[^ \t]+)[ \t]+(.*)$') - # Try the more std (acc. to RFC2980) LIST NEWSGROUPS first - resp, lines = self._longcmdstring('LIST NEWSGROUPS ' + group_pattern) - if not resp.startswith('215'): - # Now the deprecated XGTITLE. This either raises an error - # or succeeds with the same output structure as LIST - # NEWSGROUPS. - resp, lines = self._longcmdstring('XGTITLE ' + group_pattern) - groups = {} - for raw_line in lines: - match = line_pat.search(raw_line.strip()) - if match: - name, desc = match.group(1, 2) - if not return_all: - return desc - groups[name] = desc - if return_all: - return resp, groups - else: - # Nothing found - return '' - - def description(self, group): - """Get a description for a single group. If more than one - group matches ('group' is a pattern), return the first. If no - group matches, return an empty string. - - This elides the response code from the server, since it can - only be '215' or '285' (for xgtitle) anyway. If the response - code is needed, use the 'descriptions' method. - - NOTE: This neither checks for a wildcard in 'group' nor does - it check whether the group actually exists.""" - return self._getdescriptions(group, False) - - def descriptions(self, group_pattern): - """Get descriptions for a range of groups.""" - return self._getdescriptions(group_pattern, True) - - def group(self, name): - """Process a GROUP command. Argument: - - group: the group name - Returns: - - resp: server response if successful - - count: number of articles - - first: first article number - - last: last article number - - name: the group name - """ - resp = self._shortcmd('GROUP ' + name) - if not resp.startswith('211'): - raise NNTPReplyError(resp) - words = resp.split() - count = first = last = 0 - n = len(words) - if n > 1: - count = words[1] - if n > 2: - first = words[2] - if n > 3: - last = words[3] - if n > 4: - name = words[4].lower() - return resp, int(count), int(first), int(last), name - - def help(self, *, file=None): - """Process a HELP command. Argument: - - file: Filename string or file object to store the result in - Returns: - - resp: server response if successful - - list: list of strings returned by the server in response to the - HELP command - """ - return self._longcmdstring('HELP', file) - - def _statparse(self, resp): - """Internal: parse the response line of a STAT, NEXT, LAST, - ARTICLE, HEAD or BODY command.""" - if not resp.startswith('22'): - raise NNTPReplyError(resp) - words = resp.split() - art_num = int(words[1]) - message_id = words[2] - return resp, art_num, message_id - - def _statcmd(self, line): - """Internal: process a STAT, NEXT or LAST command.""" - resp = self._shortcmd(line) - return self._statparse(resp) - - def stat(self, message_spec=None): - """Process a STAT command. Argument: - - message_spec: article number or message id (if not specified, - the current article is selected) - Returns: - - resp: server response if successful - - art_num: the article number - - message_id: the message id - """ - if message_spec: - return self._statcmd('STAT {0}'.format(message_spec)) - else: - return self._statcmd('STAT') - - def next(self): - """Process a NEXT command. No arguments. Return as for STAT.""" - return self._statcmd('NEXT') - - def last(self): - """Process a LAST command. No arguments. Return as for STAT.""" - return self._statcmd('LAST') - - def _artcmd(self, line, file=None): - """Internal: process a HEAD, BODY or ARTICLE command.""" - resp, lines = self._longcmd(line, file) - resp, art_num, message_id = self._statparse(resp) - return resp, ArticleInfo(art_num, message_id, lines) - - def head(self, message_spec=None, *, file=None): - """Process a HEAD command. Argument: - - message_spec: article number or message id - - file: filename string or file object to store the headers in - Returns: - - resp: server response if successful - - ArticleInfo: (article number, message id, list of header lines) - """ - if message_spec is not None: - cmd = 'HEAD {0}'.format(message_spec) - else: - cmd = 'HEAD' - return self._artcmd(cmd, file) - - def body(self, message_spec=None, *, file=None): - """Process a BODY command. Argument: - - message_spec: article number or message id - - file: filename string or file object to store the body in - Returns: - - resp: server response if successful - - ArticleInfo: (article number, message id, list of body lines) - """ - if message_spec is not None: - cmd = 'BODY {0}'.format(message_spec) - else: - cmd = 'BODY' - return self._artcmd(cmd, file) - - def article(self, message_spec=None, *, file=None): - """Process an ARTICLE command. Argument: - - message_spec: article number or message id - - file: filename string or file object to store the article in - Returns: - - resp: server response if successful - - ArticleInfo: (article number, message id, list of article lines) - """ - if message_spec is not None: - cmd = 'ARTICLE {0}'.format(message_spec) - else: - cmd = 'ARTICLE' - return self._artcmd(cmd, file) - - def slave(self): - """Process a SLAVE command. Returns: - - resp: server response if successful - """ - return self._shortcmd('SLAVE') - - def xhdr(self, hdr, str, *, file=None): - """Process an XHDR command (optional server extension). Arguments: - - hdr: the header type (e.g. 'subject') - - str: an article nr, a message id, or a range nr1-nr2 - - file: Filename string or file object to store the result in - Returns: - - resp: server response if successful - - list: list of (nr, value) strings - """ - pat = re.compile('^([0-9]+) ?(.*)\n?') - resp, lines = self._longcmdstring('XHDR {0} {1}'.format(hdr, str), file) - def remove_number(line): - m = pat.match(line) - return m.group(1, 2) if m else line - return resp, [remove_number(line) for line in lines] - - def xover(self, start, end, *, file=None): - """Process an XOVER command (optional server extension) Arguments: - - start: start of range - - end: end of range - - file: Filename string or file object to store the result in - Returns: - - resp: server response if successful - - list: list of dicts containing the response fields - """ - resp, lines = self._longcmdstring('XOVER {0}-{1}'.format(start, end), - file) - fmt = self._getoverviewfmt() - return resp, _parse_overview(lines, fmt) - - def over(self, message_spec, *, file=None): - """Process an OVER command. If the command isn't supported, fall - back to XOVER. Arguments: - - message_spec: - - either a message id, indicating the article to fetch - information about - - or a (start, end) tuple, indicating a range of article numbers; - if end is None, information up to the newest message will be - retrieved - - or None, indicating the current article number must be used - - file: Filename string or file object to store the result in - Returns: - - resp: server response if successful - - list: list of dicts containing the response fields - - NOTE: the "message id" form isn't supported by XOVER - """ - cmd = 'OVER' if 'OVER' in self._caps else 'XOVER' - if isinstance(message_spec, (tuple, list)): - start, end = message_spec - cmd += ' {0}-{1}'.format(start, end or '') - elif message_spec is not None: - cmd = cmd + ' ' + message_spec - resp, lines = self._longcmdstring(cmd, file) - fmt = self._getoverviewfmt() - return resp, _parse_overview(lines, fmt) - - def date(self): - """Process the DATE command. - Returns: - - resp: server response if successful - - date: datetime object - """ - resp = self._shortcmd("DATE") - if not resp.startswith('111'): - raise NNTPReplyError(resp) - elem = resp.split() - if len(elem) != 2: - raise NNTPDataError(resp) - date = elem[1] - if len(date) != 14: - raise NNTPDataError(resp) - return resp, _parse_datetime(date, None) - - def _post(self, command, f): - resp = self._shortcmd(command) - # Raises a specific exception if posting is not allowed - if not resp.startswith('3'): - raise NNTPReplyError(resp) - if isinstance(f, (bytes, bytearray)): - f = f.splitlines() - # We don't use _putline() because: - # - we don't want additional CRLF if the file or iterable is already - # in the right format - # - we don't want a spurious flush() after each line is written - for line in f: - if not line.endswith(_CRLF): - line = line.rstrip(b"\r\n") + _CRLF - if line.startswith(b'.'): - line = b'.' + line - self.file.write(line) - self.file.write(b".\r\n") - self.file.flush() - return self._getresp() - - def post(self, data): - """Process a POST command. Arguments: - - data: bytes object, iterable or file containing the article - Returns: - - resp: server response if successful""" - return self._post('POST', data) - - def ihave(self, message_id, data): - """Process an IHAVE command. Arguments: - - message_id: message-id of the article - - data: file containing the article - Returns: - - resp: server response if successful - Note that if the server refuses the article an exception is raised.""" - return self._post('IHAVE {0}'.format(message_id), data) - - def _close(self): - try: - if self.file: - self.file.close() - del self.file - finally: - self.sock.close() - - def quit(self): - """Process a QUIT command and close the socket. Returns: - - resp: server response if successful""" - try: - resp = self._shortcmd('QUIT') - finally: - self._close() - return resp - - def login(self, user=None, password=None, usenetrc=True): - if self.authenticated: - raise ValueError("Already logged in.") - if not user and not usenetrc: - raise ValueError( - "At least one of `user` and `usenetrc` must be specified") - # If no login/password was specified but netrc was requested, - # try to get them from ~/.netrc - # Presume that if .netrc has an entry, NNRP authentication is required. - try: - if usenetrc and not user: - import netrc - credentials = netrc.netrc() - auth = credentials.authenticators(self.host) - if auth: - user = auth[0] - password = auth[2] - except OSError: - pass - # Perform NNTP authentication if needed. - if not user: - return - resp = self._shortcmd('authinfo user ' + user) - if resp.startswith('381'): - if not password: - raise NNTPReplyError(resp) - else: - resp = self._shortcmd('authinfo pass ' + password) - if not resp.startswith('281'): - raise NNTPPermanentError(resp) - # Capabilities might have changed after login - self._caps = None - self.getcapabilities() - # Attempt to send mode reader if it was requested after login. - # Only do so if we're not in reader mode already. - if self.readermode_afterauth and 'READER' not in self._caps: - self._setreadermode() - # Capabilities might have changed after MODE READER - self._caps = None - self.getcapabilities() - - def _setreadermode(self): - try: - self.welcome = self._shortcmd('mode reader') - except NNTPPermanentError: - # Error 5xx, probably 'not implemented' - pass - except NNTPTemporaryError as e: - if e.response.startswith('480'): - # Need authorization before 'mode reader' - self.readermode_afterauth = True - else: - raise - - if _have_ssl: - def starttls(self, context=None): - """Process a STARTTLS command. Arguments: - - context: SSL context to use for the encrypted connection - """ - # Per RFC 4642, STARTTLS MUST NOT be sent after authentication or if - # a TLS session already exists. - if self.tls_on: - raise ValueError("TLS is already enabled.") - if self.authenticated: - raise ValueError("TLS cannot be started after authentication.") - resp = self._shortcmd('STARTTLS') - if resp.startswith('382'): - self.file.close() - self.sock = _encrypt_on(self.sock, context, self.host) - self.file = self.sock.makefile("rwb") - self.tls_on = True - # Capabilities may change after TLS starts up, so ask for them - # again. - self._caps = None - self.getcapabilities() - else: - raise NNTPError("TLS failed to start.") - - -if _have_ssl: - class NNTP_SSL(NNTP): - - def __init__(self, host, port=NNTP_SSL_PORT, - user=None, password=None, ssl_context=None, - readermode=None, usenetrc=False, - timeout=_GLOBAL_DEFAULT_TIMEOUT): - """This works identically to NNTP.__init__, except for the change - in default port and the `ssl_context` argument for SSL connections. - """ - self.ssl_context = ssl_context - super().__init__(host, port, user, password, readermode, - usenetrc, timeout) - - def _create_socket(self, timeout): - sock = super()._create_socket(timeout) - try: - sock = _encrypt_on(sock, self.ssl_context, self.host) - except: - sock.close() - raise - else: - return sock - - __all__.append("NNTP_SSL") - - -# Test retrieval when run as a script. -if __name__ == '__main__': - import argparse - - parser = argparse.ArgumentParser(description="""\ - nntplib built-in demo - display the latest articles in a newsgroup""") - parser.add_argument('-g', '--group', default='gmane.comp.python.general', - help='group to fetch messages from (default: %(default)s)') - parser.add_argument('-s', '--server', default='news.gmane.io', - help='NNTP server hostname (default: %(default)s)') - parser.add_argument('-p', '--port', default=-1, type=int, - help='NNTP port number (default: %s / %s)' % (NNTP_PORT, NNTP_SSL_PORT)) - parser.add_argument('-n', '--nb-articles', default=10, type=int, - help='number of articles to fetch (default: %(default)s)') - parser.add_argument('-S', '--ssl', action='store_true', default=False, - help='use NNTP over SSL') - args = parser.parse_args() - - port = args.port - if not args.ssl: - if port == -1: - port = NNTP_PORT - s = NNTP(host=args.server, port=port) - else: - if port == -1: - port = NNTP_SSL_PORT - s = NNTP_SSL(host=args.server, port=port) - - caps = s.getcapabilities() - if 'STARTTLS' in caps: - s.starttls() - resp, count, first, last, name = s.group(args.group) - print('Group', name, 'has', count, 'articles, range', first, 'to', last) - - def cut(s, lim): - if len(s) > lim: - s = s[:lim - 4] + "..." - return s - - first = str(int(last) - args.nb_articles + 1) - resp, overviews = s.xover(first, last) - for artnum, over in overviews: - author = decode_header(over['from']).split('<', 1)[0] - subject = decode_header(over['subject']) - lines = int(over[':lines']) - print("{:7} {:20} {:42} ({})".format( - artnum, cut(author, 20), cut(subject, 42), lines) - ) - - s.quit() diff --git a/PythonLib/full/ntpath.py b/PythonLib/full/ntpath.py index c05e965f..01f060e7 100644 --- a/PythonLib/full/ntpath.py +++ b/PythonLib/full/ntpath.py @@ -19,18 +19,17 @@ import os import sys -import stat import genericpath from genericpath import * - __all__ = ["normcase","isabs","join","splitdrive","splitroot","split","splitext", "basename","dirname","commonprefix","getsize","getmtime", "getatime","getctime", "islink","exists","lexists","isdir","isfile", - "ismount", "expanduser","expandvars","normpath","abspath", - "curdir","pardir","sep","pathsep","defpath","altsep", + "ismount","isreserved","expanduser","expandvars","normpath", + "abspath","curdir","pardir","sep","pathsep","defpath","altsep", "extsep","devnull","realpath","supports_unicode_filenames","relpath", - "samefile", "sameopenfile", "samestat", "commonpath", "isjunction"] + "samefile", "sameopenfile", "samestat", "commonpath", "isjunction", + "isdevdrive", "ALLOW_MISSING"] def _get_bothseps(path): if isinstance(path, bytes): @@ -78,12 +77,6 @@ def normcase(s): return s.replace('/', '\\').lower() -# Return whether a path is absolute. -# Trivial in Posix, harder on Windows. -# For Windows it is absolute if it starts with a slash or backslash (current -# volume), or if a pathname after the volume-letter-and-colon or UNC-resource -# starts with a slash or backslash. - def isabs(s): """Test whether a path is absolute""" s = os.fspath(s) @@ -91,16 +84,15 @@ def isabs(s): sep = b'\\' altsep = b'/' colon_sep = b':\\' + double_sep = b'\\\\' else: sep = '\\' altsep = '/' colon_sep = ':\\' + double_sep = '\\\\' s = s[:3].replace(altsep, sep) # Absolute: UNC, device, and paths with a drive and root. - # LEGACY BUG: isabs("/x") should be false since the path has no drive. - if s.startswith(sep) or s.startswith(colon_sep, 1): - return True - return False + return s.startswith(colon_sep, 1) or s.startswith(double_sep) # Join two (or more) paths. @@ -109,16 +101,14 @@ def join(path, *paths): if isinstance(path, bytes): sep = b'\\' seps = b'\\/' - colon = b':' + colon_seps = b':\\/' else: sep = '\\' seps = '\\/' - colon = ':' + colon_seps = ':\\/' try: - if not paths: - path[:0] + sep #23780: Ensure compatible data type even if p is null. result_drive, result_root, result_path = splitroot(path) - for p in map(os.fspath, paths): + for p in paths: p_drive, p_root, p_path = splitroot(p) if p_root: # Second path is absolute @@ -142,7 +132,7 @@ def join(path, *paths): result_path = result_path + p_path ## add separator between UNC and non-absolute path if (result_path and not result_root and - result_drive and result_drive[-1:] not in colon + seps): + result_drive and result_drive[-1] not in colon_seps): return result_drive + sep + result_path return result_drive + result_root + result_path except (TypeError, AttributeError, BytesWarning): @@ -176,56 +166,52 @@ def splitdrive(p): return drive, root + tail -def splitroot(p): - """Split a pathname into drive, root and tail. The drive is defined - exactly as in splitdrive(). On Windows, the root may be a single path - separator or an empty string. The tail contains anything after the root. - For example: +try: + from nt import _path_splitroot_ex as splitroot +except ImportError: + def splitroot(p): + """Split a pathname into drive, root and tail. - splitroot('//server/share/') == ('//server/share', '/', '') - splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney') - splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham') - splitroot('Windows/notepad') == ('', '', 'Windows/notepad') - """ - p = os.fspath(p) - if isinstance(p, bytes): - sep = b'\\' - altsep = b'/' - colon = b':' - unc_prefix = b'\\\\?\\UNC\\' - empty = b'' - else: - sep = '\\' - altsep = '/' - colon = ':' - unc_prefix = '\\\\?\\UNC\\' - empty = '' - normp = p.replace(altsep, sep) - if normp[:1] == sep: - if normp[1:2] == sep: - # UNC drives, e.g. \\server\share or \\?\UNC\server\share - # Device drives, e.g. \\.\device or \\?\device - start = 8 if normp[:8].upper() == unc_prefix else 2 - index = normp.find(sep, start) - if index == -1: - return p, empty, empty - index2 = normp.find(sep, index + 1) - if index2 == -1: - return p, empty, empty - return p[:index2], p[index2:index2 + 1], p[index2 + 1:] + The tail contains anything after the root.""" + p = os.fspath(p) + if isinstance(p, bytes): + sep = b'\\' + altsep = b'/' + colon = b':' + unc_prefix = b'\\\\?\\UNC\\' + empty = b'' else: - # Relative path with root, e.g. \Windows - return empty, p[:1], p[1:] - elif normp[1:2] == colon: - if normp[2:3] == sep: - # Absolute drive-letter path, e.g. X:\Windows - return p[:2], p[2:3], p[3:] + sep = '\\' + altsep = '/' + colon = ':' + unc_prefix = '\\\\?\\UNC\\' + empty = '' + normp = p.replace(altsep, sep) + if normp[:1] == sep: + if normp[1:2] == sep: + # UNC drives, e.g. \\server\share or \\?\UNC\server\share + # Device drives, e.g. \\.\device or \\?\device + start = 8 if normp[:8].upper() == unc_prefix else 2 + index = normp.find(sep, start) + if index == -1: + return p, empty, empty + index2 = normp.find(sep, index + 1) + if index2 == -1: + return p, empty, empty + return p[:index2], p[index2:index2 + 1], p[index2 + 1:] + else: + # Relative path with root, e.g. \Windows + return empty, p[:1], p[1:] + elif normp[1:2] == colon: + if normp[2:3] == sep: + # Absolute drive-letter path, e.g. X:\Windows + return p[:2], p[2:3], p[3:] + else: + # Relative path with drive, e.g. X:Windows + return p[:2], empty, p[2:] else: - # Relative path with drive, e.g. X:Windows - return p[:2], empty, p[2:] - else: - # Relative path, e.g. Windows - return empty, empty, p + # Relative path, e.g. Windows + return empty, empty, p # Split a path in head (everything up to the last '/') and tail (the @@ -277,33 +263,6 @@ def dirname(p): return split(p)[0] -# Is a path a junction? - -if hasattr(os.stat_result, 'st_reparse_tag'): - def isjunction(path): - """Test whether a path is a junction""" - try: - st = os.lstat(path) - except (OSError, ValueError, AttributeError): - return False - return bool(st.st_reparse_tag == stat.IO_REPARSE_TAG_MOUNT_POINT) -else: - def isjunction(path): - """Test whether a path is a junction""" - os.fspath(path) - return False - - -# Being true for dangling symbolic links is also useful. - -def lexists(path): - """Test whether a path exists. Returns True for broken symbolic links""" - try: - st = os.lstat(path) - except (OSError, ValueError): - return False - return True - # Is a path a mount point? # Any drive letter root (eg c:\) # Any share UNC (eg \\server\share) @@ -338,6 +297,40 @@ def ismount(path): return False +_reserved_chars = frozenset( + {chr(i) for i in range(32)} | + {'"', '*', ':', '<', '>', '?', '|', '/', '\\'} +) + +_reserved_names = frozenset( + {'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} | + {f'COM{c}' for c in '123456789\xb9\xb2\xb3'} | + {f'LPT{c}' for c in '123456789\xb9\xb2\xb3'} +) + +def isreserved(path): + """Return true if the pathname is reserved by the system.""" + # Refer to "Naming Files, Paths, and Namespaces": + # https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file + path = os.fsdecode(splitroot(path)[2]).replace(altsep, sep) + return any(_isreservedname(name) for name in reversed(path.split(sep))) + +def _isreservedname(name): + """Return true if the filename is reserved by the system.""" + # Trailing dots and spaces are reserved. + if name[-1:] in ('.', ' '): + return name not in ('.', '..') + # Wildcards, separators, colon, and pipe (*?"<>/\:|) are reserved. + # ASCII control characters (0-31) are reserved. + # Colon is reserved for file streams (e.g. "name:stream[:type]"). + if _reserved_chars.intersection(name): + return True + # DOS device names are reserved (e.g. "nul" or "nul .txt"). The rules + # are complex and vary across Windows versions. On the side of + # caution, return True for names that may not be reserved. + return name.partition('.')[0].rstrip(' ').upper() in _reserved_names + + # Expand paths beginning with '~' or '~user'. # '~' means $HOME; '~user' means that user's home directory. # If the path doesn't begin with '~', or if the user or $HOME is unknown, @@ -353,24 +346,23 @@ def expanduser(path): If user or $HOME is unknown, do nothing.""" path = os.fspath(path) if isinstance(path, bytes): + seps = b'\\/' tilde = b'~' else: + seps = '\\/' tilde = '~' if not path.startswith(tilde): return path i, n = 1, len(path) - while i < n and path[i] not in _get_bothseps(path): + while i < n and path[i] not in seps: i += 1 if 'USERPROFILE' in os.environ: userhome = os.environ['USERPROFILE'] - elif not 'HOMEPATH' in os.environ: + elif 'HOMEPATH' not in os.environ: return path else: - try: - drive = os.environ['HOMEDRIVE'] - except KeyError: - drive = '' + drive = os.environ.get('HOMEDRIVE', '') userhome = join(drive, os.environ['HOMEPATH']) if i != 1: #~user @@ -408,17 +400,23 @@ def expanduser(path): # XXX With COMMAND.COM you can use any characters in a variable name, # XXX except '^|<>='. +_varpattern = r"'[^']*'?|%(%|[^%]*%?)|\$(\$|[-\w]+|\{[^}]*\}?)" +_varsub = None +_varsubb = None + def expandvars(path): """Expand shell variables of the forms $var, ${var} and %var%. Unknown variables are left unchanged.""" path = os.fspath(path) + global _varsub, _varsubb if isinstance(path, bytes): if b'$' not in path and b'%' not in path: return path - import string - varchars = bytes(string.ascii_letters + string.digits + '_-', 'ascii') - quote = b'\'' + if not _varsubb: + import re + _varsubb = re.compile(_varpattern.encode(), re.ASCII).sub + sub = _varsubb percent = b'%' brace = b'{' rbrace = b'}' @@ -427,94 +425,44 @@ def expandvars(path): else: if '$' not in path and '%' not in path: return path - import string - varchars = string.ascii_letters + string.digits + '_-' - quote = '\'' + if not _varsub: + import re + _varsub = re.compile(_varpattern, re.ASCII).sub + sub = _varsub percent = '%' brace = '{' rbrace = '}' dollar = '$' environ = os.environ - res = path[:0] - index = 0 - pathlen = len(path) - while index < pathlen: - c = path[index:index+1] - if c == quote: # no expansion within single quotes - path = path[index + 1:] - pathlen = len(path) - try: - index = path.index(c) - res += c + path[:index + 1] - except ValueError: - res += c + path - index = pathlen - 1 - elif c == percent: # variable or '%' - if path[index + 1:index + 2] == percent: - res += c - index += 1 - else: - path = path[index+1:] - pathlen = len(path) - try: - index = path.index(percent) - except ValueError: - res += percent + path - index = pathlen - 1 - else: - var = path[:index] - try: - if environ is None: - value = os.fsencode(os.environ[os.fsdecode(var)]) - else: - value = environ[var] - except KeyError: - value = percent + var + percent - res += value - elif c == dollar: # variable or '$$' - if path[index + 1:index + 2] == dollar: - res += c - index += 1 - elif path[index + 1:index + 2] == brace: - path = path[index+2:] - pathlen = len(path) - try: - index = path.index(rbrace) - except ValueError: - res += dollar + brace + path - index = pathlen - 1 - else: - var = path[:index] - try: - if environ is None: - value = os.fsencode(os.environ[os.fsdecode(var)]) - else: - value = environ[var] - except KeyError: - value = dollar + brace + var + rbrace - res += value - else: - var = path[:0] - index += 1 - c = path[index:index + 1] - while c and c in varchars: - var += c - index += 1 - c = path[index:index + 1] - try: - if environ is None: - value = os.fsencode(os.environ[os.fsdecode(var)]) - else: - value = environ[var] - except KeyError: - value = dollar + var - res += value - if c: - index -= 1 + + def repl(m): + lastindex = m.lastindex + if lastindex is None: + return m[0] + name = m[lastindex] + if lastindex == 1: + if name == percent: + return name + if not name.endswith(percent): + return m[0] + name = name[:-1] else: - res += c - index += 1 - return res + if name == dollar: + return name + if name.startswith(brace): + if not name.endswith(rbrace): + return m[0] + name = name[1:-1] + + try: + if environ is None: + return os.fsencode(os.environ[os.fsdecode(name)]) + else: + return environ[name] + except KeyError: + return m[0] + + return sub(repl, path) # Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A\B. @@ -606,12 +554,13 @@ def abspath(path): return normpath(path) try: - from nt import _getfinalpathname, readlink as _nt_readlink + from nt import _findfirstfile, _getfinalpathname, readlink as _nt_readlink except ImportError: # realpath is a no-op on systems without _getfinalpathname support. - realpath = abspath + def realpath(path, *, strict=False): + return abspath(path) else: - def _readlink_deep(path): + def _readlink_deep(path, ignored_error=OSError): # These error codes indicate that we should stop reading links and # return the path we currently have. # 1: ERROR_INVALID_FUNCTION @@ -644,7 +593,7 @@ def _readlink_deep(path): path = old_path break path = normpath(join(dirname(old_path), path)) - except OSError as ex: + except ignored_error as ex: if ex.winerror in allowed_winerror: break raise @@ -653,7 +602,7 @@ def _readlink_deep(path): break return path - def _getfinalpathname_nonstrict(path): + def _getfinalpathname_nonstrict(path, ignored_error=OSError): # These error codes indicate that we should stop resolving the path # and return the value we currently have. # 1: ERROR_INVALID_FUNCTION @@ -669,9 +618,10 @@ def _getfinalpathname_nonstrict(path): # 87: ERROR_INVALID_PARAMETER # 123: ERROR_INVALID_NAME # 161: ERROR_BAD_PATHNAME + # 1005: ERROR_UNRECOGNIZED_VOLUME # 1920: ERROR_CANT_ACCESS_FILE # 1921: ERROR_CANT_RESOLVE_FILENAME (implies unfollowable symlink) - allowed_winerror = 1, 2, 3, 5, 21, 32, 50, 53, 65, 67, 87, 123, 161, 1920, 1921 + allowed_winerror = 1, 2, 3, 5, 21, 32, 50, 53, 65, 67, 87, 123, 161, 1005, 1920, 1921 # Non-strict algorithm is to find as much of the target directory # as we can and join the rest. @@ -680,23 +630,29 @@ def _getfinalpathname_nonstrict(path): try: path = _getfinalpathname(path) return join(path, tail) if tail else path - except OSError as ex: + except ignored_error as ex: if ex.winerror not in allowed_winerror: raise try: # The OS could not resolve this path fully, so we attempt # to follow the link ourselves. If we succeed, join the tail # and return. - new_path = _readlink_deep(path) + new_path = _readlink_deep(path, + ignored_error=ignored_error) if new_path != path: return join(new_path, tail) if tail else new_path - except OSError: + except ignored_error: # If we fail to readlink(), let's keep traversing pass - path, name = split(path) - # TODO (bpo-38186): Request the real file name from the directory - # entry using FindFirstFileW. For now, we will return the path - # as best we have it + # If we get these errors, try to get the real name of the file without accessing it. + if ex.winerror in (1, 5, 32, 50, 87, 1920, 1921): + try: + name = _findfirstfile(path) + path, _ = split(path) + except ignored_error: + path, name = split(path) + else: + path, name = split(path) if path and not name: return path + tail tail = join(name, tail) if tail else name @@ -710,7 +666,8 @@ def realpath(path, *, strict=False): new_unc_prefix = b'\\\\' cwd = os.getcwdb() # bpo-38081: Special case for realpath(b'nul') - if normcase(path) == normcase(os.fsencode(devnull)): + devnull = b'nul' + if normcase(path) == devnull: return b'\\\\.\\NUL' else: prefix = '\\\\?\\' @@ -718,9 +675,19 @@ def realpath(path, *, strict=False): new_unc_prefix = '\\\\' cwd = os.getcwd() # bpo-38081: Special case for realpath('nul') - if normcase(path) == normcase(devnull): + devnull = 'nul' + if normcase(path) == devnull: return '\\\\.\\NUL' had_prefix = path.startswith(prefix) + + if strict is ALLOW_MISSING: + ignored_error = FileNotFoundError + strict = True + elif strict: + ignored_error = () + else: + ignored_error = OSError + if not had_prefix and not isabs(path): path = join(cwd, path) try: @@ -728,17 +695,16 @@ def realpath(path, *, strict=False): initial_winerror = 0 except ValueError as ex: # gh-106242: Raised for embedded null characters - # In strict mode, we convert into an OSError. + # In strict modes, we convert into an OSError. # Non-strict mode returns the path as-is, since we've already # made it absolute. if strict: raise OSError(str(ex)) from None path = normpath(path) - except OSError as ex: - if strict: - raise + except ignored_error as ex: initial_winerror = ex.winerror - path = _getfinalpathname_nonstrict(path) + path = _getfinalpathname_nonstrict(path, + ignored_error=ignored_error) # The path returned by _getfinalpathname will always start with \\?\ - # strip off that prefix unless it was already provided on the original # path. @@ -771,6 +737,9 @@ def realpath(path, *, strict=False): def relpath(path, start=None): """Return a relative version of a path""" path = os.fspath(path) + if not path: + raise ValueError("no path specified") + if isinstance(path, bytes): sep = b'\\' curdir = b'.' @@ -782,22 +751,20 @@ def relpath(path, start=None): if start is None: start = curdir + else: + start = os.fspath(start) - if not path: - raise ValueError("no path specified") - - start = os.fspath(start) try: - start_abs = abspath(normpath(start)) - path_abs = abspath(normpath(path)) + start_abs = abspath(start) + path_abs = abspath(path) start_drive, _, start_rest = splitroot(start_abs) path_drive, _, path_rest = splitroot(path_abs) if normcase(start_drive) != normcase(path_drive): raise ValueError("path is on mount %r, start on mount %r" % ( path_drive, start_drive)) - start_list = [x for x in start_rest.split(sep) if x] - path_list = [x for x in path_rest.split(sep) if x] + start_list = start_rest.split(sep) if start_rest else [] + path_list = path_rest.split(sep) if path_rest else [] # Work out how much of the filepath is shared by start and path. i = 0 for e1, e2 in zip(start_list, path_list): @@ -808,29 +775,28 @@ def relpath(path, start=None): rel_list = [pardir] * (len(start_list)-i) + path_list[i:] if not rel_list: return curdir - return join(*rel_list) + return sep.join(rel_list) except (TypeError, ValueError, AttributeError, BytesWarning, DeprecationWarning): genericpath._check_arg_types('relpath', path, start) raise -# Return the longest common sub-path of the sequence of paths given as input. +# Return the longest common sub-path of the iterable of paths given as input. # The function is case-insensitive and 'separator-insensitive', i.e. if the # only difference between two paths is the use of '\' versus '/' as separator, # they are deemed to be equal. # # However, the returned path will have the standard '\' separator (even if the # given paths had the alternative '/' separator) and will have the case of the -# first path given in the sequence. Additionally, any trailing separator is +# first path given in the iterable. Additionally, any trailing separator is # stripped from the returned path. def commonpath(paths): - """Given a sequence of path names, returns the longest common sub-path.""" - + """Given an iterable of path names, returns the longest common sub-path.""" + paths = tuple(map(os.fspath, paths)) if not paths: - raise ValueError('commonpath() arg is an empty sequence') + raise ValueError('commonpath() arg is an empty iterable') - paths = tuple(map(os.fspath, paths)) if isinstance(paths[0], bytes): sep = b'\\' altsep = b'/' @@ -844,9 +810,6 @@ def commonpath(paths): drivesplits = [splitroot(p.replace(altsep, sep).lower()) for p in paths] split_paths = [p.split(sep) for d, r, p in drivesplits] - if len({r for d, r, p in drivesplits}) != 1: - raise ValueError("Can't mix absolute and relative paths") - # Check that all drive letters or UNC paths match. The check is made only # now otherwise type errors for mixing strings and bytes would not be # caught. @@ -854,6 +817,12 @@ def commonpath(paths): raise ValueError("Paths don't have the same drive") drive, root, path = splitroot(paths[0].replace(altsep, sep)) + if len({r for d, r, p in drivesplits}) != 1: + if drive: + raise ValueError("Can't mix absolute and relative paths") + else: + raise ValueError("Can't mix rooted and not-rooted paths") + common = path.split(sep) common = [c for c in common if c and c != curdir] @@ -874,13 +843,15 @@ def commonpath(paths): try: - # The isdir(), isfile(), islink() and exists() implementations in - # genericpath use os.stat(). This is overkill on Windows. Use simpler + # The isdir(), isfile(), islink(), exists() and lexists() implementations + # in genericpath use os.stat(). This is overkill on Windows. Use simpler # builtin functions if they are available. from nt import _path_isdir as isdir from nt import _path_isfile as isfile from nt import _path_islink as islink + from nt import _path_isjunction as isjunction from nt import _path_exists as exists + from nt import _path_lexists as lexists except ImportError: # Use genericpath.* as imported above pass @@ -888,15 +859,12 @@ def commonpath(paths): try: from nt import _path_isdevdrive -except ImportError: - def isdevdrive(path): - """Determines whether the specified path is on a Windows Dev Drive.""" - # Never a Dev Drive - return False -else: def isdevdrive(path): """Determines whether the specified path is on a Windows Dev Drive.""" try: return _path_isdevdrive(abspath(path)) except OSError: return False +except ImportError: + # Use genericpath.isdevdrive as imported above + pass diff --git a/PythonLib/full/nturl2path.py b/PythonLib/full/nturl2path.py index 757fd01b..57c7858d 100644 --- a/PythonLib/full/nturl2path.py +++ b/PythonLib/full/nturl2path.py @@ -3,7 +3,15 @@ This module only exists to provide OS-specific code for urllib.requests, thus do not use directly. """ -# Testing is done through test_urllib. +# Testing is done through test_nturl2path. + +import warnings + + +warnings._deprecated( + __name__, + message=f"{warnings._DEPRECATED_MSG}; use 'urllib.request' instead", + remove=(3, 19)) def url2pathname(url): """OS-specific conversion from a relative URL of the 'file' scheme @@ -14,7 +22,7 @@ def url2pathname(url): # ///C:/foo/bar/spam.foo # become # C:\foo\bar\spam.foo - import string, urllib.parse + import urllib.parse if url[:3] == '///': # URL has an empty authority section, so the path begins on the third # character. @@ -25,19 +33,14 @@ def url2pathname(url): if url[:3] == '///': # Skip past extra slash before UNC drive in URL path. url = url[1:] - # Windows itself uses ":" even in URLs. - url = url.replace(':', '|') - if not '|' in url: - # No drive specifier, just convert slashes - # make sure not to convert quoted slashes :-) - return urllib.parse.unquote(url.replace('/', '\\')) - comp = url.split('|') - if len(comp) != 2 or comp[0][-1] not in string.ascii_letters: - error = 'Bad URL: ' + url - raise OSError(error) - drive = comp[0][-1].upper() - tail = urllib.parse.unquote(comp[1].replace('/', '\\')) - return drive + ':' + tail + else: + if url[:1] == '/' and url[2:3] in (':', '|'): + # Skip past extra slash before DOS drive in URL path. + url = url[1:] + if url[1:2] == '|': + # Older URLs use a pipe after a drive letter + url = url[:1] + ':' + url[2:] + return urllib.parse.unquote(url.replace('/', '\\')) def pathname2url(p): """OS-specific conversion from a file system path to a relative URL @@ -46,6 +49,7 @@ def pathname2url(p): # C:\foo\bar\spam.foo # becomes # ///C:/foo/bar/spam.foo + import ntpath import urllib.parse # First, clean up some special forms. We are going to sacrifice # the additional information anyway @@ -54,16 +58,17 @@ def pathname2url(p): p = p[4:] if p[:4].upper() == 'UNC/': p = '//' + p[4:] - elif p[1:2] != ':': - raise OSError('Bad path: ' + p) - if not ':' in p: - # No DOS drive specified, just quote the pathname - return urllib.parse.quote(p) - comp = p.split(':', maxsplit=2) - if len(comp) != 2 or len(comp[0]) > 1: - error = 'Bad path: ' + p - raise OSError(error) + drive, root, tail = ntpath.splitroot(p) + if drive: + if drive[1:] == ':': + # DOS drive specified. Add three slashes to the start, producing + # an authority section with a zero-length authority, and a path + # section starting with a single slash. + drive = f'///{drive}' + drive = urllib.parse.quote(drive, safe='/:') + elif root: + # Add explicitly empty authority to path beginning with one slash. + root = f'//{root}' - drive = urllib.parse.quote(comp[0].upper()) - tail = urllib.parse.quote(comp[1]) - return '///' + drive + ':' + tail + tail = urllib.parse.quote(tail) + return drive + root + tail diff --git a/PythonLib/full/numbers.py b/PythonLib/full/numbers.py index a2913e32..37fddb89 100644 --- a/PythonLib/full/numbers.py +++ b/PythonLib/full/numbers.py @@ -290,18 +290,27 @@ def conjugate(self): class Rational(Real): - """.numerator and .denominator should be in lowest terms.""" + """To Real, Rational adds numerator and denominator properties. + + The numerator and denominator values should be in lowest terms, + with a positive denominator. + """ __slots__ = () @property @abstractmethod def numerator(self): + """The numerator of a rational number in lowest terms.""" raise NotImplementedError @property @abstractmethod def denominator(self): + """The denominator of a rational number in lowest terms. + + This denominator should be positive. + """ raise NotImplementedError # Concrete implementation of Real's conversion to float. diff --git a/PythonLib/full/opcode.py b/PythonLib/full/opcode.py index 6bb2f1c1..0e9520b6 100644 --- a/PythonLib/full/opcode.py +++ b/PythonLib/full/opcode.py @@ -4,441 +4,46 @@ operate on bytecodes (e.g. peephole optimizers). """ -__all__ = ["cmp_op", "hasarg", "hasconst", "hasname", "hasjrel", "hasjabs", - "haslocal", "hascompare", "hasfree", "hasexc", "opname", "opmap", - "HAVE_ARGUMENT", "EXTENDED_ARG"] -# It's a chicken-and-egg I'm afraid: -# We're imported before _opcode's made. -# With exception unheeded -# (stack_effect is not needed) -# Both our chickens and eggs are allayed. -# --Larry Hastings, 2013/11/23 +__all__ = ["cmp_op", "stack_effect", "hascompare", "opname", "opmap", + "HAVE_ARGUMENT", "EXTENDED_ARG", "hasarg", "hasconst", "hasname", + "hasjump", "hasjrel", "hasjabs", "hasfree", "haslocal", "hasexc"] -try: - from _opcode import stack_effect - __all__.append('stack_effect') -except ImportError: - pass +import builtins +import _opcode +from _opcode import stack_effect -cmp_op = ('<', '<=', '==', '!=', '>', '>=') - -hasarg = [] -hasconst = [] -hasname = [] -hasjrel = [] -hasjabs = [] -haslocal = [] -hascompare = [] -hasfree = [] -hasexc = [] - - -ENABLE_SPECIALIZATION = True - -def is_pseudo(op): - return op >= MIN_PSEUDO_OPCODE and op <= MAX_PSEUDO_OPCODE - -oplists = [hasarg, hasconst, hasname, hasjrel, hasjabs, - haslocal, hascompare, hasfree, hasexc] - -opmap = {} - -## pseudo opcodes (used in the compiler) mapped to the values -## they can become in the actual code. -_pseudo_ops = {} - -def def_op(name, op): - opmap[name] = op - -def name_op(name, op): - def_op(name, op) - hasname.append(op) - -def jrel_op(name, op): - def_op(name, op) - hasjrel.append(op) - -def jabs_op(name, op): - def_op(name, op) - hasjabs.append(op) - -def pseudo_op(name, op, real_ops): - def_op(name, op) - _pseudo_ops[name] = real_ops - # add the pseudo opcode to the lists its targets are in - for oplist in oplists: - res = [opmap[rop] in oplist for rop in real_ops] - if any(res): - assert all(res) - oplist.append(op) - - -# Instruction opcodes for compiled code -# Blank lines correspond to available opcodes - -def_op('CACHE', 0) -def_op('POP_TOP', 1) -def_op('PUSH_NULL', 2) -def_op('INTERPRETER_EXIT', 3) - -def_op('END_FOR', 4) -def_op('END_SEND', 5) - -def_op('NOP', 9) - -def_op('UNARY_NEGATIVE', 11) -def_op('UNARY_NOT', 12) - -def_op('UNARY_INVERT', 15) - -# We reserve 17 as it is the initial value for the specializing counter -# This helps us catch cases where we attempt to execute a cache. -def_op('RESERVED', 17) - -def_op('BINARY_SUBSCR', 25) -def_op('BINARY_SLICE', 26) -def_op('STORE_SLICE', 27) - -def_op('GET_LEN', 30) -def_op('MATCH_MAPPING', 31) -def_op('MATCH_SEQUENCE', 32) -def_op('MATCH_KEYS', 33) - -def_op('PUSH_EXC_INFO', 35) -def_op('CHECK_EXC_MATCH', 36) -def_op('CHECK_EG_MATCH', 37) - -def_op('WITH_EXCEPT_START', 49) -def_op('GET_AITER', 50) -def_op('GET_ANEXT', 51) -def_op('BEFORE_ASYNC_WITH', 52) -def_op('BEFORE_WITH', 53) -def_op('END_ASYNC_FOR', 54) -def_op('CLEANUP_THROW', 55) - -def_op('STORE_SUBSCR', 60) -def_op('DELETE_SUBSCR', 61) - -def_op('GET_ITER', 68) -def_op('GET_YIELD_FROM_ITER', 69) - -def_op('LOAD_BUILD_CLASS', 71) - -def_op('LOAD_ASSERTION_ERROR', 74) -def_op('RETURN_GENERATOR', 75) - -def_op('RETURN_VALUE', 83) - -def_op('SETUP_ANNOTATIONS', 85) -def_op('LOAD_LOCALS', 87) - -def_op('POP_EXCEPT', 89) - -HAVE_ARGUMENT = 90 # real opcodes from here have an argument: - -name_op('STORE_NAME', 90) # Index in name list -name_op('DELETE_NAME', 91) # "" -def_op('UNPACK_SEQUENCE', 92) # Number of tuple items -jrel_op('FOR_ITER', 93) -def_op('UNPACK_EX', 94) -name_op('STORE_ATTR', 95) # Index in name list -name_op('DELETE_ATTR', 96) # "" -name_op('STORE_GLOBAL', 97) # "" -name_op('DELETE_GLOBAL', 98) # "" -def_op('SWAP', 99) -def_op('LOAD_CONST', 100) # Index in const list -hasconst.append(100) -name_op('LOAD_NAME', 101) # Index in name list -def_op('BUILD_TUPLE', 102) # Number of tuple items -def_op('BUILD_LIST', 103) # Number of list items -def_op('BUILD_SET', 104) # Number of set items -def_op('BUILD_MAP', 105) # Number of dict entries -name_op('LOAD_ATTR', 106) # Index in name list -def_op('COMPARE_OP', 107) # Comparison operator -hascompare.append(107) -name_op('IMPORT_NAME', 108) # Index in name list -name_op('IMPORT_FROM', 109) # Index in name list -jrel_op('JUMP_FORWARD', 110) # Number of words to skip -jrel_op('POP_JUMP_IF_FALSE', 114) -jrel_op('POP_JUMP_IF_TRUE', 115) -name_op('LOAD_GLOBAL', 116) # Index in name list -def_op('IS_OP', 117) -def_op('CONTAINS_OP', 118) -def_op('RERAISE', 119) -def_op('COPY', 120) -def_op('RETURN_CONST', 121) -hasconst.append(121) -def_op('BINARY_OP', 122) -jrel_op('SEND', 123) # Number of words to skip -def_op('LOAD_FAST', 124) # Local variable number, no null check -haslocal.append(124) -def_op('STORE_FAST', 125) # Local variable number -haslocal.append(125) -def_op('DELETE_FAST', 126) # Local variable number -haslocal.append(126) -def_op('LOAD_FAST_CHECK', 127) # Local variable number -haslocal.append(127) -jrel_op('POP_JUMP_IF_NOT_NONE', 128) -jrel_op('POP_JUMP_IF_NONE', 129) -def_op('RAISE_VARARGS', 130) # Number of raise arguments (1, 2, or 3) -def_op('GET_AWAITABLE', 131) -def_op('MAKE_FUNCTION', 132) # Flags -def_op('BUILD_SLICE', 133) # Number of items -jrel_op('JUMP_BACKWARD_NO_INTERRUPT', 134) # Number of words to skip (backwards) -def_op('MAKE_CELL', 135) -hasfree.append(135) -def_op('LOAD_CLOSURE', 136) -hasfree.append(136) -def_op('LOAD_DEREF', 137) -hasfree.append(137) -def_op('STORE_DEREF', 138) -hasfree.append(138) -def_op('DELETE_DEREF', 139) -hasfree.append(139) -jrel_op('JUMP_BACKWARD', 140) # Number of words to skip (backwards) -name_op('LOAD_SUPER_ATTR', 141) -def_op('CALL_FUNCTION_EX', 142) # Flags -def_op('LOAD_FAST_AND_CLEAR', 143) # Local variable number -haslocal.append(143) - -def_op('EXTENDED_ARG', 144) -EXTENDED_ARG = 144 -def_op('LIST_APPEND', 145) -def_op('SET_ADD', 146) -def_op('MAP_ADD', 147) -hasfree.append(148) -def_op('COPY_FREE_VARS', 149) -def_op('YIELD_VALUE', 150) -def_op('RESUME', 151) # This must be kept in sync with deepfreeze.py -def_op('MATCH_CLASS', 152) +from _opcode_metadata import (_specializations, _specialized_opmap, opmap, # noqa: F401 + HAVE_ARGUMENT, MIN_INSTRUMENTED_OPCODE) # noqa: F401 +EXTENDED_ARG = opmap['EXTENDED_ARG'] -def_op('FORMAT_VALUE', 155) -def_op('BUILD_CONST_KEY_MAP', 156) -def_op('BUILD_STRING', 157) - -def_op('LIST_EXTEND', 162) -def_op('SET_UPDATE', 163) -def_op('DICT_MERGE', 164) -def_op('DICT_UPDATE', 165) - -def_op('CALL', 171) -def_op('KW_NAMES', 172) -hasconst.append(172) -def_op('CALL_INTRINSIC_1', 173) -def_op('CALL_INTRINSIC_2', 174) - -name_op('LOAD_FROM_DICT_OR_GLOBALS', 175) -def_op('LOAD_FROM_DICT_OR_DEREF', 176) -hasfree.append(176) - -# Instrumented instructions -MIN_INSTRUMENTED_OPCODE = 237 - -def_op('INSTRUMENTED_LOAD_SUPER_ATTR', 237) -def_op('INSTRUMENTED_POP_JUMP_IF_NONE', 238) -def_op('INSTRUMENTED_POP_JUMP_IF_NOT_NONE', 239) -def_op('INSTRUMENTED_RESUME', 240) -def_op('INSTRUMENTED_CALL', 241) -def_op('INSTRUMENTED_RETURN_VALUE', 242) -def_op('INSTRUMENTED_YIELD_VALUE', 243) -def_op('INSTRUMENTED_CALL_FUNCTION_EX', 244) -def_op('INSTRUMENTED_JUMP_FORWARD', 245) -def_op('INSTRUMENTED_JUMP_BACKWARD', 246) -def_op('INSTRUMENTED_RETURN_CONST', 247) -def_op('INSTRUMENTED_FOR_ITER', 248) -def_op('INSTRUMENTED_POP_JUMP_IF_FALSE', 249) -def_op('INSTRUMENTED_POP_JUMP_IF_TRUE', 250) -def_op('INSTRUMENTED_END_FOR', 251) -def_op('INSTRUMENTED_END_SEND', 252) -def_op('INSTRUMENTED_INSTRUCTION', 253) -def_op('INSTRUMENTED_LINE', 254) -# 255 is reserved - -hasarg.extend([op for op in opmap.values() if op >= HAVE_ARGUMENT]) - -MIN_PSEUDO_OPCODE = 256 - -pseudo_op('SETUP_FINALLY', 256, ['NOP']) -hasexc.append(256) -pseudo_op('SETUP_CLEANUP', 257, ['NOP']) -hasexc.append(257) -pseudo_op('SETUP_WITH', 258, ['NOP']) -hasexc.append(258) -pseudo_op('POP_BLOCK', 259, ['NOP']) - -pseudo_op('JUMP', 260, ['JUMP_FORWARD', 'JUMP_BACKWARD']) -pseudo_op('JUMP_NO_INTERRUPT', 261, ['JUMP_FORWARD', 'JUMP_BACKWARD_NO_INTERRUPT']) - -pseudo_op('LOAD_METHOD', 262, ['LOAD_ATTR']) -pseudo_op('LOAD_SUPER_METHOD', 263, ['LOAD_SUPER_ATTR']) -pseudo_op('LOAD_ZERO_SUPER_METHOD', 264, ['LOAD_SUPER_ATTR']) -pseudo_op('LOAD_ZERO_SUPER_ATTR', 265, ['LOAD_SUPER_ATTR']) - -pseudo_op('STORE_FAST_MAYBE_NULL', 266, ['STORE_FAST']) - -MAX_PSEUDO_OPCODE = MIN_PSEUDO_OPCODE + len(_pseudo_ops) - 1 - -del def_op, name_op, jrel_op, jabs_op, pseudo_op - -opname = ['<%r>' % (op,) for op in range(MAX_PSEUDO_OPCODE + 1)] -for op, i in opmap.items(): - opname[i] = op +opname = ['<%r>' % (op,) for op in range(max(opmap.values()) + 1)] +for m in (opmap, _specialized_opmap): + for op, i in m.items(): + opname[i] = op +cmp_op = ('<', '<=', '==', '!=', '>', '>=') -_nb_ops = [ - ("NB_ADD", "+"), - ("NB_AND", "&"), - ("NB_FLOOR_DIVIDE", "//"), - ("NB_LSHIFT", "<<"), - ("NB_MATRIX_MULTIPLY", "@"), - ("NB_MULTIPLY", "*"), - ("NB_REMAINDER", "%"), - ("NB_OR", "|"), - ("NB_POWER", "**"), - ("NB_RSHIFT", ">>"), - ("NB_SUBTRACT", "-"), - ("NB_TRUE_DIVIDE", "/"), - ("NB_XOR", "^"), - ("NB_INPLACE_ADD", "+="), - ("NB_INPLACE_AND", "&="), - ("NB_INPLACE_FLOOR_DIVIDE", "//="), - ("NB_INPLACE_LSHIFT", "<<="), - ("NB_INPLACE_MATRIX_MULTIPLY", "@="), - ("NB_INPLACE_MULTIPLY", "*="), - ("NB_INPLACE_REMAINDER", "%="), - ("NB_INPLACE_OR", "|="), - ("NB_INPLACE_POWER", "**="), - ("NB_INPLACE_RSHIFT", ">>="), - ("NB_INPLACE_SUBTRACT", "-="), - ("NB_INPLACE_TRUE_DIVIDE", "/="), - ("NB_INPLACE_XOR", "^="), -] +# These lists are documented as part of the dis module's API +hasarg = [op for op in opmap.values() if _opcode.has_arg(op)] +hasconst = [op for op in opmap.values() if _opcode.has_const(op)] +hasname = [op for op in opmap.values() if _opcode.has_name(op)] +hasjump = [op for op in opmap.values() if _opcode.has_jump(op)] +hasjrel = hasjump # for backward compatibility +hasjabs = [] +hasfree = [op for op in opmap.values() if _opcode.has_free(op)] +haslocal = [op for op in opmap.values() if _opcode.has_local(op)] +hasexc = [op for op in opmap.values() if _opcode.has_exc(op)] -_intrinsic_1_descs = [ - "INTRINSIC_1_INVALID", - "INTRINSIC_PRINT", - "INTRINSIC_IMPORT_STAR", - "INTRINSIC_STOPITERATION_ERROR", - "INTRINSIC_ASYNC_GEN_WRAP", - "INTRINSIC_UNARY_POSITIVE", - "INTRINSIC_LIST_TO_TUPLE", - "INTRINSIC_TYPEVAR", - "INTRINSIC_PARAMSPEC", - "INTRINSIC_TYPEVARTUPLE", - "INTRINSIC_SUBSCRIPT_GENERIC", - "INTRINSIC_TYPEALIAS", -] -_intrinsic_2_descs = [ - "INTRINSIC_2_INVALID", - "INTRINSIC_PREP_RERAISE_STAR", - "INTRINSIC_TYPEVAR_WITH_BOUND", - "INTRINSIC_TYPEVAR_WITH_CONSTRAINTS", - "INTRINSIC_SET_FUNCTION_TYPE_PARAMS", -] +_intrinsic_1_descs = _opcode.get_intrinsic1_descs() +_intrinsic_2_descs = _opcode.get_intrinsic2_descs() +_special_method_names = _opcode.get_special_method_names() +_common_constants = [builtins.AssertionError, builtins.NotImplementedError, + builtins.tuple, builtins.all, builtins.any] +_nb_ops = _opcode.get_nb_ops() -_specializations = { - "BINARY_OP": [ - "BINARY_OP_ADD_FLOAT", - "BINARY_OP_ADD_INT", - "BINARY_OP_ADD_UNICODE", - "BINARY_OP_INPLACE_ADD_UNICODE", - "BINARY_OP_MULTIPLY_FLOAT", - "BINARY_OP_MULTIPLY_INT", - "BINARY_OP_SUBTRACT_FLOAT", - "BINARY_OP_SUBTRACT_INT", - ], - "BINARY_SUBSCR": [ - "BINARY_SUBSCR_DICT", - "BINARY_SUBSCR_GETITEM", - "BINARY_SUBSCR_LIST_INT", - "BINARY_SUBSCR_TUPLE_INT", - ], - "CALL": [ - "CALL_PY_EXACT_ARGS", - "CALL_PY_WITH_DEFAULTS", - "CALL_BOUND_METHOD_EXACT_ARGS", - "CALL_BUILTIN_CLASS", - "CALL_BUILTIN_FAST_WITH_KEYWORDS", - "CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS", - "CALL_NO_KW_BUILTIN_FAST", - "CALL_NO_KW_BUILTIN_O", - "CALL_NO_KW_ISINSTANCE", - "CALL_NO_KW_LEN", - "CALL_NO_KW_LIST_APPEND", - "CALL_NO_KW_METHOD_DESCRIPTOR_FAST", - "CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS", - "CALL_NO_KW_METHOD_DESCRIPTOR_O", - "CALL_NO_KW_STR_1", - "CALL_NO_KW_TUPLE_1", - "CALL_NO_KW_TYPE_1", - ], - "COMPARE_OP": [ - "COMPARE_OP_FLOAT", - "COMPARE_OP_INT", - "COMPARE_OP_STR", - ], - "FOR_ITER": [ - "FOR_ITER_LIST", - "FOR_ITER_TUPLE", - "FOR_ITER_RANGE", - "FOR_ITER_GEN", - ], - "LOAD_SUPER_ATTR": [ - "LOAD_SUPER_ATTR_ATTR", - "LOAD_SUPER_ATTR_METHOD", - ], - "LOAD_ATTR": [ - # These potentially push [NULL, bound method] onto the stack. - "LOAD_ATTR_CLASS", - "LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN", - "LOAD_ATTR_INSTANCE_VALUE", - "LOAD_ATTR_MODULE", - "LOAD_ATTR_PROPERTY", - "LOAD_ATTR_SLOT", - "LOAD_ATTR_WITH_HINT", - # These will always push [unbound method, self] onto the stack. - "LOAD_ATTR_METHOD_LAZY_DICT", - "LOAD_ATTR_METHOD_NO_DICT", - "LOAD_ATTR_METHOD_WITH_VALUES", - ], - "LOAD_CONST": [ - "LOAD_CONST__LOAD_FAST", - ], - "LOAD_FAST": [ - "LOAD_FAST__LOAD_CONST", - "LOAD_FAST__LOAD_FAST", - ], - "LOAD_GLOBAL": [ - "LOAD_GLOBAL_BUILTIN", - "LOAD_GLOBAL_MODULE", - ], - "STORE_ATTR": [ - "STORE_ATTR_INSTANCE_VALUE", - "STORE_ATTR_SLOT", - "STORE_ATTR_WITH_HINT", - ], - "STORE_FAST": [ - "STORE_FAST__LOAD_FAST", - "STORE_FAST__STORE_FAST", - ], - "STORE_SUBSCR": [ - "STORE_SUBSCR_DICT", - "STORE_SUBSCR_LIST_INT", - ], - "UNPACK_SEQUENCE": [ - "UNPACK_SEQUENCE_LIST", - "UNPACK_SEQUENCE_TUPLE", - "UNPACK_SEQUENCE_TWO_TUPLE", - ], - "SEND": [ - "SEND_GEN", - ], -} -_specialized_instructions = [ - opcode for family in _specializations.values() for opcode in family -] +hascompare = [opmap["COMPARE_OP"]] _cache_format = { "LOAD_GLOBAL": { @@ -449,6 +54,7 @@ def pseudo_op(name, op, real_ops): }, "BINARY_OP": { "counter": 1, + "descr": 4, }, "UNPACK_SEQUENCE": { "counter": 1, @@ -456,7 +62,7 @@ def pseudo_op(name, op, real_ops): "COMPARE_OP": { "counter": 1, }, - "BINARY_SUBSCR": { + "CONTAINS_OP": { "counter": 1, }, "FOR_ITER": { @@ -480,14 +86,37 @@ def pseudo_op(name, op, real_ops): "counter": 1, "func_version": 2, }, + "CALL_KW": { + "counter": 1, + "func_version": 2, + }, "STORE_SUBSCR": { "counter": 1, }, "SEND": { "counter": 1, }, + "JUMP_BACKWARD": { + "counter": 1, + }, + "TO_BOOL": { + "counter": 1, + "version": 2, + }, + "POP_JUMP_IF_TRUE": { + "counter": 1, + }, + "POP_JUMP_IF_FALSE": { + "counter": 1, + }, + "POP_JUMP_IF_NONE": { + "counter": 1, + }, + "POP_JUMP_IF_NOT_NONE": { + "counter": 1, + }, } -_inline_cache_entries = [ - sum(_cache_format.get(opname[opcode], {}).values()) for opcode in range(256) -] +_inline_cache_entries = { + name : sum(value.values()) for (name, value) in _cache_format.items() +} diff --git a/PythonLib/full/operator.py b/PythonLib/full/operator.py index 30116c11..1b765522 100644 --- a/PythonLib/full/operator.py +++ b/PythonLib/full/operator.py @@ -14,8 +14,8 @@ 'delitem', 'eq', 'floordiv', 'ge', 'getitem', 'gt', 'iadd', 'iand', 'iconcat', 'ifloordiv', 'ilshift', 'imatmul', 'imod', 'imul', 'index', 'indexOf', 'inv', 'invert', 'ior', 'ipow', 'irshift', - 'is_', 'is_not', 'isub', 'itemgetter', 'itruediv', 'ixor', 'le', - 'length_hint', 'lshift', 'lt', 'matmul', 'methodcaller', 'mod', + 'is_', 'is_none', 'is_not', 'is_not_none', 'isub', 'itemgetter', 'itruediv', + 'ixor', 'le', 'length_hint', 'lshift', 'lt', 'matmul', 'methodcaller', 'mod', 'mul', 'ne', 'neg', 'not_', 'or_', 'pos', 'pow', 'rshift', 'setitem', 'sub', 'truediv', 'truth', 'xor'] @@ -66,6 +66,14 @@ def is_not(a, b): "Same as a is not b." return a is not b +def is_none(a): + "Same as a is None." + return a is None + +def is_not_none(a): + "Same as a is not None." + return a is not None + # Mathematical/Bitwise Operations *********************************************# def abs(a): @@ -239,7 +247,7 @@ class attrgetter: """ __slots__ = ('_attrs', '_call') - def __init__(self, attr, *attrs): + def __init__(self, attr, /, *attrs): if not attrs: if not isinstance(attr, str): raise TypeError('attribute name must be a string') @@ -257,7 +265,7 @@ def func(obj): return tuple(getter(obj) for getter in getters) self._call = func - def __call__(self, obj): + def __call__(self, obj, /): return self._call(obj) def __repr__(self): @@ -276,7 +284,7 @@ class itemgetter: """ __slots__ = ('_items', '_call') - def __init__(self, item, *items): + def __init__(self, item, /, *items): if not items: self._items = (item,) def func(obj): @@ -288,7 +296,7 @@ def func(obj): return tuple(obj[i] for i in items) self._call = func - def __call__(self, obj): + def __call__(self, obj, /): return self._call(obj) def __repr__(self): @@ -315,7 +323,7 @@ def __init__(self, name, /, *args, **kwargs): self._args = args self._kwargs = kwargs - def __call__(self, obj): + def __call__(self, obj, /): return getattr(obj, self._name)(*self._args, **self._kwargs) def __repr__(self): @@ -415,7 +423,7 @@ def ixor(a, b): except ImportError: pass else: - from _operator import __doc__ + from _operator import __doc__ # noqa: F401 # All of these "__func__ = func" assignments have to happen after importing # from _operator to make sure they're set to the right function diff --git a/PythonLib/full/optparse.py b/PythonLib/full/optparse.py index 1c450c6f..38cf16d2 100644 --- a/PythonLib/full/optparse.py +++ b/PythonLib/full/optparse.py @@ -43,7 +43,7 @@ __copyright__ = """ Copyright (c) 2001-2006 Gregory P. Ward. All rights reserved. -Copyright (c) 2002-2006 Python Software Foundation. All rights reserved. +Copyright (c) 2002 Python Software Foundation. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -74,7 +74,8 @@ """ import sys, os -import textwrap +from gettext import gettext as _, ngettext + def _repr(self): return "<%s at 0x%x: %s>" % (self.__class__.__name__, id(self), self) @@ -86,19 +87,6 @@ def _repr(self): # Id: help.py 527 2006-07-23 15:21:30Z greg # Id: errors.py 509 2006-04-20 00:58:24Z gward -try: - from gettext import gettext, ngettext -except ImportError: - def gettext(message): - return message - - def ngettext(singular, plural, n): - if n == 1: - return singular - return plural - -_ = gettext - class OptParseError (Exception): def __init__(self, msg): @@ -263,6 +251,7 @@ def _format_text(self, text): Format a paragraph of free-form text for inclusion in the help output at the current indentation level. """ + import textwrap text_width = max(self.width - self.current_indent, 11) indent = " "*self.current_indent return textwrap.fill(text, @@ -319,6 +308,7 @@ def format_option(self, option): indent_first = 0 result.append(opts) if option.help: + import textwrap help_text = self.expand_default(option) help_lines = textwrap.wrap(help_text, self.help_width) result.append("%*s%s\n" % (indent_first, "", help_lines[0])) diff --git a/PythonLib/full/os.py b/PythonLib/full/os.py index 465742d3..ac03b416 100644 --- a/PythonLib/full/os.py +++ b/PythonLib/full/os.py @@ -10,7 +10,7 @@ - os.extsep is the extension separator (always '.') - os.altsep is the alternate pathname separator (None or '/') - os.pathsep is the component separator used in $PATH etc - - os.linesep is the line separator in text files ('\r' or '\n' or '\r\n') + - os.linesep is the line separator in text files ('\n' or '\r\n') - os.defpath is the default search path for executables - os.devnull is the file path of the null device ('/dev/null', etc.) @@ -64,6 +64,10 @@ def _get_exports_list(module): from posix import _have_functions except ImportError: pass + try: + from posix import _create_environ + except ImportError: + pass import posix __all__.extend(_get_exports_list(posix)) @@ -88,6 +92,10 @@ def _get_exports_list(module): from nt import _have_functions except ImportError: pass + try: + from nt import _create_environ + except ImportError: + pass else: raise ImportError('no os specific module found') @@ -110,6 +118,7 @@ def _add(str, fn): _add("HAVE_FCHMODAT", "chmod") _add("HAVE_FCHOWNAT", "chown") _add("HAVE_FSTATAT", "stat") + _add("HAVE_LSTAT", "lstat") _add("HAVE_FUTIMESAT", "utime") _add("HAVE_LINKAT", "link") _add("HAVE_MKDIRAT", "mkdir") @@ -131,6 +140,7 @@ def _add(str, fn): _set = set() _add("HAVE_FCHDIR", "chdir") _add("HAVE_FCHMOD", "chmod") + _add("MS_WINDOWS", "chmod") _add("HAVE_FCHOWN", "chown") _add("HAVE_FDOPENDIR", "listdir") _add("HAVE_FDOPENDIR", "scandir") @@ -171,6 +181,7 @@ def _add(str, fn): _add("HAVE_FSTATAT", "stat") _add("HAVE_LCHFLAGS", "chflags") _add("HAVE_LCHMOD", "chmod") + _add("MS_WINDOWS", "chmod") if _exists("lchown"): # mac os x10.3 _add("HAVE_LCHOWN", "chown") _add("HAVE_LINKAT", "link") @@ -363,61 +374,45 @@ def walk(top, topdown=True, onerror=None, followlinks=False): # minor reason when (say) a thousand readable directories are still # left to visit. try: - scandir_it = scandir(top) + with scandir(top) as entries: + for entry in entries: + try: + if followlinks is _walk_symlinks_as_files: + is_dir = entry.is_dir(follow_symlinks=False) and not entry.is_junction() + else: + is_dir = entry.is_dir() + except OSError: + # If is_dir() raises an OSError, consider the entry not to + # be a directory, same behaviour as os.path.isdir(). + is_dir = False + + if is_dir: + dirs.append(entry.name) + else: + nondirs.append(entry.name) + + if not topdown and is_dir: + # Bottom-up: traverse into sub-directory, but exclude + # symlinks to directories if followlinks is False + if followlinks: + walk_into = True + else: + try: + is_symlink = entry.is_symlink() + except OSError: + # If is_symlink() raises an OSError, consider the + # entry not to be a symbolic link, same behaviour + # as os.path.islink(). + is_symlink = False + walk_into = not is_symlink + + if walk_into: + walk_dirs.append(entry.path) except OSError as error: if onerror is not None: onerror(error) continue - cont = False - with scandir_it: - while True: - try: - try: - entry = next(scandir_it) - except StopIteration: - break - except OSError as error: - if onerror is not None: - onerror(error) - cont = True - break - - try: - if followlinks is _walk_symlinks_as_files: - is_dir = entry.is_dir(follow_symlinks=False) and not entry.is_junction() - else: - is_dir = entry.is_dir() - except OSError: - # If is_dir() raises an OSError, consider the entry not to - # be a directory, same behaviour as os.path.isdir(). - is_dir = False - - if is_dir: - dirs.append(entry.name) - else: - nondirs.append(entry.name) - - if not topdown and is_dir: - # Bottom-up: traverse into sub-directory, but exclude - # symlinks to directories if followlinks is False - if followlinks: - walk_into = True - else: - try: - is_symlink = entry.is_symlink() - except OSError: - # If is_symlink() raises an OSError, consider the - # entry not to be a symbolic link, same behaviour - # as os.path.islink(). - is_symlink = False - walk_into = not is_symlink - - if walk_into: - walk_dirs.append(entry.path) - if cont: - continue - if topdown: # Yield before sub-directory traversal if going top down yield top, dirs, nondirs @@ -771,7 +766,7 @@ def __ror__(self, other): new.update(self) return new -def _createenviron(): +def _create_environ_mapping(): if name == 'nt': # Where Env Var Names Must Be UPPERCASE def check_str(value): @@ -801,9 +796,24 @@ def decode(value): encode, decode) # unicode environ -environ = _createenviron() -del _createenviron +environ = _create_environ_mapping() +del _create_environ_mapping + +if _exists("_create_environ"): + def reload_environ(): + data = _create_environ() + if name == 'nt': + encodekey = environ.encodekey + data = {encodekey(key): value + for key, value in data.items()} + + # modify in-place to keep os.environb in sync + env_data = environ._data + env_data.clear() + env_data.update(data) + + __all__.append("reload_environ") def getenv(key, default=None): """Get an environment variable, return None if it doesn't exist. @@ -1090,6 +1100,12 @@ def _fspath(path): else: raise TypeError("expected str, bytes or os.PathLike object, " "not " + path_type.__name__) + except TypeError: + if path_type.__fspath__ is None: + raise TypeError("expected str, bytes or os.PathLike object, " + "not " + path_type.__name__) from None + else: + raise if isinstance(path_repr, (str, bytes)): return path_repr else: @@ -1108,6 +1124,8 @@ class PathLike(abc.ABC): """Abstract base class for implementing the file system path protocol.""" + __slots__ = () + @abc.abstractmethod def __fspath__(self): """Return the file system path representation of the object.""" @@ -1157,3 +1175,17 @@ def add_dll_directory(path): cookie, nt._remove_dll_directory ) + + +if _exists('sched_getaffinity') and sys._get_cpu_count_config() < 0: + def process_cpu_count(): + """ + Get the number of CPUs of the current process. + + Return the number of logical CPUs usable by the calling thread of the + current process. Return None if indeterminable. + """ + return len(sched_getaffinity(0)) +else: + # Just an alias to cpu_count() (same docstring) + process_cpu_count = cpu_count diff --git a/PythonLib/full/pathlib.py b/PythonLib/full/pathlib.py deleted file mode 100644 index 02eb5c25..00000000 --- a/PythonLib/full/pathlib.py +++ /dev/null @@ -1,1435 +0,0 @@ -"""Object-oriented filesystem paths. - -This module provides classes to represent abstract paths and concrete -paths with operations that have semantics appropriate for different -operating systems. -""" - -import fnmatch -import functools -import io -import ntpath -import os -import posixpath -import re -import sys -import warnings -from _collections_abc import Sequence -from errno import ENOENT, ENOTDIR, EBADF, ELOOP -from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO -from urllib.parse import quote_from_bytes as urlquote_from_bytes - - -__all__ = [ - "PurePath", "PurePosixPath", "PureWindowsPath", - "Path", "PosixPath", "WindowsPath", - ] - -# -# Internals -# - -# Reference for Windows paths can be found at -# https://learn.microsoft.com/en-gb/windows/win32/fileio/naming-a-file . -_WIN_RESERVED_NAMES = frozenset( - {'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} | - {f'COM{c}' for c in '123456789\xb9\xb2\xb3'} | - {f'LPT{c}' for c in '123456789\xb9\xb2\xb3'} -) - -_WINERROR_NOT_READY = 21 # drive exists but is not accessible -_WINERROR_INVALID_NAME = 123 # fix for bpo-35306 -_WINERROR_CANT_RESOLVE_FILENAME = 1921 # broken symlink pointing to itself - -# EBADF - guard against macOS `stat` throwing EBADF -_IGNORED_ERRNOS = (ENOENT, ENOTDIR, EBADF, ELOOP) - -_IGNORED_WINERRORS = ( - _WINERROR_NOT_READY, - _WINERROR_INVALID_NAME, - _WINERROR_CANT_RESOLVE_FILENAME) - -def _ignore_error(exception): - return (getattr(exception, 'errno', None) in _IGNORED_ERRNOS or - getattr(exception, 'winerror', None) in _IGNORED_WINERRORS) - - -@functools.cache -def _is_case_sensitive(flavour): - return flavour.normcase('Aa') == 'Aa' - -# -# Globbing helpers -# - - -# fnmatch.translate() returns a regular expression that includes a prefix and -# a suffix, which enable matching newlines and ensure the end of the string is -# matched, respectively. These features are undesirable for our implementation -# of PurePatch.match(), which represents path separators as newlines and joins -# pattern segments together. As a workaround, we define a slice object that -# can remove the prefix and suffix from any translate() result. See the -# _compile_pattern_lines() function for more details. -_FNMATCH_PREFIX, _FNMATCH_SUFFIX = fnmatch.translate('_').split('_') -_FNMATCH_SLICE = slice(len(_FNMATCH_PREFIX), -len(_FNMATCH_SUFFIX)) -_SWAP_SEP_AND_NEWLINE = { - '/': str.maketrans({'/': '\n', '\n': '/'}), - '\\': str.maketrans({'\\': '\n', '\n': '\\'}), -} - - -@functools.lru_cache() -def _make_selector(pattern_parts, flavour, case_sensitive): - pat = pattern_parts[0] - if not pat: - return _TerminatingSelector() - if pat == '**': - child_parts_idx = 1 - while child_parts_idx < len(pattern_parts) and pattern_parts[child_parts_idx] == '**': - child_parts_idx += 1 - child_parts = pattern_parts[child_parts_idx:] - if '**' in child_parts: - cls = _DoubleRecursiveWildcardSelector - else: - cls = _RecursiveWildcardSelector - else: - child_parts = pattern_parts[1:] - if pat == '..': - cls = _ParentSelector - elif '**' in pat: - raise ValueError("Invalid pattern: '**' can only be an entire path component") - else: - cls = _WildcardSelector - return cls(pat, child_parts, flavour, case_sensitive) - - -@functools.lru_cache(maxsize=256) -def _compile_pattern(pat, case_sensitive): - flags = re.NOFLAG if case_sensitive else re.IGNORECASE - return re.compile(fnmatch.translate(pat), flags).match - - -@functools.lru_cache() -def _compile_pattern_lines(pattern_lines, case_sensitive): - """Compile the given pattern lines to an `re.Pattern` object. - - The *pattern_lines* argument is a glob-style pattern (e.g. '*/*.py') with - its path separators and newlines swapped (e.g. '*\n*.py`). By using - newlines to separate path components, and not setting `re.DOTALL`, we - ensure that the `*` wildcard cannot match path separators. - - The returned `re.Pattern` object may have its `match()` method called to - match a complete pattern, or `search()` to match from the right. The - argument supplied to these methods must also have its path separators and - newlines swapped. - """ - - # Match the start of the path, or just after a path separator - parts = ['^'] - for part in pattern_lines.splitlines(keepends=True): - if part == '*\n': - part = r'.+\n' - elif part == '*': - part = r'.+' - else: - # Any other component: pass to fnmatch.translate(). We slice off - # the common prefix and suffix added by translate() to ensure that - # re.DOTALL is not set, and the end of the string not matched, - # respectively. With DOTALL not set, '*' wildcards will not match - # path separators, because the '.' characters in the pattern will - # not match newlines. - part = fnmatch.translate(part)[_FNMATCH_SLICE] - parts.append(part) - # Match the end of the path, always. - parts.append(r'\Z') - flags = re.MULTILINE - if not case_sensitive: - flags |= re.IGNORECASE - return re.compile(''.join(parts), flags=flags) - - -class _Selector: - """A selector matches a specific glob pattern part against the children - of a given path.""" - - def __init__(self, child_parts, flavour, case_sensitive): - self.child_parts = child_parts - if child_parts: - self.successor = _make_selector(child_parts, flavour, case_sensitive) - self.dironly = True - else: - self.successor = _TerminatingSelector() - self.dironly = False - - def select_from(self, parent_path): - """Iterate over all child paths of `parent_path` matched by this - selector. This can contain parent_path itself.""" - path_cls = type(parent_path) - scandir = path_cls._scandir - if not parent_path.is_dir(): - return iter([]) - return self._select_from(parent_path, scandir) - - -class _TerminatingSelector: - - def _select_from(self, parent_path, scandir): - yield parent_path - - -class _ParentSelector(_Selector): - - def __init__(self, name, child_parts, flavour, case_sensitive): - _Selector.__init__(self, child_parts, flavour, case_sensitive) - - def _select_from(self, parent_path, scandir): - path = parent_path._make_child_relpath('..') - for p in self.successor._select_from(path, scandir): - yield p - - -class _WildcardSelector(_Selector): - - def __init__(self, pat, child_parts, flavour, case_sensitive): - _Selector.__init__(self, child_parts, flavour, case_sensitive) - if case_sensitive is None: - # TODO: evaluate case-sensitivity of each directory in _select_from() - case_sensitive = _is_case_sensitive(flavour) - self.match = _compile_pattern(pat, case_sensitive) - - def _select_from(self, parent_path, scandir): - try: - # We must close the scandir() object before proceeding to - # avoid exhausting file descriptors when globbing deep trees. - with scandir(parent_path) as scandir_it: - entries = list(scandir_it) - except OSError: - pass - else: - for entry in entries: - if self.dironly: - try: - if not entry.is_dir(): - continue - except OSError: - continue - name = entry.name - if self.match(name): - path = parent_path._make_child_relpath(name) - for p in self.successor._select_from(path, scandir): - yield p - - -class _RecursiveWildcardSelector(_Selector): - - def __init__(self, pat, child_parts, flavour, case_sensitive): - _Selector.__init__(self, child_parts, flavour, case_sensitive) - - def _iterate_directories(self, parent_path): - yield parent_path - for dirpath, dirnames, _ in parent_path.walk(): - for dirname in dirnames: - yield dirpath._make_child_relpath(dirname) - - def _select_from(self, parent_path, scandir): - successor_select = self.successor._select_from - for starting_point in self._iterate_directories(parent_path): - for p in successor_select(starting_point, scandir): - yield p - - -class _DoubleRecursiveWildcardSelector(_RecursiveWildcardSelector): - """ - Like _RecursiveWildcardSelector, but also de-duplicates results from - successive selectors. This is necessary if the pattern contains - multiple non-adjacent '**' segments. - """ - - def _select_from(self, parent_path, scandir): - yielded = set() - try: - for p in super()._select_from(parent_path, scandir): - if p not in yielded: - yield p - yielded.add(p) - finally: - yielded.clear() - - -# -# Public API -# - -class _PathParents(Sequence): - """This object provides sequence-like access to the logical ancestors - of a path. Don't try to construct it yourself.""" - __slots__ = ('_path', '_drv', '_root', '_tail') - - def __init__(self, path): - self._path = path - self._drv = path.drive - self._root = path.root - self._tail = path._tail - - def __len__(self): - return len(self._tail) - - def __getitem__(self, idx): - if isinstance(idx, slice): - return tuple(self[i] for i in range(*idx.indices(len(self)))) - - if idx >= len(self) or idx < -len(self): - raise IndexError(idx) - if idx < 0: - idx += len(self) - return self._path._from_parsed_parts(self._drv, self._root, - self._tail[:-idx - 1]) - - def __repr__(self): - return "<{}.parents>".format(type(self._path).__name__) - - -class PurePath(object): - """Base class for manipulating paths without I/O. - - PurePath represents a filesystem path and offers operations which - don't imply any actual filesystem I/O. Depending on your system, - instantiating a PurePath will return either a PurePosixPath or a - PureWindowsPath object. You can also instantiate either of these classes - directly, regardless of your system. - """ - - __slots__ = ( - # The `_raw_paths` slot stores unnormalized string paths. This is set - # in the `__init__()` method. - '_raw_paths', - - # The `_drv`, `_root` and `_tail_cached` slots store parsed and - # normalized parts of the path. They are set when any of the `drive`, - # `root` or `_tail` properties are accessed for the first time. The - # three-part division corresponds to the result of - # `os.path.splitroot()`, except that the tail is further split on path - # separators (i.e. it is a list of strings), and that the root and - # tail are normalized. - '_drv', '_root', '_tail_cached', - - # The `_str` slot stores the string representation of the path, - # computed from the drive, root and tail when `__str__()` is called - # for the first time. It's used to implement `_str_normcase` - '_str', - - # The `_str_normcase_cached` slot stores the string path with - # normalized case. It is set when the `_str_normcase` property is - # accessed for the first time. It's used to implement `__eq__()` - # `__hash__()`, and `_parts_normcase` - '_str_normcase_cached', - - # The `_parts_normcase_cached` slot stores the case-normalized - # string path after splitting on path separators. It's set when the - # `_parts_normcase` property is accessed for the first time. It's used - # to implement comparison methods like `__lt__()`. - '_parts_normcase_cached', - - # The `_lines_cached` slot stores the string path with path separators - # and newlines swapped. This is used to implement `match()`. - '_lines_cached', - - # The `_hash` slot stores the hash of the case-normalized string - # path. It's set when `__hash__()` is called for the first time. - '_hash', - ) - _flavour = os.path - - def __new__(cls, *args, **kwargs): - """Construct a PurePath from one or several strings and or existing - PurePath objects. The strings and path objects are combined so as - to yield a canonicalized path, which is incorporated into the - new PurePath object. - """ - if cls is PurePath: - cls = PureWindowsPath if os.name == 'nt' else PurePosixPath - return object.__new__(cls) - - def __reduce__(self): - # Using the parts tuple helps share interned path parts - # when pickling related paths. - return (self.__class__, self.parts) - - def __init__(self, *args): - paths = [] - for arg in args: - if isinstance(arg, PurePath): - if arg._flavour is not self._flavour: - # GH-103631: Convert separators for backwards compatibility. - paths.append(arg.as_posix()) - else: - paths.extend(arg._raw_paths) - else: - try: - path = os.fspath(arg) - except TypeError: - path = arg - if not isinstance(path, str): - raise TypeError( - "argument should be a str or an os.PathLike " - "object where __fspath__ returns a str, " - f"not {type(path).__name__!r}") - paths.append(path) - self._raw_paths = paths - - def with_segments(self, *pathsegments): - """Construct a new path object from any number of path-like objects. - Subclasses may override this method to customize how new path objects - are created from methods like `iterdir()`. - """ - return type(self)(*pathsegments) - - @classmethod - def _parse_path(cls, path): - if not path: - return '', '', [] - sep = cls._flavour.sep - altsep = cls._flavour.altsep - if altsep: - path = path.replace(altsep, sep) - drv, root, rel = cls._flavour.splitroot(path) - if not root and drv.startswith(sep) and not drv.endswith(sep): - drv_parts = drv.split(sep) - if len(drv_parts) == 4 and drv_parts[2] not in '?.': - # e.g. //server/share - root = sep - elif len(drv_parts) == 6: - # e.g. //?/unc/server/share - root = sep - parsed = [sys.intern(str(x)) for x in rel.split(sep) if x and x != '.'] - return drv, root, parsed - - def _load_parts(self): - paths = self._raw_paths - if len(paths) == 0: - path = '' - elif len(paths) == 1: - path = paths[0] - else: - path = self._flavour.join(*paths) - drv, root, tail = self._parse_path(path) - self._drv = drv - self._root = root - self._tail_cached = tail - - def _from_parsed_parts(self, drv, root, tail): - path_str = self._format_parsed_parts(drv, root, tail) - path = self.with_segments(path_str) - path._str = path_str or '.' - path._drv = drv - path._root = root - path._tail_cached = tail - return path - - @classmethod - def _format_parsed_parts(cls, drv, root, tail): - if drv or root: - return drv + root + cls._flavour.sep.join(tail) - elif tail and cls._flavour.splitdrive(tail[0])[0]: - tail = ['.'] + tail - return cls._flavour.sep.join(tail) - - def __str__(self): - """Return the string representation of the path, suitable for - passing to system calls.""" - try: - return self._str - except AttributeError: - self._str = self._format_parsed_parts(self.drive, self.root, - self._tail) or '.' - return self._str - - def __fspath__(self): - return str(self) - - def as_posix(self): - """Return the string representation of the path with forward (/) - slashes.""" - f = self._flavour - return str(self).replace(f.sep, '/') - - def __bytes__(self): - """Return the bytes representation of the path. This is only - recommended to use under Unix.""" - return os.fsencode(self) - - def __repr__(self): - return "{}({!r})".format(self.__class__.__name__, self.as_posix()) - - def as_uri(self): - """Return the path as a 'file' URI.""" - if not self.is_absolute(): - raise ValueError("relative path can't be expressed as a file URI") - - drive = self.drive - if len(drive) == 2 and drive[1] == ':': - # It's a path on a local drive => 'file:///c:/a/b' - prefix = 'file:///' + drive - path = self.as_posix()[2:] - elif drive: - # It's a path on a network drive => 'file://host/share/a/b' - prefix = 'file:' - path = self.as_posix() - else: - # It's a posix path => 'file:///etc/hosts' - prefix = 'file://' - path = str(self) - return prefix + urlquote_from_bytes(os.fsencode(path)) - - @property - def _str_normcase(self): - # String with normalized case, for hashing and equality checks - try: - return self._str_normcase_cached - except AttributeError: - if _is_case_sensitive(self._flavour): - self._str_normcase_cached = str(self) - else: - self._str_normcase_cached = str(self).lower() - return self._str_normcase_cached - - @property - def _parts_normcase(self): - # Cached parts with normalized case, for comparisons. - try: - return self._parts_normcase_cached - except AttributeError: - self._parts_normcase_cached = self._str_normcase.split(self._flavour.sep) - return self._parts_normcase_cached - - @property - def _lines(self): - # Path with separators and newlines swapped, for pattern matching. - try: - return self._lines_cached - except AttributeError: - path_str = str(self) - if path_str == '.': - self._lines_cached = '' - else: - trans = _SWAP_SEP_AND_NEWLINE[self._flavour.sep] - self._lines_cached = path_str.translate(trans) - return self._lines_cached - - def __eq__(self, other): - if not isinstance(other, PurePath): - return NotImplemented - return self._str_normcase == other._str_normcase and self._flavour is other._flavour - - def __hash__(self): - try: - return self._hash - except AttributeError: - self._hash = hash(self._str_normcase) - return self._hash - - def __lt__(self, other): - if not isinstance(other, PurePath) or self._flavour is not other._flavour: - return NotImplemented - return self._parts_normcase < other._parts_normcase - - def __le__(self, other): - if not isinstance(other, PurePath) or self._flavour is not other._flavour: - return NotImplemented - return self._parts_normcase <= other._parts_normcase - - def __gt__(self, other): - if not isinstance(other, PurePath) or self._flavour is not other._flavour: - return NotImplemented - return self._parts_normcase > other._parts_normcase - - def __ge__(self, other): - if not isinstance(other, PurePath) or self._flavour is not other._flavour: - return NotImplemented - return self._parts_normcase >= other._parts_normcase - - @property - def drive(self): - """The drive prefix (letter or UNC path), if any.""" - try: - return self._drv - except AttributeError: - self._load_parts() - return self._drv - - @property - def root(self): - """The root of the path, if any.""" - try: - return self._root - except AttributeError: - self._load_parts() - return self._root - - @property - def _tail(self): - try: - return self._tail_cached - except AttributeError: - self._load_parts() - return self._tail_cached - - @property - def anchor(self): - """The concatenation of the drive and root, or ''.""" - anchor = self.drive + self.root - return anchor - - @property - def name(self): - """The final path component, if any.""" - tail = self._tail - if not tail: - return '' - return tail[-1] - - @property - def suffix(self): - """ - The final component's last suffix, if any. - - This includes the leading period. For example: '.txt' - """ - name = self.name - i = name.rfind('.') - if 0 < i < len(name) - 1: - return name[i:] - else: - return '' - - @property - def suffixes(self): - """ - A list of the final component's suffixes, if any. - - These include the leading periods. For example: ['.tar', '.gz'] - """ - name = self.name - if name.endswith('.'): - return [] - name = name.lstrip('.') - return ['.' + suffix for suffix in name.split('.')[1:]] - - @property - def stem(self): - """The final path component, minus its last suffix.""" - name = self.name - i = name.rfind('.') - if 0 < i < len(name) - 1: - return name[:i] - else: - return name - - def with_name(self, name): - """Return a new path with the file name changed.""" - if not self.name: - raise ValueError("%r has an empty name" % (self,)) - f = self._flavour - if not name or f.sep in name or (f.altsep and f.altsep in name) or name == '.': - raise ValueError("Invalid name %r" % (name)) - return self._from_parsed_parts(self.drive, self.root, - self._tail[:-1] + [name]) - - def with_stem(self, stem): - """Return a new path with the stem changed.""" - return self.with_name(stem + self.suffix) - - def with_suffix(self, suffix): - """Return a new path with the file suffix changed. If the path - has no suffix, add given suffix. If the given suffix is an empty - string, remove the suffix from the path. - """ - f = self._flavour - if f.sep in suffix or f.altsep and f.altsep in suffix: - raise ValueError("Invalid suffix %r" % (suffix,)) - if suffix and not suffix.startswith('.') or suffix == '.': - raise ValueError("Invalid suffix %r" % (suffix)) - name = self.name - if not name: - raise ValueError("%r has an empty name" % (self,)) - old_suffix = self.suffix - if not old_suffix: - name = name + suffix - else: - name = name[:-len(old_suffix)] + suffix - return self._from_parsed_parts(self.drive, self.root, - self._tail[:-1] + [name]) - - def relative_to(self, other, /, *_deprecated, walk_up=False): - """Return the relative path to another path identified by the passed - arguments. If the operation is not possible (because this is not - related to the other path), raise ValueError. - - The *walk_up* parameter controls whether `..` may be used to resolve - the path. - """ - if _deprecated: - msg = ("support for supplying more than one positional argument " - "to pathlib.PurePath.relative_to() is deprecated and " - "scheduled for removal in Python {remove}") - warnings._deprecated("pathlib.PurePath.relative_to(*args)", msg, - remove=(3, 14)) - other = self.with_segments(other, *_deprecated) - for step, path in enumerate([other] + list(other.parents)): - if self.is_relative_to(path): - break - elif not walk_up: - raise ValueError(f"{str(self)!r} is not in the subpath of {str(other)!r}") - elif path.name == '..': - raise ValueError(f"'..' segment in {str(other)!r} cannot be walked") - else: - raise ValueError(f"{str(self)!r} and {str(other)!r} have different anchors") - parts = ['..'] * step + self._tail[len(path._tail):] - return self.with_segments(*parts) - - def is_relative_to(self, other, /, *_deprecated): - """Return True if the path is relative to another path or False. - """ - if _deprecated: - msg = ("support for supplying more than one argument to " - "pathlib.PurePath.is_relative_to() is deprecated and " - "scheduled for removal in Python {remove}") - warnings._deprecated("pathlib.PurePath.is_relative_to(*args)", - msg, remove=(3, 14)) - other = self.with_segments(other, *_deprecated) - return other == self or other in self.parents - - @property - def parts(self): - """An object providing sequence-like access to the - components in the filesystem path.""" - if self.drive or self.root: - return (self.drive + self.root,) + tuple(self._tail) - else: - return tuple(self._tail) - - def joinpath(self, *pathsegments): - """Combine this path with one or several arguments, and return a - new path representing either a subpath (if all arguments are relative - paths) or a totally different path (if one of the arguments is - anchored). - """ - return self.with_segments(self, *pathsegments) - - def __truediv__(self, key): - try: - return self.joinpath(key) - except TypeError: - return NotImplemented - - def __rtruediv__(self, key): - try: - return self.with_segments(key, self) - except TypeError: - return NotImplemented - - @property - def parent(self): - """The logical parent of the path.""" - drv = self.drive - root = self.root - tail = self._tail - if not tail: - return self - return self._from_parsed_parts(drv, root, tail[:-1]) - - @property - def parents(self): - """A sequence of this path's logical parents.""" - # The value of this property should not be cached on the path object, - # as doing so would introduce a reference cycle. - return _PathParents(self) - - def is_absolute(self): - """True if the path is absolute (has both a root and, if applicable, - a drive).""" - if self._flavour is ntpath: - # ntpath.isabs() is defective - see GH-44626. - return bool(self.drive and self.root) - elif self._flavour is posixpath: - # Optimization: work with raw paths on POSIX. - for path in self._raw_paths: - if path.startswith('/'): - return True - return False - else: - return self._flavour.isabs(str(self)) - - def is_reserved(self): - """Return True if the path contains one of the special names reserved - by the system, if any.""" - if self._flavour is posixpath or not self._tail: - return False - - # NOTE: the rules for reserved names seem somewhat complicated - # (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not - # exist). We err on the side of caution and return True for paths - # which are not considered reserved by Windows. - if self.drive.startswith('\\\\'): - # UNC paths are never reserved. - return False - name = self._tail[-1].partition('.')[0].partition(':')[0].rstrip(' ') - return name.upper() in _WIN_RESERVED_NAMES - - def match(self, path_pattern, *, case_sensitive=None): - """ - Return True if this path matches the given pattern. - """ - if not isinstance(path_pattern, PurePath): - path_pattern = self.with_segments(path_pattern) - if case_sensitive is None: - case_sensitive = _is_case_sensitive(self._flavour) - pattern = _compile_pattern_lines(path_pattern._lines, case_sensitive) - if path_pattern.drive or path_pattern.root: - return pattern.match(self._lines) is not None - elif path_pattern._tail: - return pattern.search(self._lines) is not None - else: - raise ValueError("empty pattern") - - -# Can't subclass os.PathLike from PurePath and keep the constructor -# optimizations in PurePath.__slots__. -os.PathLike.register(PurePath) - - -class PurePosixPath(PurePath): - """PurePath subclass for non-Windows systems. - - On a POSIX system, instantiating a PurePath should return this object. - However, you can also instantiate it directly on any system. - """ - _flavour = posixpath - __slots__ = () - - -class PureWindowsPath(PurePath): - """PurePath subclass for Windows systems. - - On a Windows system, instantiating a PurePath should return this object. - However, you can also instantiate it directly on any system. - """ - _flavour = ntpath - __slots__ = () - - -# Filesystem-accessing classes - - -class Path(PurePath): - """PurePath subclass that can make system calls. - - Path represents a filesystem path but unlike PurePath, also offers - methods to do system calls on path objects. Depending on your system, - instantiating a Path will return either a PosixPath or a WindowsPath - object. You can also instantiate a PosixPath or WindowsPath directly, - but cannot instantiate a WindowsPath on a POSIX system or vice versa. - """ - __slots__ = () - - def stat(self, *, follow_symlinks=True): - """ - Return the result of the stat() system call on this path, like - os.stat() does. - """ - return os.stat(self, follow_symlinks=follow_symlinks) - - def lstat(self): - """ - Like stat(), except if the path points to a symlink, the symlink's - status information is returned, rather than its target's. - """ - return self.stat(follow_symlinks=False) - - - # Convenience functions for querying the stat results - - def exists(self, *, follow_symlinks=True): - """ - Whether this path exists. - - This method normally follows symlinks; to check whether a symlink exists, - add the argument follow_symlinks=False. - """ - try: - self.stat(follow_symlinks=follow_symlinks) - except OSError as e: - if not _ignore_error(e): - raise - return False - except ValueError: - # Non-encodable path - return False - return True - - def is_dir(self): - """ - Whether this path is a directory. - """ - try: - return S_ISDIR(self.stat().st_mode) - except OSError as e: - if not _ignore_error(e): - raise - # Path doesn't exist or is a broken symlink - # (see http://web.archive.org/web/20200623061726/https://bitbucket.org/pitrou/pathlib/issues/12/ ) - return False - except ValueError: - # Non-encodable path - return False - - def is_file(self): - """ - Whether this path is a regular file (also True for symlinks pointing - to regular files). - """ - try: - return S_ISREG(self.stat().st_mode) - except OSError as e: - if not _ignore_error(e): - raise - # Path doesn't exist or is a broken symlink - # (see http://web.archive.org/web/20200623061726/https://bitbucket.org/pitrou/pathlib/issues/12/ ) - return False - except ValueError: - # Non-encodable path - return False - - def is_mount(self): - """ - Check if this path is a mount point - """ - return self._flavour.ismount(self) - - def is_symlink(self): - """ - Whether this path is a symbolic link. - """ - try: - return S_ISLNK(self.lstat().st_mode) - except OSError as e: - if not _ignore_error(e): - raise - # Path doesn't exist - return False - except ValueError: - # Non-encodable path - return False - - def is_junction(self): - """ - Whether this path is a junction. - """ - return self._flavour.isjunction(self) - - def is_block_device(self): - """ - Whether this path is a block device. - """ - try: - return S_ISBLK(self.stat().st_mode) - except OSError as e: - if not _ignore_error(e): - raise - # Path doesn't exist or is a broken symlink - # (see http://web.archive.org/web/20200623061726/https://bitbucket.org/pitrou/pathlib/issues/12/ ) - return False - except ValueError: - # Non-encodable path - return False - - def is_char_device(self): - """ - Whether this path is a character device. - """ - try: - return S_ISCHR(self.stat().st_mode) - except OSError as e: - if not _ignore_error(e): - raise - # Path doesn't exist or is a broken symlink - # (see http://web.archive.org/web/20200623061726/https://bitbucket.org/pitrou/pathlib/issues/12/ ) - return False - except ValueError: - # Non-encodable path - return False - - def is_fifo(self): - """ - Whether this path is a FIFO. - """ - try: - return S_ISFIFO(self.stat().st_mode) - except OSError as e: - if not _ignore_error(e): - raise - # Path doesn't exist or is a broken symlink - # (see http://web.archive.org/web/20200623061726/https://bitbucket.org/pitrou/pathlib/issues/12/ ) - return False - except ValueError: - # Non-encodable path - return False - - def is_socket(self): - """ - Whether this path is a socket. - """ - try: - return S_ISSOCK(self.stat().st_mode) - except OSError as e: - if not _ignore_error(e): - raise - # Path doesn't exist or is a broken symlink - # (see http://web.archive.org/web/20200623061726/https://bitbucket.org/pitrou/pathlib/issues/12/ ) - return False - except ValueError: - # Non-encodable path - return False - - def samefile(self, other_path): - """Return whether other_path is the same or not as this file - (as returned by os.path.samefile()). - """ - st = self.stat() - try: - other_st = other_path.stat() - except AttributeError: - other_st = self.with_segments(other_path).stat() - return self._flavour.samestat(st, other_st) - - def open(self, mode='r', buffering=-1, encoding=None, - errors=None, newline=None): - """ - Open the file pointed to by this path and return a file object, as - the built-in open() function does. - """ - if "b" not in mode: - encoding = io.text_encoding(encoding) - return io.open(self, mode, buffering, encoding, errors, newline) - - def read_bytes(self): - """ - Open the file in bytes mode, read it, and close the file. - """ - with self.open(mode='rb') as f: - return f.read() - - def read_text(self, encoding=None, errors=None): - """ - Open the file in text mode, read it, and close the file. - """ - encoding = io.text_encoding(encoding) - with self.open(mode='r', encoding=encoding, errors=errors) as f: - return f.read() - - def write_bytes(self, data): - """ - Open the file in bytes mode, write to it, and close the file. - """ - # type-check for the buffer interface before truncating the file - view = memoryview(data) - with self.open(mode='wb') as f: - return f.write(view) - - def write_text(self, data, encoding=None, errors=None, newline=None): - """ - Open the file in text mode, write to it, and close the file. - """ - if not isinstance(data, str): - raise TypeError('data must be str, not %s' % - data.__class__.__name__) - encoding = io.text_encoding(encoding) - with self.open(mode='w', encoding=encoding, errors=errors, newline=newline) as f: - return f.write(data) - - def iterdir(self): - """Yield path objects of the directory contents. - - The children are yielded in arbitrary order, and the - special entries '.' and '..' are not included. - """ - for name in os.listdir(self): - yield self._make_child_relpath(name) - - def _scandir(self): - # bpo-24132: a future version of pathlib will support subclassing of - # pathlib.Path to customize how the filesystem is accessed. This - # includes scandir(), which is used to implement glob(). - return os.scandir(self) - - def _make_child_relpath(self, name): - path_str = str(self) - tail = self._tail - if tail: - path_str = f'{path_str}{self._flavour.sep}{name}' - elif path_str != '.': - path_str = f'{path_str}{name}' - else: - path_str = name - path = self.with_segments(path_str) - path._str = path_str - path._drv = self.drive - path._root = self.root - path._tail_cached = tail + [name] - return path - - def glob(self, pattern, *, case_sensitive=None): - """Iterate over this subtree and yield all existing files (of any - kind, including directories) matching the given relative pattern. - """ - sys.audit("pathlib.Path.glob", self, pattern) - if not pattern: - raise ValueError("Unacceptable pattern: {!r}".format(pattern)) - drv, root, pattern_parts = self._parse_path(pattern) - if drv or root: - raise NotImplementedError("Non-relative patterns are unsupported") - if pattern[-1] in (self._flavour.sep, self._flavour.altsep): - pattern_parts.append('') - selector = _make_selector(tuple(pattern_parts), self._flavour, case_sensitive) - for p in selector.select_from(self): - yield p - - def rglob(self, pattern, *, case_sensitive=None): - """Recursively yield all existing files (of any kind, including - directories) matching the given relative pattern, anywhere in - this subtree. - """ - sys.audit("pathlib.Path.rglob", self, pattern) - drv, root, pattern_parts = self._parse_path(pattern) - if drv or root: - raise NotImplementedError("Non-relative patterns are unsupported") - if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep): - pattern_parts.append('') - selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour, case_sensitive) - for p in selector.select_from(self): - yield p - - def walk(self, top_down=True, on_error=None, follow_symlinks=False): - """Walk the directory tree from this directory, similar to os.walk().""" - sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks) - paths = [self] - - while paths: - path = paths.pop() - if isinstance(path, tuple): - yield path - continue - - # We may not have read permission for self, in which case we can't - # get a list of the files the directory contains. os.walk() - # always suppressed the exception in that instance, rather than - # blow up for a minor reason when (say) a thousand readable - # directories are still left to visit. That logic is copied here. - try: - scandir_it = path._scandir() - except OSError as error: - if on_error is not None: - on_error(error) - continue - - with scandir_it: - dirnames = [] - filenames = [] - for entry in scandir_it: - try: - is_dir = entry.is_dir(follow_symlinks=follow_symlinks) - except OSError: - # Carried over from os.path.isdir(). - is_dir = False - - if is_dir: - dirnames.append(entry.name) - else: - filenames.append(entry.name) - - if top_down: - yield path, dirnames, filenames - else: - paths.append((path, dirnames, filenames)) - - paths += [path._make_child_relpath(d) for d in reversed(dirnames)] - - def __init__(self, *args, **kwargs): - if kwargs: - msg = ("support for supplying keyword arguments to pathlib.PurePath " - "is deprecated and scheduled for removal in Python {remove}") - warnings._deprecated("pathlib.PurePath(**kwargs)", msg, remove=(3, 14)) - super().__init__(*args) - - def __new__(cls, *args, **kwargs): - if cls is Path: - cls = WindowsPath if os.name == 'nt' else PosixPath - return object.__new__(cls) - - def __enter__(self): - # In previous versions of pathlib, __exit__() marked this path as - # closed; subsequent attempts to perform I/O would raise an IOError. - # This functionality was never documented, and had the effect of - # making Path objects mutable, contrary to PEP 428. - # In Python 3.9 __exit__() was made a no-op. - # In Python 3.11 __enter__() began emitting DeprecationWarning. - # In Python 3.13 __enter__() and __exit__() should be removed. - warnings.warn("pathlib.Path.__enter__() is deprecated and scheduled " - "for removal in Python 3.13; Path objects as a context " - "manager is a no-op", - DeprecationWarning, stacklevel=2) - return self - - def __exit__(self, t, v, tb): - pass - - # Public API - - @classmethod - def cwd(cls): - """Return a new path pointing to the current working directory.""" - # We call 'absolute()' rather than using 'os.getcwd()' directly to - # enable users to replace the implementation of 'absolute()' in a - # subclass and benefit from the new behaviour here. This works because - # os.path.abspath('.') == os.getcwd(). - return cls().absolute() - - @classmethod - def home(cls): - """Return a new path pointing to the user's home directory (as - returned by os.path.expanduser('~')). - """ - return cls("~").expanduser() - - def absolute(self): - """Return an absolute version of this path by prepending the current - working directory. No normalization or symlink resolution is performed. - - Use resolve() to get the canonical path to a file. - """ - if self.is_absolute(): - return self - elif self.drive: - # There is a CWD on each drive-letter drive. - cwd = self._flavour.abspath(self.drive) - else: - cwd = os.getcwd() - # Fast path for "empty" paths, e.g. Path("."), Path("") or Path(). - # We pass only one argument to with_segments() to avoid the cost - # of joining, and we exploit the fact that getcwd() returns a - # fully-normalized string by storing it in _str. This is used to - # implement Path.cwd(). - if not self.root and not self._tail: - result = self.with_segments(cwd) - result._str = cwd - return result - return self.with_segments(cwd, self) - - def resolve(self, strict=False): - """ - Make the path absolute, resolving all symlinks on the way and also - normalizing it. - """ - - def check_eloop(e): - winerror = getattr(e, 'winerror', 0) - if e.errno == ELOOP or winerror == _WINERROR_CANT_RESOLVE_FILENAME: - raise RuntimeError("Symlink loop from %r" % e.filename) - - try: - s = self._flavour.realpath(self, strict=strict) - except OSError as e: - check_eloop(e) - raise - p = self.with_segments(s) - - # In non-strict mode, realpath() doesn't raise on symlink loops. - # Ensure we get an exception by calling stat() - if not strict: - try: - p.stat() - except OSError as e: - check_eloop(e) - return p - - def owner(self): - """ - Return the login name of the file owner. - """ - try: - import pwd - return pwd.getpwuid(self.stat().st_uid).pw_name - except ImportError: - raise NotImplementedError("Path.owner() is unsupported on this system") - - def group(self): - """ - Return the group name of the file gid. - """ - - try: - import grp - return grp.getgrgid(self.stat().st_gid).gr_name - except ImportError: - raise NotImplementedError("Path.group() is unsupported on this system") - - def readlink(self): - """ - Return the path to which the symbolic link points. - """ - if not hasattr(os, "readlink"): - raise NotImplementedError("os.readlink() not available on this system") - return self.with_segments(os.readlink(self)) - - def touch(self, mode=0o666, exist_ok=True): - """ - Create this file with the given access mode, if it doesn't exist. - """ - - if exist_ok: - # First try to bump modification time - # Implementation note: GNU touch uses the UTIME_NOW option of - # the utimensat() / futimens() functions. - try: - os.utime(self, None) - except OSError: - # Avoid exception chaining - pass - else: - return - flags = os.O_CREAT | os.O_WRONLY - if not exist_ok: - flags |= os.O_EXCL - fd = os.open(self, flags, mode) - os.close(fd) - - def mkdir(self, mode=0o777, parents=False, exist_ok=False): - """ - Create a new directory at this given path. - """ - try: - os.mkdir(self, mode) - except FileNotFoundError: - if not parents or self.parent == self: - raise - self.parent.mkdir(parents=True, exist_ok=True) - self.mkdir(mode, parents=False, exist_ok=exist_ok) - except OSError: - # Cannot rely on checking for EEXIST, since the operating system - # could give priority to other errors like EACCES or EROFS - if not exist_ok or not self.is_dir(): - raise - - def chmod(self, mode, *, follow_symlinks=True): - """ - Change the permissions of the path, like os.chmod(). - """ - os.chmod(self, mode, follow_symlinks=follow_symlinks) - - def lchmod(self, mode): - """ - Like chmod(), except if the path points to a symlink, the symlink's - permissions are changed, rather than its target's. - """ - self.chmod(mode, follow_symlinks=False) - - def unlink(self, missing_ok=False): - """ - Remove this file or link. - If the path is a directory, use rmdir() instead. - """ - try: - os.unlink(self) - except FileNotFoundError: - if not missing_ok: - raise - - def rmdir(self): - """ - Remove this directory. The directory must be empty. - """ - os.rmdir(self) - - def rename(self, target): - """ - Rename this path to the target path. - - The target path may be absolute or relative. Relative paths are - interpreted relative to the current working directory, *not* the - directory of the Path object. - - Returns the new Path instance pointing to the target path. - """ - os.rename(self, target) - return self.with_segments(target) - - def replace(self, target): - """ - Rename this path to the target path, overwriting if that path exists. - - The target path may be absolute or relative. Relative paths are - interpreted relative to the current working directory, *not* the - directory of the Path object. - - Returns the new Path instance pointing to the target path. - """ - os.replace(self, target) - return self.with_segments(target) - - def symlink_to(self, target, target_is_directory=False): - """ - Make this path a symlink pointing to the target path. - Note the order of arguments (link, target) is the reverse of os.symlink. - """ - if not hasattr(os, "symlink"): - raise NotImplementedError("os.symlink() not available on this system") - os.symlink(target, self, target_is_directory) - - def hardlink_to(self, target): - """ - Make this path a hard link pointing to the same file as *target*. - - Note the order of arguments (self, target) is the reverse of os.link's. - """ - if not hasattr(os, "link"): - raise NotImplementedError("os.link() not available on this system") - os.link(target, self) - - def expanduser(self): - """ Return a new path with expanded ~ and ~user constructs - (as returned by os.path.expanduser) - """ - if (not (self.drive or self.root) and - self._tail and self._tail[0][:1] == '~'): - homedir = self._flavour.expanduser(self._tail[0]) - if homedir[:1] == "~": - raise RuntimeError("Could not determine home directory.") - drv, root, tail = self._parse_path(homedir) - return self._from_parsed_parts(drv, root, tail + self._tail[1:]) - - return self - - -class PosixPath(Path, PurePosixPath): - """Path subclass for non-Windows systems. - - On a POSIX system, instantiating a Path should return this object. - """ - __slots__ = () - - if os.name == 'nt': - def __new__(cls, *args, **kwargs): - raise NotImplementedError( - f"cannot instantiate {cls.__name__!r} on your system") - -class WindowsPath(Path, PureWindowsPath): - """Path subclass for Windows systems. - - On a Windows system, instantiating a Path should return this object. - """ - __slots__ = () - - if os.name != 'nt': - def __new__(cls, *args, **kwargs): - raise NotImplementedError( - f"cannot instantiate {cls.__name__!r} on your system") diff --git a/PythonLib/full/pathlib/__init__.py b/PythonLib/full/pathlib/__init__.py new file mode 100644 index 00000000..0d763d1f --- /dev/null +++ b/PythonLib/full/pathlib/__init__.py @@ -0,0 +1,1307 @@ +"""Object-oriented filesystem paths. + +This module provides classes to represent abstract paths and concrete +paths with operations that have semantics appropriate for different +operating systems. +""" + +import io +import ntpath +import operator +import os +import posixpath +import sys +from errno import * +from glob import _StringGlobber, _no_recurse_symlinks +from itertools import chain +from stat import S_ISDIR, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO +from _collections_abc import Sequence + +try: + import pwd +except ImportError: + pwd = None +try: + import grp +except ImportError: + grp = None + +from pathlib._os import ( + PathInfo, DirEntryInfo, + ensure_different_files, ensure_distinct_paths, + copyfile2, copyfileobj, magic_open, copy_info, +) + + +__all__ = [ + "UnsupportedOperation", + "PurePath", "PurePosixPath", "PureWindowsPath", + "Path", "PosixPath", "WindowsPath", + ] + + +class UnsupportedOperation(NotImplementedError): + """An exception that is raised when an unsupported operation is attempted. + """ + pass + + +class _PathParents(Sequence): + """This object provides sequence-like access to the logical ancestors + of a path. Don't try to construct it yourself.""" + __slots__ = ('_path', '_drv', '_root', '_tail') + + def __init__(self, path): + self._path = path + self._drv = path.drive + self._root = path.root + self._tail = path._tail + + def __len__(self): + return len(self._tail) + + def __getitem__(self, idx): + if isinstance(idx, slice): + return tuple(self[i] for i in range(*idx.indices(len(self)))) + + if idx >= len(self) or idx < -len(self): + raise IndexError(idx) + if idx < 0: + idx += len(self) + return self._path._from_parsed_parts(self._drv, self._root, + self._tail[:-idx - 1]) + + def __repr__(self): + return "<{}.parents>".format(type(self._path).__name__) + + +class PurePath: + """Base class for manipulating paths without I/O. + + PurePath represents a filesystem path and offers operations which + don't imply any actual filesystem I/O. Depending on your system, + instantiating a PurePath will return either a PurePosixPath or a + PureWindowsPath object. You can also instantiate either of these classes + directly, regardless of your system. + """ + + __slots__ = ( + # The `_raw_paths` slot stores unjoined string paths. This is set in + # the `__init__()` method. + '_raw_paths', + + # The `_drv`, `_root` and `_tail_cached` slots store parsed and + # normalized parts of the path. They are set when any of the `drive`, + # `root` or `_tail` properties are accessed for the first time. The + # three-part division corresponds to the result of + # `os.path.splitroot()`, except that the tail is further split on path + # separators (i.e. it is a list of strings), and that the root and + # tail are normalized. + '_drv', '_root', '_tail_cached', + + # The `_str` slot stores the string representation of the path, + # computed from the drive, root and tail when `__str__()` is called + # for the first time. It's used to implement `_str_normcase` + '_str', + + # The `_str_normcase_cached` slot stores the string path with + # normalized case. It is set when the `_str_normcase` property is + # accessed for the first time. It's used to implement `__eq__()` + # `__hash__()`, and `_parts_normcase` + '_str_normcase_cached', + + # The `_parts_normcase_cached` slot stores the case-normalized + # string path after splitting on path separators. It's set when the + # `_parts_normcase` property is accessed for the first time. It's used + # to implement comparison methods like `__lt__()`. + '_parts_normcase_cached', + + # The `_hash` slot stores the hash of the case-normalized string + # path. It's set when `__hash__()` is called for the first time. + '_hash', + ) + parser = os.path + + def __new__(cls, *args, **kwargs): + """Construct a PurePath from one or several strings and or existing + PurePath objects. The strings and path objects are combined so as + to yield a canonicalized path, which is incorporated into the + new PurePath object. + """ + if cls is PurePath: + cls = PureWindowsPath if os.name == 'nt' else PurePosixPath + return object.__new__(cls) + + def __init__(self, *args): + paths = [] + for arg in args: + if isinstance(arg, PurePath): + if arg.parser is not self.parser: + # GH-103631: Convert separators for backwards compatibility. + paths.append(arg.as_posix()) + else: + paths.extend(arg._raw_paths) + else: + try: + path = os.fspath(arg) + except TypeError: + path = arg + if not isinstance(path, str): + raise TypeError( + "argument should be a str or an os.PathLike " + "object where __fspath__ returns a str, " + f"not {type(path).__name__!r}") + paths.append(path) + self._raw_paths = paths + + def with_segments(self, *pathsegments): + """Construct a new path object from any number of path-like objects. + Subclasses may override this method to customize how new path objects + are created from methods like `iterdir()`. + """ + return type(self)(*pathsegments) + + def joinpath(self, *pathsegments): + """Combine this path with one or several arguments, and return a + new path representing either a subpath (if all arguments are relative + paths) or a totally different path (if one of the arguments is + anchored). + """ + return self.with_segments(self, *pathsegments) + + def __truediv__(self, key): + try: + return self.with_segments(self, key) + except TypeError: + return NotImplemented + + def __rtruediv__(self, key): + try: + return self.with_segments(key, self) + except TypeError: + return NotImplemented + + def __reduce__(self): + return self.__class__, tuple(self._raw_paths) + + def __repr__(self): + return "{}({!r})".format(self.__class__.__name__, self.as_posix()) + + def __fspath__(self): + return str(self) + + def __bytes__(self): + """Return the bytes representation of the path. This is only + recommended to use under Unix.""" + return os.fsencode(self) + + @property + def _str_normcase(self): + # String with normalized case, for hashing and equality checks + try: + return self._str_normcase_cached + except AttributeError: + if self.parser is posixpath: + self._str_normcase_cached = str(self) + else: + self._str_normcase_cached = str(self).lower() + return self._str_normcase_cached + + def __hash__(self): + try: + return self._hash + except AttributeError: + self._hash = hash(self._str_normcase) + return self._hash + + def __eq__(self, other): + if not isinstance(other, PurePath): + return NotImplemented + return self._str_normcase == other._str_normcase and self.parser is other.parser + + @property + def _parts_normcase(self): + # Cached parts with normalized case, for comparisons. + try: + return self._parts_normcase_cached + except AttributeError: + self._parts_normcase_cached = self._str_normcase.split(self.parser.sep) + return self._parts_normcase_cached + + def __lt__(self, other): + if not isinstance(other, PurePath) or self.parser is not other.parser: + return NotImplemented + return self._parts_normcase < other._parts_normcase + + def __le__(self, other): + if not isinstance(other, PurePath) or self.parser is not other.parser: + return NotImplemented + return self._parts_normcase <= other._parts_normcase + + def __gt__(self, other): + if not isinstance(other, PurePath) or self.parser is not other.parser: + return NotImplemented + return self._parts_normcase > other._parts_normcase + + def __ge__(self, other): + if not isinstance(other, PurePath) or self.parser is not other.parser: + return NotImplemented + return self._parts_normcase >= other._parts_normcase + + def __str__(self): + """Return the string representation of the path, suitable for + passing to system calls.""" + try: + return self._str + except AttributeError: + self._str = self._format_parsed_parts(self.drive, self.root, + self._tail) or '.' + return self._str + + @classmethod + def _format_parsed_parts(cls, drv, root, tail): + if drv or root: + return drv + root + cls.parser.sep.join(tail) + elif tail and cls.parser.splitdrive(tail[0])[0]: + tail = ['.'] + tail + return cls.parser.sep.join(tail) + + def _from_parsed_parts(self, drv, root, tail): + path = self._from_parsed_string(self._format_parsed_parts(drv, root, tail)) + path._drv = drv + path._root = root + path._tail_cached = tail + return path + + def _from_parsed_string(self, path_str): + path = self.with_segments(path_str) + path._str = path_str or '.' + return path + + @classmethod + def _parse_path(cls, path): + if not path: + return '', '', [] + sep = cls.parser.sep + altsep = cls.parser.altsep + if altsep: + path = path.replace(altsep, sep) + drv, root, rel = cls.parser.splitroot(path) + if not root and drv.startswith(sep) and not drv.endswith(sep): + drv_parts = drv.split(sep) + if len(drv_parts) == 4 and drv_parts[2] not in '?.': + # e.g. //server/share + root = sep + elif len(drv_parts) == 6: + # e.g. //?/unc/server/share + root = sep + return drv, root, [x for x in rel.split(sep) if x and x != '.'] + + @classmethod + def _parse_pattern(cls, pattern): + """Parse a glob pattern to a list of parts. This is much like + _parse_path, except: + + - Rather than normalizing and returning the drive and root, we raise + NotImplementedError if either are present. + - If the path has no real parts, we raise ValueError. + - If the path ends in a slash, then a final empty part is added. + """ + drv, root, rel = cls.parser.splitroot(pattern) + if root or drv: + raise NotImplementedError("Non-relative patterns are unsupported") + sep = cls.parser.sep + altsep = cls.parser.altsep + if altsep: + rel = rel.replace(altsep, sep) + parts = [x for x in rel.split(sep) if x and x != '.'] + if not parts: + raise ValueError(f"Unacceptable pattern: {str(pattern)!r}") + elif rel.endswith(sep): + # GH-65238: preserve trailing slash in glob patterns. + parts.append('') + return parts + + def as_posix(self): + """Return the string representation of the path with forward (/) + slashes.""" + return str(self).replace(self.parser.sep, '/') + + @property + def _raw_path(self): + paths = self._raw_paths + if len(paths) == 1: + return paths[0] + elif paths: + # Join path segments from the initializer. + return self.parser.join(*paths) + else: + return '' + + @property + def drive(self): + """The drive prefix (letter or UNC path), if any.""" + try: + return self._drv + except AttributeError: + self._drv, self._root, self._tail_cached = self._parse_path(self._raw_path) + return self._drv + + @property + def root(self): + """The root of the path, if any.""" + try: + return self._root + except AttributeError: + self._drv, self._root, self._tail_cached = self._parse_path(self._raw_path) + return self._root + + @property + def _tail(self): + try: + return self._tail_cached + except AttributeError: + self._drv, self._root, self._tail_cached = self._parse_path(self._raw_path) + return self._tail_cached + + @property + def anchor(self): + """The concatenation of the drive and root, or ''.""" + return self.drive + self.root + + @property + def parts(self): + """An object providing sequence-like access to the + components in the filesystem path.""" + if self.drive or self.root: + return (self.drive + self.root,) + tuple(self._tail) + else: + return tuple(self._tail) + + @property + def parent(self): + """The logical parent of the path.""" + drv = self.drive + root = self.root + tail = self._tail + if not tail: + return self + return self._from_parsed_parts(drv, root, tail[:-1]) + + @property + def parents(self): + """A sequence of this path's logical parents.""" + # The value of this property should not be cached on the path object, + # as doing so would introduce a reference cycle. + return _PathParents(self) + + @property + def name(self): + """The final path component, if any.""" + tail = self._tail + if not tail: + return '' + return tail[-1] + + def with_name(self, name): + """Return a new path with the file name changed.""" + p = self.parser + if not name or p.sep in name or (p.altsep and p.altsep in name) or name == '.': + raise ValueError(f"Invalid name {name!r}") + tail = self._tail.copy() + if not tail: + raise ValueError(f"{self!r} has an empty name") + tail[-1] = name + return self._from_parsed_parts(self.drive, self.root, tail) + + def with_stem(self, stem): + """Return a new path with the stem changed.""" + suffix = self.suffix + if not suffix: + return self.with_name(stem) + elif not stem: + # If the suffix is non-empty, we can't make the stem empty. + raise ValueError(f"{self!r} has a non-empty suffix") + else: + return self.with_name(stem + suffix) + + def with_suffix(self, suffix): + """Return a new path with the file suffix changed. If the path + has no suffix, add given suffix. If the given suffix is an empty + string, remove the suffix from the path. + """ + stem = self.stem + if not stem: + # If the stem is empty, we can't make the suffix non-empty. + raise ValueError(f"{self!r} has an empty name") + elif suffix and not suffix.startswith('.'): + raise ValueError(f"Invalid suffix {suffix!r}") + else: + return self.with_name(stem + suffix) + + @property + def stem(self): + """The final path component, minus its last suffix.""" + name = self.name + i = name.rfind('.') + if i != -1: + stem = name[:i] + # Stem must contain at least one non-dot character. + if stem.lstrip('.'): + return stem + return name + + @property + def suffix(self): + """ + The final component's last suffix, if any. + + This includes the leading period. For example: '.txt' + """ + name = self.name.lstrip('.') + i = name.rfind('.') + if i != -1: + return name[i:] + return '' + + @property + def suffixes(self): + """ + A list of the final component's suffixes, if any. + + These include the leading periods. For example: ['.tar', '.gz'] + """ + return ['.' + ext for ext in self.name.lstrip('.').split('.')[1:]] + + def relative_to(self, other, *, walk_up=False): + """Return the relative path to another path identified by the passed + arguments. If the operation is not possible (because this is not + related to the other path), raise ValueError. + + The *walk_up* parameter controls whether `..` may be used to resolve + the path. + """ + if not hasattr(other, 'with_segments'): + other = self.with_segments(other) + for step, path in enumerate(chain([other], other.parents)): + if path == self or path in self.parents: + break + elif not walk_up: + raise ValueError(f"{str(self)!r} is not in the subpath of {str(other)!r}") + elif path.name == '..': + raise ValueError(f"'..' segment in {str(other)!r} cannot be walked") + else: + raise ValueError(f"{str(self)!r} and {str(other)!r} have different anchors") + parts = ['..'] * step + self._tail[len(path._tail):] + return self._from_parsed_parts('', '', parts) + + def is_relative_to(self, other): + """Return True if the path is relative to another path or False. + """ + if not hasattr(other, 'with_segments'): + other = self.with_segments(other) + return other == self or other in self.parents + + def is_absolute(self): + """True if the path is absolute (has both a root and, if applicable, + a drive).""" + if self.parser is posixpath: + # Optimization: work with raw paths on POSIX. + for path in self._raw_paths: + if path.startswith('/'): + return True + return False + return self.parser.isabs(self) + + def is_reserved(self): + """Return True if the path contains one of the special names reserved + by the system, if any.""" + import warnings + msg = ("pathlib.PurePath.is_reserved() is deprecated and scheduled " + "for removal in Python 3.15. Use os.path.isreserved() to " + "detect reserved paths on Windows.") + warnings._deprecated("pathlib.PurePath.is_reserved", msg, remove=(3, 15)) + if self.parser is ntpath: + return self.parser.isreserved(self) + return False + + def as_uri(self): + """Return the path as a URI.""" + import warnings + msg = ("pathlib.PurePath.as_uri() is deprecated and scheduled " + "for removal in Python 3.19. Use pathlib.Path.as_uri().") + warnings._deprecated("pathlib.PurePath.as_uri", msg, remove=(3, 19)) + if not self.is_absolute(): + raise ValueError("relative path can't be expressed as a file URI") + + drive = self.drive + if len(drive) == 2 and drive[1] == ':': + # It's a path on a local drive => 'file:///c:/a/b' + prefix = 'file:///' + drive + path = self.as_posix()[2:] + elif drive: + # It's a path on a network drive => 'file://host/share/a/b' + prefix = 'file:' + path = self.as_posix() + else: + # It's a posix path => 'file:///etc/hosts' + prefix = 'file://' + path = str(self) + from urllib.parse import quote_from_bytes + return prefix + quote_from_bytes(os.fsencode(path)) + + def full_match(self, pattern, *, case_sensitive=None): + """ + Return True if this path matches the given glob-style pattern. The + pattern is matched against the entire path. + """ + if not hasattr(pattern, 'with_segments'): + pattern = self.with_segments(pattern) + if case_sensitive is None: + case_sensitive = self.parser is posixpath + + # The string representation of an empty path is a single dot ('.'). Empty + # paths shouldn't match wildcards, so we change it to the empty string. + path = str(self) if self.parts else '' + pattern = str(pattern) if pattern.parts else '' + globber = _StringGlobber(self.parser.sep, case_sensitive, recursive=True) + return globber.compile(pattern)(path) is not None + + def match(self, path_pattern, *, case_sensitive=None): + """ + Return True if this path matches the given pattern. If the pattern is + relative, matching is done from the right; otherwise, the entire path + is matched. The recursive wildcard '**' is *not* supported by this + method. + """ + if not hasattr(path_pattern, 'with_segments'): + path_pattern = self.with_segments(path_pattern) + if case_sensitive is None: + case_sensitive = self.parser is posixpath + path_parts = self.parts[::-1] + pattern_parts = path_pattern.parts[::-1] + if not pattern_parts: + raise ValueError("empty pattern") + if len(path_parts) < len(pattern_parts): + return False + if len(path_parts) > len(pattern_parts) and path_pattern.anchor: + return False + globber = _StringGlobber(self.parser.sep, case_sensitive) + for path_part, pattern_part in zip(path_parts, pattern_parts): + match = globber.compile(pattern_part) + if match(path_part) is None: + return False + return True + +# Subclassing os.PathLike makes isinstance() checks slower, +# which in turn makes Path construction slower. Register instead! +os.PathLike.register(PurePath) + + +class PurePosixPath(PurePath): + """PurePath subclass for non-Windows systems. + + On a POSIX system, instantiating a PurePath should return this object. + However, you can also instantiate it directly on any system. + """ + parser = posixpath + __slots__ = () + + +class PureWindowsPath(PurePath): + """PurePath subclass for Windows systems. + + On a Windows system, instantiating a PurePath should return this object. + However, you can also instantiate it directly on any system. + """ + parser = ntpath + __slots__ = () + + +class Path(PurePath): + """PurePath subclass that can make system calls. + + Path represents a filesystem path but unlike PurePath, also offers + methods to do system calls on path objects. Depending on your system, + instantiating a Path will return either a PosixPath or a WindowsPath + object. You can also instantiate a PosixPath or WindowsPath directly, + but cannot instantiate a WindowsPath on a POSIX system or vice versa. + """ + __slots__ = ('_info',) + + def __new__(cls, *args, **kwargs): + if cls is Path: + cls = WindowsPath if os.name == 'nt' else PosixPath + return object.__new__(cls) + + @property + def info(self): + """ + A PathInfo object that exposes the file type and other file attributes + of this path. + """ + try: + return self._info + except AttributeError: + self._info = PathInfo(self) + return self._info + + def stat(self, *, follow_symlinks=True): + """ + Return the result of the stat() system call on this path, like + os.stat() does. + """ + return os.stat(self, follow_symlinks=follow_symlinks) + + def lstat(self): + """ + Like stat(), except if the path points to a symlink, the symlink's + status information is returned, rather than its target's. + """ + return os.lstat(self) + + def exists(self, *, follow_symlinks=True): + """ + Whether this path exists. + + This method normally follows symlinks; to check whether a symlink exists, + add the argument follow_symlinks=False. + """ + if follow_symlinks: + return os.path.exists(self) + return os.path.lexists(self) + + def is_dir(self, *, follow_symlinks=True): + """ + Whether this path is a directory. + """ + if follow_symlinks: + return os.path.isdir(self) + try: + return S_ISDIR(self.stat(follow_symlinks=follow_symlinks).st_mode) + except (OSError, ValueError): + return False + + def is_file(self, *, follow_symlinks=True): + """ + Whether this path is a regular file (also True for symlinks pointing + to regular files). + """ + if follow_symlinks: + return os.path.isfile(self) + try: + return S_ISREG(self.stat(follow_symlinks=follow_symlinks).st_mode) + except (OSError, ValueError): + return False + + def is_mount(self): + """ + Check if this path is a mount point + """ + return os.path.ismount(self) + + def is_symlink(self): + """ + Whether this path is a symbolic link. + """ + return os.path.islink(self) + + def is_junction(self): + """ + Whether this path is a junction. + """ + return os.path.isjunction(self) + + def is_block_device(self): + """ + Whether this path is a block device. + """ + try: + return S_ISBLK(self.stat().st_mode) + except (OSError, ValueError): + return False + + def is_char_device(self): + """ + Whether this path is a character device. + """ + try: + return S_ISCHR(self.stat().st_mode) + except (OSError, ValueError): + return False + + def is_fifo(self): + """ + Whether this path is a FIFO. + """ + try: + return S_ISFIFO(self.stat().st_mode) + except (OSError, ValueError): + return False + + def is_socket(self): + """ + Whether this path is a socket. + """ + try: + return S_ISSOCK(self.stat().st_mode) + except (OSError, ValueError): + return False + + def samefile(self, other_path): + """Return whether other_path is the same or not as this file + (as returned by os.path.samefile()). + """ + st = self.stat() + try: + other_st = other_path.stat() + except AttributeError: + other_st = self.with_segments(other_path).stat() + return (st.st_ino == other_st.st_ino and + st.st_dev == other_st.st_dev) + + def open(self, mode='r', buffering=-1, encoding=None, + errors=None, newline=None): + """ + Open the file pointed to by this path and return a file object, as + the built-in open() function does. + """ + if "b" not in mode: + encoding = io.text_encoding(encoding) + return io.open(self, mode, buffering, encoding, errors, newline) + + def read_bytes(self): + """ + Open the file in bytes mode, read it, and close the file. + """ + with self.open(mode='rb', buffering=0) as f: + return f.read() + + def read_text(self, encoding=None, errors=None, newline=None): + """ + Open the file in text mode, read it, and close the file. + """ + # Call io.text_encoding() here to ensure any warning is raised at an + # appropriate stack level. + encoding = io.text_encoding(encoding) + with self.open(mode='r', encoding=encoding, errors=errors, newline=newline) as f: + return f.read() + + def write_bytes(self, data): + """ + Open the file in bytes mode, write to it, and close the file. + """ + # type-check for the buffer interface before truncating the file + view = memoryview(data) + with self.open(mode='wb') as f: + return f.write(view) + + def write_text(self, data, encoding=None, errors=None, newline=None): + """ + Open the file in text mode, write to it, and close the file. + """ + # Call io.text_encoding() here to ensure any warning is raised at an + # appropriate stack level. + encoding = io.text_encoding(encoding) + if not isinstance(data, str): + raise TypeError('data must be str, not %s' % + data.__class__.__name__) + with self.open(mode='w', encoding=encoding, errors=errors, newline=newline) as f: + return f.write(data) + + _remove_leading_dot = operator.itemgetter(slice(2, None)) + _remove_trailing_slash = operator.itemgetter(slice(-1)) + + def _filter_trailing_slash(self, paths): + sep = self.parser.sep + anchor_len = len(self.anchor) + for path_str in paths: + if len(path_str) > anchor_len and path_str[-1] == sep: + path_str = path_str[:-1] + yield path_str + + def _from_dir_entry(self, dir_entry, path_str): + path = self.with_segments(path_str) + path._str = path_str + path._info = DirEntryInfo(dir_entry) + return path + + def iterdir(self): + """Yield path objects of the directory contents. + + The children are yielded in arbitrary order, and the + special entries '.' and '..' are not included. + """ + root_dir = str(self) + with os.scandir(root_dir) as scandir_it: + entries = list(scandir_it) + if root_dir == '.': + return (self._from_dir_entry(e, e.name) for e in entries) + else: + return (self._from_dir_entry(e, e.path) for e in entries) + + def glob(self, pattern, *, case_sensitive=None, recurse_symlinks=False): + """Iterate over this subtree and yield all existing files (of any + kind, including directories) matching the given relative pattern. + """ + sys.audit("pathlib.Path.glob", self, pattern) + if case_sensitive is None: + case_sensitive = self.parser is posixpath + case_pedantic = False + else: + # The user has expressed a case sensitivity choice, but we don't + # know the case sensitivity of the underlying filesystem, so we + # must use scandir() for everything, including non-wildcard parts. + case_pedantic = True + parts = self._parse_pattern(pattern) + recursive = True if recurse_symlinks else _no_recurse_symlinks + globber = _StringGlobber(self.parser.sep, case_sensitive, case_pedantic, recursive) + select = globber.selector(parts[::-1]) + root = str(self) + paths = select(self.parser.join(root, '')) + + # Normalize results + if root == '.': + paths = map(self._remove_leading_dot, paths) + if parts[-1] == '': + paths = map(self._remove_trailing_slash, paths) + elif parts[-1] == '**': + paths = self._filter_trailing_slash(paths) + paths = map(self._from_parsed_string, paths) + return paths + + def rglob(self, pattern, *, case_sensitive=None, recurse_symlinks=False): + """Recursively yield all existing files (of any kind, including + directories) matching the given relative pattern, anywhere in + this subtree. + """ + sys.audit("pathlib.Path.rglob", self, pattern) + pattern = self.parser.join('**', pattern) + return self.glob(pattern, case_sensitive=case_sensitive, recurse_symlinks=recurse_symlinks) + + def walk(self, top_down=True, on_error=None, follow_symlinks=False): + """Walk the directory tree from this directory, similar to os.walk().""" + sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks) + root_dir = str(self) + if not follow_symlinks: + follow_symlinks = os._walk_symlinks_as_files + results = os.walk(root_dir, top_down, on_error, follow_symlinks) + for path_str, dirnames, filenames in results: + if root_dir == '.': + path_str = path_str[2:] + yield self._from_parsed_string(path_str), dirnames, filenames + + def absolute(self): + """Return an absolute version of this path + No normalization or symlink resolution is performed. + + Use resolve() to resolve symlinks and remove '..' segments. + """ + if self.is_absolute(): + return self + if self.root: + drive = os.path.splitroot(os.getcwd())[0] + return self._from_parsed_parts(drive, self.root, self._tail) + if self.drive: + # There is a CWD on each drive-letter drive. + cwd = os.path.abspath(self.drive) + else: + cwd = os.getcwd() + if not self._tail: + # Fast path for "empty" paths, e.g. Path("."), Path("") or Path(). + # We pass only one argument to with_segments() to avoid the cost + # of joining, and we exploit the fact that getcwd() returns a + # fully-normalized string by storing it in _str. This is used to + # implement Path.cwd(). + return self._from_parsed_string(cwd) + drive, root, rel = os.path.splitroot(cwd) + if not rel: + return self._from_parsed_parts(drive, root, self._tail) + tail = rel.split(self.parser.sep) + tail.extend(self._tail) + return self._from_parsed_parts(drive, root, tail) + + @classmethod + def cwd(cls): + """Return a new path pointing to the current working directory.""" + cwd = os.getcwd() + path = cls(cwd) + path._str = cwd # getcwd() returns a normalized path + return path + + def resolve(self, strict=False): + """ + Make the path absolute, resolving all symlinks on the way and also + normalizing it. + """ + + return self.with_segments(os.path.realpath(self, strict=strict)) + + if pwd: + def owner(self, *, follow_symlinks=True): + """ + Return the login name of the file owner. + """ + uid = self.stat(follow_symlinks=follow_symlinks).st_uid + return pwd.getpwuid(uid).pw_name + else: + def owner(self, *, follow_symlinks=True): + """ + Return the login name of the file owner. + """ + f = f"{type(self).__name__}.owner()" + raise UnsupportedOperation(f"{f} is unsupported on this system") + + if grp: + def group(self, *, follow_symlinks=True): + """ + Return the group name of the file gid. + """ + gid = self.stat(follow_symlinks=follow_symlinks).st_gid + return grp.getgrgid(gid).gr_name + else: + def group(self, *, follow_symlinks=True): + """ + Return the group name of the file gid. + """ + f = f"{type(self).__name__}.group()" + raise UnsupportedOperation(f"{f} is unsupported on this system") + + if hasattr(os, "readlink"): + def readlink(self): + """ + Return the path to which the symbolic link points. + """ + return self.with_segments(os.readlink(self)) + else: + def readlink(self): + """ + Return the path to which the symbolic link points. + """ + f = f"{type(self).__name__}.readlink()" + raise UnsupportedOperation(f"{f} is unsupported on this system") + + def touch(self, mode=0o666, exist_ok=True): + """ + Create this file with the given access mode, if it doesn't exist. + """ + + if exist_ok: + # First try to bump modification time + # Implementation note: GNU touch uses the UTIME_NOW option of + # the utimensat() / futimens() functions. + try: + os.utime(self, None) + except OSError: + # Avoid exception chaining + pass + else: + return + flags = os.O_CREAT | os.O_WRONLY + if not exist_ok: + flags |= os.O_EXCL + fd = os.open(self, flags, mode) + os.close(fd) + + def mkdir(self, mode=0o777, parents=False, exist_ok=False): + """ + Create a new directory at this given path. + """ + try: + os.mkdir(self, mode) + except FileNotFoundError: + if not parents or self.parent == self: + raise + self.parent.mkdir(parents=True, exist_ok=True) + self.mkdir(mode, parents=False, exist_ok=exist_ok) + except OSError: + # Cannot rely on checking for EEXIST, since the operating system + # could give priority to other errors like EACCES or EROFS + if not exist_ok or not self.is_dir(): + raise + + def chmod(self, mode, *, follow_symlinks=True): + """ + Change the permissions of the path, like os.chmod(). + """ + os.chmod(self, mode, follow_symlinks=follow_symlinks) + + def lchmod(self, mode): + """ + Like chmod(), except if the path points to a symlink, the symlink's + permissions are changed, rather than its target's. + """ + self.chmod(mode, follow_symlinks=False) + + def unlink(self, missing_ok=False): + """ + Remove this file or link. + If the path is a directory, use rmdir() instead. + """ + try: + os.unlink(self) + except FileNotFoundError: + if not missing_ok: + raise + + def rmdir(self): + """ + Remove this directory. The directory must be empty. + """ + os.rmdir(self) + + def _delete(self): + """ + Delete this file or directory (including all sub-directories). + """ + if self.is_symlink() or self.is_junction(): + self.unlink() + elif self.is_dir(): + # Lazy import to improve module import time + import shutil + shutil.rmtree(self) + else: + self.unlink() + + def rename(self, target): + """ + Rename this path to the target path. + + The target path may be absolute or relative. Relative paths are + interpreted relative to the current working directory, *not* the + directory of the Path object. + + Returns the new Path instance pointing to the target path. + """ + os.rename(self, target) + if not hasattr(target, 'with_segments'): + target = self.with_segments(target) + return target + + def replace(self, target): + """ + Rename this path to the target path, overwriting if that path exists. + + The target path may be absolute or relative. Relative paths are + interpreted relative to the current working directory, *not* the + directory of the Path object. + + Returns the new Path instance pointing to the target path. + """ + os.replace(self, target) + if not hasattr(target, 'with_segments'): + target = self.with_segments(target) + return target + + def copy(self, target, **kwargs): + """ + Recursively copy this file or directory tree to the given destination. + """ + if not hasattr(target, 'with_segments'): + target = self.with_segments(target) + ensure_distinct_paths(self, target) + target._copy_from(self, **kwargs) + return target.joinpath() # Empty join to ensure fresh metadata. + + def copy_into(self, target_dir, **kwargs): + """ + Copy this file or directory tree into the given existing directory. + """ + name = self.name + if not name: + raise ValueError(f"{self!r} has an empty name") + elif hasattr(target_dir, 'with_segments'): + target = target_dir / name + else: + target = self.with_segments(target_dir, name) + return self.copy(target, **kwargs) + + def _copy_from(self, source, follow_symlinks=True, preserve_metadata=False): + """ + Recursively copy the given path to this path. + """ + if not follow_symlinks and source.info.is_symlink(): + self._copy_from_symlink(source, preserve_metadata) + elif source.info.is_dir(): + children = source.iterdir() + os.mkdir(self) + for child in children: + self.joinpath(child.name)._copy_from( + child, follow_symlinks, preserve_metadata) + if preserve_metadata: + copy_info(source.info, self) + else: + self._copy_from_file(source, preserve_metadata) + + def _copy_from_file(self, source, preserve_metadata=False): + ensure_different_files(source, self) + with magic_open(source, 'rb') as source_f: + with open(self, 'wb') as target_f: + copyfileobj(source_f, target_f) + if preserve_metadata: + copy_info(source.info, self) + + if copyfile2: + # Use fast OS routine for local file copying where available. + _copy_from_file_fallback = _copy_from_file + def _copy_from_file(self, source, preserve_metadata=False): + try: + source = os.fspath(source) + except TypeError: + pass + else: + copyfile2(source, str(self)) + return + self._copy_from_file_fallback(source, preserve_metadata) + + if os.name == 'nt': + # If a directory-symlink is copied *before* its target, then + # os.symlink() incorrectly creates a file-symlink on Windows. Avoid + # this by passing *target_is_dir* to os.symlink() on Windows. + def _copy_from_symlink(self, source, preserve_metadata=False): + os.symlink(str(source.readlink()), self, source.info.is_dir()) + if preserve_metadata: + copy_info(source.info, self, follow_symlinks=False) + else: + def _copy_from_symlink(self, source, preserve_metadata=False): + os.symlink(str(source.readlink()), self) + if preserve_metadata: + copy_info(source.info, self, follow_symlinks=False) + + def move(self, target): + """ + Recursively move this file or directory tree to the given destination. + """ + # Use os.replace() if the target is os.PathLike and on the same FS. + try: + target = self.with_segments(target) + except TypeError: + pass + else: + ensure_different_files(self, target) + try: + os.replace(self, target) + except OSError as err: + if err.errno != EXDEV: + raise + else: + return target.joinpath() # Empty join to ensure fresh metadata. + # Fall back to copy+delete. + target = self.copy(target, follow_symlinks=False, preserve_metadata=True) + self._delete() + return target + + def move_into(self, target_dir): + """ + Move this file or directory tree into the given existing directory. + """ + name = self.name + if not name: + raise ValueError(f"{self!r} has an empty name") + elif hasattr(target_dir, 'with_segments'): + target = target_dir / name + else: + target = self.with_segments(target_dir, name) + return self.move(target) + + if hasattr(os, "symlink"): + def symlink_to(self, target, target_is_directory=False): + """ + Make this path a symlink pointing to the target path. + Note the order of arguments (link, target) is the reverse of os.symlink. + """ + os.symlink(target, self, target_is_directory) + else: + def symlink_to(self, target, target_is_directory=False): + """ + Make this path a symlink pointing to the target path. + Note the order of arguments (link, target) is the reverse of os.symlink. + """ + f = f"{type(self).__name__}.symlink_to()" + raise UnsupportedOperation(f"{f} is unsupported on this system") + + if hasattr(os, "link"): + def hardlink_to(self, target): + """ + Make this path a hard link pointing to the same file as *target*. + + Note the order of arguments (self, target) is the reverse of os.link's. + """ + os.link(target, self) + else: + def hardlink_to(self, target): + """ + Make this path a hard link pointing to the same file as *target*. + + Note the order of arguments (self, target) is the reverse of os.link's. + """ + f = f"{type(self).__name__}.hardlink_to()" + raise UnsupportedOperation(f"{f} is unsupported on this system") + + def expanduser(self): + """ Return a new path with expanded ~ and ~user constructs + (as returned by os.path.expanduser) + """ + if (not (self.drive or self.root) and + self._tail and self._tail[0][:1] == '~'): + homedir = os.path.expanduser(self._tail[0]) + if homedir[:1] == "~": + raise RuntimeError("Could not determine home directory.") + drv, root, tail = self._parse_path(homedir) + return self._from_parsed_parts(drv, root, tail + self._tail[1:]) + + return self + + @classmethod + def home(cls): + """Return a new path pointing to expanduser('~'). + """ + homedir = os.path.expanduser("~") + if homedir == "~": + raise RuntimeError("Could not determine home directory.") + return cls(homedir) + + def as_uri(self): + """Return the path as a URI.""" + if not self.is_absolute(): + raise ValueError("relative paths can't be expressed as file URIs") + from urllib.request import pathname2url + return pathname2url(str(self), add_scheme=True) + + @classmethod + def from_uri(cls, uri): + """Return a new path from the given 'file' URI.""" + from urllib.error import URLError + from urllib.request import url2pathname + try: + path = cls(url2pathname(uri, require_scheme=True)) + except URLError as exc: + raise ValueError(exc.reason) from None + if not path.is_absolute(): + raise ValueError(f"URI is not absolute: {uri!r}") + return path + + +class PosixPath(Path, PurePosixPath): + """Path subclass for non-Windows systems. + + On a POSIX system, instantiating a Path should return this object. + """ + __slots__ = () + + if os.name == 'nt': + def __new__(cls, *args, **kwargs): + raise UnsupportedOperation( + f"cannot instantiate {cls.__name__!r} on your system") + +class WindowsPath(Path, PureWindowsPath): + """Path subclass for Windows systems. + + On a Windows system, instantiating a Path should return this object. + """ + __slots__ = () + + if os.name != 'nt': + def __new__(cls, *args, **kwargs): + raise UnsupportedOperation( + f"cannot instantiate {cls.__name__!r} on your system") diff --git a/PythonLib/full/pathlib/_local.py b/PythonLib/full/pathlib/_local.py new file mode 100644 index 00000000..58e137f2 --- /dev/null +++ b/PythonLib/full/pathlib/_local.py @@ -0,0 +1,12 @@ +""" +This module exists so that pathlib objects pickled under Python 3.13 can be +unpickled in 3.14+. +""" + +from pathlib import * + +__all__ = [ + "UnsupportedOperation", + "PurePath", "PurePosixPath", "PureWindowsPath", + "Path", "PosixPath", "WindowsPath", +] diff --git a/PythonLib/full/pathlib/_os.py b/PythonLib/full/pathlib/_os.py new file mode 100644 index 00000000..03983694 --- /dev/null +++ b/PythonLib/full/pathlib/_os.py @@ -0,0 +1,530 @@ +""" +Low-level OS functionality wrappers used by pathlib. +""" + +from errno import * +from io import TextIOWrapper, text_encoding +from stat import S_ISDIR, S_ISREG, S_ISLNK, S_IMODE +import os +import sys +try: + import fcntl +except ImportError: + fcntl = None +try: + import posix +except ImportError: + posix = None +try: + import _winapi +except ImportError: + _winapi = None + + +def _get_copy_blocksize(infd): + """Determine blocksize for fastcopying on Linux. + Hopefully the whole file will be copied in a single call. + The copying itself should be performed in a loop 'till EOF is + reached (0 return) so a blocksize smaller or bigger than the actual + file size should not make any difference, also in case the file + content changes while being copied. + """ + try: + blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8 MiB + except OSError: + blocksize = 2 ** 27 # 128 MiB + # On 32-bit architectures truncate to 1 GiB to avoid OverflowError, + # see gh-82500. + if sys.maxsize < 2 ** 32: + blocksize = min(blocksize, 2 ** 30) + return blocksize + + +if fcntl and hasattr(fcntl, 'FICLONE'): + def _ficlone(source_fd, target_fd): + """ + Perform a lightweight copy of two files, where the data blocks are + copied only when modified. This is known as Copy on Write (CoW), + instantaneous copy or reflink. + """ + fcntl.ioctl(target_fd, fcntl.FICLONE, source_fd) +else: + _ficlone = None + + +if posix and hasattr(posix, '_fcopyfile'): + def _fcopyfile(source_fd, target_fd): + """ + Copy a regular file content using high-performance fcopyfile(3) + syscall (macOS). + """ + posix._fcopyfile(source_fd, target_fd, posix._COPYFILE_DATA) +else: + _fcopyfile = None + + +if hasattr(os, 'copy_file_range'): + def _copy_file_range(source_fd, target_fd): + """ + Copy data from one regular mmap-like fd to another by using a + high-performance copy_file_range(2) syscall that gives filesystems + an opportunity to implement the use of reflinks or server-side + copy. + This should work on Linux >= 4.5 only. + """ + blocksize = _get_copy_blocksize(source_fd) + offset = 0 + while True: + sent = os.copy_file_range(source_fd, target_fd, blocksize, + offset_dst=offset) + if sent == 0: + break # EOF + offset += sent +else: + _copy_file_range = None + + +if hasattr(os, 'sendfile'): + def _sendfile(source_fd, target_fd): + """Copy data from one regular mmap-like fd to another by using + high-performance sendfile(2) syscall. + This should work on Linux >= 2.6.33 only. + """ + blocksize = _get_copy_blocksize(source_fd) + offset = 0 + while True: + sent = os.sendfile(target_fd, source_fd, offset, blocksize) + if sent == 0: + break # EOF + offset += sent +else: + _sendfile = None + + +if _winapi and hasattr(_winapi, 'CopyFile2'): + def copyfile2(source, target): + """ + Copy from one file to another using CopyFile2 (Windows only). + """ + _winapi.CopyFile2(source, target, 0) +else: + copyfile2 = None + + +def copyfileobj(source_f, target_f): + """ + Copy data from file-like object source_f to file-like object target_f. + """ + try: + source_fd = source_f.fileno() + target_fd = target_f.fileno() + except Exception: + pass # Fall through to generic code. + else: + try: + # Use OS copy-on-write where available. + if _ficlone: + try: + _ficlone(source_fd, target_fd) + return + except OSError as err: + if err.errno not in (EBADF, EOPNOTSUPP, ETXTBSY, EXDEV): + raise err + + # Use OS copy where available. + if _fcopyfile: + try: + _fcopyfile(source_fd, target_fd) + return + except OSError as err: + if err.errno not in (EINVAL, ENOTSUP): + raise err + if _copy_file_range: + try: + _copy_file_range(source_fd, target_fd) + return + except OSError as err: + if err.errno not in (ETXTBSY, EXDEV): + raise err + if _sendfile: + try: + _sendfile(source_fd, target_fd) + return + except OSError as err: + if err.errno != ENOTSOCK: + raise err + except OSError as err: + # Produce more useful error messages. + err.filename = source_f.name + err.filename2 = target_f.name + raise err + + # Last resort: copy with fileobj read() and write(). + read_source = source_f.read + write_target = target_f.write + while buf := read_source(1024 * 1024): + write_target(buf) + + +def magic_open(path, mode='r', buffering=-1, encoding=None, errors=None, + newline=None): + """ + Open the file pointed to by this path and return a file object, as + the built-in open() function does. + """ + text = 'b' not in mode + if text: + # Call io.text_encoding() here to ensure any warning is raised at an + # appropriate stack level. + encoding = text_encoding(encoding) + try: + return open(path, mode, buffering, encoding, errors, newline) + except TypeError: + pass + cls = type(path) + mode = ''.join(sorted(c for c in mode if c not in 'bt')) + if text: + try: + attr = getattr(cls, f'__open_{mode}__') + except AttributeError: + pass + else: + return attr(path, buffering, encoding, errors, newline) + elif encoding is not None: + raise ValueError("binary mode doesn't take an encoding argument") + elif errors is not None: + raise ValueError("binary mode doesn't take an errors argument") + elif newline is not None: + raise ValueError("binary mode doesn't take a newline argument") + + try: + attr = getattr(cls, f'__open_{mode}b__') + except AttributeError: + pass + else: + stream = attr(path, buffering) + if text: + stream = TextIOWrapper(stream, encoding, errors, newline) + return stream + + raise TypeError(f"{cls.__name__} can't be opened with mode {mode!r}") + + +def ensure_distinct_paths(source, target): + """ + Raise OSError(EINVAL) if the other path is within this path. + """ + # Note: there is no straightforward, foolproof algorithm to determine + # if one directory is within another (a particularly perverse example + # would be a single network share mounted in one location via NFS, and + # in another location via CIFS), so we simply checks whether the + # other path is lexically equal to, or within, this path. + if source == target: + err = OSError(EINVAL, "Source and target are the same path") + elif source in target.parents: + err = OSError(EINVAL, "Source path is a parent of target path") + else: + return + err.filename = str(source) + err.filename2 = str(target) + raise err + + +def ensure_different_files(source, target): + """ + Raise OSError(EINVAL) if both paths refer to the same file. + """ + try: + source_file_id = source.info._file_id + target_file_id = target.info._file_id + except AttributeError: + if source != target: + return + else: + try: + if source_file_id() != target_file_id(): + return + except (OSError, ValueError): + return + err = OSError(EINVAL, "Source and target are the same file") + err.filename = str(source) + err.filename2 = str(target) + raise err + + +def copy_info(info, target, follow_symlinks=True): + """Copy metadata from the given PathInfo to the given local path.""" + copy_times_ns = ( + hasattr(info, '_access_time_ns') and + hasattr(info, '_mod_time_ns') and + (follow_symlinks or os.utime in os.supports_follow_symlinks)) + if copy_times_ns: + t0 = info._access_time_ns(follow_symlinks=follow_symlinks) + t1 = info._mod_time_ns(follow_symlinks=follow_symlinks) + os.utime(target, ns=(t0, t1), follow_symlinks=follow_symlinks) + + # We must copy extended attributes before the file is (potentially) + # chmod()'ed read-only, otherwise setxattr() will error with -EACCES. + copy_xattrs = ( + hasattr(info, '_xattrs') and + hasattr(os, 'setxattr') and + (follow_symlinks or os.setxattr in os.supports_follow_symlinks)) + if copy_xattrs: + xattrs = info._xattrs(follow_symlinks=follow_symlinks) + for attr, value in xattrs: + try: + os.setxattr(target, attr, value, follow_symlinks=follow_symlinks) + except OSError as e: + if e.errno not in (EPERM, ENOTSUP, ENODATA, EINVAL, EACCES): + raise + + copy_posix_permissions = ( + hasattr(info, '_posix_permissions') and + (follow_symlinks or os.chmod in os.supports_follow_symlinks)) + if copy_posix_permissions: + posix_permissions = info._posix_permissions(follow_symlinks=follow_symlinks) + try: + os.chmod(target, posix_permissions, follow_symlinks=follow_symlinks) + except NotImplementedError: + # if we got a NotImplementedError, it's because + # * follow_symlinks=False, + # * lchown() is unavailable, and + # * either + # * fchownat() is unavailable or + # * fchownat() doesn't implement AT_SYMLINK_NOFOLLOW. + # (it returned ENOSUP.) + # therefore we're out of options--we simply cannot chown the + # symlink. give up, suppress the error. + # (which is what shutil always did in this circumstance.) + pass + + copy_bsd_flags = ( + hasattr(info, '_bsd_flags') and + hasattr(os, 'chflags') and + (follow_symlinks or os.chflags in os.supports_follow_symlinks)) + if copy_bsd_flags: + bsd_flags = info._bsd_flags(follow_symlinks=follow_symlinks) + try: + os.chflags(target, bsd_flags, follow_symlinks=follow_symlinks) + except OSError as why: + if why.errno not in (EOPNOTSUPP, ENOTSUP): + raise + + +class _PathInfoBase: + __slots__ = ('_path', '_stat_result', '_lstat_result') + + def __init__(self, path): + self._path = str(path) + + def __repr__(self): + path_type = "WindowsPath" if os.name == "nt" else "PosixPath" + return f"<{path_type}.info>" + + def _stat(self, *, follow_symlinks=True, ignore_errors=False): + """Return the status as an os.stat_result, or None if stat() fails and + ignore_errors is true.""" + if follow_symlinks: + try: + result = self._stat_result + except AttributeError: + pass + else: + if ignore_errors or result is not None: + return result + try: + self._stat_result = os.stat(self._path) + except (OSError, ValueError): + self._stat_result = None + if not ignore_errors: + raise + return self._stat_result + else: + try: + result = self._lstat_result + except AttributeError: + pass + else: + if ignore_errors or result is not None: + return result + try: + self._lstat_result = os.lstat(self._path) + except (OSError, ValueError): + self._lstat_result = None + if not ignore_errors: + raise + return self._lstat_result + + def _posix_permissions(self, *, follow_symlinks=True): + """Return the POSIX file permissions.""" + return S_IMODE(self._stat(follow_symlinks=follow_symlinks).st_mode) + + def _file_id(self, *, follow_symlinks=True): + """Returns the identifier of the file.""" + st = self._stat(follow_symlinks=follow_symlinks) + return st.st_dev, st.st_ino + + def _access_time_ns(self, *, follow_symlinks=True): + """Return the access time in nanoseconds.""" + return self._stat(follow_symlinks=follow_symlinks).st_atime_ns + + def _mod_time_ns(self, *, follow_symlinks=True): + """Return the modify time in nanoseconds.""" + return self._stat(follow_symlinks=follow_symlinks).st_mtime_ns + + if hasattr(os.stat_result, 'st_flags'): + def _bsd_flags(self, *, follow_symlinks=True): + """Return the flags.""" + return self._stat(follow_symlinks=follow_symlinks).st_flags + + if hasattr(os, 'listxattr'): + def _xattrs(self, *, follow_symlinks=True): + """Return the xattrs as a list of (attr, value) pairs, or an empty + list if extended attributes aren't supported.""" + try: + return [ + (attr, os.getxattr(self._path, attr, follow_symlinks=follow_symlinks)) + for attr in os.listxattr(self._path, follow_symlinks=follow_symlinks)] + except OSError as err: + if err.errno not in (EPERM, ENOTSUP, ENODATA, EINVAL, EACCES): + raise + return [] + + +class _WindowsPathInfo(_PathInfoBase): + """Implementation of pathlib.types.PathInfo that provides status + information for Windows paths. Don't try to construct it yourself.""" + __slots__ = ('_exists', '_is_dir', '_is_file', '_is_symlink') + + def exists(self, *, follow_symlinks=True): + """Whether this path exists.""" + if not follow_symlinks and self.is_symlink(): + return True + try: + return self._exists + except AttributeError: + if os.path.exists(self._path): + self._exists = True + return True + else: + self._exists = self._is_dir = self._is_file = False + return False + + def is_dir(self, *, follow_symlinks=True): + """Whether this path is a directory.""" + if not follow_symlinks and self.is_symlink(): + return False + try: + return self._is_dir + except AttributeError: + if os.path.isdir(self._path): + self._is_dir = self._exists = True + return True + else: + self._is_dir = False + return False + + def is_file(self, *, follow_symlinks=True): + """Whether this path is a regular file.""" + if not follow_symlinks and self.is_symlink(): + return False + try: + return self._is_file + except AttributeError: + if os.path.isfile(self._path): + self._is_file = self._exists = True + return True + else: + self._is_file = False + return False + + def is_symlink(self): + """Whether this path is a symbolic link.""" + try: + return self._is_symlink + except AttributeError: + self._is_symlink = os.path.islink(self._path) + return self._is_symlink + + +class _PosixPathInfo(_PathInfoBase): + """Implementation of pathlib.types.PathInfo that provides status + information for POSIX paths. Don't try to construct it yourself.""" + __slots__ = () + + def exists(self, *, follow_symlinks=True): + """Whether this path exists.""" + st = self._stat(follow_symlinks=follow_symlinks, ignore_errors=True) + if st is None: + return False + return True + + def is_dir(self, *, follow_symlinks=True): + """Whether this path is a directory.""" + st = self._stat(follow_symlinks=follow_symlinks, ignore_errors=True) + if st is None: + return False + return S_ISDIR(st.st_mode) + + def is_file(self, *, follow_symlinks=True): + """Whether this path is a regular file.""" + st = self._stat(follow_symlinks=follow_symlinks, ignore_errors=True) + if st is None: + return False + return S_ISREG(st.st_mode) + + def is_symlink(self): + """Whether this path is a symbolic link.""" + st = self._stat(follow_symlinks=False, ignore_errors=True) + if st is None: + return False + return S_ISLNK(st.st_mode) + + +PathInfo = _WindowsPathInfo if os.name == 'nt' else _PosixPathInfo + + +class DirEntryInfo(_PathInfoBase): + """Implementation of pathlib.types.PathInfo that provides status + information by querying a wrapped os.DirEntry object. Don't try to + construct it yourself.""" + __slots__ = ('_entry',) + + def __init__(self, entry): + super().__init__(entry.path) + self._entry = entry + + def _stat(self, *, follow_symlinks=True, ignore_errors=False): + try: + return self._entry.stat(follow_symlinks=follow_symlinks) + except OSError: + if not ignore_errors: + raise + return None + + def exists(self, *, follow_symlinks=True): + """Whether this path exists.""" + if not follow_symlinks: + return True + return self._stat(ignore_errors=True) is not None + + def is_dir(self, *, follow_symlinks=True): + """Whether this path is a directory.""" + try: + return self._entry.is_dir(follow_symlinks=follow_symlinks) + except OSError: + return False + + def is_file(self, *, follow_symlinks=True): + """Whether this path is a regular file.""" + try: + return self._entry.is_file(follow_symlinks=follow_symlinks) + except OSError: + return False + + def is_symlink(self): + """Whether this path is a symbolic link.""" + try: + return self._entry.is_symlink() + except OSError: + return False diff --git a/PythonLib/full/pathlib/types.py b/PythonLib/full/pathlib/types.py new file mode 100644 index 00000000..d8f5c34a --- /dev/null +++ b/PythonLib/full/pathlib/types.py @@ -0,0 +1,430 @@ +""" +Protocols for supporting classes in pathlib. +""" + +# This module also provides abstract base classes for rich path objects. +# These ABCs are a *private* part of the Python standard library, but they're +# made available as a PyPI package called "pathlib-abc". It's possible they'll +# become an official part of the standard library in future. +# +# Three ABCs are provided -- _JoinablePath, _ReadablePath and _WritablePath + + +from abc import ABC, abstractmethod +from glob import _PathGlobber +from io import text_encoding +from pathlib._os import magic_open, ensure_distinct_paths, ensure_different_files, copyfileobj +from pathlib import PurePath, Path +from typing import Optional, Protocol, runtime_checkable + + +def _explode_path(path, split): + """ + Split the path into a 2-tuple (anchor, parts), where *anchor* is the + uppermost parent of the path (equivalent to path.parents[-1]), and + *parts* is a reversed list of parts following the anchor. + """ + parent, name = split(path) + names = [] + while path != parent: + names.append(name) + path = parent + parent, name = split(path) + return path, names + + +@runtime_checkable +class _PathParser(Protocol): + """Protocol for path parsers, which do low-level path manipulation. + + Path parsers provide a subset of the os.path API, specifically those + functions needed to provide JoinablePath functionality. Each JoinablePath + subclass references its path parser via a 'parser' class attribute. + """ + + sep: str + altsep: Optional[str] + def split(self, path: str) -> tuple[str, str]: ... + def splitext(self, path: str) -> tuple[str, str]: ... + def normcase(self, path: str) -> str: ... + + +@runtime_checkable +class PathInfo(Protocol): + """Protocol for path info objects, which support querying the file type. + Methods may return cached results. + """ + def exists(self, *, follow_symlinks: bool = True) -> bool: ... + def is_dir(self, *, follow_symlinks: bool = True) -> bool: ... + def is_file(self, *, follow_symlinks: bool = True) -> bool: ... + def is_symlink(self) -> bool: ... + + +class _JoinablePath(ABC): + """Abstract base class for pure path objects. + + This class *does not* provide several magic methods that are defined in + its implementation PurePath. They are: __init__, __fspath__, __bytes__, + __reduce__, __hash__, __eq__, __lt__, __le__, __gt__, __ge__. + """ + __slots__ = () + + @property + @abstractmethod + def parser(self): + """Implementation of pathlib._types.Parser used for low-level path + parsing and manipulation. + """ + raise NotImplementedError + + @abstractmethod + def with_segments(self, *pathsegments): + """Construct a new path object from any number of path-like objects. + Subclasses may override this method to customize how new path objects + are created from methods like `iterdir()`. + """ + raise NotImplementedError + + @abstractmethod + def __str__(self): + """Return the string representation of the path, suitable for + passing to system calls.""" + raise NotImplementedError + + @property + def anchor(self): + """The concatenation of the drive and root, or ''.""" + return _explode_path(str(self), self.parser.split)[0] + + @property + def name(self): + """The final path component, if any.""" + return self.parser.split(str(self))[1] + + @property + def suffix(self): + """ + The final component's last suffix, if any. + + This includes the leading period. For example: '.txt' + """ + return self.parser.splitext(self.name)[1] + + @property + def suffixes(self): + """ + A list of the final component's suffixes, if any. + + These include the leading periods. For example: ['.tar', '.gz'] + """ + split = self.parser.splitext + stem, suffix = split(self.name) + suffixes = [] + while suffix: + suffixes.append(suffix) + stem, suffix = split(stem) + return suffixes[::-1] + + @property + def stem(self): + """The final path component, minus its last suffix.""" + return self.parser.splitext(self.name)[0] + + def with_name(self, name): + """Return a new path with the file name changed.""" + split = self.parser.split + if split(name)[0]: + raise ValueError(f"Invalid name {name!r}") + path = str(self) + path = path.removesuffix(split(path)[1]) + name + return self.with_segments(path) + + def with_stem(self, stem): + """Return a new path with the stem changed.""" + suffix = self.suffix + if not suffix: + return self.with_name(stem) + elif not stem: + # If the suffix is non-empty, we can't make the stem empty. + raise ValueError(f"{self!r} has a non-empty suffix") + else: + return self.with_name(stem + suffix) + + def with_suffix(self, suffix): + """Return a new path with the file suffix changed. If the path + has no suffix, add given suffix. If the given suffix is an empty + string, remove the suffix from the path. + """ + stem = self.stem + if not stem: + # If the stem is empty, we can't make the suffix non-empty. + raise ValueError(f"{self!r} has an empty name") + elif suffix and not suffix.startswith('.'): + raise ValueError(f"Invalid suffix {suffix!r}") + else: + return self.with_name(stem + suffix) + + @property + def parts(self): + """An object providing sequence-like access to the + components in the filesystem path.""" + anchor, parts = _explode_path(str(self), self.parser.split) + if anchor: + parts.append(anchor) + return tuple(reversed(parts)) + + def joinpath(self, *pathsegments): + """Combine this path with one or several arguments, and return a + new path representing either a subpath (if all arguments are relative + paths) or a totally different path (if one of the arguments is + anchored). + """ + return self.with_segments(str(self), *pathsegments) + + def __truediv__(self, key): + try: + return self.with_segments(str(self), key) + except TypeError: + return NotImplemented + + def __rtruediv__(self, key): + try: + return self.with_segments(key, str(self)) + except TypeError: + return NotImplemented + + @property + def parent(self): + """The logical parent of the path.""" + path = str(self) + parent = self.parser.split(path)[0] + if path != parent: + return self.with_segments(parent) + return self + + @property + def parents(self): + """A sequence of this path's logical parents.""" + split = self.parser.split + path = str(self) + parent = split(path)[0] + parents = [] + while path != parent: + parents.append(self.with_segments(parent)) + path = parent + parent = split(path)[0] + return tuple(parents) + + def full_match(self, pattern): + """ + Return True if this path matches the given glob-style pattern. The + pattern is matched against the entire path. + """ + case_sensitive = self.parser.normcase('Aa') == 'Aa' + globber = _PathGlobber(self.parser.sep, case_sensitive, recursive=True) + match = globber.compile(pattern, altsep=self.parser.altsep) + return match(str(self)) is not None + + +class _ReadablePath(_JoinablePath): + """Abstract base class for readable path objects. + + The Path class implements this ABC for local filesystem paths. Users may + create subclasses to implement readable virtual filesystem paths, such as + paths in archive files or on remote storage systems. + """ + __slots__ = () + + @property + @abstractmethod + def info(self): + """ + A PathInfo object that exposes the file type and other file attributes + of this path. + """ + raise NotImplementedError + + @abstractmethod + def __open_rb__(self, buffering=-1): + """ + Open the file pointed to by this path for reading in binary mode and + return a file object, like open(mode='rb'). + """ + raise NotImplementedError + + def read_bytes(self): + """ + Open the file in bytes mode, read it, and close the file. + """ + with magic_open(self, mode='rb', buffering=0) as f: + return f.read() + + def read_text(self, encoding=None, errors=None, newline=None): + """ + Open the file in text mode, read it, and close the file. + """ + # Call io.text_encoding() here to ensure any warning is raised at an + # appropriate stack level. + encoding = text_encoding(encoding) + with magic_open(self, mode='r', encoding=encoding, errors=errors, newline=newline) as f: + return f.read() + + @abstractmethod + def iterdir(self): + """Yield path objects of the directory contents. + + The children are yielded in arbitrary order, and the + special entries '.' and '..' are not included. + """ + raise NotImplementedError + + def glob(self, pattern, *, recurse_symlinks=True): + """Iterate over this subtree and yield all existing files (of any + kind, including directories) matching the given relative pattern. + """ + anchor, parts = _explode_path(pattern, self.parser.split) + if anchor: + raise NotImplementedError("Non-relative patterns are unsupported") + elif not parts: + raise ValueError(f"Unacceptable pattern: {pattern!r}") + elif not recurse_symlinks: + raise NotImplementedError("recurse_symlinks=False is unsupported") + case_sensitive = self.parser.normcase('Aa') == 'Aa' + globber = _PathGlobber(self.parser.sep, case_sensitive, recursive=True) + select = globber.selector(parts) + return select(self.joinpath('')) + + def walk(self, top_down=True, on_error=None, follow_symlinks=False): + """Walk the directory tree from this directory, similar to os.walk().""" + paths = [self] + while paths: + path = paths.pop() + if isinstance(path, tuple): + yield path + continue + dirnames = [] + filenames = [] + if not top_down: + paths.append((path, dirnames, filenames)) + try: + for child in path.iterdir(): + if child.info.is_dir(follow_symlinks=follow_symlinks): + if not top_down: + paths.append(child) + dirnames.append(child.name) + else: + filenames.append(child.name) + except OSError as error: + if on_error is not None: + on_error(error) + if not top_down: + while not isinstance(paths.pop(), tuple): + pass + continue + if top_down: + yield path, dirnames, filenames + paths += [path.joinpath(d) for d in reversed(dirnames)] + + @abstractmethod + def readlink(self): + """ + Return the path to which the symbolic link points. + """ + raise NotImplementedError + + def copy(self, target, **kwargs): + """ + Recursively copy this file or directory tree to the given destination. + """ + ensure_distinct_paths(self, target) + target._copy_from(self, **kwargs) + return target.joinpath() # Empty join to ensure fresh metadata. + + def copy_into(self, target_dir, **kwargs): + """ + Copy this file or directory tree into the given existing directory. + """ + name = self.name + if not name: + raise ValueError(f"{self!r} has an empty name") + return self.copy(target_dir / name, **kwargs) + + +class _WritablePath(_JoinablePath): + """Abstract base class for writable path objects. + + The Path class implements this ABC for local filesystem paths. Users may + create subclasses to implement writable virtual filesystem paths, such as + paths in archive files or on remote storage systems. + """ + __slots__ = () + + @abstractmethod + def symlink_to(self, target, target_is_directory=False): + """ + Make this path a symlink pointing to the target path. + Note the order of arguments (link, target) is the reverse of os.symlink. + """ + raise NotImplementedError + + @abstractmethod + def mkdir(self): + """ + Create a new directory at this given path. + """ + raise NotImplementedError + + @abstractmethod + def __open_wb__(self, buffering=-1): + """ + Open the file pointed to by this path for writing in binary mode and + return a file object, like open(mode='wb'). + """ + raise NotImplementedError + + def write_bytes(self, data): + """ + Open the file in bytes mode, write to it, and close the file. + """ + # type-check for the buffer interface before truncating the file + view = memoryview(data) + with magic_open(self, mode='wb') as f: + return f.write(view) + + def write_text(self, data, encoding=None, errors=None, newline=None): + """ + Open the file in text mode, write to it, and close the file. + """ + # Call io.text_encoding() here to ensure any warning is raised at an + # appropriate stack level. + encoding = text_encoding(encoding) + if not isinstance(data, str): + raise TypeError('data must be str, not %s' % + data.__class__.__name__) + with magic_open(self, mode='w', encoding=encoding, errors=errors, newline=newline) as f: + return f.write(data) + + def _copy_from(self, source, follow_symlinks=True): + """ + Recursively copy the given path to this path. + """ + stack = [(source, self)] + while stack: + src, dst = stack.pop() + if not follow_symlinks and src.info.is_symlink(): + dst.symlink_to(str(src.readlink()), src.info.is_dir()) + elif src.info.is_dir(): + children = src.iterdir() + dst.mkdir() + for child in children: + stack.append((child, dst.joinpath(child.name))) + else: + ensure_different_files(src, dst) + with magic_open(src, 'rb') as source_f: + with magic_open(dst, 'wb') as target_f: + copyfileobj(source_f, target_f) + + +_JoinablePath.register(PurePath) +_ReadablePath.register(Path) +_WritablePath.register(Path) diff --git a/PythonLib/full/pdb.py b/PythonLib/full/pdb.py index 2a6e994d..903baeb8 100644 --- a/PythonLib/full/pdb.py +++ b/PythonLib/full/pdb.py @@ -1,5 +1,3 @@ -#! /usr/bin/env python3 - """ The Python Debugger Pdb ======================= @@ -76,16 +74,34 @@ import dis import code import glob +import json +import stat import token +import types +import atexit +import codeop import pprint import signal +import socket +import typing +import asyncio import inspect +import weakref +import builtins +import tempfile +import textwrap import tokenize -import functools +import itertools import traceback import linecache +import selectors +import threading +import _colorize +import _pyrepl.utils -from typing import Union +from contextlib import ExitStack, closing, contextmanager +from types import CodeType +from warnings import deprecated class Restart(Exception): @@ -93,19 +109,55 @@ class Restart(Exception): pass __all__ = ["run", "pm", "Pdb", "runeval", "runctx", "runcall", "set_trace", - "post_mortem", "help"] + "post_mortem", "set_default_backend", "get_default_backend", "help"] + + +def find_first_executable_line(code): + """ Try to find the first executable line of the code object. + + Equivalently, find the line number of the instruction that's + after RESUME + + Return code.co_firstlineno if no executable line is found. + """ + prev = None + for instr in dis.get_instructions(code): + if prev is not None and prev.opname == 'RESUME': + if instr.positions.lineno is not None: + return instr.positions.lineno + return code.co_firstlineno + prev = instr + return code.co_firstlineno def find_function(funcname, filename): cre = re.compile(r'def\s+%s(\s*\[.+\])?\s*[(]' % re.escape(funcname)) try: fp = tokenize.open(filename) except OSError: - return None + lines = linecache.getlines(filename) + if not lines: + return None + fp = io.StringIO(''.join(lines)) + funcdef = "" + funcstart = 0 # consumer of this info expects the first line to be 1 with fp: for lineno, line in enumerate(fp, start=1): if cre.match(line): - return funcname, filename, lineno + funcstart, funcdef = lineno, line + elif funcdef: + funcdef += line + + if funcdef: + try: + code = compile(funcdef, filename, 'exec') + except SyntaxError: + continue + # We should always be able to find the code object here + funccode = next(c for c in code.co_consts if + isinstance(c, CodeType) and c.co_name == funcname) + lineno_offset = find_first_executable_line(funccode) + return funcname, filename, funcstart + lineno_offset - 1 return None def lasti2lineno(code, lasti): @@ -123,50 +175,76 @@ def __repr__(self): return self -class _ScriptTarget(str): - def __new__(cls, val): - # Mutate self to be the "real path". - res = super().__new__(cls, os.path.realpath(val)) +class _ExecutableTarget: + filename: str + code: CodeType | str + namespace: dict + - # Store the original path for error reporting. - res.orig = val +class _ScriptTarget(_ExecutableTarget): + def __init__(self, target): + self._check(target) + self._target = self._safe_realpath(target) - return res + # If PYTHONSAFEPATH (-P) is not set, sys.path[0] is the directory + # of pdb, and we should replace it with the directory of the script + if not sys.flags.safe_path: + sys.path[0] = os.path.dirname(self._target) - def check(self): - if not os.path.exists(self): - print('Error:', self.orig, 'does not exist') + @staticmethod + def _check(target): + """ + Check that target is plausibly a script. + """ + if not os.path.exists(target): + print(f'Error: {target} does not exist') sys.exit(1) - if os.path.isdir(self): - print('Error:', self.orig, 'is a directory') + if os.path.isdir(target): + print(f'Error: {target} is a directory') sys.exit(1) - # Replace pdb's dir with script's dir in front of module search path. - sys.path[0] = os.path.dirname(self) + @staticmethod + def _safe_realpath(path): + """ + Return the canonical path (realpath) if it is accessible from the userspace. + Otherwise (for example, if the path is a symlink to an anonymous pipe), + return the original path. + + See GH-142315. + """ + realpath = os.path.realpath(path) + return realpath if os.path.exists(realpath) else path + + def __repr__(self): + return self._target @property def filename(self): - return self + return self._target + + @property + def code(self): + # Open the file each time because the file may be modified + with io.open_code(self._target) as fp: + return f"exec(compile({fp.read()!r}, {self._target!r}, 'exec'))" @property def namespace(self): return dict( __name__='__main__', - __file__=self, + __file__=self._target, __builtins__=__builtins__, __spec__=None, ) - @property - def code(self): - with io.open_code(self) as fp: - return f"exec(compile({fp.read()!r}, {self!r}, 'exec'))" +class _ModuleTarget(_ExecutableTarget): + def __init__(self, target): + self._target = target -class _ModuleTarget(str): - def check(self): + import runpy try: - self._details + _, self._spec, self._code = runpy._get_module_details(self._target) except ImportError as e: print(f"ImportError: {e}") sys.exit(1) @@ -174,24 +252,54 @@ def check(self): traceback.print_exc() sys.exit(1) - @functools.cached_property - def _details(self): - import runpy - return runpy._get_module_details(self) + def __repr__(self): + return self._target @property def filename(self): - return self.code.co_filename + return self._code.co_filename @property def code(self): - name, spec, code = self._details - return code + return self._code @property - def _spec(self): - name, spec, code = self._details - return spec + def namespace(self): + return dict( + __name__='__main__', + __file__=os.path.normcase(os.path.abspath(self.filename)), + __package__=self._spec.parent, + __loader__=self._spec.loader, + __spec__=self._spec, + __builtins__=__builtins__, + ) + + +class _ZipTarget(_ExecutableTarget): + def __init__(self, target): + import runpy + + self._target = os.path.realpath(target) + sys.path.insert(0, self._target) + try: + _, self._spec, self._code = runpy._get_main_module_details() + except ImportError as e: + print(f"ImportError: {e}") + sys.exit(1) + except Exception: + traceback.print_exc() + sys.exit(1) + + def __repr__(self): + return self._target + + @property + def filename(self): + return self._code.co_filename + + @property + def code(self): + return self._code @property def namespace(self): @@ -205,6 +313,15 @@ def namespace(self): ) +class _PdbInteractiveConsole(code.InteractiveConsole): + def __init__(self, ns, message): + self._message = message + super().__init__(locals=ns, local_exit=True) + + def write(self, data): + self._message(data, end='') + + # Interaction prompt line will separate file and call info from code # text using value of line_prefix string. A newline and arrow may # be to your liking. You can set it once pdb is imported using the @@ -212,13 +329,39 @@ def namespace(self): # line_prefix = ': ' # Use this to get the old situation back line_prefix = '\n-> ' # Probably a better default -class Pdb(bdb.Bdb, cmd.Cmd): +# The default backend to use for Pdb instances if not specified +# Should be either 'settrace' or 'monitoring' +_default_backend = 'settrace' + + +def set_default_backend(backend): + """Set the default backend to use for Pdb instances.""" + global _default_backend + if backend not in ('settrace', 'monitoring'): + raise ValueError("Invalid backend: %s" % backend) + _default_backend = backend + + +def get_default_backend(): + """Get the default backend to use for Pdb instances.""" + return _default_backend + + +class Pdb(bdb.Bdb, cmd.Cmd): _previous_sigint_handler = None + # Limit the maximum depth of chained exceptions, we should be handling cycles, + # but in case there are recursions, we stop at 999. + MAX_CHAINED_EXCEPTION_DEPTH = 999 + + _file_mtime_table = {} + + _last_pdb_instance = None + def __init__(self, completekey='tab', stdin=None, stdout=None, skip=None, - nosigint=False, readrc=True): - bdb.Bdb.__init__(self, skip=skip) + nosigint=False, readrc=True, mode=None, backend=None, colorize=False): + bdb.Bdb.__init__(self, skip=skip, backend=backend if backend else get_default_backend()) cmd.Cmd.__init__(self, completekey, stdin, stdout) sys.audit("pdb.Pdb") if stdout: @@ -229,15 +372,21 @@ def __init__(self, completekey='tab', stdin=None, stdout=None, skip=None, self.mainpyfile = '' self._wait_for_mainpyfile = False self.tb_lineno = {} + self.mode = mode + self.colorize = colorize and _colorize.can_colorize(file=stdout or sys.stdout) # Try to load readline if it exists try: import readline # remove some common file name delimiters - readline.set_completer_delims(' \t\n`@#$%^&*()=+[{]}\\|;:\'",<>?') + readline.set_completer_delims(' \t\n`@#%^&*()=+[{]}\\|;:\'",<>?') except ImportError: pass + self.allow_kbdint = False self.nosigint = nosigint + # Consider these characters as part of the command so when the users type + # c.a or c['a'], it won't be recognized as a c(ontinue) command + self.identchars = cmd.Cmd.identchars + '=.[](),"\'+-*/%@&|<>~^' # Read ~/.pdbrc and ./.pdbrc self.rcLines = [] @@ -254,15 +403,80 @@ def __init__(self, completekey='tab', stdin=None, stdout=None, skip=None, pass self.commands = {} # associates a command list to breakpoint numbers - self.commands_doprompt = {} # for each bp num, tells if the prompt - # must be disp. after execing the cmd list - self.commands_silent = {} # for each bp num, tells if the stack trace - # must be disp. after execing the cmd list self.commands_defining = False # True while in the process of defining # a command list self.commands_bnum = None # The breakpoint number for which we are # defining a list + self.async_shim_frame = None + self.async_awaitable = None + + self._chained_exceptions = tuple() + self._chained_exception_index = 0 + + self._current_task = None + + def set_trace(self, frame=None, *, commands=None): + Pdb._last_pdb_instance = self + if frame is None: + frame = sys._getframe().f_back + + if commands is not None: + self.rcLines.extend(commands) + + super().set_trace(frame) + + async def set_trace_async(self, frame=None, *, commands=None): + if self.async_awaitable is not None: + # We are already in a set_trace_async call, do not mess with it + return + + if frame is None: + frame = sys._getframe().f_back + + # We need set_trace to set up the basics, however, this will call + # set_stepinstr() will we need to compensate for, because we don't + # want to trigger on calls + self.set_trace(frame, commands=commands) + # Changing the stopframe will disable trace dispatch on calls + self.stopframe = frame + # We need to stop tracing because we don't have the privilege to avoid + # triggering tracing functions as normal, as we are not already in + # tracing functions + self.stop_trace() + + self.async_shim_frame = sys._getframe() + self.async_awaitable = None + + while True: + self.async_awaitable = None + # Simulate a trace event + # This should bring up pdb and make pdb believe it's debugging the + # caller frame + self.trace_dispatch(frame, "opcode", None) + if self.async_awaitable is not None: + try: + if self.breaks: + with self.set_enterframe(frame): + # set_continue requires enterframe to work + self.set_continue() + self.start_trace() + await self.async_awaitable + except Exception: + self._error_exc() + else: + break + + self.async_shim_frame = None + + # start the trace (the actual command is already set by set_* calls) + if self.returnframe is None and self.stoplineno == -1 and not self.breaks: + # This means we did a continue without any breakpoints, we should not + # start the trace + return + + self.start_trace() + def sigint_handler(self, signum, frame): if self.allow_kbdint: raise KeyboardInterrupt @@ -281,7 +495,6 @@ def forget(self): if hasattr(self, 'curframe') and self.curframe: self.curframe.f_globals.pop('__pdb_convenience_variables', None) self.curframe = None - self.curframe_locals = {} self.tb_lineno.clear() def setup(self, f, tb): @@ -295,11 +508,17 @@ def setup(self, f, tb): self.tb_lineno[tb.tb_frame] = lineno tb = tb.tb_next self.curframe = self.stack[self.curindex][0] - # The f_locals dictionary is updated from the actual frame - # locals whenever the .f_locals accessor is called, so we - # cache it here to ensure that modifications are not overwritten. - self.curframe_locals = self.curframe.f_locals self.set_convenience_variable(self.curframe, '_frame', self.curframe) + if self._current_task: + self.set_convenience_variable(self.curframe, '_asynctask', self._current_task) + self._save_initial_file_mtime(self.curframe) + + if self._chained_exceptions: + self.set_convenience_variable( + self.curframe, + '_exception', + self._chained_exceptions[self._chained_exception_index], + ) if self.rcLines: self.cmdqueue = [ @@ -308,6 +527,16 @@ def setup(self, f, tb): ] self.rcLines = [] + @property + @deprecated("The frame locals reference is no longer cached. Use 'curframe.f_locals' instead.") + def curframe_locals(self): + return self.curframe.f_locals + + @curframe_locals.setter + @deprecated("Setting 'curframe_locals' no longer has any effect. Update the contents of 'curframe.f_locals' instead.") + def curframe_locals(self, value): + pass + # Override Bdb methods def user_call(self, frame, argument_list): @@ -325,8 +554,17 @@ def user_line(self, frame): if (self.mainpyfile != self.canonic(frame.f_code.co_filename)): return self._wait_for_mainpyfile = False - if self.bp_commands(frame): - self.interaction(frame, None) + if self.trace_opcodes: + # GH-127321 + # We want to avoid stopping at an opcode that does not have + # an associated line number because pdb does not like it + if frame.f_lineno is None: + self.set_stepinstr() + return + self.bp_commands(frame) + self.interaction(frame, None) + + user_opcode = user_line def bp_commands(self, frame): """Call every command that was set for the current active breakpoint @@ -339,18 +577,9 @@ def bp_commands(self, frame): self.currentbp in self.commands: currentbp = self.currentbp self.currentbp = 0 - lastcmd_back = self.lastcmd - self.setup(frame, None) for line in self.commands[currentbp]: - self.onecmd(line) - self.lastcmd = lastcmd_back - if not self.commands_silent[currentbp]: - self.print_stack_entry(self.stack[self.curindex]) - if self.commands_doprompt[currentbp]: - self._cmdloop() - self.forget() - return - return 1 + self.cmdqueue.append(line) + self.cmdqueue.append(f'_pdbcmd_restore_lastcmd {self.lastcmd}') def user_return(self, frame, return_value): """This function is called when a return trap is set here.""" @@ -393,6 +622,32 @@ def _cmdloop(self): except KeyboardInterrupt: self.message('--KeyboardInterrupt--') + def _save_initial_file_mtime(self, frame): + """save the mtime of the all the files in the frame stack in the file mtime table + if they haven't been saved yet.""" + while frame: + filename = frame.f_code.co_filename + if filename not in self._file_mtime_table: + try: + self._file_mtime_table[filename] = os.path.getmtime(filename) + except Exception: + pass + frame = frame.f_back + + def _validate_file_mtime(self): + """Check if the source file of the current frame has been modified. + If so, give a warning and reset the modify time to current.""" + try: + filename = self.curframe.f_code.co_filename + mtime = os.path.getmtime(filename) + except Exception: + return + if (filename in self._file_mtime_table and + mtime != self._file_mtime_table[filename]): + self.message(f"*** WARNING: file '{filename}' was edited, " + "running stale code until the program is rerun") + self._file_mtime_table[filename] = mtime + # Called before loop, handles display expressions # Set up convenience variable containers def _show_display(self): @@ -409,7 +664,71 @@ def _show_display(self): (expr, self._safe_repr(newvalue, expr), self._safe_repr(oldvalue, expr))) - def interaction(self, frame, traceback): + def _get_tb_and_exceptions(self, tb_or_exc): + """ + Given a tracecack or an exception, return a tuple of chained exceptions + and current traceback to inspect. + + This will deal with selecting the right ``__cause__`` or ``__context__`` + as well as handling cycles, and return a flattened list of exceptions we + can jump to with do_exceptions. + + """ + _exceptions = [] + if isinstance(tb_or_exc, BaseException): + traceback, current = tb_or_exc.__traceback__, tb_or_exc + + while current is not None: + if current in _exceptions: + break + _exceptions.append(current) + if current.__cause__ is not None: + current = current.__cause__ + elif ( + current.__context__ is not None and not current.__suppress_context__ + ): + current = current.__context__ + + if len(_exceptions) >= self.MAX_CHAINED_EXCEPTION_DEPTH: + self.message( + f"More than {self.MAX_CHAINED_EXCEPTION_DEPTH}" + " chained exceptions found, not all exceptions" + "will be browsable with `exceptions`." + ) + break + else: + traceback = tb_or_exc + return tuple(reversed(_exceptions)), traceback + + @contextmanager + def _hold_exceptions(self, exceptions): + """ + Context manager to ensure proper cleaning of exceptions references + + When given a chained exception instead of a traceback, + pdb may hold references to many objects which may leak memory. + + We use this context manager to make sure everything is properly cleaned + + """ + try: + self._chained_exceptions = exceptions + self._chained_exception_index = len(exceptions) - 1 + yield + finally: + # we can't put those in forget as otherwise they would + # be cleared on exception change + self._chained_exceptions = tuple() + self._chained_exception_index = 0 + + def _get_asyncio_task(self): + try: + task = asyncio.current_task() + except RuntimeError: + task = None + return task + + def interaction(self, frame, tb_or_exc): # Restore the previous signal handler at the Pdb prompt. if Pdb._previous_sigint_handler: try: @@ -418,18 +737,25 @@ def interaction(self, frame, traceback): pass else: Pdb._previous_sigint_handler = None - self.setup(frame, traceback) - # We should print the stack entry if and only if the user input - # is expected, and we should print it right before the user input. - # We achieve this by appending _pdbcmd_print_frame_status to the - # command queue. If cmdqueue is not exausted, the user input is - # not expected and we will not print the stack entry. - self.cmdqueue.append('_pdbcmd_print_frame_status') - self._cmdloop() - # If _pdbcmd_print_frame_status is not used, pop it out - if self.cmdqueue and self.cmdqueue[-1] == '_pdbcmd_print_frame_status': - self.cmdqueue.pop() - self.forget() + + self._current_task = self._get_asyncio_task() + + _chained_exceptions, tb = self._get_tb_and_exceptions(tb_or_exc) + if isinstance(tb_or_exc, BaseException): + assert tb is not None, "main exception must have a traceback" + with self._hold_exceptions(_chained_exceptions): + self.setup(frame, tb) + # We should print the stack entry if and only if the user input + # is expected, and we should print it right before the user input. + # We achieve this by appending _pdbcmd_print_frame_status to the + # command queue. If cmdqueue is not exhausted, the user input is + # not expected and we will not print the stack entry. + self.cmdqueue.append('_pdbcmd_print_frame_status') + self._cmdloop() + # If _pdbcmd_print_frame_status is not used, pop it out + if self.cmdqueue and self.cmdqueue[-1] == '_pdbcmd_print_frame_status': + self.cmdqueue.pop() + self.forget() def displayhook(self, obj): """Custom displayhook for the exec in default(), which prevents @@ -439,12 +765,191 @@ def displayhook(self, obj): if obj is not None: self.message(repr(obj)) + @contextmanager + def _enable_multiline_input(self): + try: + import readline + except ImportError: + yield + return + + def input_auto_indent(): + last_index = readline.get_current_history_length() + last_line = readline.get_history_item(last_index) + if last_line: + if last_line.isspace(): + # If the last line is empty, we don't need to indent + return + + last_line = last_line.rstrip('\r\n') + indent = len(last_line) - len(last_line.lstrip()) + if last_line.endswith(":"): + indent += 4 + readline.insert_text(' ' * indent) + + completenames = self.completenames + try: + self.completenames = self.complete_multiline_names + readline.set_startup_hook(input_auto_indent) + yield + finally: + readline.set_startup_hook() + self.completenames = completenames + return + + def _exec_in_closure(self, source, globals, locals): + """ Run source code in closure so code object created within source + can find variables in locals correctly + + returns True if the source is executed, False otherwise + """ + + # Determine if the source should be executed in closure. Only when the + # source compiled to multiple code objects, we should use this feature. + # Otherwise, we can just raise an exception and normal exec will be used. + + code = compile(source, "", "exec") + if not any(isinstance(const, CodeType) for const in code.co_consts): + return False + + # locals could be a proxy which does not support pop + # copy it first to avoid modifying the original locals + locals_copy = dict(locals) + + locals_copy["__pdb_eval__"] = { + "result": None, + "write_back": {} + } + + # If the source is an expression, we need to print its value + try: + compile(source, "", "eval") + except SyntaxError: + pass + else: + source = "__pdb_eval__['result'] = " + source + + # Add write-back to update the locals + source = ("try:\n" + + textwrap.indent(source, " ") + "\n" + + "finally:\n" + + " __pdb_eval__['write_back'] = locals()") + + # Build a closure source code with freevars from locals like: + # def __pdb_outer(): + # var = None + # def __pdb_scope(): # This is the code object we want to execute + # nonlocal var + # + # return __pdb_scope.__code__ + source_with_closure = ("def __pdb_outer():\n" + + "\n".join(f" {var} = None" for var in locals_copy) + "\n" + + " def __pdb_scope():\n" + + "\n".join(f" nonlocal {var}" for var in locals_copy) + "\n" + + textwrap.indent(source, " ") + "\n" + + " return __pdb_scope.__code__" + ) + + # Get the code object of __pdb_scope() + # The exec fills locals_copy with the __pdb_outer() function and we can call + # that to get the code object of __pdb_scope() + ns = {} + try: + exec(source_with_closure, {}, ns) + except Exception: + return False + code = ns["__pdb_outer"]() + + cells = tuple(types.CellType(locals_copy.get(var)) for var in code.co_freevars) + + try: + exec(code, globals, locals_copy, closure=cells) + except Exception: + return False + + # get the data we need from the statement + pdb_eval = locals_copy["__pdb_eval__"] + + # __pdb_eval__ should not be updated back to locals + pdb_eval["write_back"].pop("__pdb_eval__") + + # Write all local variables back to locals + locals.update(pdb_eval["write_back"]) + eval_result = pdb_eval["result"] + if eval_result is not None: + self.message(repr(eval_result)) + + return True + + def _exec_await(self, source, globals, locals): + """ Run source code that contains await by playing with async shim frame""" + # Put the source in an async function + source_async = ( + "async def __pdb_await():\n" + + textwrap.indent(source, " ") + '\n' + + " __pdb_locals.update(locals())" + ) + ns = globals | locals + # We use __pdb_locals to do write back + ns["__pdb_locals"] = locals + exec(source_async, ns) + self.async_awaitable = ns["__pdb_await"]() + + def _read_code(self, line): + buffer = line + is_await_code = False + code = None + try: + if (code := codeop.compile_command(line + '\n', '', 'single')) is None: + # Multi-line mode + with self._enable_multiline_input(): + buffer = line + continue_prompt = "... " + while (code := codeop.compile_command(buffer, '', 'single')) is None: + if self.use_rawinput: + try: + line = input(continue_prompt) + except (EOFError, KeyboardInterrupt): + self.lastcmd = "" + print('\n') + return None, None, False + else: + self.stdout.write(continue_prompt) + self.stdout.flush() + line = self.stdin.readline() + if not len(line): + self.lastcmd = "" + self.stdout.write('\n') + self.stdout.flush() + return None, None, False + else: + line = line.rstrip('\r\n') + if line.isspace(): + # empty line, just continue + buffer += '\n' + else: + buffer += '\n' + line + self.lastcmd = buffer + except SyntaxError as e: + # Maybe it's an await expression/statement + if ( + self.async_shim_frame is not None + and e.msg == "'await' outside function" + ): + is_await_code = True + else: + raise + + return code, buffer, is_await_code + def default(self, line): if line[:1] == '!': line = line[1:].strip() - locals = self.curframe_locals + locals = self.curframe.f_locals globals = self.curframe.f_globals try: - code = compile(line + '\n', '', 'single') + code, buffer, is_await_code = self._read_code(line) + if buffer is None: + return save_stdout = sys.stdout save_stdin = sys.stdin save_displayhook = sys.displayhook @@ -452,7 +957,12 @@ def default(self, line): sys.stdin = self.stdin sys.stdout = self.stdout sys.displayhook = self.displayhook - exec(code, globals, locals) + if is_await_code: + self._exec_await(buffer, globals, locals) + return True + else: + if not self._exec_in_closure(buffer, globals, locals): + exec(code, globals, locals) finally: sys.stdout = save_stdout sys.stdin = save_stdin @@ -468,7 +978,7 @@ def _replace_convenience_variables(self, line): if "$" not in line: return line - dollar_start = dollar_end = -1 + dollar_start = dollar_end = (-1, -1) replace_variables = [] try: for t in tokenize.generate_tokens(io.StringIO(line).readline): @@ -500,11 +1010,20 @@ def precmd(self, line): args = line.split() while args[0] in self.aliases: line = self.aliases[args[0]] - ii = 1 - for tmpArg in args[1:]: - line = line.replace("%" + str(ii), - tmpArg) - ii += 1 + for idx in range(1, 10): + if f'%{idx}' in line: + if idx >= len(args): + self.error(f"Not enough arguments for alias '{args[0]}'") + # This is a no-op + return "!" + line = line.replace(f'%{idx}', args[idx]) + elif '%*' not in line: + if idx < len(args): + self.error(f"Too many arguments for alias '{args[0]}'") + # This is a no-op + return "!" + break + line = line.replace("%*", ' '.join(args[1:])) args = line.split() # split into ';;' separated commands @@ -543,12 +1062,15 @@ def handle_command_def(self, line): cmd, arg, line = self.parseline(line) if not cmd: return False - if cmd == 'silent': - self.commands_silent[self.commands_bnum] = True - return False # continue to handle other cmd def in the cmd list - elif cmd == 'end': + if cmd == 'end': + return True # end of cmd list + elif cmd == 'EOF': + self.message('') return True # end of cmd list cmdlist = self.commands[self.commands_bnum] + if cmd == 'silent': + cmdlist.append('_pdbcmd_silence_frame_status') + return False # continue to handle other cmd def in the cmd list if arg: cmdlist.append(cmd+' '+arg) else: @@ -560,14 +1082,20 @@ def handle_command_def(self, line): func = self.default # one of the resuming commands if func.__name__ in self.commands_resuming: - self.commands_doprompt[self.commands_bnum] = False return True return False + def _colorize_code(self, code): + if self.colorize: + colors = list(_pyrepl.utils.gen_colors(code)) + chars, _ = _pyrepl.utils.disp_str(code, colors=colors, force_color=True) + code = "".join(chars) + return code + # interface abstraction functions - def message(self, msg): - print(msg, file=self.stdout) + def message(self, msg, end='\n'): + print(msg, end=end, file=self.stdout) def error(self, msg): print('***', msg, file=self.stdout) @@ -582,6 +1110,46 @@ def set_convenience_variable(self, frame, name, value): # Generic completion functions. Individual complete_foo methods can be # assigned below to one of these functions. + @property + def rlcompleter(self): + """Return the `Completer` class from `rlcompleter`, while avoiding the + side effects of changing the completer from `import rlcompleter`. + + This is a compromise between GH-138860 and GH-139289. If GH-139289 is + fixed, then we don't need this and we can just `import rlcompleter` in + `Pdb.__init__`. + """ + if not hasattr(self, "_rlcompleter"): + try: + import readline + except ImportError: + # readline is not available, just get the Completer + from rlcompleter import Completer + self._rlcompleter = Completer + else: + # importing rlcompleter could have side effect of changing + # the current completer, we need to restore it + prev_completer = readline.get_completer() + from rlcompleter import Completer + self._rlcompleter = Completer + readline.set_completer(prev_completer) + return self._rlcompleter + + def completenames(self, text, line, begidx, endidx): + # Overwrite completenames() of cmd so for the command completion, + # if no current command matches, check for expressions as well + commands = super().completenames(text, line, begidx, endidx) + for alias in self.aliases: + if alias.startswith(text): + commands.append(alias) + if commands: + return commands + else: + expressions = self._complete_expression(text, line, begidx, endidx) + if expressions: + return expressions + return self.completedefault(text, line, begidx, endidx) + def _complete_location(self, text, line, begidx, endidx): # Complete a file/module/function location for break/tbreak/clear. if line.strip().endswith((':', ',')): @@ -615,14 +1183,17 @@ def _complete_expression(self, text, line, begidx, endidx): # Collect globals and locals. It is usually not really sensible to also # complete builtins, and they clutter the namespace quite heavily, so we # leave them out. - ns = {**self.curframe.f_globals, **self.curframe_locals} + ns = {**self.curframe.f_globals, **self.curframe.f_locals} if '.' in text: # Walk an attribute chain up to the last part, similar to what # rlcompleter does. This will bail if any of the parts are not # simple attribute access, which is what we want. dotted = text.split('.') try: - obj = ns[dotted[0]] + if dotted[0].startswith('$'): + obj = self.curframe.f_globals['__pdb_convenience_variables'][dotted[0][1:]] + else: + obj = ns[dotted[0]] for part in dotted[1:-1]: obj = getattr(obj, part) except (KeyError, AttributeError): @@ -630,15 +1201,72 @@ def _complete_expression(self, text, line, begidx, endidx): prefix = '.'.join(dotted[:-1]) + '.' return [prefix + n for n in dir(obj) if n.startswith(dotted[-1])] else: + if text.startswith("$"): + # Complete convenience variables + conv_vars = self.curframe.f_globals.get('__pdb_convenience_variables', {}) + return [f"${name}" for name in conv_vars if name.startswith(text[1:])] # Complete a simple name. return [n for n in ns.keys() if n.startswith(text)] + def _complete_indentation(self, text, line, begidx, endidx): + try: + import readline + except ImportError: + return [] + # Fill in spaces to form a 4-space indent + return [' ' * (4 - readline.get_begidx() % 4)] + + def complete_multiline_names(self, text, line, begidx, endidx): + # If text is space-only, the user entered before any text. + # That normally means they want to indent the current line. + if not text.strip(): + return self._complete_indentation(text, line, begidx, endidx) + return self.completedefault(text, line, begidx, endidx) + + def completedefault(self, text, line, begidx, endidx): + if text.startswith("$"): + # Complete convenience variables + conv_vars = self.curframe.f_globals.get('__pdb_convenience_variables', {}) + return [f"${name}" for name in conv_vars if name.startswith(text[1:])] + + state = 0 + matches = [] + completer = self.rlcompleter(self.curframe.f_globals | self.curframe.f_locals) + while (match := completer.complete(text, state)) is not None: + matches.append(match) + state += 1 + return matches + + @contextmanager + def _enable_rlcompleter(self, ns): + try: + import readline + except ImportError: + yield + return + + try: + completer = self.rlcompleter(ns) + old_completer = readline.get_completer() + readline.set_completer(completer.complete) + yield + finally: + readline.set_completer(old_completer) + # Pdb meta commands, only intended to be used internally by pdb def _pdbcmd_print_frame_status(self, arg): - self.print_stack_entry(self.stack[self.curindex]) + self.print_stack_trace(0) + self._validate_file_mtime() self._show_display() + def _pdbcmd_silence_frame_status(self, arg): + if self.cmdqueue and self.cmdqueue[-1] == '_pdbcmd_print_frame_status': + self.cmdqueue.pop() + + def _pdbcmd_restore_lastcmd(self, arg): + self.lastcmd = arg + # Command definitions, called by cmdloop() # The argument is the remaining string on the command line # Return true to exit from the command loop @@ -686,7 +1314,7 @@ def do_commands(self, arg): try: bnum = int(arg) except: - self.error("Usage: commands [bnum]\n ...\n end") + self._print_invalid_arg(arg) return try: self.get_bpbynumber(bnum) @@ -697,14 +1325,10 @@ def do_commands(self, arg): self.commands_bnum = bnum # Save old definitions for the case of a keyboard interrupt. if bnum in self.commands: - old_command_defs = (self.commands[bnum], - self.commands_doprompt[bnum], - self.commands_silent[bnum]) + old_commands = self.commands[bnum] else: - old_command_defs = None + old_commands = None self.commands[bnum] = [] - self.commands_doprompt[bnum] = True - self.commands_silent[bnum] = False prompt_back = self.prompt self.prompt = '(com) ' @@ -713,14 +1337,10 @@ def do_commands(self, arg): self.cmdloop() except KeyboardInterrupt: # Restore old definitions. - if old_command_defs: - self.commands[bnum] = old_command_defs[0] - self.commands_doprompt[bnum] = old_command_defs[1] - self.commands_silent[bnum] = old_command_defs[2] + if old_commands: + self.commands[bnum] = old_commands else: del self.commands[bnum] - del self.commands_doprompt[bnum] - del self.commands_silent[bnum] self.error('command definition aborted, old commands restored') finally: self.commands_defining = False @@ -728,7 +1348,7 @@ def do_commands(self, arg): complete_commands = _complete_bpnumber - def do_break(self, arg, temporary = 0): + def do_break(self, arg, temporary=False): """b(reak) [ ([filename:]lineno | function) [, condition] ] Without argument, list all breaks. @@ -756,6 +1376,7 @@ def do_break(self, arg, temporary = 0): filename = None lineno = None cond = None + module_globals = None comma = arg.find(',') if comma > 0: # parse stuff after comma: "condition" @@ -789,7 +1410,7 @@ def do_break(self, arg, temporary = 0): try: func = eval(arg, self.curframe.f_globals, - self.curframe_locals) + self.curframe.f_locals) except: func = arg try: @@ -799,8 +1420,9 @@ def do_break(self, arg, temporary = 0): #use co_name to identify the bkpt (function names #could be aliased, but co_name is invariant) funcname = code.co_name - lineno = code.co_firstlineno + lineno = find_first_executable_line(code) filename = code.co_filename + module_globals = func.__globals__ except: # last thing to try (ok, filename, ln) = self.lineinfo(arg) @@ -812,8 +1434,9 @@ def do_break(self, arg, temporary = 0): lineno = int(ln) if not filename: filename = self.defaultFile() + filename = self.canonic(filename) # Check for reasonable breakpoint - line = self.checkline(filename, lineno) + line = self.checkline(filename, lineno, module_globals) if line: # now set the break point err = self.set_break(filename, line, temporary, cond, funcname) @@ -843,7 +1466,7 @@ def do_tbreak(self, arg): Same arguments as break, but sets a temporary breakpoint: it is automatically deleted when first hit. """ - self.do_break(arg, 1) + self.do_break(arg, True) complete_tbreak = _complete_location @@ -876,11 +1499,13 @@ def lineinfo(self, identifier): f = self.lookupmodule(parts[0]) if f: fname = f - item = parts[1] - answer = find_function(item, fname) + item = parts[1] + else: + return failed + answer = find_function(item, self.canonic(fname)) return answer or failed - def checkline(self, filename, lineno): + def checkline(self, filename, lineno, module_globals=None): """Check whether specified line seems to be executable. Return `lineno` if it is, 0 if not (e.g. a docstring, comment, blank @@ -889,8 +1514,9 @@ def checkline(self, filename, lineno): # this method should be callable before starting debugging, so default # to "no globals" if there is no current frame frame = getattr(self, 'curframe', None) - globs = frame.f_globals if frame else None - line = linecache.getline(filename, lineno, globs) + if module_globals is None: + module_globals = frame.f_globals if frame else None + line = linecache.getline(filename, lineno, module_globals) if not line: self.message('End of file') return 0 @@ -908,6 +1534,9 @@ def do_enable(self, arg): Enables the breakpoints given as a space separated list of breakpoint numbers. """ + if not arg: + self._print_invalid_arg(arg) + return args = arg.split() for i in args: try: @@ -929,6 +1558,9 @@ def do_disable(self, arg): breakpoint, it remains in the list of breakpoints and can be (re-)enabled. """ + if not arg: + self._print_invalid_arg(arg) + return args = arg.split() for i in args: try: @@ -949,6 +1581,9 @@ def do_condition(self, arg): condition is absent, any existing condition is removed; i.e., the breakpoint is made unconditional. """ + if not arg: + self._print_invalid_arg(arg) + return args = arg.split(' ', 1) try: cond = args[1] @@ -982,15 +1617,26 @@ def do_ignore(self, arg): and the breakpoint is not disabled and any associated condition evaluates to true. """ + if not arg: + self._print_invalid_arg(arg) + return args = arg.split() - try: - count = int(args[1].strip()) - except: + if not args: + self.error('Breakpoint number expected') + return + if len(args) == 1: count = 0 + elif len(args) == 2: + try: + count = int(args[1]) + except ValueError: + self._print_invalid_arg(arg) + return + else: + self._print_invalid_arg(arg) + return try: bp = self.get_bpbynumber(args[0].strip()) - except IndexError: - self.error('Breakpoint number expected') except ValueError as err: self.error(err) else: @@ -1008,6 +1654,13 @@ def do_ignore(self, arg): complete_ignore = _complete_bpnumber + def _prompt_for_confirmation(self, prompt, default): + try: + reply = input(prompt) + except EOFError: + reply = default + return reply.strip().lower() + def do_clear(self, arg): """cl(ear) [filename:lineno | bpnumber ...] @@ -1017,11 +1670,10 @@ def do_clear(self, arg): clear all breaks at that line in that file. """ if not arg: - try: - reply = input('Clear all breaks? ') - except EOFError: - reply = 'no' - reply = reply.strip().lower() + reply = self._prompt_for_confirmation( + 'Clear all breaks? ', + default='no', + ) if reply in ('y', 'yes'): bplist = [bp for bp in bdb.Breakpoint.bpbynumber if bp] self.clear_all_breaks() @@ -1061,13 +1713,24 @@ def do_clear(self, arg): complete_cl = _complete_location def do_where(self, arg): - """w(here) + """w(here) [count] - Print a stack trace, with the most recent frame at the bottom. + Print a stack trace. If count is not specified, print the full stack. + If count is 0, print the current frame entry. If count is positive, + print count entries from the most recent frame. If count is negative, + print -count entries from the least recent frame. An arrow indicates the "current frame", which determines the context of most commands. 'bt' is an alias for this command. """ - self.print_stack_trace() + if not arg: + count = None + else: + try: + count = int(arg) + except ValueError: + self.error('Invalid count (%s)' % arg) + return + self.print_stack_trace(count) do_w = do_where do_bt = do_where @@ -1075,11 +1738,57 @@ def _select_frame(self, number): assert 0 <= number < len(self.stack) self.curindex = number self.curframe = self.stack[self.curindex][0] - self.curframe_locals = self.curframe.f_locals self.set_convenience_variable(self.curframe, '_frame', self.curframe) self.print_stack_entry(self.stack[self.curindex]) self.lineno = None + def do_exceptions(self, arg): + """exceptions [number] + + List or change current exception in an exception chain. + + Without arguments, list all the current exception in the exception + chain. Exceptions will be numbered, with the current exception indicated + with an arrow. + + If given an integer as argument, switch to the exception at that index. + """ + if not self._chained_exceptions: + self.message( + "Did not find chained exceptions. To move between" + " exceptions, pdb/post_mortem must be given an exception" + " object rather than a traceback." + ) + return + if not arg: + for ix, exc in enumerate(self._chained_exceptions): + prompt = ">" if ix == self._chained_exception_index else " " + rep = repr(exc) + if len(rep) > 80: + rep = rep[:77] + "..." + indicator = ( + " -" + if self._chained_exceptions[ix].__traceback__ is None + else f"{ix:>3}" + ) + self.message(f"{prompt} {indicator} {rep}") + else: + try: + number = int(arg) + except ValueError: + self.error("Argument must be an integer") + return + if 0 <= number < len(self._chained_exceptions): + if self._chained_exceptions[number].__traceback__ is None: + self.error("This exception does not have a traceback, cannot jump to it") + return + + self._chained_exception_index = number + self.setup(None, self._chained_exceptions[number].__traceback__) + self.print_stack_entry(self.stack[self.curindex]) + else: + self.error("No exception with that number") + def do_up(self, arg): """u(p) [count] @@ -1154,6 +1863,9 @@ def do_step(self, arg): (either in a function that is called or in the current function). """ + if arg: + self._print_invalid_arg(arg) + return self.set_step() return 1 do_s = do_step @@ -1164,6 +1876,9 @@ def do_next(self, arg): Continue execution until the next line in the current function is reached or it returns. """ + if arg: + self._print_invalid_arg(arg) + return self.set_next(self.curframe) return 1 do_n = do_next @@ -1176,6 +1891,11 @@ def do_run(self, arg): sys.argv. History, breakpoints, actions and debugger options are preserved. "restart" is an alias for "run". """ + if self.mode == 'inline': + self.error('run/restart command is disabled when pdb is running in inline mode.\n' + 'Use the command line interface to enable restarting your program\n' + 'e.g. "python -m pdb myscript.py"') + return if arg: import shlex argv0 = sys.argv[0:1] @@ -1195,6 +1915,9 @@ def do_return(self, arg): Continue execution until the current function returns. """ + if arg: + self._print_invalid_arg(arg) + return self.set_return(self.curframe) return 1 do_r = do_return @@ -1204,6 +1927,9 @@ def do_continue(self, arg): Continue execution, only stop when a breakpoint is encountered. """ + if arg: + self._print_invalid_arg(arg) + return if not self.nosigint: try: Pdb._previous_sigint_handler = \ @@ -1230,6 +1956,9 @@ def do_jump(self, arg): instance it is not possible to jump into the middle of a for loop or out of a finally clause. """ + if not arg: + self._print_invalid_arg(arg) + return if self.curindex + 1 != len(self.stack): self.error('You can only jump within the bottom frame') return @@ -1248,6 +1977,9 @@ def do_jump(self, arg): self.error('Jump failed: %s' % e) do_j = do_jump + def _create_recursive_debugger(self): + return Pdb(self.completekey, self.stdin, self.stdout) + def do_debug(self, arg): """debug code @@ -1255,10 +1987,13 @@ def do_debug(self, arg): argument (which is an arbitrary expression or statement to be executed in the current environment). """ - sys.settrace(None) + if not arg: + self._print_invalid_arg(arg) + return + self.stop_trace() globals = self.curframe.f_globals - locals = self.curframe_locals - p = Pdb(self.completekey, self.stdin, self.stdout) + locals = self.curframe.f_locals + p = self._create_recursive_debugger() p.prompt = "(%s) " % self.prompt.strip() self.message("ENTERING RECURSIVE DEBUGGER") try: @@ -1266,7 +2001,7 @@ def do_debug(self, arg): except Exception: self._error_exc() self.message("LEAVING RECURSIVE DEBUGGER") - sys.settrace(self.trace_dispatch) + self.start_trace() self.lastcmd = p.lastcmd complete_debug = _complete_expression @@ -1276,6 +2011,22 @@ def do_quit(self, arg): Quit from the debugger. The program being executed is aborted. """ + # Show prompt to kill process when in 'inline' mode and if pdb was not + # started from an interactive console. The attribute sys.ps1 is only + # defined if the interpreter is in interactive mode. + if self.mode == 'inline' and not hasattr(sys, 'ps1'): + while True: + try: + reply = input('Quitting pdb will kill the process. Quit anyway? [y/n] ') + reply = reply.lower().strip() + except EOFError: + reply = 'y' + self.message('') + if reply == 'y' or reply == '': + sys.exit(1) + elif reply.lower() == 'n': + return + self._user_requested_quit = True self.set_quit() return 1 @@ -1289,17 +2040,18 @@ def do_EOF(self, arg): Handles the receipt of EOF as a command. """ self.message('') - self._user_requested_quit = True - self.set_quit() - return 1 + return self.do_quit(arg) def do_args(self, arg): """a(rgs) Print the argument list of the current function. """ + if arg: + self._print_invalid_arg(arg) + return co = self.curframe.f_code - dict = self.curframe_locals + dict = self.curframe.f_locals n = co.co_argcount + co.co_kwonlyargcount if co.co_flags & inspect.CO_VARARGS: n = n+1 if co.co_flags & inspect.CO_VARKEYWORDS: n = n+1 @@ -1316,15 +2068,18 @@ def do_retval(self, arg): Print the return value for the last return of a function. """ - if '__return__' in self.curframe_locals: - self.message(self._safe_repr(self.curframe_locals['__return__'], "retval")) + if arg: + self._print_invalid_arg(arg) + return + if '__return__' in self.curframe.f_locals: + self.message(self._safe_repr(self.curframe.f_locals['__return__'], "retval")) else: self.error('Not yet returned!') do_rv = do_retval def _getval(self, arg): try: - return eval(arg, self.curframe.f_globals, self.curframe_locals) + return eval(arg, self.curframe.f_globals, self.curframe.f_locals) except: self._error_exc() raise @@ -1332,7 +2087,7 @@ def _getval(self, arg): def _getval_except(self, arg, frame=None): try: if frame is None: - return eval(arg, self.curframe.f_globals, self.curframe_locals) + return eval(arg, self.curframe.f_globals, self.curframe.f_locals) else: return eval(arg, frame.f_globals, frame.f_locals) except BaseException as exc: @@ -1363,6 +2118,9 @@ def do_p(self, arg): Print the value of the expression. """ + if not arg: + self._print_invalid_arg(arg) + return self._msg_val_func(arg, repr) def do_pp(self, arg): @@ -1370,6 +2128,9 @@ def do_pp(self, arg): Pretty-print the value of the expression. """ + if not arg: + self._print_invalid_arg(arg) + return self._msg_val_func(arg, pprint.pformat) complete_print = _complete_expression @@ -1415,12 +2176,6 @@ def do_list(self, arg): if last is None: last = first + 10 filename = self.curframe.f_code.co_filename - # gh-93696: stdlib frozen modules provide a useful __file__ - # this workaround can be removed with the closure of gh-89815 - if filename.startswith(" 0, prints count most recent frame entries + + def print_stack_trace(self, count=None): + if count is None: + stack_to_print = self.stack + elif count == 0: + stack_to_print = [self.stack[self.curindex]] + elif count < 0: + stack_to_print = self.stack[:-count] + else: + stack_to_print = self.stack[-count:] try: - for frame_lineno in self.stack: + for frame_lineno in stack_to_print: self.print_stack_entry(frame_lineno) except KeyboardInterrupt: pass @@ -1653,8 +2449,14 @@ def print_stack_entry(self, frame_lineno, prompt_prefix=line_prefix): prefix = '> ' else: prefix = ' ' - self.message(prefix + - self.format_stack_entry(frame_lineno, prompt_prefix)) + stack_entry = self.format_stack_entry(frame_lineno, prompt_prefix) + if self.colorize: + lines = stack_entry.split(prompt_prefix, 1) + if len(lines) > 1: + # We have some code to display + lines[1] = self._colorize_code(lines[1]) + stack_entry = prompt_prefix.join(lines) + self.message(prefix + stack_entry) # Provide help @@ -1714,17 +2516,23 @@ def lookupmodule(self, filename): lookupmodule() translates (possibly incomplete) file or module name into an absolute file name. + + filename could be in format of: + * an absolute path like '/path/to/file.py' + * a relative path like 'file.py' or 'dir/file.py' + * a module name like 'module' or 'package.module' + + files and modules will be searched in sys.path. """ - if os.path.isabs(filename) and os.path.exists(filename): - return filename - f = os.path.join(sys.path[0], filename) - if os.path.exists(f) and self.canonic(f) == self.mainpyfile: - return f - root, ext = os.path.splitext(filename) - if ext == '': - filename = filename + '.py' + if not filename.endswith('.py'): + # A module is passed in so convert it to equivalent file + filename = filename.replace('.', os.sep) + '.py' + if os.path.isabs(filename): - return filename + if os.path.exists(filename): + return filename + return None + for dirname in sys.path: while os.path.islink(dirname): dirname = os.readlink(dirname) @@ -1733,7 +2541,7 @@ def lookupmodule(self, filename): return fullname return None - def _run(self, target: Union[_ModuleTarget, _ScriptTarget]): + def _run(self, target: _ExecutableTarget): # When bdb sets tracing, a number of call and line events happen # BEFORE debugger even reaches user's code (and the exact sequence of # events depends on python version). Take special measures to @@ -1751,6 +2559,10 @@ def _run(self, target: Union[_ModuleTarget, _ScriptTarget]): __main__.__dict__.clear() __main__.__dict__.update(target.namespace) + # Clear the mtime table for program reruns, assume all the files + # are up to date. + self._file_mtime_table.clear() + self.run(target.code) def _format_exc(self, exc: BaseException): @@ -1774,7 +2586,7 @@ def _getsourcelines(self, obj): lineno = max(1, lineno) return lines, lineno - def _help_message_from_doc(self, doc): + def _help_message_from_doc(self, doc, usage_only=False): lines = [line.strip() for line in doc.rstrip().splitlines()] if not lines: return "No help message found." @@ -1790,10 +2602,27 @@ def _help_message_from_doc(self, doc): elif i < usage_end: prefix = " " else: + if usage_only: + break prefix = "" formatted.append(indent + prefix + line) return "\n".join(formatted) + def _print_invalid_arg(self, arg): + """Return the usage string for a function.""" + + if not arg: + self.error("Argument is required for this command") + else: + self.error(f"Invalid argument: {arg}") + + # Yes it's a bit hacky. Get the caller name, get the method based on + # that name, and get the docstring from that method. + # This should NOT fail if the caller is a method of this class. + doc = inspect.getdoc(getattr(self, sys._getframe(1).f_code.co_name)) + if doc is not None: + self.message(self._help_message_from_doc(doc, usage_only=True)) + # Collect all command help into docstring, if not run with -OO if __doc__ is not None: @@ -1853,27 +2682,826 @@ def runcall(*args, **kwds): """ return Pdb().runcall(*args, **kwds) -def set_trace(*, header=None): +def set_trace(*, header=None, commands=None): """Enter the debugger at the calling stack frame. This is useful to hard-code a breakpoint at a given point in a program, even if the code is not otherwise being debugged (e.g. when an assertion fails). If given, *header* is printed to the console - just before debugging begins. + just before debugging begins. *commands* is an optional list of + pdb commands to run when the debugger starts. + """ + if Pdb._last_pdb_instance is not None: + pdb = Pdb._last_pdb_instance + else: + pdb = Pdb(mode='inline', backend='monitoring', colorize=True) + if header is not None: + pdb.message(header) + pdb.set_trace(sys._getframe().f_back, commands=commands) + +async def set_trace_async(*, header=None, commands=None): + """Enter the debugger at the calling stack frame, but in async mode. + + This should be used as await pdb.set_trace_async(). Users can do await + if they enter the debugger with this function. Otherwise it's the same + as set_trace(). """ - pdb = Pdb() + if Pdb._last_pdb_instance is not None: + pdb = Pdb._last_pdb_instance + else: + pdb = Pdb(mode='inline', backend='monitoring', colorize=True) if header is not None: pdb.message(header) - pdb.set_trace(sys._getframe().f_back) + await pdb.set_trace_async(sys._getframe().f_back, commands=commands) + +# Remote PDB + +class _PdbServer(Pdb): + def __init__( + self, + sockfile, + signal_server=None, + owns_sockfile=True, + colorize=False, + **kwargs, + ): + self._owns_sockfile = owns_sockfile + self._interact_state = None + self._sockfile = sockfile + self._command_name_cache = [] + self._write_failed = False + if signal_server: + # Only started by the top level _PdbServer, not recursive ones. + self._start_signal_listener(signal_server) + # Override the `colorize` attribute set by the parent constructor, + # because it checks the server's stdout, rather than the client's. + super().__init__(colorize=False, **kwargs) + self.colorize = colorize + + @staticmethod + def protocol_version(): + # By default, assume a client and server are compatible if they run + # the same Python major.minor version. We'll try to keep backwards + # compatibility between patch versions of a minor version if possible. + # If we do need to change the protocol in a patch version, we'll change + # `revision` to the patch version where the protocol changed. + # We can ignore compatibility for pre-release versions; sys.remote_exec + # can't attach to a pre-release version except from that same version. + v = sys.version_info + revision = 0 + return int(f"{v.major:02X}{v.minor:02X}{revision:02X}F0", 16) + + def _ensure_valid_message(self, msg): + # Ensure the message conforms to our protocol. + # If anything needs to be changed here for a patch release of Python, + # the 'revision' in protocol_version() should be updated. + match msg: + case {"message": str(), "type": str()}: + # Have the client show a message. The client chooses how to + # format the message based on its type. The currently defined + # types are "info" and "error". If a message has a type the + # client doesn't recognize, it must be treated as "info". + pass + case {"help": str()}: + # Have the client show the help for a given argument. + pass + case {"prompt": str(), "state": str()}: + # Have the client display the given prompt and wait for a reply + # from the user. If the client recognizes the state it may + # enable mode-specific features like multi-line editing. + # If it doesn't recognize the state it must prompt for a single + # line only and send it directly to the server. A server won't + # progress until it gets a "reply" or "signal" message, but can + # process "complete" requests while waiting for the reply. + pass + case { + "completions": list(completions) + } if all(isinstance(c, str) for c in completions): + # Return valid completions for a client's "complete" request. + pass + case { + "command_list": list(command_list) + } if all(isinstance(c, str) for c in command_list): + # Report the list of legal PDB commands to the client. + # Due to aliases this list is not static, but the client + # needs to know it for multi-line editing. + pass + case _: + raise AssertionError( + f"PDB message doesn't follow the schema! {msg}" + ) + + @classmethod + def _start_signal_listener(cls, address): + def listener(sock): + with closing(sock): + # Check if the interpreter is finalizing every quarter of a second. + # Clean up and exit if so. + sock.settimeout(0.25) + sock.shutdown(socket.SHUT_WR) + while not shut_down.is_set(): + try: + data = sock.recv(1024) + except socket.timeout: + continue + if data == b"": + return # EOF + signal.raise_signal(signal.SIGINT) + + def stop_thread(): + shut_down.set() + thread.join() + + # Use a daemon thread so that we don't detach until after all non-daemon + # threads are done. Use an atexit handler to stop gracefully at that point, + # so that our thread is stopped before the interpreter is torn down. + shut_down = threading.Event() + thread = threading.Thread( + target=listener, + args=[socket.create_connection(address, timeout=5)], + daemon=True, + ) + atexit.register(stop_thread) + thread.start() + + def _send(self, **kwargs): + self._ensure_valid_message(kwargs) + json_payload = json.dumps(kwargs) + try: + self._sockfile.write(json_payload.encode() + b"\n") + self._sockfile.flush() + except (OSError, ValueError): + # We get an OSError if the network connection has dropped, and a + # ValueError if detach() if the sockfile has been closed. We'll + # handle this the next time we try to read from the client instead + # of trying to handle it from everywhere _send() may be called. + # Track this with a flag rather than assuming readline() will ever + # return an empty string because the socket may be half-closed. + self._write_failed = True + + @typing.override + def message(self, msg, end="\n"): + self._send(message=str(msg) + end, type="info") + + @typing.override + def error(self, msg): + self._send(message=str(msg), type="error") + + def _get_input(self, prompt, state) -> str: + # Before displaying a (Pdb) prompt, send the list of PDB commands + # unless we've already sent an up-to-date list. + if state == "pdb" and not self._command_name_cache: + self._command_name_cache = self.completenames("", "", 0, 0) + self._send(command_list=self._command_name_cache) + self._send(prompt=prompt, state=state) + return self._read_reply() + + def _read_reply(self): + # Loop until we get a 'reply' or 'signal' from the client, + # processing out-of-band 'complete' requests as they arrive. + while True: + if self._write_failed: + raise EOFError + + msg = self._sockfile.readline() + if not msg: + raise EOFError + + try: + payload = json.loads(msg) + except json.JSONDecodeError: + self.error(f"Disconnecting: client sent invalid JSON {msg!r}") + raise EOFError + + match payload: + case {"reply": str(reply)}: + return reply + case {"signal": str(signal)}: + if signal == "INT": + raise KeyboardInterrupt + elif signal == "EOF": + raise EOFError + else: + self.error( + f"Received unrecognized signal: {signal}" + ) + # Our best hope of recovering is to pretend we + # got an EOF to exit whatever mode we're in. + raise EOFError + case { + "complete": { + "text": str(text), + "line": str(line), + "begidx": int(begidx), + "endidx": int(endidx), + } + }: + items = self._complete_any(text, line, begidx, endidx) + self._send(completions=items) + continue + # Valid JSON, but doesn't meet the schema. + self.error(f"Ignoring invalid message from client: {msg}") + + def _complete_any(self, text, line, begidx, endidx): + # If we're in 'interact' mode, we need to use the default completer + if self._interact_state: + compfunc = self.completedefault + else: + if begidx == 0: + return self.completenames(text, line, begidx, endidx) + + cmd = self.parseline(line)[0] + if cmd: + compfunc = getattr(self, "complete_" + cmd, self.completedefault) + else: + compfunc = self.completedefault + return compfunc(text, line, begidx, endidx) + + def cmdloop(self, intro=None): + self.preloop() + if intro is not None: + self.intro = intro + if self.intro: + self.message(str(self.intro)) + stop = None + while not stop: + if self._interact_state is not None: + try: + reply = self._get_input(prompt=">>> ", state="interact") + except KeyboardInterrupt: + # Match how KeyboardInterrupt is handled in a REPL + self.message("\nKeyboardInterrupt") + except EOFError: + self.message("\n*exit from pdb interact command*") + self._interact_state = None + else: + self._run_in_python_repl(reply) + continue + + if not self.cmdqueue: + try: + state = "commands" if self.commands_defining else "pdb" + reply = self._get_input(prompt=self.prompt, state=state) + except EOFError: + reply = "EOF" + + self.cmdqueue.append(reply) + + line = self.cmdqueue.pop(0) + line = self.precmd(line) + stop = self.onecmd(line) + stop = self.postcmd(stop, line) + self.postloop() + + def postloop(self): + super().postloop() + if self.quitting: + self.detach() + + def detach(self): + # Detach the debugger and close the socket without raising BdbQuit + self.quitting = False + if self._owns_sockfile: + # Don't try to reuse this instance, it's not valid anymore. + Pdb._last_pdb_instance = None + try: + self._sockfile.close() + except OSError: + # close() can fail if the connection was broken unexpectedly. + pass + + def do_debug(self, arg): + # Clear our cached list of valid commands; the recursive debugger might + # send its own differing list, and so ours needs to be re-sent. + self._command_name_cache = [] + return super().do_debug(arg) + + def do_alias(self, arg): + # Clear our cached list of valid commands; one might be added. + self._command_name_cache = [] + return super().do_alias(arg) + + def do_unalias(self, arg): + # Clear our cached list of valid commands; one might be removed. + self._command_name_cache = [] + return super().do_unalias(arg) + + def do_help(self, arg): + # Tell the client to render the help, since it might need a pager. + self._send(help=arg) + + do_h = do_help + + def _interact_displayhook(self, obj): + # Like the default `sys.displayhook` except sending a socket message. + if obj is not None: + self.message(repr(obj)) + builtins._ = obj + + def _run_in_python_repl(self, lines): + # Run one 'interact' mode code block against an existing namespace. + assert self._interact_state + save_displayhook = sys.displayhook + try: + sys.displayhook = self._interact_displayhook + code_obj = self._interact_state["compiler"](lines + "\n") + if code_obj is None: + raise SyntaxError("Incomplete command") + exec(code_obj, self._interact_state["ns"]) + except: + self._error_exc() + finally: + sys.displayhook = save_displayhook + + def do_interact(self, arg): + # Prepare to run 'interact' mode code blocks, and trigger the client + # to start treating all input as Python commands, not PDB ones. + self.message("*pdb interact start*") + self._interact_state = dict( + compiler=codeop.CommandCompiler(), + ns={**self.curframe.f_globals, **self.curframe.f_locals}, + ) + + @typing.override + def _create_recursive_debugger(self): + return _PdbServer( + self._sockfile, + owns_sockfile=False, + colorize=self.colorize, + ) + + @typing.override + def _prompt_for_confirmation(self, prompt, default): + try: + return self._get_input(prompt=prompt, state="confirm") + except (EOFError, KeyboardInterrupt): + return default + + def do_run(self, arg): + self.error("remote PDB cannot restart the program") + + do_restart = do_run + + def _error_exc(self): + if self._interact_state and isinstance(sys.exception(), SystemExit): + # If we get a SystemExit in 'interact' mode, exit the REPL. + self._interact_state = None + ret = super()._error_exc() + self.message("*exit from pdb interact command*") + return ret + else: + return super()._error_exc() + + def default(self, line): + # Unlike Pdb, don't prompt for more lines of a multi-line command. + # The remote needs to send us the whole block in one go. + try: + candidate = line.removeprefix("!") + "\n" + if codeop.compile_command(candidate, "", "single") is None: + raise SyntaxError("Incomplete command") + return super().default(candidate) + except: + self._error_exc() + + +class _PdbClient: + def __init__(self, pid, server_socket, interrupt_sock): + self.pid = pid + self.read_buf = b"" + self.signal_read = None + self.signal_write = None + self.sigint_received = False + self.raise_on_sigint = False + self.server_socket = server_socket + self.interrupt_sock = interrupt_sock + self.pdb_instance = Pdb() + self.pdb_commands = set() + self.completion_matches = [] + self.state = "dumb" + self.write_failed = False + self.multiline_block = False + + def _ensure_valid_message(self, msg): + # Ensure the message conforms to our protocol. + # If anything needs to be changed here for a patch release of Python, + # the 'revision' in protocol_version() should be updated. + match msg: + case {"reply": str()}: + # Send input typed by a user at a prompt to the remote PDB. + pass + case {"signal": "EOF"}: + # Tell the remote PDB that the user pressed ^D at a prompt. + pass + case {"signal": "INT"}: + # Tell the remote PDB that the user pressed ^C at a prompt. + pass + case { + "complete": { + "text": str(), + "line": str(), + "begidx": int(), + "endidx": int(), + } + }: + # Ask the remote PDB what completions are valid for the given + # parameters, using readline's completion protocol. + pass + case _: + raise AssertionError( + f"PDB message doesn't follow the schema! {msg}" + ) + + def _send(self, **kwargs): + self._ensure_valid_message(kwargs) + json_payload = json.dumps(kwargs) + try: + self.server_socket.sendall(json_payload.encode() + b"\n") + except OSError: + # This means that the client has abruptly disconnected, but we'll + # handle that the next time we try to read from the client instead + # of trying to handle it from everywhere _send() may be called. + # Track this with a flag rather than assuming readline() will ever + # return an empty string because the socket may be half-closed. + self.write_failed = True + + def _readline(self): + if self.sigint_received: + # There's a pending unhandled SIGINT. Handle it now. + self.sigint_received = False + raise KeyboardInterrupt + + # Wait for either a SIGINT or a line or EOF from the PDB server. + selector = selectors.DefaultSelector() + selector.register(self.signal_read, selectors.EVENT_READ) + selector.register(self.server_socket, selectors.EVENT_READ) + + while b"\n" not in self.read_buf: + for key, _ in selector.select(): + if key.fileobj == self.signal_read: + self.signal_read.recv(1024) + if self.sigint_received: + # If not, we're reading wakeup events for sigints that + # we've previously handled, and can ignore them. + self.sigint_received = False + raise KeyboardInterrupt + elif key.fileobj == self.server_socket: + data = self.server_socket.recv(16 * 1024) + self.read_buf += data + if not data and b"\n" not in self.read_buf: + # EOF without a full final line. Drop the partial line. + self.read_buf = b"" + return b"" + + ret, sep, self.read_buf = self.read_buf.partition(b"\n") + return ret + sep + + def read_input(self, prompt, multiline_block): + self.multiline_block = multiline_block + with self._sigint_raises_keyboard_interrupt(): + return input(prompt) + + def read_command(self, prompt): + reply = self.read_input(prompt, multiline_block=False) + if self.state == "dumb": + # No logic applied whatsoever, just pass the raw reply back. + return reply + + prefix = "" + if self.state == "pdb": + # PDB command entry mode + cmd = self.pdb_instance.parseline(reply)[0] + if cmd in self.pdb_commands or reply.strip() == "": + # Recognized PDB command, or blank line repeating last command + return reply + + # Otherwise, explicit or implicit exec command + if reply.startswith("!"): + prefix = "!" + reply = reply.removeprefix(prefix).lstrip() + + if codeop.compile_command(reply + "\n", "", "single") is not None: + # Valid single-line statement + return prefix + reply + + # Otherwise, valid first line of a multi-line statement + more_prompt = "...".ljust(len(prompt)) + while codeop.compile_command(reply, "", "single") is None: + reply += "\n" + self.read_input(more_prompt, multiline_block=True) + + return prefix + reply + + @contextmanager + def readline_completion(self, completer): + try: + import readline + except ImportError: + yield + return + + old_completer = readline.get_completer() + try: + readline.set_completer(completer) + if readline.backend == "editline": + # libedit uses "^I" instead of "tab" + command_string = "bind ^I rl_complete" + else: + command_string = "tab: complete" + readline.parse_and_bind(command_string) + yield + finally: + readline.set_completer(old_completer) + + @contextmanager + def _sigint_handler(self): + # Signal handling strategy: + # - When we call input() we want a SIGINT to raise KeyboardInterrupt + # - Otherwise we want to write to the wakeup FD and set a flag. + # We'll break out of select() when the wakeup FD is written to, + # and we'll check the flag whenever we're about to accept input. + def handler(signum, frame): + self.sigint_received = True + if self.raise_on_sigint: + # One-shot; don't raise again until the flag is set again. + self.raise_on_sigint = False + self.sigint_received = False + raise KeyboardInterrupt + + sentinel = object() + old_handler = sentinel + old_wakeup_fd = sentinel + + self.signal_read, self.signal_write = socket.socketpair() + with (closing(self.signal_read), closing(self.signal_write)): + self.signal_read.setblocking(False) + self.signal_write.setblocking(False) + + try: + old_handler = signal.signal(signal.SIGINT, handler) + + try: + old_wakeup_fd = signal.set_wakeup_fd( + self.signal_write.fileno(), + warn_on_full_buffer=False, + ) + yield + finally: + # Restore the old wakeup fd if we installed a new one + if old_wakeup_fd is not sentinel: + signal.set_wakeup_fd(old_wakeup_fd) + finally: + self.signal_read = self.signal_write = None + if old_handler is not sentinel: + # Restore the old handler if we installed a new one + signal.signal(signal.SIGINT, old_handler) + + @contextmanager + def _sigint_raises_keyboard_interrupt(self): + if self.sigint_received: + # There's a pending unhandled SIGINT. Handle it now. + self.sigint_received = False + raise KeyboardInterrupt + + try: + self.raise_on_sigint = True + yield + finally: + self.raise_on_sigint = False + + def cmdloop(self): + with ( + self._sigint_handler(), + self.readline_completion(self.complete), + ): + while not self.write_failed: + try: + if not (payload_bytes := self._readline()): + break + except KeyboardInterrupt: + self.send_interrupt() + continue + + try: + payload = json.loads(payload_bytes) + except json.JSONDecodeError: + print( + f"*** Invalid JSON from remote: {payload_bytes!r}", + flush=True, + ) + continue + + self.process_payload(payload) + + def send_interrupt(self): + if self.interrupt_sock is not None: + # Write to a socket that the PDB server listens on. This triggers + # the remote to raise a SIGINT for itself. We do this because + # Windows doesn't allow triggering SIGINT remotely. + # See https://stackoverflow.com/a/35792192 for many more details. + self.interrupt_sock.sendall(signal.SIGINT.to_bytes()) + else: + # On Unix we can just send a SIGINT to the remote process. + # This is preferable to using the signal thread approach that we + # use on Windows because it can interrupt IO in the main thread. + os.kill(self.pid, signal.SIGINT) + + def process_payload(self, payload): + match payload: + case { + "command_list": command_list + } if all(isinstance(c, str) for c in command_list): + self.pdb_commands = set(command_list) + case {"message": str(msg), "type": str(msg_type)}: + if msg_type == "error": + print("***", msg, flush=True) + else: + print(msg, end="", flush=True) + case {"help": str(arg)}: + self.pdb_instance.do_help(arg) + case {"prompt": str(prompt), "state": str(state)}: + if state not in ("pdb", "interact"): + state = "dumb" + self.state = state + self.prompt_for_reply(prompt) + case _: + raise RuntimeError(f"Unrecognized payload {payload}") + + def prompt_for_reply(self, prompt): + while True: + try: + payload = {"reply": self.read_command(prompt)} + except EOFError: + payload = {"signal": "EOF"} + except KeyboardInterrupt: + payload = {"signal": "INT"} + except Exception as exc: + msg = traceback.format_exception_only(exc)[-1].strip() + print("***", msg, flush=True) + continue + + self._send(**payload) + return + + def complete(self, text, state): + import readline + + if state == 0: + self.completion_matches = [] + if self.state not in ("pdb", "interact"): + return None + + origline = readline.get_line_buffer() + line = origline.lstrip() + if self.multiline_block: + # We're completing a line contained in a multi-line block. + # Force the remote to treat it as a Python expression. + line = "! " + line + offset = len(origline) - len(line) + begidx = readline.get_begidx() - offset + endidx = readline.get_endidx() - offset + + msg = { + "complete": { + "text": text, + "line": line, + "begidx": begidx, + "endidx": endidx, + } + } + + self._send(**msg) + if self.write_failed: + return None + + payload = self._readline() + if not payload: + return None + + payload = json.loads(payload) + if "completions" not in payload: + raise RuntimeError( + f"Failed to get valid completions. Got: {payload}" + ) + + self.completion_matches = payload["completions"] + try: + return self.completion_matches[state] + except IndexError: + return None + + +def _connect( + *, + host, + port, + frame, + commands, + version, + signal_raising_thread, + colorize, +): + with closing(socket.create_connection((host, port))) as conn: + sockfile = conn.makefile("rwb") + + # The client requests this thread on Windows but not on Unix. + # Most tests don't request this thread, to keep them simpler. + if signal_raising_thread: + signal_server = (host, port) + else: + signal_server = None + + remote_pdb = _PdbServer( + sockfile, + signal_server=signal_server, + colorize=colorize, + ) + weakref.finalize(remote_pdb, sockfile.close) + + if Pdb._last_pdb_instance is not None: + remote_pdb.error("Another PDB instance is already attached.") + elif version != remote_pdb.protocol_version(): + target_ver = f"0x{remote_pdb.protocol_version():08X}" + attach_ver = f"0x{version:08X}" + remote_pdb.error( + f"The target process is running a Python version that is" + f" incompatible with this PDB module." + f"\nTarget process pdb protocol version: {target_ver}" + f"\nLocal pdb module's protocol version: {attach_ver}" + ) + else: + remote_pdb.rcLines.extend(commands.splitlines()) + remote_pdb.set_trace(frame=frame) + + +def attach(pid, commands=()): + """Attach to a running process with the given PID.""" + with ExitStack() as stack: + server = stack.enter_context( + closing(socket.create_server(("localhost", 0))) + ) + port = server.getsockname()[1] + + connect_script = stack.enter_context( + tempfile.NamedTemporaryFile("w", delete_on_close=False) + ) + + use_signal_thread = sys.platform == "win32" + colorize = _colorize.can_colorize() + + connect_script.write( + textwrap.dedent( + f""" + import pdb, sys + pdb._connect( + host="localhost", + port={port}, + frame=sys._getframe(1), + commands={json.dumps("\n".join(commands))}, + version={_PdbServer.protocol_version()}, + signal_raising_thread={use_signal_thread!r}, + colorize={colorize!r}, + ) + """ + ) + ) + connect_script.close() + orig_mode = os.stat(connect_script.name).st_mode + os.chmod(connect_script.name, orig_mode | stat.S_IROTH | stat.S_IRGRP) + sys.remote_exec(pid, connect_script.name) + + # TODO Add a timeout? Or don't bother since the user can ^C? + client_sock, _ = server.accept() + stack.enter_context(closing(client_sock)) + + if use_signal_thread: + interrupt_sock, _ = server.accept() + stack.enter_context(closing(interrupt_sock)) + interrupt_sock.setblocking(False) + else: + interrupt_sock = None + + _PdbClient(pid, client_sock, interrupt_sock).cmdloop() + # Post-Mortem interface def post_mortem(t=None): - """Enter post-mortem debugging of the given *traceback* object. + """Enter post-mortem debugging of the given *traceback*, or *exception* + object. If no traceback is given, it uses the one of the exception that is currently being handled (an exception must be being handled if the default is to be used). + + If `t` is an exception object, the `exceptions` command makes it possible to + list and inspect its chained exceptions (if any). + """ + return _post_mortem(t, Pdb()) + + +def _post_mortem(t, pdb_instance): + """ + Private version of post_mortem, which allow to pass a pdb instance + for testing purposes. """ # handling the default if t is None: @@ -1881,21 +3509,17 @@ def post_mortem(t=None): if exc is not None: t = exc.__traceback__ - if t is None: + if t is None or (isinstance(t, BaseException) and t.__traceback__ is None): raise ValueError("A valid traceback must be passed if no " "exception is being handled") - p = Pdb() - p.reset() - p.interaction(None, t) + pdb_instance.reset() + pdb_instance.interaction(None, t) + def pm(): - """Enter post-mortem debugging of the traceback found in sys.last_traceback.""" - if hasattr(sys, 'last_exc'): - tb = sys.last_exc.__traceback__ - else: - tb = sys.last_traceback - post_mortem(tb) + """Enter post-mortem debugging of the traceback found in sys.last_exc.""" + post_mortem(sys.last_exc) # Main program for testing @@ -1911,11 +3535,10 @@ def help(): pydoc.pager(__doc__) _usage = """\ -usage: pdb.py [-c command] ... [-m module | pyfile] [arg] ... - Debug the Python program given by pyfile. Alternatively, an executable module or package to debug can be specified using -the -m switch. +the -m switch. You can also attach to a running Python process +using the -p option with its PID. Initial commands are read from .pdbrc files in your home directory and in the current directory, if they exist. Commands supplied with @@ -1926,41 +3549,107 @@ def help(): "-c 'until X'".""" -def main(): - import getopt - - opts, args = getopt.getopt(sys.argv[1:], 'mhc:', ['help', 'command=']) +def exit_with_permission_help_text(): + """ + Prints a message pointing to platform-specific permission help text and exits the program. + This function is called when a PermissionError is encountered while trying + to attach to a process. + """ + print( + "Error: The specified process cannot be attached to due to insufficient permissions.\n" + "See the Python documentation for details on required privileges and troubleshooting:\n" + "https://docs.python.org/3.14/howto/remote_debugging.html#permission-requirements\n" + ) + sys.exit(1) + + +def parse_args(): + # We want pdb to be as intuitive as possible to users, so we need to do some + # heuristic parsing to deal with ambiguity. + # For example: + # "python -m pdb -m foo -p 1" should pass "-p 1" to "foo". + # "python -m pdb foo.py -m bar" should pass "-m bar" to "foo.py". + # "python -m pdb -m foo -m bar" should pass "-m bar" to "foo". + # This require some customized parsing logic to find the actual debug target. + + import argparse + + parser = argparse.ArgumentParser( + usage="%(prog)s [-h] [-c command] (-m module | -p pid | pyfile) [args ...]", + description=_usage, + formatter_class=argparse.RawDescriptionHelpFormatter, + allow_abbrev=False, + color=True, + ) + + # Get all the commands out first. For backwards compatibility, we allow + # -c commands to be after the target. + parser.add_argument('-c', '--command', action='append', default=[], metavar='command', dest='commands', + help='pdb commands to execute as if given in a .pdbrc file') + + opts, args = parser.parse_known_args() if not args: - print(_usage) + # If no arguments were given (python -m pdb), print the whole help message. + # Without this check, argparse would only complain about missing required arguments. + # We need to add the arguments definitions here to get a proper help message. + parser.add_argument('-m', metavar='module', dest='module') + parser.add_argument('-p', '--pid', type=int, help="attach to the specified PID", default=None) + parser.print_help() sys.exit(2) + elif args[0] == '-p' or args[0] == '--pid': + # Attach to a pid + parser.add_argument('-p', '--pid', type=int, help="attach to the specified PID", default=None) + opts, args = parser.parse_known_args() + if args: + # For --pid, any extra arguments are invalid. + parser.error(f"unrecognized arguments: {' '.join(args)}") + elif args[0] == '-m': + # Debug a module, we only need the first -m module argument. + # The rest is passed to the module itself. + parser.add_argument('-m', metavar='module', dest='module') + opt_module = parser.parse_args(args[:2]) + opts.module = opt_module.module + args = args[2:] + elif args[0].startswith('-'): + # Invalid argument before the script name. + invalid_args = list(itertools.takewhile(lambda a: a.startswith('-'), args)) + parser.error(f"unrecognized arguments: {' '.join(invalid_args)}") + + # Otherwise it's debugging a script and we already parsed all -c commands. + + return opts, args - if any(opt in ['-h', '--help'] for opt, optarg in opts): - print(_usage) - sys.exit() - - commands = [optarg for opt, optarg in opts if opt in ['-c', '--command']] - - module_indicated = any(opt in ['-m'] for opt, optarg in opts) - cls = _ModuleTarget if module_indicated else _ScriptTarget - target = cls(args[0]) +def main(): + opts, args = parse_args() - target.check() + if getattr(opts, 'pid', None) is not None: + try: + attach(opts.pid, opts.commands) + except PermissionError as e: + exit_with_permission_help_text() + return + elif getattr(opts, 'module', None) is not None: + file = opts.module + target = _ModuleTarget(file) + else: + file = args.pop(0) + if file.endswith('.pyz'): + target = _ZipTarget(file) + else: + target = _ScriptTarget(file) - sys.argv[:] = args # Hide "pdb.py" and pdb options from argument list + sys.argv[:] = [file] + args # Hide "pdb.py" and pdb options from argument list # Note on saving/restoring sys.argv: it's a good idea when sys.argv was # modified by the script being debugged. It's a bad idea when it was # changed by the user from the command line. There is a "restart" command # which allows explicit specification of command line arguments. - pdb = Pdb() - pdb.rcLines.extend(commands) + pdb = Pdb(mode='cli', backend='monitoring', colorize=True) + pdb.rcLines.extend(opts.commands) while True: try: pdb._run(target) - if pdb._user_requested_quit: - break - print("The program finished and will be restarted") except Restart: print("Restarting", target, "with arguments:") print("\t" + " ".join(sys.argv[1:])) @@ -1968,17 +3657,19 @@ def main(): # In most cases SystemExit does not warrant a post-mortem session. print("The program exited via sys.exit(). Exit status:", end=' ') print(e) - except SyntaxError: - traceback.print_exc() - sys.exit(1) except BaseException as e: - traceback.print_exc() + traceback.print_exception(e, colorize=_colorize.can_colorize()) print("Uncaught exception. Entering post mortem debugging") print("Running 'cont' or 'step' will restart the program") - t = e.__traceback__ - pdb.interaction(None, t) - print("Post mortem debugger finished. The " + target + - " will be restarted") + try: + pdb.interaction(None, e) + except Restart: + print("Restarting", target, "with arguments:") + print("\t" + " ".join(sys.argv[1:])) + continue + if pdb._user_requested_quit: + break + print("The program finished and will be restarted") # When invoked as main program, invoke the debugger on a script diff --git a/PythonLib/full/pickle.py b/PythonLib/full/pickle.py index ea5f1c5d..beaefae0 100644 --- a/PythonLib/full/pickle.py +++ b/PythonLib/full/pickle.py @@ -26,12 +26,11 @@ from types import FunctionType from copyreg import dispatch_table from copyreg import _extension_registry, _inverted_registry, _extension_cache -from itertools import islice +from itertools import batched from functools import partial import sys from sys import maxsize from struct import pack, unpack -import re import io import codecs import _compat_pickle @@ -51,7 +50,7 @@ bytes_types = (bytes, bytearray) # These are purely informational; no code uses these. -format_version = "4.0" # File format version we write +format_version = "5.0" # File format version we write compatible_formats = ["1.0", # Original protocol 0 "1.1", # Protocol 0 with INST added "1.2", # Original protocol 1 @@ -68,7 +67,7 @@ # The protocol we write by default. May be less than HIGHEST_PROTOCOL. # Only bump this if the oldest still supported version of Python already # includes it. -DEFAULT_PROTOCOL = 4 +DEFAULT_PROTOCOL = 5 class PickleError(Exception): """A common base class for the other pickling exceptions.""" @@ -188,7 +187,7 @@ def __init__(self, value): NEXT_BUFFER = b'\x97' # push next out-of-band buffer READONLY_BUFFER = b'\x98' # make top of stack readonly -__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)]) +__all__.extend(x for x in dir() if x.isupper() and not x.startswith('_')) class _Framer: @@ -313,38 +312,46 @@ def load_frame(self, frame_size): # Tools used for pickling. -def _getattribute(obj, name): - top = obj - for subpath in name.split('.'): - if subpath == '': - raise AttributeError("Can't get local attribute {!r} on {!r}" - .format(name, top)) - try: - parent = obj - obj = getattr(obj, subpath) - except AttributeError: - raise AttributeError("Can't get attribute {!r} on {!r}" - .format(name, top)) from None - return obj, parent +def _getattribute(obj, dotted_path): + for subpath in dotted_path: + obj = getattr(obj, subpath) + return obj def whichmodule(obj, name): """Find the module an object belong to.""" + dotted_path = name.split('.') module_name = getattr(obj, '__module__', None) - if module_name is not None: - return module_name - # Protect the iteration by using a list copy of sys.modules against dynamic - # modules that trigger imports of other modules upon calls to getattr. - for module_name, module in sys.modules.copy().items(): - if (module_name == '__main__' - or module_name == '__mp_main__' # bpo-42406 - or module is None): - continue - try: - if _getattribute(module, name)[0] is obj: - return module_name - except AttributeError: - pass - return '__main__' + if '' in dotted_path: + raise PicklingError(f"Can't pickle local object {obj!r}") + if module_name is None: + # Protect the iteration by using a list copy of sys.modules against dynamic + # modules that trigger imports of other modules upon calls to getattr. + for module_name, module in sys.modules.copy().items(): + if (module_name == '__main__' + or module_name == '__mp_main__' # bpo-42406 + or module is None): + continue + try: + if _getattribute(module, dotted_path) is obj: + return module_name + except AttributeError: + pass + module_name = '__main__' + + try: + __import__(module_name, level=0) + module = sys.modules[module_name] + except (ImportError, ValueError, KeyError) as exc: + raise PicklingError(f"Can't pickle {obj!r}: {exc!s}") + try: + if _getattribute(module, dotted_path) is obj: + return module_name + except AttributeError: + raise PicklingError(f"Can't pickle {obj!r}: " + f"it's not found as {module_name}.{name}") + + raise PicklingError( + f"Can't pickle {obj!r}: it's not the same object as {module_name}.{name}") def encode_long(x): r"""Encode a long to a two's complement little-endian binary string. @@ -396,6 +403,13 @@ def decode_long(data): """ return int.from_bytes(data, byteorder='little', signed=True) +def _T(obj): + cls = type(obj) + module = cls.__module__ + if module in (None, 'builtins', '__main__'): + return cls.__qualname__ + return f'{module}.{cls.__qualname__}' + _NoValue = object() @@ -409,7 +423,7 @@ def __init__(self, file, protocol=None, *, fix_imports=True, The optional *protocol* argument tells the pickler to use the given protocol; supported protocols are 0, 1, 2, 3, 4 and 5. - The default protocol is 4. It was introduced in Python 3.4, and + The default protocol is 5. It was introduced in Python 3.8, and is incompatible with previous versions. Specifying a negative protocol version selects the highest @@ -546,8 +560,8 @@ def save(self, obj, save_persistent_id=True): return rv = NotImplemented - reduce = getattr(self, "reducer_override", None) - if reduce is not None: + reduce = getattr(self, "reducer_override", _NoValue) + if reduce is not _NoValue: rv = reduce(obj) if rv is NotImplemented: @@ -560,8 +574,8 @@ def save(self, obj, save_persistent_id=True): # Check private dispatch table if any, or else # copyreg.dispatch_table - reduce = getattr(self, 'dispatch_table', dispatch_table).get(t) - if reduce is not None: + reduce = getattr(self, 'dispatch_table', dispatch_table).get(t, _NoValue) + if reduce is not _NoValue: rv = reduce(obj) else: # Check for a class with a custom metaclass; treat as regular @@ -571,34 +585,37 @@ def save(self, obj, save_persistent_id=True): return # Check for a __reduce_ex__ method, fall back to __reduce__ - reduce = getattr(obj, "__reduce_ex__", None) - if reduce is not None: + reduce = getattr(obj, "__reduce_ex__", _NoValue) + if reduce is not _NoValue: rv = reduce(self.proto) else: - reduce = getattr(obj, "__reduce__", None) - if reduce is not None: + reduce = getattr(obj, "__reduce__", _NoValue) + if reduce is not _NoValue: rv = reduce() else: - raise PicklingError("Can't pickle %r object: %r" % - (t.__name__, obj)) + raise PicklingError(f"Can't pickle {_T(t)} object") # Check for string returned by reduce(), meaning "save as global" if isinstance(rv, str): self.save_global(obj, rv) return - # Assert that reduce() returned a tuple - if not isinstance(rv, tuple): - raise PicklingError("%s must return string or tuple" % reduce) - - # Assert that it returned an appropriately sized tuple - l = len(rv) - if not (2 <= l <= 6): - raise PicklingError("Tuple returned by %s must have " - "two to six elements" % reduce) - - # Save the reduce() output and finally memoize the object - self.save_reduce(obj=obj, *rv) + try: + # Assert that reduce() returned a tuple + if not isinstance(rv, tuple): + raise PicklingError(f'__reduce__ must return a string or tuple, not {_T(rv)}') + + # Assert that it returned an appropriately sized tuple + l = len(rv) + if not (2 <= l <= 6): + raise PicklingError("tuple returned by __reduce__ " + "must contain 2 through 6 elements") + + # Save the reduce() output and finally memoize the object + self.save_reduce(obj=obj, *rv) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} object') + raise def persistent_id(self, obj): # This exists so a subclass can override it @@ -620,10 +637,12 @@ def save_reduce(self, func, args, state=None, listitems=None, dictitems=None, state_setter=None, *, obj=None): # This API is called by some subclasses - if not isinstance(args, tuple): - raise PicklingError("args from save_reduce() must be a tuple") if not callable(func): - raise PicklingError("func from save_reduce() must be callable") + raise PicklingError(f"first item of the tuple returned by __reduce__ " + f"must be callable, not {_T(func)}") + if not isinstance(args, tuple): + raise PicklingError(f"second item of the tuple returned by __reduce__ " + f"must be a tuple, not {_T(args)}") save = self.save write = self.write @@ -632,19 +651,30 @@ def save_reduce(self, func, args, state=None, listitems=None, if self.proto >= 2 and func_name == "__newobj_ex__": cls, args, kwargs = args if not hasattr(cls, "__new__"): - raise PicklingError("args[0] from {} args has no __new__" - .format(func_name)) + raise PicklingError("first argument to __newobj_ex__() has no __new__") if obj is not None and cls is not obj.__class__: - raise PicklingError("args[0] from {} args has the wrong class" - .format(func_name)) + raise PicklingError(f"first argument to __newobj_ex__() " + f"must be {obj.__class__!r}, not {cls!r}") if self.proto >= 4: - save(cls) - save(args) - save(kwargs) + try: + save(cls) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} class') + raise + try: + save(args) + save(kwargs) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} __new__ arguments') + raise write(NEWOBJ_EX) else: func = partial(cls.__new__, cls, *args, **kwargs) - save(func) + try: + save(func) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} reconstructor') + raise save(()) write(REDUCE) elif self.proto >= 2 and func_name == "__newobj__": @@ -676,18 +706,33 @@ def save_reduce(self, func, args, state=None, listitems=None, # Python 2.2). cls = args[0] if not hasattr(cls, "__new__"): - raise PicklingError( - "args[0] from __newobj__ args has no __new__") + raise PicklingError("first argument to __newobj__() has no __new__") if obj is not None and cls is not obj.__class__: - raise PicklingError( - "args[0] from __newobj__ args has the wrong class") + raise PicklingError(f"first argument to __newobj__() " + f"must be {obj.__class__!r}, not {cls!r}") args = args[1:] - save(cls) - save(args) + try: + save(cls) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} class') + raise + try: + save(args) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} __new__ arguments') + raise write(NEWOBJ) else: - save(func) - save(args) + try: + save(func) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} reconstructor') + raise + try: + save(args) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} reconstructor arguments') + raise write(REDUCE) if obj is not None: @@ -705,23 +750,35 @@ def save_reduce(self, func, args, state=None, listitems=None, # items and dict items (as (key, value) tuples), or None. if listitems is not None: - self._batch_appends(listitems) + self._batch_appends(listitems, obj) if dictitems is not None: - self._batch_setitems(dictitems) + self._batch_setitems(dictitems, obj) if state is not None: if state_setter is None: - save(state) + try: + save(state) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} state') + raise write(BUILD) else: # If a state_setter is specified, call it instead of load_build # to update obj's with its previous state. # First, push state_setter and its tuple of expected arguments # (obj, state) onto the stack. - save(state_setter) + try: + save(state_setter) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} state setter') + raise save(obj) # simple BINGET opcode as obj is already memoized. - save(state) + try: + save(state) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} state') + raise write(TUPLE2) # Trigger a state_setter(obj, state) function call. write(REDUCE) @@ -901,8 +958,12 @@ def save_tuple(self, obj): save = self.save memo = self.memo if n <= 3 and self.proto >= 2: - for element in obj: - save(element) + for i, element in enumerate(obj): + try: + save(element) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} item {i}') + raise # Subtle. Same as in the big comment below. if id(obj) in memo: get = self.get(memo[id(obj)][0]) @@ -916,8 +977,12 @@ def save_tuple(self, obj): # has more than 3 elements. write = self.write write(MARK) - for element in obj: - save(element) + for i, element in enumerate(obj): + try: + save(element) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} item {i}') + raise if id(obj) in memo: # Subtle. d was not in memo when we entered save_tuple(), so @@ -947,38 +1012,47 @@ def save_list(self, obj): self.write(MARK + LIST) self.memoize(obj) - self._batch_appends(obj) + self._batch_appends(obj, obj) dispatch[list] = save_list _BATCHSIZE = 1000 - def _batch_appends(self, items): + def _batch_appends(self, items, obj): # Helper to batch up APPENDS sequences save = self.save write = self.write if not self.bin: - for x in items: - save(x) + for i, x in enumerate(items): + try: + save(x) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} item {i}') + raise write(APPEND) return - it = iter(items) - while True: - tmp = list(islice(it, self._BATCHSIZE)) - n = len(tmp) - if n > 1: + start = 0 + for batch in batched(items, self._BATCHSIZE): + batch_len = len(batch) + if batch_len != 1: write(MARK) - for x in tmp: - save(x) + for i, x in enumerate(batch, start): + try: + save(x) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} item {i}') + raise write(APPENDS) - elif n: - save(tmp[0]) + else: + try: + save(batch[0]) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} item {start}') + raise write(APPEND) - # else tmp is empty, and we're done - if n < self._BATCHSIZE: - return + start += batch_len def save_dict(self, obj): if self.bin: @@ -987,11 +1061,11 @@ def save_dict(self, obj): self.write(MARK + DICT) self.memoize(obj) - self._batch_setitems(obj.items()) + self._batch_setitems(obj.items(), obj) dispatch[dict] = save_dict - def _batch_setitems(self, items): + def _batch_setitems(self, items, obj): # Helper to batch up SETITEMS sequences; proto >= 1 only save = self.save write = self.write @@ -999,28 +1073,34 @@ def _batch_setitems(self, items): if not self.bin: for k, v in items: save(k) - save(v) + try: + save(v) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} item {k!r}') + raise write(SETITEM) return - it = iter(items) - while True: - tmp = list(islice(it, self._BATCHSIZE)) - n = len(tmp) - if n > 1: + for batch in batched(items, self._BATCHSIZE): + if len(batch) != 1: write(MARK) - for k, v in tmp: + for k, v in batch: save(k) - save(v) + try: + save(v) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} item {k!r}') + raise write(SETITEMS) - elif n: - k, v = tmp[0] + else: + k, v = batch[0] save(k) - save(v) + try: + save(v) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} item {k!r}') + raise write(SETITEM) - # else tmp is empty, and we're done - if n < self._BATCHSIZE: - return def save_set(self, obj): save = self.save @@ -1033,17 +1113,15 @@ def save_set(self, obj): write(EMPTY_SET) self.memoize(obj) - it = iter(obj) - while True: - batch = list(islice(it, self._BATCHSIZE)) - n = len(batch) - if n > 0: - write(MARK) + for batch in batched(obj, self._BATCHSIZE): + write(MARK) + try: for item in batch: save(item) - write(ADDITEMS) - if n < self._BATCHSIZE: - return + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} element') + raise + write(ADDITEMS) dispatch[set] = save_set def save_frozenset(self, obj): @@ -1055,8 +1133,12 @@ def save_frozenset(self, obj): return write(MARK) - for item in obj: - save(item) + try: + for item in obj: + save(item) + except BaseException as exc: + exc.add_note(f'when serializing {_T(obj)} element') + raise if id(obj) in self.memo: # If the object is already in the memo, this means it is @@ -1075,24 +1157,10 @@ def save_global(self, obj, name=None): if name is None: name = getattr(obj, '__qualname__', None) - if name is None: - name = obj.__name__ + if name is None: + name = obj.__name__ module_name = whichmodule(obj, name) - try: - __import__(module_name, level=0) - module = sys.modules[module_name] - obj2, parent = _getattribute(module, name) - except (ImportError, KeyError, AttributeError): - raise PicklingError( - "Can't pickle %r: it's not found as %s.%s" % - (obj, module_name, name)) from None - else: - if obj2 is not obj: - raise PicklingError( - "Can't pickle %r: it's not the same object as %s.%s" % - (obj, module_name, name)) - if self.proto >= 2: code = _extension_registry.get((module_name, name), _NoValue) if code is not _NoValue: @@ -1109,10 +1177,7 @@ def save_global(self, obj, name=None): else: write(EXT4 + pack("= 3. + if self.proto >= 4: self.save(module_name) self.save(name) @@ -1144,8 +1209,7 @@ def save_global(self, obj, name=None): def _save_toplevel_by_name(self, module_name, name): if self.proto >= 3: # Non-ASCII identifiers are supported only with protocols >= 3. - self.write(GLOBAL + bytes(module_name, "utf-8") + b'\n' + - bytes(name, "utf-8") + b'\n') + encoding = "utf-8" else: if self.fix_imports: r_name_mapping = _compat_pickle.REVERSE_NAME_MAPPING @@ -1154,13 +1218,19 @@ def _save_toplevel_by_name(self, module_name, name): module_name, name = r_name_mapping[(module_name, name)] elif module_name in r_import_mapping: module_name = r_import_mapping[module_name] - try: - self.write(GLOBAL + bytes(module_name, "ascii") + b'\n' + - bytes(name, "ascii") + b'\n') - except UnicodeEncodeError: - raise PicklingError( - "can't pickle global identifier '%s.%s' using " - "pickle protocol %i" % (module_name, name, self.proto)) from None + encoding = "ascii" + try: + self.write(GLOBAL + bytes(module_name, encoding) + b'\n') + except UnicodeEncodeError: + raise PicklingError( + f"can't pickle module identifier {module_name!r} using " + f"pickle protocol {self.proto}") + try: + self.write(bytes(name, encoding) + b'\n') + except UnicodeEncodeError: + raise PicklingError( + f"can't pickle global identifier {name!r} using " + f"pickle protocol {self.proto}") def save_type(self, obj): if obj is type(None): @@ -1316,7 +1386,7 @@ def load_int(self): elif data == TRUE[1:]: val = True else: - val = int(data, 0) + val = int(data) self.append(val) dispatch[INT[0]] = load_int @@ -1336,7 +1406,7 @@ def load_long(self): val = self.readline()[:-1] if val and val[-1] == b'L'[0]: val = val[:-1] - self.append(int(val, 0)) + self.append(int(val)) dispatch[LONG[0]] = load_long def load_long1(self): @@ -1620,8 +1690,13 @@ def find_class(self, module, name): elif module in _compat_pickle.IMPORT_MAPPING: module = _compat_pickle.IMPORT_MAPPING[module] __import__(module, level=0) - if self.proto >= 4: - return _getattribute(sys.modules[module], name)[0] + if self.proto >= 4 and '.' in name: + dotted_path = name.split('.') + try: + return _getattribute(sys.modules[module], dotted_path) + except AttributeError: + raise AttributeError( + f"Can't resolve path {name!r} on module {module!r}") else: return getattr(sys.modules[module], name) @@ -1755,8 +1830,8 @@ def load_build(self): stack = self.stack state = stack.pop() inst = stack[-1] - setstate = getattr(inst, "__setstate__", None) - if setstate is not None: + setstate = getattr(inst, "__setstate__", _NoValue) + if setstate is not _NoValue: setstate(state) return slotstate = None @@ -1831,36 +1906,26 @@ def _loads(s, /, *, fix_imports=True, encoding="ASCII", errors="strict", Pickler, Unpickler = _Pickler, _Unpickler dump, dumps, load, loads = _dump, _dumps, _load, _loads -# Doctest -def _test(): - import doctest - return doctest.testmod() -if __name__ == "__main__": +def _main(args=None): import argparse + import pprint parser = argparse.ArgumentParser( - description='display contents of the pickle files') + description='display contents of the pickle files', + color=True, + ) parser.add_argument( 'pickle_file', - nargs='*', help='the pickle file') - parser.add_argument( - '-t', '--test', action='store_true', - help='run self-test suite') - parser.add_argument( - '-v', action='store_true', - help='run verbosely; only affects self-test run') - args = parser.parse_args() - if args.test: - _test() - else: - if not args.pickle_file: - parser.print_help() + nargs='+', help='the pickle file') + args = parser.parse_args(args) + for fn in args.pickle_file: + if fn == '-': + obj = load(sys.stdin.buffer) else: - import pprint - for fn in args.pickle_file: - if fn == '-': - obj = load(sys.stdin.buffer) - else: - with open(fn, 'rb') as f: - obj = load(f) - pprint.pprint(obj) + with open(fn, 'rb') as f: + obj = load(f) + pprint.pprint(obj) + + +if __name__ == "__main__": + _main() diff --git a/PythonLib/full/pickletools.py b/PythonLib/full/pickletools.py index 33a51492..254b6c7f 100644 --- a/PythonLib/full/pickletools.py +++ b/PythonLib/full/pickletools.py @@ -348,7 +348,7 @@ def read_stringnl(f, decode=True, stripquotes=True, *, encoding='latin-1'): for q in (b'"', b"'"): if data.startswith(q): if not data.endswith(q): - raise ValueError("strinq quote %r not found at both " + raise ValueError("string quote %r not found at both " "ends of %r" % (q, data)) data = data[1:-1] break @@ -2429,8 +2429,6 @@ def dis(pickle, out=None, memo=None, indentlevel=4, annotate=0): + A memo entry isn't referenced before it's defined. + The markobject isn't stored in the memo. - - + A memo entry isn't redefined. """ # Most of the hair here is for sanity checks, but most of it is needed @@ -2484,7 +2482,7 @@ def dis(pickle, out=None, memo=None, indentlevel=4, annotate=0): assert opcode.name == "POP" numtopop = 0 else: - errormsg = markmsg = "no MARK exists on stack" + errormsg = "no MARK exists on stack" # Check for correct memo usage. if opcode.name in ("PUT", "BINPUT", "LONG_BINPUT", "MEMOIZE"): @@ -2494,9 +2492,7 @@ def dis(pickle, out=None, memo=None, indentlevel=4, annotate=0): else: assert arg is not None memo_idx = arg - if memo_idx in memo: - errormsg = "memo key %r already defined" % arg - elif not stack: + if not stack: errormsg = "stack is empty -- can't store into memo" elif stack[-1] is markobject: errormsg = "can't store markobject in the memo" @@ -2842,17 +2838,16 @@ def __init__(self, value): 'disassembler_memo_test': _memo_test, } -def _test(): - import doctest - return doctest.testmod() if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( - description='disassemble one or more pickle files') + description='disassemble one or more pickle files', + color=True, + ) parser.add_argument( 'pickle_file', - nargs='*', help='the pickle file') + nargs='+', help='the pickle file') parser.add_argument( '-o', '--output', help='the file where the output should be written') @@ -2869,36 +2864,24 @@ def _test(): '-p', '--preamble', default="==> {name} <==", help='if more than one pickle file is specified, print this before' ' each disassembly') - parser.add_argument( - '-t', '--test', action='store_true', - help='run self-test suite') - parser.add_argument( - '-v', action='store_true', - help='run verbosely; only affects self-test run') args = parser.parse_args() - if args.test: - _test() + annotate = 30 if args.annotate else 0 + memo = {} if args.memo else None + if args.output is None: + output = sys.stdout else: - if not args.pickle_file: - parser.print_help() - else: - annotate = 30 if args.annotate else 0 - memo = {} if args.memo else None - if args.output is None: - output = sys.stdout + output = open(args.output, 'w') + try: + for arg in args.pickle_file: + if len(args.pickle_file) > 1: + name = '' if arg == '-' else arg + preamble = args.preamble.format(name=name) + output.write(preamble + '\n') + if arg == '-': + dis(sys.stdin.buffer, output, memo, args.indentlevel, annotate) else: - output = open(args.output, 'w') - try: - for arg in args.pickle_file: - if len(args.pickle_file) > 1: - name = '' if arg == '-' else arg - preamble = args.preamble.format(name=name) - output.write(preamble + '\n') - if arg == '-': - dis(sys.stdin.buffer, output, memo, args.indentlevel, annotate) - else: - with open(arg, 'rb') as f: - dis(f, output, memo, args.indentlevel, annotate) - finally: - if output is not sys.stdout: - output.close() + with open(arg, 'rb') as f: + dis(f, output, memo, args.indentlevel, annotate) + finally: + if output is not sys.stdout: + output.close() diff --git a/PythonLib/full/pipes.py b/PythonLib/full/pipes.py deleted file mode 100644 index 61d63b48..00000000 --- a/PythonLib/full/pipes.py +++ /dev/null @@ -1,250 +0,0 @@ -"""Conversion pipeline templates. - -The problem: ------------- - -Suppose you have some data that you want to convert to another format, -such as from GIF image format to PPM image format. Maybe the -conversion involves several steps (e.g. piping it through compress or -uuencode). Some of the conversion steps may require that their input -is a disk file, others may be able to read standard input; similar for -their output. The input to the entire conversion may also be read -from a disk file or from an open file, and similar for its output. - -The module lets you construct a pipeline template by sticking one or -more conversion steps together. It will take care of creating and -removing temporary files if they are necessary to hold intermediate -data. You can then use the template to do conversions from many -different sources to many different destinations. The temporary -file names used are different each time the template is used. - -The templates are objects so you can create templates for many -different conversion steps and store them in a dictionary, for -instance. - - -Directions: ------------ - -To create a template: - t = Template() - -To add a conversion step to a template: - t.append(command, kind) -where kind is a string of two characters: the first is '-' if the -command reads its standard input or 'f' if it requires a file; the -second likewise for the output. The command must be valid /bin/sh -syntax. If input or output files are required, they are passed as -$IN and $OUT; otherwise, it must be possible to use the command in -a pipeline. - -To add a conversion step at the beginning: - t.prepend(command, kind) - -To convert a file to another file using a template: - sts = t.copy(infile, outfile) -If infile or outfile are the empty string, standard input is read or -standard output is written, respectively. The return value is the -exit status of the conversion pipeline. - -To open a file for reading or writing through a conversion pipeline: - fp = t.open(file, mode) -where mode is 'r' to read the file, or 'w' to write it -- just like -for the built-in function open() or for os.popen(). - -To create a new template object initialized to a given one: - t2 = t.clone() -""" # ' - - -import re -import os -import tempfile -import warnings -# we import the quote function rather than the module for backward compat -# (quote used to be an undocumented but used function in pipes) -from shlex import quote - -warnings._deprecated(__name__, remove=(3, 13)) - -__all__ = ["Template"] - -# Conversion step kinds - -FILEIN_FILEOUT = 'ff' # Must read & write real files -STDIN_FILEOUT = '-f' # Must write a real file -FILEIN_STDOUT = 'f-' # Must read a real file -STDIN_STDOUT = '--' # Normal pipeline element -SOURCE = '.-' # Must be first, writes stdout -SINK = '-.' # Must be last, reads stdin - -stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \ - SOURCE, SINK] - - -class Template: - """Class representing a pipeline template.""" - - def __init__(self): - """Template() returns a fresh pipeline template.""" - self.debugging = 0 - self.reset() - - def __repr__(self): - """t.__repr__() implements repr(t).""" - return '