diff --git a/third_party/bigframes_vendored/ibis/backends/bigquery/__init__.py b/third_party/bigframes_vendored/ibis/backends/bigquery/__init__.py index b342c7e4a99..1d3f63d216d 100644 --- a/third_party/bigframes_vendored/ibis/backends/bigquery/__init__.py +++ b/third_party/bigframes_vendored/ibis/backends/bigquery/__init__.py @@ -21,9 +21,6 @@ schema_from_bigquery_table, ) from bigframes_vendored.ibis.backends.bigquery.datatypes import BigQuerySchema -from bigframes_vendored.ibis.backends.bigquery.udf.core import ( - PythonToJavaScriptTranslator, -) from bigframes_vendored.ibis.backends.sql import SQLBackend from bigframes_vendored.ibis.backends.sql.compilers import BigQueryCompiler from bigframes_vendored.ibis.backends.sql.datatypes import BigQueryType @@ -731,15 +728,7 @@ def compile( ): """Compile an Ibis expression to a SQL string.""" query = self._to_sqlglot(expr, limit=limit, params=params, **kwargs) - udf_sources = [] - for udf_node in expr.op().find(ops.ScalarUDF): - compile_func = getattr( - self, f"_compile_{udf_node.__input_type__.name.lower()}_udf" - ) - if sql := compile_func(udf_node): - udf_sources.append(sql.sql(self.name, pretty=True)) - - sql = ";\n".join([*udf_sources, query.sql(dialect=self.name, pretty=True)]) + sql = query.sql(dialect=self.name, pretty=True) self._log(sql) return sql @@ -1186,68 +1175,6 @@ def _clean_up_cached_table(self, name): force=True, ) - def _get_udf_source(self, udf_node: ops.ScalarUDF): - name = type(udf_node).__name__ - type_mapper = self.compiler.udf_type_mapper - - body = PythonToJavaScriptTranslator(udf_node.__func__).compile() - config = udf_node.__config__ - libraries = config.get("libraries", []) - - signature = [ - sge.ColumnDef( - this=sg.to_identifier(name, quoted=self.compiler.quoted), - kind=type_mapper.from_ibis(param.annotation.pattern.dtype), - ) - for name, param in udf_node.__signature__.parameters.items() - ] - - lines = ['"""'] - - if config.get("strict", True): - lines.append('"use strict";') - - lines += [ - body, - "", - f"return {udf_node.__func_name__}({', '.join(udf_node.argnames)});", - '"""', - ] - - func = sge.Create( - kind="FUNCTION", - this=sge.UserDefinedFunction( - this=sg.to_identifier(name), expressions=signature, wrapped=True - ), - # not exactly what I had in mind, but it works - # - # quoting is too simplistic to handle multiline strings - expression=sge.Var(this="\n".join(lines)), - exists=False, - properties=sge.Properties( - expressions=[ - sge.TemporaryProperty(), - sge.ReturnsProperty(this=type_mapper.from_ibis(udf_node.dtype)), - sge.StabilityProperty( - this="IMMUTABLE" if config.get("determinism") else "VOLATILE" - ), - sge.LanguageProperty(this=sg.to_identifier("js")), - ] - + [ - sge.Property( - this=sg.to_identifier("library"), - value=self.compiler.f.array(*libraries), - ) - ] - * bool(libraries) - ), - ) - - return func - - def _compile_python_udf(self, udf_node: ops.ScalarUDF) -> None: - return self._get_udf_source(udf_node) - def _register_udfs(self, expr: ir.Expr) -> None: """No op because UDFs made with CREATE TEMPORARY FUNCTION must be followed by a query.""" diff --git a/third_party/bigframes_vendored/ibis/backends/bigquery/udf/__init__.py b/third_party/bigframes_vendored/ibis/backends/bigquery/udf/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/third_party/bigframes_vendored/ibis/backends/bigquery/udf/core.py b/third_party/bigframes_vendored/ibis/backends/bigquery/udf/core.py deleted file mode 100644 index 6f59a2becd7..00000000000 --- a/third_party/bigframes_vendored/ibis/backends/bigquery/udf/core.py +++ /dev/null @@ -1,604 +0,0 @@ -# Contains code from https://github.com/ibis-project/ibis/blob/9.2.0/ibis/backends/bigquery/udf/core.py - -"""Translate a Python AST to JavaScript.""" - -from __future__ import annotations - -import ast -from collections import ChainMap -import contextlib -import functools -import inspect -import textwrap -from typing import TYPE_CHECKING - -from bigframes_vendored.ibis.backends.bigquery.udf.find import find_names -from bigframes_vendored.ibis.backends.bigquery.udf.rewrite import rewrite - -if TYPE_CHECKING: - from collections.abc import Callable - - -class SymbolTable(ChainMap): - """ChainMap subclass implementing scope for the translator. - - Notes - ----- - JavaScript requires declarations in strict mode, so to implement this we - shove a "let" at the beginning of every variable name if it doesn't already - exist in the current scope. - - """ - - def __getitem__(self, key): - if key not in self: - self[key] = key - return f"let {key}" - return key - - -def indent(lines, spaces=4): - """Indent `lines` by `spaces` spaces. - - Parameters - ---------- - lines : Union[str, List[str]] - A string or list of strings to indent - spaces : int - The number of spaces to indent `lines` - - Returns - ------- - indented_lines : str - - """ - if isinstance(lines, str): - text = [lines] - text = "\n".join(lines) - return textwrap.indent(text, " " * spaces) - - -def semicolon(f: Callable) -> Callable: - """Add a semicolon to the result of a `visit_*` call.""" - - @functools.wraps(f) - def wrapper(*args, **kwargs): - return f(*args, **kwargs) + ";" - - return wrapper - - -@rewrite.register(ast.Call(func=ast.Name(id="print"))) -def rewrite_print(node): - return ast.Call( - func=ast.Attribute( - value=ast.Name(id="console", ctx=ast.Load()), - attr="log", - ctx=ast.Load(), - ), - args=node.args, - keywords=node.keywords, - ) - - -@rewrite.register(ast.Call(func=ast.Name(id="len"))) -def rewrite_len(node): - assert len(node.args) == 1 - return ast.Attribute(value=node.args[0], attr="length", ctx=ast.Load()) - - -@rewrite.register(ast.Call(func=ast.Attribute(attr="append"))) -def rewrite_append(node): - return ast.Call( - func=ast.Attribute(value=node.func.value, attr="push", ctx=ast.Load()), - args=node.args, - keywords=node.keywords, - ) - - -@rewrite.register( - ast.Call(func=ast.Attribute(value=ast.Name(id="Array"), attr="from_")) -) -def rewrite_array_from(node): - return ast.Call( - func=ast.Attribute(value=node.func.value, attr="from"), - args=node.args, - keywords=node.keywords, - ) - - -class PythonToJavaScriptTranslator: - constructor_map = { - "list": "Array", - "Array": "Array", - "Date": "Date", - "dict": "Object", - "Map": "Map", - "WeakMap": "WeakMap", - "str": "String", - "String": "String", - "set": "Set", - "Set": "Set", - "WeakSet": "WeakSet", - } - - def __init__(self, function): - self.function = function - self.source = textwrap.dedent(inspect.getsource(function)) - self.ast = ast.parse(self.source) - self.scope = SymbolTable() - self.current_function = None - self.current_class = None - self.is_generator = False - self.is_nested_definition = False - - def compile(self): - return self.visit(self.ast) - - def visit(self, node): - node = rewrite(node) - typename = node.__class__.__name__ - method_name = f"visit_{typename}" - method = getattr(self, method_name, None) - if method is None: - raise NotImplementedError(f"{method_name!r} nodes not yet implemented") - assert callable(method) - - result = method(node) - return result - - def visit_Name(self, node): - if self.current_class is not None and node.id == "self": - return "this" - return node.id - - def visit_Yield(self, node): - self.is_generator = True - return f"yield {self.visit(node.value)}" - - def visit_YieldFrom(self, node): - self.is_generator = True - return f"yield* {self.visit(node.value)}" - - @semicolon - def visit_Assign(self, node): - try: - (target,) = node.targets - except ValueError: - raise NotImplementedError("Only single assignment supported for now") - - if not isinstance(target, (ast.Name, ast.Subscript, ast.Attribute)): - raise NotImplementedError( - "Only index, attribute, and variable name assignment " - f"supported, got {type(target).__name__}" - ) - - is_name = isinstance(target, ast.Name) - compiled_target = self.visit(target) - if not is_name or ( - self.current_class is not None and compiled_target.startswith("this.") - ): - self.scope[compiled_target] = compiled_target - return f"{self.scope[compiled_target]} = {self.visit(node.value)}" - - def translate_special_method(self, name): - return {"__init__": "constructor"}.get(name, name) - - def visit_FunctionDef(self, node): - self.current_function = node - - is_property_getter = any( - getattr(dec, "id", None) == "property" for dec in node.decorator_list - ) - - if self.current_class is None: # not a method - if is_property_getter: - raise TypeError("Functions cannot be properties, only methods can") - prefix = "function" - else: - if is_property_getter and self.is_generator: - raise TypeError("generator methods cannot be properties") - prefix = "get " * is_property_getter - - with self.local_scope(): - body = indent(map(self.visit, node.body)) - - if self.is_generator: - prefix += "* " - else: - prefix += " " * (self.current_class is None) - - lines = [ - prefix - + self.translate_special_method(node.name) - + f"({self.visit(node.args)}) {{", - body, - "}", - ] - - self.current_function = None - self.is_generator = False - return "\n".join(lines) - - @semicolon - def visit_Return(self, node): - return f"return {self.visit(node.value)}" - - def visit_Add(self, node): - return "+" - - def visit_Sub(self, node): - return "-" - - def visit_Mult(self, node): - return "*" - - def visit_Div(self, node): - return "/" - - def visit_FloorDiv(self, node): - raise AssertionError("should never reach FloorDiv") - - def visit_Pow(self, node): - raise AssertionError("should never reach Pow") - - def visit_UnaryOp(self, node): - return f"({self.visit(node.op)}{self.visit(node.operand)})" - - def visit_USub(self, node): - return "-" - - def visit_UAdd(self, node): - return "+" - - def visit_BinOp(self, node): - left, op, right = node.left, node.op, node.right - - if isinstance(op, ast.Pow): - return f"Math.pow({self.visit(left)}, {self.visit(right)})" - elif isinstance(op, ast.FloorDiv): - return f"Math.floor({self.visit(left)} / {self.visit(right)})" - return f"({self.visit(left)} {self.visit(op)} {self.visit(right)})" - - def visit_Constant(self, node): - value = node.value - if value is None: - return "null" - if isinstance(value, bool): - return "true" if value else "false" - if isinstance(value, (int, float, str)): - return repr(value) - raise NotImplementedError( - f"{value.__class__.__name__!r} constants not yet implemented" - ) - - def visit_NameConstant(self, node): - value = node.value - if value is True: - return "true" - elif value is False: - return "false" - assert ( - value is None - ), f"value is not True and is not False, must be None, got {value}" - return "null" - - def visit_Str(self, node): - return repr(node.s) - - def visit_Num(self, node): - return repr(node.n) - - def visit_List(self, node): - return "[{}]".format(", ".join(map(self.visit, node.elts))) - - def visit_Tuple(self, node): - # tuples becomes lists in javascript - return "[{}]".format(", ".join(map(self.visit, node.elts))) - - def visit_Dict(self, node): - return "{{{}}}".format( - ", ".join( - f"[{self.visit(key)}]: {self.visit(value)}" - for key, value in zip(node.keys, node.values) - ) - ) - - @semicolon - def visit_Expr(self, node): - return self.visit(node.value) - - def visit_Starred(self, node): - return f"...{self.visit(node.value)}" - - def visit_Call(self, node): - thing_to_call = self.visit(node.func) - constructors = self.__class__.constructor_map - args = ", ".join(map(self.visit, node.args)) - try: - thing_to_call = constructors[thing_to_call] - except KeyError: - format_string = "{}({})" - else: - format_string = "(new {}({}))" - return format_string.format(thing_to_call, args) - - def visit_Attribute(self, node): - return f"{self.visit(node.value)}.{node.attr}" - - def visit_For(self, node): - lines = [f"for (let {self.visit(node.target)} of {self.visit(node.iter)}) {{"] - with self.local_scope(): - lines.append(indent(map(self.visit, node.body))) - lines.append("}") - return "\n".join(lines) - - def visit_While(self, node): - lines = [f"while ({self.visit(node.test)}) {{"] - with self.local_scope(): - lines.append(indent(map(self.visit, node.body))) - lines.append("}") - return "\n".join(lines) - - @semicolon - def visit_Break(self, node): - return "break" - - @semicolon - def visit_Continue(self, node): - return "continue" - - def visit_Eq(self, node): - return "===" - - def visit_NotEq(self, node): - return "!==" - - def visit_Or(self, node): - return "||" - - def visit_And(self, node): - return "&&" - - def visit_BoolOp(self, node): - return "({})".format( - f" {self.visit(node.op)} ".join(map(self.visit, node.values)) - ) - - def visit_Lt(self, node): - return "<" - - def visit_LtE(self, node): - return "<=" - - def visit_Gt(self, node): - return ">" - - def visit_GtE(self, node): - return ">=" - - def visit_Compare(self, node): - rights = node.comparators - ops = node.ops - - left = node.left - comparisons = [] - for op, right in zip(ops, rights): - comparisons.append( - f"({self.visit(left)} {self.visit(op)} {self.visit(right)})" - ) - left = right - return " && ".join(comparisons) - - @semicolon - def visit_AugAssign(self, node): - target = self.visit(node.target) - op = self.visit(node.op) - value = self.visit(node.value) - return f"{target} {op}= {value}" - - def visit_Module(self, node): - return "\n\n".join(map(self.visit, node.body)) - - def visit_arg(self, node): - if self.current_class is not None and node.arg == "self": - return "" - return node.arg - - def visit_arguments(self, node): - args = list(filter(None, map(self.visit, node.args[:]))) - vararg = node.vararg - if vararg is not None: - args.append(f"...{vararg.arg}") - return ", ".join(args) - - def visit_Lambda(self, node): - args = node.args - generated_args = self.visit(args) - return f"(({generated_args}) => {self.visit(node.body)})" - - @contextlib.contextmanager - def local_scope(self): - """Assign symbols to local variables.""" - self.scope = self.scope.new_child() - try: - yield self.scope - finally: - self.scope = self.scope.parents - - def visit_If(self, node): - lines = [f"if ({self.visit(node.test)}) {{"] - - with self.local_scope(): - lines.append(indent(map(self.visit, node.body))) - lines.append("}") - - if node.orelse: - lines[-1] += " else {" - with self.local_scope(): - lines.append(indent(map(self.visit, node.orelse))) - lines.append("}") - return "\n".join(lines) - - def visit_IfExp(self, node): - test = self.visit(node.test) - body = self.visit(node.body) - orelse = self.visit(node.orelse) - return f"({test} ? {body} : {orelse})" - - def visit_Index(self, node): - return self.visit(node.value) - - def visit_Subscript(self, node): - return f"{self.visit(node.value)}[{self.visit(node.slice)}]" - - def visit_ClassDef(self, node): - self.current_class = node - bases = node.bases - - lines = [f"class {node.name}"] - if bases: - lines[-1] += " extends {}".format(", ".join(map(self.visit, bases))) - lines[-1] += " {" - lines.append(indent(map(self.visit, node.body))) - lines.append("}") - self.current_class = None - self.__class__.constructor_map[node.name] = node.name - return "\n".join(lines) - - def visit_Not(self, node): - return "!" - - def visit_ListComp(self, node): - """Generate a curried lambda function. - - [x + y for x, y in [[1, 4], [2, 5], [3, 6]]] - - becomes - - [[1, 4], [2, 5], [3, 6]]].map(([x, y]) => x + y) - """ - try: - (generator,) = node.generators - except ValueError: - raise NotImplementedError("Only single loop comprehensions are allowed") - - names = find_names(generator.target) - argslist = [ast.arg(arg=name.id, annotation=None) for name in names] - if len(names) <= 1: - signature = ast.arguments( - args=argslist, - vararg=None, - kwonlyargs=[], - kw_defaults=[], - kwarg=None, - defaults=[], - ) - else: - signature = ast.List(elts=argslist, ctx=ast.Load()) - - array = generator.iter - lam_sig = functools.partial(ast.Lambda, args=signature) - - filters = generator.ifs - if filters: - filt = ast.BoolOp(op=ast.And(), values=filters) - # array.filter - method = ast.Attribute(value=array, attr="filter", ctx=ast.Load()) - # array.filter(func) - array = ast.Call(func=method, args=[lam_sig(body=filt)], keywords=[]) - - method = ast.Attribute(value=array, attr="map", ctx=ast.Load()) - mapped = ast.Call(func=method, args=[lam_sig(body=node.elt)], keywords=[]) - result = self.visit(mapped) - return result - - def visit_Delete(self, node): - return "\n".join(f"delete {self.visit(target)};" for target in node.targets) - - -if __name__ == "__main__": - import bigframes_vendored.ibis - from bigframes_vendored.ibis import udf - - @udf.scalar.python(strict=False) - def my_func(a: float, b: float, n: float) -> list[float]: - class Rectangle: - def __init__(self, width, height): - self.width = width - self.height = height - - @property - def area(self): - return self.width * self.height - - @property - def perimeter(self): - return self.width * 2 + self.height * 2 - - def foobar(self, n): - yield from range(n) - - def sum(values): - result = 0 - for value in values: - result += value - console.log(result) # noqa: F821 - return values.reduce(lambda a, b: a + b, 0) - - def range(n): - i = 0 - while i < n: - yield i - i += 1 - - some_stuff = [x + y for x, y in [[1, 4], [2, 5], [3, 6]] if 2 < x < 3] - some_stuff1 = [range(x) for x in [1, 2, 3]] - some_stuff2 = [x + y for x, y in [(1, 4), (2, 5), (3, 6)]] - print(some_stuff) # noqa: T201 - print(some_stuff1) # noqa: T201 - print(some_stuff2) # noqa: T201 - - x = 1 - y = 2 - x = 3 - values = [] - for i in range(10): - values.append(i) - - i = 0 - foo = 2 - bar = lambda x: x # noqa: E731 - bazel = lambda x: y # noqa: E731 - while i < n: - foo = bar(bazel(10)) - i += 1 - console.log(i) # noqa: F821 - - foo = 2 - - if i == 10 and (y < 2 or i != 42): - y += 2 - else: - y -= 2 - - z = 42.0 - w = 3 - w = not False - yyz = None - print(yyz) # noqa: T201 - foobar = x < y < z < w # x < y and y < z and z < w - foobar = 1 - baz = foobar // 3 - console.log(baz) # noqa: F821 - - my_obj = {"a": 1, "b": 2} # noqa: F841 - - z = (x if y else b) + 2 + foobar - foo = Rectangle(1, 2) - nnn = len(values) - return [sum(values) - a + b * y**-x, z, foo.width, nnn] - - print( - bigframes_vendored.ibis.bigquery.compile(my_func(42.7, 13.2, 1)) - ) # noqa: T201 diff --git a/third_party/bigframes_vendored/ibis/backends/bigquery/udf/find.py b/third_party/bigframes_vendored/ibis/backends/bigquery/udf/find.py deleted file mode 100644 index b1f353ae4f1..00000000000 --- a/third_party/bigframes_vendored/ibis/backends/bigquery/udf/find.py +++ /dev/null @@ -1,64 +0,0 @@ -# Contains code from https://github.com/ibis-project/ibis/blob/9.2.0/ibis/backends/bigquery/udf/find.py - -from __future__ import annotations - -import ast - -import toolz - - -class NameFinder: - """Helper class to find the unique names in an AST.""" - - __slots__ = () - - def find(self, node): - typename = type(node).__name__ - method = getattr(self, f"find_{typename}", None) - if method is None: - fields = getattr(node, "_fields", None) - if fields is None: - return - for field in fields: - value = getattr(node, field) - yield from self.find(value) - else: - yield from method(node) - - def find_Name(self, node): - # TODO not sure if this is robust to scope changes - yield node - - def find_list(self, node): - return list(toolz.concat(map(self.find, node))) - - def find_Call(self, node): - if not isinstance(node.func, ast.Name): - fields = node._fields - else: - fields = [field for field in node._fields if field != "func"] - return toolz.concat(map(self.find, (getattr(node, field) for field in fields))) - - -def find_names(node: ast.AST) -> list[ast.Name]: - """Return the unique `ast.Name` instances in an AST. - - Examples - -------- - >>> import ast - >>> node = ast.parse("a + b") - >>> names = find_names(node) - >>> names - [<....Name object at 0x...>, <....Name object at 0x...>] - >>> names[0].id - 'a' - >>> names[1].id - 'b' - - """ - return list( - toolz.unique( - filter(None, NameFinder().find(node)), - key=lambda node: (node.id, type(node.ctx)), - ) - ) diff --git a/third_party/bigframes_vendored/ibis/backends/bigquery/udf/rewrite.py b/third_party/bigframes_vendored/ibis/backends/bigquery/udf/rewrite.py deleted file mode 100644 index 6d2b0df7cdf..00000000000 --- a/third_party/bigframes_vendored/ibis/backends/bigquery/udf/rewrite.py +++ /dev/null @@ -1,54 +0,0 @@ -# Contains code from https://github.com/ibis-project/ibis/blob/9.2.0/ibis/backends/bigquery/udf/rewrite.py - -from __future__ import annotations - -import ast -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from collections.abc import Callable - - -def matches(value: ast.AST, pattern: ast.AST) -> bool: - """Check whether `value` matches `pattern`.""" - # types must match exactly - if type(value) is not type(pattern): - return False - - # primitive value, such as None, True, False etc - if not isinstance(value, ast.AST) and not isinstance(pattern, ast.AST): - return value == pattern - - fields = [ - (field, getattr(pattern, field)) - for field in pattern._fields - if hasattr(pattern, field) - ] - return all( - matches(getattr(value, field_name), field_value) - for field_name, field_value in fields - ) - - -class Rewriter: - """AST pattern matcher to enable rewrite rules.""" - - def __init__(self): - self.funcs: list[tuple[ast.AST, Callable[[ast.expr], ast.expr]]] = [] - - def register(self, pattern): - def wrapper(f): - self.funcs.append((pattern, f)) - return f - - return wrapper - - def __call__(self, node): - # TODO: more efficient way of doing this? - for pattern, func in self.funcs: - if matches(node, pattern): - return func(node) - return node - - -rewrite = Rewriter()