"""Generate static DuckDB function namespace classes.""" from __future__ import annotations from collections import defaultdict import keyword from pathlib import Path from textwrap import indent import duckdb HEADER = """Auto-generated DuckDB function namespaces. This file is generated by ``scripts/generate_function_namespaces.py``. Do not edit by hand; regenerate to pull in new function definitions. """ CATALOG_VERSION = "1.3" SCHEMA_PRIORITY = {"main": 0, "duckdb": 1, "pg_catalog": 2} RETURN_CATEGORY_DOCS = { "numeric": "numeric", "boolean": "boolean", "varchar": "string", "blob": "binary", "generic": "generic", } CATEGORY_TO_EXPRESSION = { "numeric": "NumericExpression", "boolean": "BooleanExpression", "varchar": "VarcharExpression", "blob": "BlobExpression", "generic": "TypedExpression", } STUB_VALID_TYPE_IGNORE = {"list"} def _categorise_return_type(return_type: str | None) -> str: if return_type is None: return "generic" normalized = return_type.upper() if any( normalized.startswith(prefix) for prefix in ( "TINYINT", "SMALLINT", "INTEGER", "BIGINT", "HUGEINT", "UTINYINT", "USMALLINT", "UINTEGER", "UBIGINT", "FLOAT", "DOUBLE", "DECIMAL", "REAL", "INTERVAL", ) ): return "numeric" if normalized.startswith("BOOLEAN"): return "boolean" if any( normalized.startswith(prefix) for prefix in ("VARCHAR", "STRING", "TEXT", "JSON", "UUID") ): return "varchar" if normalized.startswith("BLOB"): return "blob" return "generic" def _definition_sort_key( definition: tuple[ str, str, str | None, tuple[str, ...], tuple[str, ...], str | None, str | None, str | None, str | None, ] ) -> tuple[int, int]: schema, _name, _rtype, parameter_types, _parameter_names, _varargs, *_ = definition schema_priority = SCHEMA_PRIORITY.get(schema, 10) arity = len(parameter_types) return (schema_priority, arity) def _quote_identifier(identifier: str) -> str: return f'"{identifier.replace("\"", "\"\"")}"' def _infer_macro_return_type( connection: duckdb.DuckDBPyConnection, schema_name: str, function_name: str, parameter_count: int, ) -> str | None: qualified_name = f"{_quote_identifier(schema_name)}.{_quote_identifier(function_name)}" arguments = ", ".join("NULL" for _ in range(parameter_count)) call = f"{qualified_name}({arguments})" if parameter_count else f"{qualified_name}()" try: result = connection.execute(f"SELECT typeof({call})").fetchone() except duckdb.Error: return None if not result: return None inferred = result[0] if isinstance(inferred, str) and inferred: return inferred.upper() return None def _load_definitions() -> dict[ str, dict[ str, dict[ str, list[ tuple[ str, str, str | None, tuple[str, ...], tuple[str, ...], str | None, str | None, str | None, str | None, ] ], ], ], ]: connection = duckdb.connect() processed_rows: list[ tuple[ str, str, str, str | None, tuple[str, ...], tuple[str, ...], str | None, str | None, str | None, str | None, ] ] = [] try: rows = connection.execute( """ SELECT schema_name, function_name, function_type, return_type, parameters, parameter_types, varargs, description, comment, macro_definition FROM duckdb_functions() WHERE function_type IN ('scalar', 'aggregate', 'window', 'macro') """ ).fetchall() for ( schema_name, function_name, function_type, return_type, parameters, parameter_types, varargs, description, comment, macro_definition, ) in rows: parameter_names_tuple = tuple(parameters or ()) parameter_types_tuple = tuple(parameter_types or ()) if function_type == "macro": function_type = "scalar" if return_type is None: inferred_type = _infer_macro_return_type( connection, schema_name, function_name, len(parameter_names_tuple), ) if inferred_type is not None: return_type = inferred_type processed_rows.append( ( schema_name, function_name, function_type, return_type, parameter_types_tuple, parameter_names_tuple, varargs, description, comment, macro_definition, ) ) finally: connection.close() index: dict[str, dict[str, dict[str, list[tuple[str, str, str | None, tuple[str, ...], tuple[str, ...], str | None, str | None, str | None, str | None]]]]] = defaultdict( lambda: defaultdict(dict) ) for ( schema_name, function_name, function_type, return_type, parameter_types, parameter_names, varargs, description, comment, macro_definition, ) in processed_rows: category = _categorise_return_type(return_type) type_bucket = index[function_type] category_bucket = type_bucket.setdefault(category, defaultdict(list)) category_bucket[function_name].append( ( schema_name, function_name, return_type, parameter_types, parameter_names, varargs, description, comment, macro_definition, ) ) for type_bucket in index.values(): for category_bucket in type_bucket.values(): for overloads in category_bucket.values(): overloads.sort(key=_definition_sort_key) return index def _format_type(type_spec: str | None) -> str: if type_spec is None: return "None" return f"parse_type({type_spec!r})" def _format_parameters(parameters: tuple[str, ...]) -> str: if not parameters: return "()" formatted = ", ".join(_format_type(parameter) for parameter in parameters) if len(parameters) == 1: formatted += "," return f"({formatted})" def _format_parameter_names(names: tuple[str, ...]) -> str: if not names: return "()" formatted = ", ".join(repr(name) for name in names) if len(names) == 1: formatted += "," return f"({formatted})" def _format_definition( schema: str, name: str, return_type: str | None, parameters: tuple[str, ...], parameter_names: tuple[str, ...], varargs: str | None, description: str | None, comment: str | None, macro_definition: str | None, ) -> str: return "\n".join( [ " DuckDBFunctionDefinition(", f" schema_name={schema!r},", f" function_name={name!r},", " function_type=function_type,", f" return_type={_format_type(return_type)},", f" parameter_types={_format_parameters(parameters)},", f" parameters={_format_parameter_names(parameter_names)},", f" varargs={_format_type(varargs)},", f" description={description!r},", f" comment={comment!r},", f" macro_definition={macro_definition!r},", " ),", ] ) def _signature_constant_name(name: str) -> str: if name.isidentifier(): normalized = name.upper() else: normalized = "_".join(f"{ord(char):04x}" for char in name) return f"_{normalized}_SIGNATURES" def _symbol_method_name(symbol: str) -> str: codes = "_".join(f"{ord(char):04x}" for char in symbol) return f"symbol_{codes}" def _format_overload_summary( schema: str, name: str, return_type: str | None, parameter_types: tuple[str, ...], parameter_names: tuple[str, ...], varargs: str | None, ) -> str: components: list[str] = [] for index, parameter_type in enumerate(parameter_types): parameter_name = parameter_names[index] if index < len(parameter_names) and parameter_names[index] else f"arg{index}" rendered_type = parameter_type or "ANY" components.append(f"{rendered_type} {parameter_name}") if varargs: components.append(f"{varargs} ...") arguments = ", ".join(components) rendered_return = return_type or "ANY" return f"- {schema}.{name}({arguments}) -> {rendered_return}" def _docstring_for_function( name: str, overloads: list[ tuple[ str, str, str | None, tuple[str, ...], tuple[str, ...], str | None, str | None, str | None, str | None, ] ], *, filter_variant: bool = False, symbol: str | None = None, ) -> str: target = symbol or name header = f"Call DuckDB function ``{target}``" if filter_variant: header += " with ``FILTER``" header += "." description = next((entry[6] for entry in overloads if entry[6]), None) lines = [f" \"\"\"{header}"] if description: lines.append("") lines.append(f" {description}") lines.append("") lines.append(" Overloads:") for overload in overloads: lines.append( " " + _format_overload_summary( overload[0], overload[1], overload[2], overload[3], overload[4], overload[5], ) ) lines.append(" \"\"\"") return "\n".join(lines) def _render_signature_constant( constant_name: str, overloads: list[ tuple[ str, str, str | None, tuple[str, ...], tuple[str, ...], str | None, str | None, str | None, str | None, ] ], ) -> str: lines = [f" {constant_name}: ClassVar[tuple[DuckDBFunctionDefinition, ...]] = ("] lines.extend( indent( _format_definition( schema, name, return_type, parameter_types, parameter_names, varargs, description, comment, macro_definition, ), " ", ) for ( schema, name, return_type, parameter_types, parameter_names, varargs, description, comment, macro_definition, ) in overloads ) lines.append(" )") return "\n".join(lines) def _render_method( method_name: str, *, expression: str, constant_name: str, overloads: list[ tuple[ str, str, str | None, tuple[str, ...], tuple[str, ...], str | None, str | None, str | None, str | None, ] ], function_type: str, filter_variant: bool = False, symbol: str | None = None, original_name: str | None = None, registered_names: tuple[str, ...] | None = None, ) -> str: signature = "predicate: object, *operands: object" if filter_variant else "*operands: object" if function_type == "aggregate": signature += ( ", order_by: Iterable[object] | object | None = None," " within_group: Iterable[object] | object | None = None," " partition_by: Iterable[object] | object | None = None," " over_order_by: Iterable[object] | object | None = None," " frame: str | None = None" ) decorator_parts: list[str] = [] if registered_names: decorator_parts.append(", ".join(repr(name) for name in registered_names)) if symbol is not None: decorator_parts.append(f"symbols=({symbol!r},)") decorator_call = ", ".join(part for part in decorator_parts if part) if decorator_call: lines = [f" @duckdb_function({decorator_call})"] else: lines = [" @duckdb_function()"] lines.append(f" def {method_name}(self, {signature}) -> {expression}:") lines.append( _docstring_for_function( original_name or method_name, overloads, filter_variant=filter_variant, symbol=symbol, ) ) if filter_variant: lines.extend( [ " return call_duckdb_filter_function(", " predicate,", f" self.{constant_name},", " return_category=self.return_category,", " operands=operands,", *( [ " order_by=order_by,", " within_group=within_group,", " partition_by=partition_by,", " over_order_by=over_order_by,", " frame=frame,", ] if function_type == "aggregate" else [] ), " )", ] ) else: lines.extend( [ " return call_duckdb_function(", f" self.{constant_name},", " return_category=self.return_category,", " operands=operands,", *( [ " order_by=order_by,", " within_group=within_group,", " partition_by=partition_by,", " over_order_by=over_order_by,", " frame=frame,", ] if function_type == "aggregate" else [] ), " )", ] ) return "\n".join(lines) def _render_namespace( *, function_type: str, category: str, identifiers: dict[ str, list[ tuple[ str, str, str | None, tuple[str, ...], tuple[str, ...], str | None, str | None, str | None, str | None, ] ], ], symbols: dict[ str, list[ tuple[ str, str, str | None, tuple[str, ...], tuple[str, ...], str | None, str | None, str | None, str | None, ] ], ], ) -> str: class_name = f"{function_type.title()}{category.title()}Functions" doc_category = RETURN_CATEGORY_DOCS.get(category, category) doc = f"DuckDB {function_type} functions returning {doc_category} results." expression = CATEGORY_TO_EXPRESSION[category] identifier_registry: dict[str, str] = {} symbol_registry: dict[str, str] = {} lines: list[str] = [f"class {class_name}(_StaticFunctionNamespace):", f" \"\"\"{doc}\"\"\""] lines.append(" __slots__ = ()") lines.append(" function_type: ClassVar[str] = " + repr(function_type)) lines.append(" return_category: ClassVar[str] = " + repr(category)) for function_name in sorted(identifiers): constant_name = _signature_constant_name(function_name) lines.append(_render_signature_constant(constant_name, identifiers[function_name])) lines.append( _render_method( function_name, expression=expression, constant_name=constant_name, overloads=identifiers[function_name], function_type=function_type, registered_names=(function_name,), ) ) identifier_registry[function_name] = function_name if function_type == "aggregate": filter_name = f"{function_name}_filter" lines.append( _render_method( filter_name, expression=expression, constant_name=constant_name, overloads=identifiers[function_name], function_type=function_type, filter_variant=True, original_name=function_name, registered_names=(filter_name,), ) ) identifier_registry[filter_name] = filter_name for symbol_name in sorted(symbols): constant_name = _signature_constant_name(symbol_name) method_name = _symbol_method_name(symbol_name) lines.append(_render_signature_constant(constant_name, symbols[symbol_name])) lines.append( _render_method( method_name, expression=expression, constant_name=constant_name, overloads=symbols[symbol_name], function_type=function_type, symbol=symbol_name, registered_names=(), ) ) symbol_registry[symbol_name] = method_name if identifier_registry: lines.append("") lines.append(" _IDENTIFIER_FUNCTIONS: ClassVar[dict[str, str]] = {") for alias, method_name in identifier_registry.items(): lines.append(f" {alias!r}: {method_name!r},") lines.append(" }") else: lines.append(" _IDENTIFIER_FUNCTIONS: ClassVar[dict[str, str]] = {}") if symbol_registry: lines.append("") lines.append(" _SYMBOLIC_FUNCTIONS: ClassVar[dict[str, str]] = {") for alias, method_name in symbol_registry.items(): lines.append(f" {alias!r}: {method_name!r},") lines.append(" }") else: lines.append(" _SYMBOLIC_FUNCTIONS: ClassVar[dict[str, str]] = {}") return "\n".join(lines) def _render_stub_namespace( *, function_type: str, category: str, identifiers: dict[ str, list[ tuple[ str, str, str | None, tuple[str, ...], tuple[str, ...], str | None, str | None, str | None, str | None, ] ], ], symbols: dict[ str, list[ tuple[ str, str, str | None, tuple[str, ...], tuple[str, ...], str | None, str | None, str | None, str | None, ] ], ], ) -> str: class_name = f"{function_type.title()}{category.title()}Functions" expression = CATEGORY_TO_EXPRESSION[category] lines = [f"class {class_name}(_StaticFunctionNamespace[{expression}]):"] if not identifiers and not symbols: lines.append(" ...") return "\n".join(lines) for function_name in sorted(identifiers): if function_type == "aggregate": extra = ( "order_by: Iterable[object] | object | None = ..., " "within_group: Iterable[object] | object | None = ..., " "partition_by: Iterable[object] | object | None = ..., " "over_order_by: Iterable[object] | object | None = ..., " "frame: str | None = ..." ) signature = "".join(extra) lines.append( f" def {function_name}(self, *operands: object, {signature}) -> {expression}: ..." ) lines.append( f" def {function_name}_filter(self, predicate: object, *operands: object, {signature}) -> {expression}: ..." ) else: lines.append(f" def {function_name}(self, *operands: object) -> {expression}: ...") for symbol_name in sorted(symbols): method_name = _symbol_method_name(symbol_name) if function_type == "aggregate": extra = ( "order_by: Iterable[object] | object | None = ..., " "within_group: Iterable[object] | object | None = ..., " "partition_by: Iterable[object] | object | None = ..., " "over_order_by: Iterable[object] | object | None = ..., " "frame: str | None = ..." ) signature = "".join(extra) lines.append( f" def {method_name}(self, *operands: object, {signature}) -> {expression}: ..." ) else: lines.append(f" def {method_name}(self, *operands: object) -> {expression}: ...") return "\n".join(lines) def _render_type_namespace(function_type: str, categories: dict[str, str]) -> str: class_name = f"{function_type.title()}FunctionNamespace" lines = [ f"class {class_name}:", f" \"\"\"DuckDB {function_type} function categories.\"\"\"", " __slots__ = ()", ] for category_name, cls_name in categories.items(): lines.append( f" {category_name.title()}: {cls_name} = {cls_name}() # noqa: N802" ) lines.append(" def __dir__(self) -> list[str]:") lines.append( " return sorted(name for name in self.__class__.__dict__ if not name.startswith('_'))" ) return "\n".join(lines) def _render_stub_type_namespace(function_type: str, categories: dict[str, str]) -> str: class_name = f"{function_type.title()}FunctionNamespace" lines = [f"class {class_name}:", " ..."] for category_name, cls_name in categories.items(): lines.append(f" {category_name.title()}: {cls_name}") lines.append(" def __dir__(self) -> list[str]: ...") return "\n".join(lines) def _render_catalog_markdown( catalog: dict[str, dict[str, dict[str, set[str]]]], ) -> str: lines = [ f"# DuckDB typed function catalog (DuckPlus {CATALOG_VERSION})", "", "This catalog enumerates every DuckDB scalar, aggregate, and window function", "exposed through DuckPlus' static typed API. Entries are grouped by return", "category to match the generated namespaces.", "", "Aggregate helpers also expose ``_filter`` variants that render ``FILTER", "(WHERE ...)`` with a typed boolean predicate as the first argument.", "", "The file is auto-generated by ``scripts/generate_function_namespaces.py``;", "run the script after upgrading DuckDB to refresh the catalog.", "", ] for function_type in ("scalar", "aggregate", "window"): type_bucket = catalog.get(function_type, {}) lines.append(f"## {function_type.title()} functions") lines.append("") if not type_bucket: lines.append("(No functions exposed in this release.)") lines.append("") continue for category, entries in sorted(type_bucket.items()): lines.append(f"### {category.title()} results") identifiers = sorted(entries["identifiers"]) symbols = sorted(entries["symbols"]) if identifiers: for name in identifiers: lines.append(f"- ``{name}``") else: lines.append("- *(no identifier functions)*") if symbols: lines.append("") lines.append("Symbolic operators:") for symbol in symbols: lines.append(f"- ``{symbol}``") lines.append("") lines.append("") return "\n".join(lines).rstrip() + "\n" def main() -> None: index = _load_definitions() for function_type in ("aggregate", "scalar", "window"): type_bucket = index.setdefault(function_type, {}) for category in ("blob", "boolean", "generic", "numeric", "varchar"): type_bucket.setdefault(category, {}) catalog: dict[ str, dict[str, dict[str, set[str]]], ] = defaultdict(lambda: defaultdict(lambda: {"identifiers": set(), "symbols": set()})) output_lines: list[str] = [ "from __future__ import annotations", "", "from typing import ClassVar, Iterable", "", "from .functions import (", " DuckDBFunctionDefinition,", " DuckDBFunctionSignature,", " _StaticFunctionNamespace,", " duckdb_function,", " call_duckdb_filter_function,", " call_duckdb_function,", ")", "from .types import parse_type", "", "", ] stub_lines: list[str] = [ "from __future__ import annotations", "", "from typing import Callable, ClassVar, Generic, Iterable, Mapping, Tuple, TypeVar", "", "from .expression import BlobExpression, BooleanExpression, GenericExpression, NumericExpression, TypedExpression, VarcharExpression", "", "class DuckDBFunctionDefinition: ...", "class DuckDBFunctionSignature: ...", "_NamespaceExprT = TypeVar('_NamespaceExprT', bound=TypedExpression)", "", "class _StaticFunctionNamespace(Generic[_NamespaceExprT]):", " function_type: ClassVar[str]", " return_category: ClassVar[str]", " _IDENTIFIER_FUNCTIONS: Mapping[str, str]", " _SYMBOLIC_FUNCTIONS: Mapping[str, str]", " def __getitem__(self, name: str) -> Callable[..., _NamespaceExprT]: ...", " def get(", " self,", " name: str,", " default: Callable[..., _NamespaceExprT] | None = ...,", " ) -> Callable[..., _NamespaceExprT] | None: ...", " def __contains__(self, name: object) -> bool: ...", " @property", " def symbols(self) -> Mapping[str, Callable[..., _NamespaceExprT]]: ...", " def __dir__(self) -> list[str]: ...", "", ] for function_type, categories in sorted(index.items()): for category, functions in sorted(categories.items()): identifiers: dict[ str, list[ tuple[ str, str, str | None, tuple[str, ...], tuple[str, ...], str | None, str | None, str | None, str | None, ] ], ] = {} symbols: dict[ str, list[ tuple[ str, str, str | None, tuple[str, ...], tuple[str, ...], str | None, str | None, str | None, str | None, ] ], ] = {} for name, overloads in functions.items(): catalog_entry = catalog[function_type][category] if name.isidentifier() and not keyword.iskeyword(name): identifiers[name] = overloads catalog_entry["identifiers"].add(name) else: symbols[name] = overloads catalog_entry["symbols"].add(name) output_lines.append( _render_namespace( function_type=function_type, category=category, identifiers=identifiers, symbols=symbols, ) ) output_lines.append("") stub_lines.append( _render_stub_namespace( function_type=function_type, category=category, identifiers=identifiers, symbols=symbols, ) ) stub_lines.append("") categories = { category: f"{function_type.title()}{category.title()}Functions" for category in sorted(categories) } output_lines.append(_render_type_namespace(function_type, categories)) output_lines.append("") stub_lines.append(_render_stub_type_namespace(function_type, categories)) stub_lines.append("") output_lines.extend( [ "class DuckDBFunctionNamespace:", " \"\"\"Aggregate entry point for DuckDB typed function namespaces.\"\"\"", " __slots__ = ()", " Scalar: ScalarFunctionNamespace = ScalarFunctionNamespace()", # noqa: N802 " Aggregate: AggregateFunctionNamespace = AggregateFunctionNamespace()", # noqa: N802 " Window: WindowFunctionNamespace = WindowFunctionNamespace()", # noqa: N802 " def __dir__(self) -> list[str]: # pragma: no cover - interactive helper", " return ['Scalar', 'Aggregate', 'Window']", "", "SCALAR_FUNCTIONS = ScalarFunctionNamespace()", "AGGREGATE_FUNCTIONS = AggregateFunctionNamespace()", "WINDOW_FUNCTIONS = WindowFunctionNamespace()", "", "__all__ = [", " 'DuckDBFunctionDefinition',", " 'DuckDBFunctionNamespace',", " 'DuckDBFunctionSignature',", " 'SCALAR_FUNCTIONS',", " 'AGGREGATE_FUNCTIONS',", " 'WINDOW_FUNCTIONS',", "]", "", ] ) stub_lines.extend( [ "class DuckDBFunctionNamespace:", " ...", " Scalar: ScalarFunctionNamespace", " Aggregate: AggregateFunctionNamespace", " Window: WindowFunctionNamespace", " def __dir__(self) -> list[str]: ...", "", "SCALAR_FUNCTIONS: ScalarFunctionNamespace", "AGGREGATE_FUNCTIONS: AggregateFunctionNamespace", "WINDOW_FUNCTIONS: WindowFunctionNamespace", "", "__all__ = [", " 'DuckDBFunctionDefinition',", " 'DuckDBFunctionNamespace',", " 'DuckDBFunctionSignature',", " 'SCALAR_FUNCTIONS',", " 'AGGREGATE_FUNCTIONS',", " 'WINDOW_FUNCTIONS',", "]", ] ) output_path = Path(__file__).resolve().parent.parent / "duckplus" / "static_typed" / "_generated_function_namespaces.py" output = "\n".join(output_lines).strip() + "\n" prologue = "# pylint: skip-file\n# mypy: ignore-errors\n\n" + f'"""{HEADER}\n"""\n\n' output_path.write_text(prologue + output, encoding="utf-8") print(f"Wrote {output_path}") stub_path = output_path.with_suffix(".pyi") stub_body = "\n".join(stub_lines).strip() + "\n" stub_prologue = f'"""{HEADER}\n"""\n\n' stub_path.write_text(stub_prologue + stub_body, encoding="utf-8") print(f"Wrote {stub_path}") catalog_path = ( Path(__file__).resolve().parent.parent / "docs" / "versions" / CATALOG_VERSION / "api" / "typed" / "function_catalog.md" ) catalog_path.parent.mkdir(parents=True, exist_ok=True) catalog_body = _render_catalog_markdown(catalog) catalog_path.write_text(catalog_body, encoding="utf-8") print(f"Wrote {catalog_path}") if __name__ == "__main__": main()