From 612415516e1d06b9dd33b6ed592279c7bb1f3e97 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Thu, 18 Jun 2026 16:38:17 +0800 Subject: [PATCH 1/4] feat: add deprecation warning for Expr passed to literal-only args - Introduced shared `_warn_if_expr_for_literal_arg` in `functions/__init__.py` - Added `DeprecationWarning` for the following methods when `Expr` is passed as argument: - `encode(..., encoding=Expr)` - `decode(..., encoding=Expr)` - `digest(..., method=Expr)` - `arrow_cast(..., data_type=Expr)` - `arrow_try_cast(..., data_type=Expr)` - `arrow_metadata(..., key=Expr)` test: update tests to check for warnings - Implemented tests in `test_functions.py` to ensure: - Warning is raised for `Expr` form - No warning for native literal form --- python/datafusion/functions/__init__.py | 13 ++++++ python/tests/test_functions.py | 59 +++++++++++++++++++++++++ 2 files changed, 72 insertions(+) diff --git a/python/datafusion/functions/__init__.py b/python/datafusion/functions/__init__.py index 54783f086..fb6a607eb 100644 --- a/python/datafusion/functions/__init__.py +++ b/python/datafusion/functions/__init__.py @@ -73,6 +73,13 @@ def _warn_expr_for_literal_arg(function_name: str, arg_name: str) -> None: ) +def _warn_if_expr_for_literal_arg( + value: Any, function_name: str, arg_name: str +) -> None: + if isinstance(value, Expr): + _warn_expr_for_literal_arg(function_name, arg_name) + + __all__ = [ "abs", "acos", @@ -437,6 +444,7 @@ def encode(expr: Expr, encoding: Expr | str) -> Expr: >>> result.collect_column("enc")[0].as_py() 'aGVsbG8' """ + _warn_if_expr_for_literal_arg(encoding, "encode", "encoding") encoding = coerce_to_expr(encoding) return Expr(f.encode(expr.expr, encoding.expr)) @@ -452,6 +460,7 @@ def decode(expr: Expr, encoding: Expr | str) -> Expr: >>> result.collect_column("dec")[0].as_py() b'hello' """ + _warn_if_expr_for_literal_arg(encoding, "decode", "encoding") encoding = coerce_to_expr(encoding) return Expr(f.decode(expr.expr, encoding.expr)) @@ -742,6 +751,7 @@ def digest(value: Expr, method: Expr | str) -> Expr: >>> len(result.collect_column("d")[0].as_py()) > 0 True """ + _warn_if_expr_for_literal_arg(method, "digest", "method") method = coerce_to_expr(method) return Expr(f.digest(value.expr, method.expr)) @@ -3096,6 +3106,7 @@ def arrow_cast(expr: Expr, data_type: Expr | str | pa.DataType) -> Expr: >>> result.collect_column("c")[0].as_py() 1.0 """ + _warn_if_expr_for_literal_arg(data_type, "arrow_cast", "data_type") if isinstance(data_type, pa.DataType): return expr.cast(data_type) if isinstance(data_type, str): @@ -3128,6 +3139,7 @@ def arrow_try_cast(expr: Expr, data_type: Expr | str | pa.DataType) -> Expr: >>> result.collect_column("c")[0].as_py() is None True """ + _warn_if_expr_for_literal_arg(data_type, "arrow_try_cast", "data_type") if isinstance(data_type, pa.DataType): return expr.try_cast(data_type) if isinstance(data_type, str): @@ -3235,6 +3247,7 @@ def arrow_metadata(expr: Expr, key: Expr | str | None = None) -> Expr: """ if key is None: return Expr(f.arrow_metadata(expr.expr)) + _warn_if_expr_for_literal_arg(key, "arrow_metadata", "key") if isinstance(key, str): key = Expr.string_literal(key) return Expr(f.arrow_metadata(expr.expr, key.expr)) diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 43dd70660..557ea107b 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -2355,6 +2355,65 @@ def test_regexp_replace_native(self): ).collect() assert result[0].column(0)[0].as_py() == "aX bX cX" + @pytest.mark.parametrize( + ("func", "arg_name", "expr"), + [ + pytest.param( + f.encode, + "encoding", + lambda: f.encode(column("a"), literal("base64")), + id="encode-encoding", + ), + pytest.param( + f.decode, + "encoding", + lambda: f.decode(column("a"), literal("base64")), + id="decode-encoding", + ), + pytest.param( + f.digest, + "method", + lambda: f.digest(column("a"), literal("sha256")), + id="digest-method", + ), + pytest.param( + f.arrow_cast, + "data_type", + lambda: f.arrow_cast(column("a"), literal("Float64")), + id="arrow-cast-data-type", + ), + pytest.param( + f.arrow_try_cast, + "data_type", + lambda: f.arrow_try_cast(column("a"), literal("Float64")), + id="arrow-try-cast-data-type", + ), + pytest.param( + f.arrow_metadata, + "key", + lambda: f.arrow_metadata(column("a"), literal("k")), + id="arrow-metadata-key", + ), + ], + ) + def test_literal_only_expr_args_warn_deprecated(self, func, arg_name, expr): + with pytest.warns( + DeprecationWarning, + match=rf"Passing Expr for {func.__name__}\(\) argument '{arg_name}' is deprecated", + ): + result = expr() + assert result is not None + + def test_literal_only_native_args_do_not_warn(self): + with warnings.catch_warnings(): + warnings.simplefilter("error", DeprecationWarning) + assert f.encode(column("a"), "base64") is not None + assert f.decode(column("a"), "base64") is not None + assert f.digest(column("a"), "sha256") is not None + assert f.arrow_cast(column("a"), "Float64") is not None + assert f.arrow_try_cast(column("a"), pa.float64()) is not None + assert f.arrow_metadata(column("a"), "k") is not None + def test_backward_compat_with_lit(self): """Verify that existing code using lit() still works.""" ctx = SessionContext() From 529f05d5273482c2961ea5c793dcdd647dc18e0f Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Thu, 18 Jun 2026 16:56:22 +0800 Subject: [PATCH 2/4] fix(tests): resolve E501 line length issue in test_functions.py --- python/tests/test_functions.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 557ea107b..3561fc947 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -2399,7 +2399,10 @@ def test_regexp_replace_native(self): def test_literal_only_expr_args_warn_deprecated(self, func, arg_name, expr): with pytest.warns( DeprecationWarning, - match=rf"Passing Expr for {func.__name__}\(\) argument '{arg_name}' is deprecated", + match=( + rf"Passing Expr for {func.__name__}\(\) argument " + rf"'{arg_name}' is deprecated" + ), ): result = expr() assert result is not None From d0a72738f1c95414901860bc86d678e83ba8bf22 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Mon, 22 Jun 2026 15:52:22 +0800 Subject: [PATCH 3/4] feat: consolidate warning helpers and update temporal function calls - Collapsed warning helpers into a single function `_warn_if_expr_for_literal_arg` in `python/datafusion/functions/__init__.py`. - Updated callers of temporal functions `_date_part` and `_date_trunc` in `python/datafusion/functions/__init__.py`. - Modified the digest behavior test in `python/tests/test_functions.py` to use native method strings. - Updated the encode/decode behavior test to use native "base64" in `python/tests/test_functions.py`. --- python/datafusion/functions/__init__.py | 22 ++++++++-------------- python/tests/test_functions.py | 10 ++++------ 2 files changed, 12 insertions(+), 20 deletions(-) diff --git a/python/datafusion/functions/__init__.py b/python/datafusion/functions/__init__.py index fb6a607eb..9fb20dc15 100644 --- a/python/datafusion/functions/__init__.py +++ b/python/datafusion/functions/__init__.py @@ -64,20 +64,16 @@ from datafusion.functions import spark -def _warn_expr_for_literal_arg(function_name: str, arg_name: str) -> None: - warnings.warn( - f"Passing Expr for {function_name}() argument {arg_name!r} is deprecated; " - "pass a Python literal instead.", - DeprecationWarning, - stacklevel=4, - ) - - def _warn_if_expr_for_literal_arg( value: Any, function_name: str, arg_name: str ) -> None: if isinstance(value, Expr): - _warn_expr_for_literal_arg(function_name, arg_name) + warnings.warn( + f"Passing Expr for {function_name}() argument {arg_name!r} is deprecated; " + "pass a Python literal instead.", + DeprecationWarning, + stacklevel=3, + ) __all__ = [ @@ -2733,8 +2729,7 @@ def date_part(part: Expr | str, date: Expr) -> Expr: def _date_part(part: Expr | str, date: Expr, function_name: str) -> Expr: - if isinstance(part, Expr): - _warn_expr_for_literal_arg(function_name, "part") + _warn_if_expr_for_literal_arg(part, function_name, "part") part = coerce_to_expr(part) return Expr(f.date_part(part.expr, date.expr)) @@ -2770,8 +2765,7 @@ def date_trunc(part: Expr | str, date: Expr) -> Expr: def _date_trunc(part: Expr | str, date: Expr, function_name: str) -> Expr: - if isinstance(part, Expr): - _warn_expr_for_literal_arg(function_name, "part") + _warn_if_expr_for_literal_arg(part, function_name, "part") part = coerce_to_expr(part) return Expr(f.date_trunc(part.expr, date.expr)) diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 3561fc947..894d81fcf 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -1014,7 +1014,7 @@ def test_string_functions(df, function, expected_result): def test_hash_functions(df): exprs = [ - f.digest(column("a"), literal(m)) + f.digest(column("a"), m) for m in ( "md5", "sha224", @@ -1602,12 +1602,10 @@ def test_regr_funcs_df(func, expected): def test_binary_string_functions(df): df = df.select( - f.encode(column("a").cast(pa.string()), literal("base64").cast(pa.string())), + f.encode(column("a").cast(pa.string()), "base64"), f.decode( - f.encode( - column("a").cast(pa.string()), literal("base64").cast(pa.string()) - ), - literal("base64").cast(pa.string()), + f.encode(column("a").cast(pa.string()), "base64"), + "base64", ), ) result = df.collect() From 7307042a8e9496cae9c034c338d2763eaec5209d Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Mon, 22 Jun 2026 15:57:43 +0800 Subject: [PATCH 4/4] docs: update function examples in common-operations user guide - Replaced `string_literal("Float64")` with `"Float64"` in examples. - Replaced `str_lit("Int32")` with `"Int32"` in examples. - Removed unused `string_literal` and `str_lit` imports. --- docs/source/user-guide/common-operations/functions.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/user-guide/common-operations/functions.md b/docs/source/user-guide/common-operations/functions.md index 50411825d..9939e2f00 100644 --- a/docs/source/user-guide/common-operations/functions.md +++ b/docs/source/user-guide/common-operations/functions.md @@ -52,7 +52,7 @@ df = ctx.table("pokemon") DataFusion offers mathematical functions such as {py:func}`~datafusion.functions.pow` or {py:func}`~datafusion.functions.log` ```{code-cell} ipython3 -from datafusion import col, literal, string_literal, str_lit +from datafusion import col, literal from datafusion import functions as f df.select( @@ -122,8 +122,8 @@ Casting expressions to different data types using {py:func}`~datafusion.function ```{code-cell} ipython3 df.select( - f.arrow_cast(col('"Total"'), string_literal("Float64")).alias("total_as_float"), - f.arrow_cast(col('"Total"'), str_lit("Int32")).alias("total_as_int") + f.arrow_cast(col('"Total"'), "Float64").alias("total_as_float"), + f.arrow_cast(col('"Total"'), "Int32").alias("total_as_int") ) ```