snowflakedb · sfc-gh-jrose · Jun 10, 2025 · Jun 10, 2025 · Jun 10, 2025 · Jun 10, 2025
@@ -135,7 +135,7 @@ jobs:
       - name: Install tox
         run: python -m pip install tox
       - name: Run tests
-        run: tox -e modin_previous_version-snowparkpandasdailynotdoctest-ci
+        run: tox -e modin_previous_version-snowparkpandasdailynotdoctest-modin-ci
 
   test:
     name: Test modin-${{ matrix.os.download_name }}-${{ matrix.python-version }}-${{ matrix.cloud-provider }}

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,6 @@
 # Release History
 
-## 1.33.0 (YYYY-MM-DD)
+## 1.33.0 (2025-06-19)
 
 ### Snowpark Python API Updates
 
@@ -11,9 +11,23 @@
 - Added support for Databricks in `DataFrameWriter.dbapi` (PrPr) for UDTF-based ingestion.
 - Added support to `DataFrameReader` to enable use of `PATTERN` when reading files with `INFER_SCHEMA` enabled.
 - Added support for the following AI-powered functions in `functions.py`:
+  - `ai_complete`
   - `ai_similarity`
   - `ai_summarize_agg` (originally `summarize_agg`)
   - different config options for `ai_classify`
+- Added support for more options when reading XML files with a row tag using `rowTag` option:
+  - Added support for removing namespace prefixes from col names using `ignoreNamespace` option.
+  - Added support for specifying the prefix for the attribute column in the result table using `attributePrefix` option.
+  - Added support for excluding attributes from the XML element using `excludeAttributes` option.
+  - Added support for specifying the column name for the value when there are attributes in an element that has no child elements using `valueTag` option.
+  - Added support for specifying the value to treat as a ``null`` value using `nullValue` option.
+  - Added support for specifying the character encoding of the XML file using `charset` option.
+  - Added support for ignoring surrounding whitespace in the XML element using `ignoreSurroundingWhitespace` option.
+- Added support for parameter `return_dataframe` in `Session.call`, which can be used to set the return type of the functions to a `DataFrame` object.
+- Added a new argument to `Dataframe.describe` called `strings_include_math_stats` that triggers `stddev` and `mean` to be calculated for String columns.
+- Added support for retrieving `Edge.properties` when retrieving lineage from `DGQL` in `DataFrame.lineage.trace`.
+- Added a parameter `table_exists` to `DataFrameWriter.save_as_table` that allows specifying if a table already exists. This allows skipping a table lookup that can be expensive.
+
 
 #### Bug Fixes
 
@@ -26,21 +40,10 @@
 
 #### Improvements
 
-- Added support for more options when reading XML files with a row tag using `rowTag` option:
-  - Added support for removing namespace prefixes from col names using `ignoreNamespace` option.
-  - Added support for specifying the prefix for the attribute column in the result table using `attributePrefix` option.
-  - Added support for excluding attributes from the XML element using `excludeAttributes` option.
-  - Added support for specifying the column name for the value when there are attributes in an element that has no child elements using `valueTag` option.
-  - Added support for specifying the value to treat as a ``null`` value using `nullValue` option.
-  - Added support for specifying the character encoding of the XML file using `charset` option.
-  - Added support for ignoring surrounding whitespace in the XML element using `ignoreSurroundingWhitespace` option.
-- Added support for parameter `return_dataframe` in `Session.call`, which can be used to set the return type of the functions to a `DataFrame` object.
-- Added a new argument to `Dataframe.describe` called `strings_include_math_stats` that triggers `stddev` and `mean` to be calculated for String columns.
 - Improved the error message for `Session.write_pandas()` and `Session.create_dataframe()` when the input pandas DataFrame does not have a column.
-- Added support for retrieving `Edge.properties` when retrieving lineage from `DGQL` in `DataFrame.lineage.trace`.
-- Added a parameter `table_exists` to `DataFrameWriter.save_as_table` that allows specifying if a table already exists. This allows skipping a table lookup that can be expensive.
 - Improved `DataFrame.select` when the arguments contain a table function with output columns that collide with columns of current dataframe. With the improvement, if user provides non-colliding columns in `df.select("col1", "col2", table_func(...))` as string arguments, then the query generated by snowpark client will not raise ambiguous column error.
 - Improved `DataFrameReader.dbapi` (PrPr) to use in-memory Parquet-based ingestion for better performance and security.
+- Improved `DataFrameReader.dbapi` (PrPr) to use `MATCH_BY_COLUMN_NAME=CASE_SENSITIVE` in copy into table operation.
 
 ### Snowpark Local Testing Updates
 
@@ -62,13 +65,17 @@
 #### New Features
 - Added support for **Hybrid Execution (PrPr)**. By running `from modin.config import AutoSwitchBackend; AutoSwitchBackend.enable()`, Snowpark pandas will automatically choose whether to run certain pandas operations locally or on Snowflake. This feature is disabled by default.
 
-
 #### Improvements
 
 - Set the default value of the `index` parameter to `False` for `DataFrame.to_view`, `Series.to_view`, `DataFrame.to_dynamic_table`, and `Series.to_dynamic_table`.
 - Added `iceberg_version` option to table creation functions.
 - Reduced query count for many operations, including `insert`, `repr`, and `groupby`, that previously issued a query to retrieve the input data's size.
 
+#### Bug Fixes
+
+- Fixed a bug in `Series.where` when the `other` parameter is an unnamed `Series`.
+
+
 ## 1.32.0 (2025-05-15)
 
 ### Snowpark Python API Updates

@@ -24,6 +24,7 @@ Functions
     add_months
     ai_agg
     ai_classify
+    ai_complete
     ai_filter
     ai_similarity
     ai_summarize_agg

@@ -1,6 +1,7 @@
 {% set name = "snowflake-snowpark-python" %}
-{% set version = "1.32.0" %}
+{% set version = "1.33.0" %}
 {% set noarch_build = (os.environ.get('SNOWFLAKE_SNOWPARK_PYTHON_NOARCH_BUILD', 'false')) == 'true' %}
+{% set build_number = os.environ.get('SNOWFLAKE_SNOWPARK_PYTHON_BUILD_NUMBER', 0) %}
 
 package:
   name: {{ name|lower }}
@@ -11,17 +12,17 @@ source:
   path: ../
 
 build:
-  number: {{ os.environ.get('SNOWFLAKE_SNOWPARK_PYTHON_BUILD_NUMBER', 0) }}
+  number: {{ build_number }}
   skip: True  # [py<38 or win32 or s390x]
   script: {{ PYTHON }} -m pip install . --no-deps -vvv
   script_env:
     - SNOWFLAKE_IS_PYTHON_RUNTIME_TEST=1
 {% if noarch_build %}
   noarch: python
-  string: "py39_0"  # [py==39]
-  string: "py310_0"  # [py==310]
-  string: "py311_0"  # [py==311]
-  string: "py312_0"  # [py==312]
+  string: "py39_{{ build_number }}"  # [py==39]
+  string: "py310_{{ build_number }}"  # [py==310]
+  string: "py311_{{ build_number }}"  # [py==311]
+  string: "py312_{{ build_number }}"  # [py==312]
 {% endif %}
 
 {% if noarch_build and py not in [39, 310, 311, 312] %}
@@ -62,7 +63,7 @@ requirements:
     - tzlocal
   run_constrained:
     # Snowpark pandas
-    - modin==0.32.0
+    - modin >=0.32.0,<0.34.0
 {% endif %}
 
 test:

@@ -210,6 +210,7 @@ def run(self):
             # TODO(SNOW-1938831): Test snowflake-ml-python on python 3.12 once
             # snowflake-ml-python is available on python 3.12.
             "snowflake-ml-python>=1.8.0; python_version<'3.12'",
+            "s3fs",  # Used in tests that read CSV files from s3
         ],
         "localtest": [
             "pandas",

@@ -61,6 +61,7 @@
     to_sql,
 )
 from snowflake.snowpark._internal.analyzer.expression import (
+    NamedFunctionExpression,
     Attribute,
     CaseWhen,
     Collate,
@@ -431,6 +432,22 @@ def analyze(
                 expr.is_distinct,
             )
 
+        if isinstance(expr, NamedFunctionExpression):
+            if expr.api_call_source is not None:
+                self.session._conn._telemetry_client.send_function_usage_telemetry(
+                    expr.api_call_source, TelemetryField.FUNC_CAT_USAGE.value
+                )
+            func_name = expr.name.upper() if parse_local_name else expr.name
+            return named_arguments_function(
+                func_name,
+                {
+                    key: self.to_sql_try_avoid_cast(
+                        value, df_aliased_col_name_to_real_col_name
+                    )
+                    for key, value in expr.named_arguments.items()
+                },
+            )
+
         if isinstance(expr, Star):
             if expr.df_alias:
                 # This is only hit by col(<df_alias>)

@@ -588,6 +588,38 @@ def plan_node_category(self) -> PlanNodeCategory:
         return PlanNodeCategory.FUNCTION
 
 
+class NamedFunctionExpression(Expression):
+    def __init__(
+        self,
+        name: str,
+        named_arguments: Dict[str, Expression],
+        api_call_source: Optional[str] = None,
+    ) -> None:
+        super().__init__()
+        self.name = name
+        self.named_arguments = named_arguments
+        self.children = list(named_arguments.values())
+        self.api_call_source = api_call_source
+
+    @property
+    def pretty_name(self) -> str:
+        return self.name
+
+    @property
+    def sql(self) -> str:
+        return f"{self.pretty_name}({', '.join([f'{k} => {v.sql}' for k, v in self.named_arguments.items()])})"
+
+    def dependent_column_names(self) -> Optional[AbstractSet[str]]:
+        return derive_dependent_columns(*self.children)
+
+    def dependent_column_names_with_duplication(self) -> List[str]:
+        return derive_dependent_columns_with_duplication(*self.children)
+
+    @property
+    def plan_node_category(self) -> PlanNodeCategory:
+        return PlanNodeCategory.FUNCTION
+
+
 class WithinGroup(Expression):
     def __init__(self, expr: Expression, order_by_cols: List[Expression]) -> None:
         super().__init__(expr)

@@ -23,6 +23,7 @@
     IntegerType,
     BinaryType,
     DateType,
+    BooleanType,
 )
 import snowflake.snowpark
 import logging
@@ -184,10 +185,17 @@ def data_source_data_to_pandas_df(
                     if isinstance(x, (datetime.datetime, datetime.date))
                     else x
                 )
+            # astype below is meant to address copy into failure when the column contain only None value,
+            # pandas would infer wrong type for that column in that situation, thus we convert them to corresponding type.
             elif isinstance(field.datatype, BinaryType):
-                df[name] = df[name].map(
-                    lambda x: x.hex() if isinstance(x, (bytearray, bytes)) else x
+                # we convert all binary to hex, so it is safe to astype to string
+                df[name] = (
+                    df[name]
+                    .map(lambda x: x.hex() if isinstance(x, (bytearray, bytes)) else x)
+                    .astype("string")
                 )
+            elif isinstance(field.datatype, BooleanType):
+                df[name] = df[name].astype("boolean")
         return df
 
     @staticmethod

@@ -228,7 +228,7 @@ def _upload_and_copy_into_table(
         copy_into_table_query = f"""
         COPY INTO {snowflake_table_name} FROM @{snowflake_stage_name}/{parquet_id}
         FILE_FORMAT = (TYPE = PARQUET USE_VECTORIZED_SCANNER=TRUE)
-        MATCH_BY_COLUMN_NAME=CASE_INSENSITIVE
+        MATCH_BY_COLUMN_NAME=CASE_SENSITIVE
         PURGE=TRUE
         ON_ERROR={on_error}
         {DATA_SOURCE_SQL_COMMENT}

@@ -47,6 +47,7 @@
 )
 from snowflake.snowpark._internal.udf_utils import get_types_from_type_hints
 from snowflake.snowpark._internal.utils import (
+    SNOWURL_PREFIX,
     STAGE_PREFIX,
     XML_ROW_TAG_STRING,
     XML_ROW_DATA_COLUMN_NAME,
@@ -975,10 +976,13 @@ def _infer_schema_for_file_format(
 
         # When pattern is set we should only consider files that match the pattern during schema inference
         # If no files match fallback to trying to read all files.
+        # snow:// paths are not yet supported
         infer_path = path
         if (
-            pattern := self._cur_options.get("PATTERN", None)
-        ) and "FILES" not in infer_schema_options:
+            (pattern := self._cur_options.get("PATTERN", None))
+            and "FILES" not in infer_schema_options
+            and not path.startswith(SNOWURL_PREFIX)
+        ):
             # matches has schema (name, size, md5, last_modified)
             # Name is fully qualified with stage path
             matches = self._session._conn.run_query(