Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 3 additions & 12 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Release History

## 1.36.0 (YYYY-MM-DD)
## 1.36.0 (2025-08-05)

### Snowpark Python API Updates

Expand All @@ -19,24 +19,15 @@

#### New Features

- Added support for creating permanent and immutable UDFs/UDTFs with `DataFrame/Series/GroupBy.apply`, `map`, and `transform` by passing the `snowflake_udf_params` keyword argument. See documentation for details.

#### Improvements

- Hybrid execution row estimate improvements and a reduction of eager calls.
- Improved performance by deferring row position computation.
- The following operations are currently supported and can benefit from the optimization: `read_snowflake`, `repr`, `loc`, `reset_index`, `merge`, and binary operations.
- If a lazy object (e.g., DataFrame or Series) depends on a mix of supported and unsupported operations, the optimization will not be used.
- Add a new configuration variable to control transfer costs out of Snowflake when using hybrid execution.
- Added support for creating permanent and immutable UDFs/UDTFs with `DataFrame/Series/GroupBy.apply`, `map`, and `transform` by passing the `snowflake_udf_params` keyword argument. See documentation for details.

#### Bug Fixes

- Fixed an issue where Snowpark pandas plugin would unconditionally disable `AutoSwitchBackend` even when users had explicitly configured it via environment variables or programmatically.

### Snowpark pandas API Updates

#### Improvements
- Add a new configuration variable to control transfer costs out of Snowflake when using hybrid execution. Lower the default to 100k from 10M.

## 1.35.0 (2025-07-24)

### Snowpark Python API Updates
Expand Down
2 changes: 1 addition & 1 deletion recipe/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{% set name = "snowflake-snowpark-python" %}
{% set version = "1.35.0" %}
{% set version = "1.36.0" %}
{% set noarch_build = (os.environ.get('SNOWFLAKE_SNOWPARK_PYTHON_NOARCH_BUILD', 'false')) == 'true' %}
{% set build_number = os.environ.get('SNOWFLAKE_SNOWPARK_PYTHON_BUILD_NUMBER', 0) %}

Expand Down
2 changes: 1 addition & 1 deletion src/snowflake/snowpark/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -4987,7 +4987,7 @@ def cell_to_str(cell: Any, datatype: DataType) -> str:
"{"
+ ", ".join(
[
f"{cell_to_str(k, datatype.key_type or StringType())} -> {cell_to_str(v, datatype.key_type or StringType())}"
f"{cell_to_str(k, datatype.key_type or StringType())} -> {cell_to_str(v, datatype.value_type or StringType())}"
for k, v in sorted(cell.items())
]
)
Expand Down
5 changes: 1 addition & 4 deletions src/snowflake/snowpark/modin/plugin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,10 +170,7 @@
)
from modin.config import AutoSwitchBackend # isort: skip # noqa: E402

if (
AutoSwitchBackend.get() is not AutoSwitchBackend.default
and AutoSwitchBackend.get_value_source() is not ValueSource.DEFAULT
):
if AutoSwitchBackend.get_value_source() is ValueSource.DEFAULT:
AutoSwitchBackend.disable()

# Hybrid Mode Registration
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -336,9 +336,7 @@ def _select_columns(
)


def add_global_ordering_columns(
frame: InternalFrame, position: int, dummy_row_pos_mode: bool = False
) -> InternalFrame:
def add_global_ordering_columns(frame: InternalFrame, position: int) -> InternalFrame:
"""
To create global ordering for concat (axis=0) operation we first ensure a
row position column for local ordering within the frame. Then add another
Expand All @@ -353,7 +351,7 @@ def add_global_ordering_columns(
A new frame with updated ordering columns.

"""
frame = frame.ensure_row_position_column(dummy_row_pos_mode)
frame = frame.ensure_row_position_column()
ordered_dataframe = frame.ordered_dataframe.sort(
[OrderingColumn(frame.row_position_snowflake_quoted_identifier)]
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,6 @@ def compute_bin_indices(
cuts_frame: InternalFrame,
n_cuts: int,
right: bool = True,
dummy_row_pos_mode: bool = False,
) -> InternalFrame:
"""
Given a frame of cuts, i.e. borders of bins (strictly increasing) compute for the data in values_frame the index of the bin they fall into.
Expand Down Expand Up @@ -184,7 +183,7 @@ def compute_bin_indices(
# within OrderedDataFrame yet, we use the Snowpark layer directly. This should have no negative
# consequences when it comes to building lazy graphs, as both cut and qcut are materializing operations.

cuts_frame = cuts_frame.ensure_row_position_column(dummy_row_pos_mode)
cuts_frame = cuts_frame.ensure_row_position_column()
# perform asof join to find the closet to the cut frame data.
asof_result = join(
values_frame,
Expand Down
19 changes: 5 additions & 14 deletions src/snowflake/snowpark/modin/plugin/_internal/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -888,19 +888,15 @@ def to_pandas(
###########################################################################
# START: Internal Frame mutation APIs.
# APIs that creates a new InternalFrame instance, should only be added below
def ensure_row_position_column(
self, dummy_row_pos_mode: bool = False
) -> "InternalFrame":
def ensure_row_position_column(self) -> "InternalFrame":
"""
Ensure row position column is computed for given internal frame.

Returns:
A new InternalFrame instance with computed virtual index.
"""
return InternalFrame.create(
ordered_dataframe=self.ordered_dataframe.ensure_row_position_column(
dummy_row_pos_mode
),
ordered_dataframe=self.ordered_dataframe.ensure_row_position_column(),
data_column_pandas_labels=self.data_column_pandas_labels,
data_column_snowflake_quoted_identifiers=self.data_column_snowflake_quoted_identifiers,
data_column_pandas_index_names=self.data_column_pandas_index_names,
Expand Down Expand Up @@ -1354,9 +1350,7 @@ def select_active_columns(self) -> "InternalFrame":
)

def strip_duplicates(
self: "InternalFrame",
quoted_identifiers: list[str],
dummy_row_pos_mode: bool = False,
self: "InternalFrame", quoted_identifiers: list[str]
) -> "InternalFrame":
"""
When assigning frames via index operations for duplicates only the last entry is used, as entries are repeatedly overwritten.
Expand All @@ -1370,7 +1364,7 @@ def strip_duplicates(
new internal frame with unique index.
"""

frame = self.ensure_row_position_column(dummy_row_pos_mode)
frame = self.ensure_row_position_column()

# To remove the duplicates, first compute via windowing over index columns the value of the last row position.
# with this join then select only the relevant rows. Note that an EXISTS subquery doesn't work here because
Expand Down Expand Up @@ -1406,15 +1400,12 @@ def strip_duplicates(
left_on_cols=[frame.row_position_snowflake_quoted_identifier],
right_on_cols=[relevant_last_value_row_positions_quoted_identifier],
how="inner",
dummy_row_pos_mode=dummy_row_pos_mode,
)

# Because we reuse row position to select the relevant columns, we need to
# generate a new row position column here so locational indexing after this operation
# continues to work correctly.
new_ordered_dataframe = joined_ordered_dataframe.ensure_row_position_column(
dummy_row_pos_mode
)
new_ordered_dataframe = joined_ordered_dataframe.ensure_row_position_column()
return InternalFrame.create(
ordered_dataframe=new_ordered_dataframe,
data_column_pandas_labels=frame.data_column_pandas_labels,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ def generate_regular_range(

def _create_qc_from_snowpark_dataframe(
sp_df: DataFrame,
dummy_row_pos_mode: bool = False,
) -> "snowflake_query_compiler.SnowflakeQueryCompiler":
"""
Create a Snowflake query compiler from a Snowpark DataFrame, assuming the DataFrame only contains one column.
Expand All @@ -90,9 +89,7 @@ def _create_qc_from_snowpark_dataframe(
Returns:
A Snowflake query compiler
"""
odf = OrderedDataFrame(DataFrameReference(sp_df)).ensure_row_position_column(
dummy_row_pos_mode
)
odf = OrderedDataFrame(DataFrameReference(sp_df)).ensure_row_position_column()

from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import (
SnowflakeQueryCompiler,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,6 @@ def get_dummies_helper(
columns: list[Hashable],
prefixes: list[Hashable],
prefix_sep: str,
dummy_row_pos_mode: bool = False,
) -> InternalFrame:
"""
Helper function for get dummies to perform encoding on given columns
Expand Down Expand Up @@ -223,9 +222,9 @@ def get_dummies_helper(
)

# append a lit true column as value column for pivot
new_internal_frame = internal_frame.ensure_row_position_column(
dummy_row_pos_mode
).append_column(LIT_TRUE_COLUMN_PANDAS_LABEL, pandas_lit(True))
new_internal_frame = internal_frame.ensure_row_position_column().append_column(
LIT_TRUE_COLUMN_PANDAS_LABEL, pandas_lit(True)
)
# the dummy column is appended as the last data column of the new_internal_frame
row_position_column_snowflake_quoted_identifier = (
new_internal_frame.row_position_snowflake_quoted_identifier
Expand Down
Loading
Loading